1 | The following changes since commit 86f4c7e05b1c44dbe1b329a51f311f10aef6ff34: | 1 | The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180302' into staging (2018-03-02 14:37:10 +0000) | 3 | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to 9d9b4b640f9e583ff4b24dc762630945f3ccc16d: | 9 | for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8: |
10 | 10 | ||
11 | Merge remote-tracking branch 'mreitz/tags/pull-block-2018-03-02' into queue-block (2018-03-02 18:45:03 +0100) | 11 | block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches | 14 | Block layer patches |
15 | 15 | ||
16 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
17 | Alberto Garcia (3): | 17 | Doug Gale (1): |
18 | specs/qcow2: Fix documentation of the compressed cluster descriptor | 18 | nvme: Add tracing |
19 | docs: document how to use the l2-cache-entry-size parameter | ||
20 | qcow2: Replace align_offset() with ROUND_UP() | ||
21 | 19 | ||
22 | Anton Nefedov (2): | 20 | Edgar Kaziakhmedov (1): |
23 | block: fix write with zero flag set and iovector provided | 21 | qcow2: get rid of qcow2_backing_read1 routine |
24 | iotest 033: add misaligned write-zeroes test via truncate | ||
25 | 22 | ||
26 | Eric Blake (21): | 23 | Fam Zheng (2): |
27 | block: Add .bdrv_co_block_status() callback | 24 | block: Open backing image in force share mode for size probe |
28 | nvme: Drop pointless .bdrv_co_get_block_status() | 25 | block: Remove unused bdrv_requests_pending |
29 | block: Switch passthrough drivers to .bdrv_co_block_status() | ||
30 | file-posix: Switch to .bdrv_co_block_status() | ||
31 | gluster: Switch to .bdrv_co_block_status() | ||
32 | iscsi: Switch cluster_sectors to byte-based | ||
33 | iscsi: Switch iscsi_allocmap_update() to byte-based | ||
34 | iscsi: Switch to .bdrv_co_block_status() | ||
35 | null: Switch to .bdrv_co_block_status() | ||
36 | parallels: Switch to .bdrv_co_block_status() | ||
37 | qcow: Switch to .bdrv_co_block_status() | ||
38 | qcow2: Switch to .bdrv_co_block_status() | ||
39 | qed: Switch to .bdrv_co_block_status() | ||
40 | raw: Switch to .bdrv_co_block_status() | ||
41 | sheepdog: Switch to .bdrv_co_block_status() | ||
42 | vdi: Avoid bitrot of debugging code | ||
43 | vdi: Switch to .bdrv_co_block_status() | ||
44 | vmdk: Switch to .bdrv_co_block_status() | ||
45 | vpc: Switch to .bdrv_co_block_status() | ||
46 | vvfat: Switch to .bdrv_co_block_status() | ||
47 | block: Drop unused .bdrv_co_get_block_status() | ||
48 | 26 | ||
49 | Kevin Wolf (2): | 27 | John Snow (1): |
50 | block: test blk_aio_flush() with blk->root == NULL | 28 | iotests: fix 197 for vpc |
51 | Merge remote-tracking branch 'mreitz/tags/pull-block-2018-03-02' into queue-block | ||
52 | 29 | ||
53 | Max Reitz (4): | 30 | Kevin Wolf (27): |
54 | qemu-img: Make resize error message more general | 31 | block: Formats don't need CONSISTENT_READ with NO_IO |
55 | block/ssh: Pull ssh_grow_file() from ssh_create() | 32 | block: Make bdrv_drain_invoke() recursive |
56 | block/ssh: Make ssh_grow_file() blocking | 33 | block: Call .drain_begin only once in bdrv_drain_all_begin() |
57 | block/ssh: Add basic .bdrv_truncate() | 34 | test-bdrv-drain: Test BlockDriver callbacks for drain |
35 | block: bdrv_drain_recurse(): Remove unused begin parameter | ||
36 | block: Don't wait for requests in bdrv_drain*_end() | ||
37 | block: Unify order in drain functions | ||
38 | block: Don't acquire AioContext in hmp_qemu_io() | ||
39 | block: Document that x-blockdev-change breaks quorum children list | ||
40 | block: Assert drain_all is only called from main AioContext | ||
41 | block: Make bdrv_drain() driver callbacks non-recursive | ||
42 | test-bdrv-drain: Test callback for bdrv_drain | ||
43 | test-bdrv-drain: Test bs->quiesce_counter | ||
44 | blockjob: Pause job on draining any job BDS | ||
45 | test-bdrv-drain: Test drain vs. block jobs | ||
46 | block: Don't block_job_pause_all() in bdrv_drain_all() | ||
47 | block: Nested drain_end must still call callbacks | ||
48 | test-bdrv-drain: Test nested drain sections | ||
49 | block: Don't notify parents in drain call chain | ||
50 | block: Add bdrv_subtree_drained_begin/end() | ||
51 | test-bdrv-drain: Tests for bdrv_subtree_drain | ||
52 | test-bdrv-drain: Test behaviour in coroutine context | ||
53 | test-bdrv-drain: Recursive draining with multiple parents | ||
54 | block: Allow graph changes in subtree drained section | ||
55 | test-bdrv-drain: Test graph changes in drained section | ||
56 | commit: Simplify reopen of base | ||
57 | block: Keep nodes drained between reopen_queue/multiple | ||
58 | 58 | ||
59 | Stefan Hajnoczi (6): | 59 | Thomas Huth (3): |
60 | aio: rename aio_context_in_iothread() to in_aio_context_home_thread() | 60 | block: Remove the obsolete -drive boot=on|off parameter |
61 | block: extract AIO_WAIT_WHILE() from BlockDriverState | 61 | block: Remove the deprecated -hdachs option |
62 | block: add BlockBackend->in_flight counter | 62 | block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter |
63 | Revert "IDE: Do not flush empty CDROM drives" | ||
64 | block: rename .bdrv_create() to .bdrv_co_create_opts() | ||
65 | qcow2: make qcow2_co_create2() a coroutine_fn | ||
66 | 63 | ||
67 | docs/interop/qcow2.txt | 16 ++++- | 64 | qapi/block-core.json | 4 + |
68 | docs/qcow2-cache.txt | 46 ++++++++++++- | 65 | block/qcow2.h | 3 - |
69 | block/qcow2.h | 6 -- | 66 | include/block/block.h | 15 +- |
70 | include/block/aio-wait.h | 116 ++++++++++++++++++++++++++++++++ | 67 | include/block/block_int.h | 6 +- |
71 | include/block/aio.h | 7 +- | 68 | block.c | 75 ++++- |
72 | include/block/block.h | 54 ++++----------- | 69 | block/commit.c | 8 +- |
73 | include/block/block_int.h | 61 ++++++++++------- | 70 | block/io.c | 164 +++++++--- |
74 | block.c | 11 ++- | 71 | block/qcow2.c | 51 +-- |
75 | block/blkdebug.c | 20 +++--- | 72 | block/replication.c | 6 + |
76 | block/block-backend.c | 60 +++++++++++++++-- | 73 | blockdev.c | 11 - |
77 | block/commit.c | 2 +- | 74 | blockjob.c | 22 +- |
78 | block/crypto.c | 8 +-- | 75 | hmp.c | 6 - |
79 | block/file-posix.c | 79 +++++++++++----------- | 76 | hw/block/nvme.c | 349 +++++++++++++++++---- |
80 | block/file-win32.c | 5 +- | 77 | qemu-io-cmds.c | 3 + |
81 | block/gluster.c | 83 ++++++++++++----------- | 78 | tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++ |
82 | block/io.c | 98 +++++++++++---------------- | 79 | vl.c | 86 +----- |
83 | block/iscsi.c | 164 ++++++++++++++++++++++++--------------------- | 80 | hw/block/trace-events | 93 ++++++ |
84 | block/mirror.c | 2 +- | 81 | qemu-doc.texi | 29 +- |
85 | block/nfs.c | 5 +- | 82 | qemu-options.hx | 19 +- |
86 | block/null.c | 23 ++++--- | 83 | tests/Makefile.include | 2 + |
87 | block/nvme.c | 14 ---- | 84 | tests/qemu-iotests/197 | 4 + |
88 | block/parallels.c | 28 +++++--- | 85 | tests/qemu-iotests/common.filter | 3 +- |
89 | block/qcow.c | 32 +++++---- | 86 | 22 files changed, 1294 insertions(+), 316 deletions(-) |
90 | block/qcow2-bitmap.c | 4 +- | 87 | create mode 100644 tests/test-bdrv-drain.c |
91 | block/qcow2-cluster.c | 4 +- | ||
92 | block/qcow2-refcount.c | 4 +- | ||
93 | block/qcow2-snapshot.c | 10 +-- | ||
94 | block/qcow2.c | 60 +++++++++-------- | ||
95 | block/qed.c | 82 ++++++++--------------- | ||
96 | block/raw-format.c | 21 +++--- | ||
97 | block/rbd.c | 6 +- | ||
98 | block/sheepdog.c | 36 +++++----- | ||
99 | block/ssh.c | 66 +++++++++++++++--- | ||
100 | block/throttle.c | 2 +- | ||
101 | block/vdi.c | 50 +++++++------- | ||
102 | block/vhdx.c | 5 +- | ||
103 | block/vmdk.c | 43 +++++------- | ||
104 | block/vpc.c | 50 +++++++------- | ||
105 | block/vvfat.c | 16 ++--- | ||
106 | hw/ide/core.c | 10 +-- | ||
107 | qemu-img.c | 2 +- | ||
108 | tests/test-block-backend.c | 82 +++++++++++++++++++++++ | ||
109 | util/aio-wait.c | 40 +++++++++++ | ||
110 | tests/Makefile.include | 2 + | ||
111 | tests/qemu-iotests/033 | 29 ++++++++ | ||
112 | tests/qemu-iotests/033.out | 13 ++++ | ||
113 | util/Makefile.objs | 2 +- | ||
114 | 47 files changed, 973 insertions(+), 606 deletions(-) | ||
115 | create mode 100644 include/block/aio-wait.h | ||
116 | create mode 100644 tests/test-block-backend.c | ||
117 | create mode 100644 util/aio-wait.c | ||
118 | 88 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently |
---|---|---|---|
2 | in use as a mirror target. It is not enough for image formats, though, | ||
3 | as these still unconditionally request BLK_PERM_CONSISTENT_READ. | ||
2 | 4 | ||
3 | BlockBackend currently relies on BlockDriverState->in_flight to track | 5 | As this permission is geared towards whether the guest-visible data is |
4 | requests for blk_drain(). There is a corner case where | 6 | consistent, and has no impact on whether the metadata is sane, and |
5 | BlockDriverState->in_flight cannot be used though: blk->root can be NULL | 7 | 'qemu-img info' does not read guest-visible data (except for the raw |
6 | when there is no medium. This results in a segfault when the NULL | 8 | format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there |
7 | pointer is dereferenced. | 9 | is not going to be any guest I/O performed, regardless of image format. |
8 | |||
9 | Introduce a BlockBackend->in_flight counter for aio requests so it works | ||
10 | even when blk->root == NULL. | ||
11 | |||
12 | Based on a patch by Kevin Wolf <kwolf@redhat.com>. | ||
13 | 10 | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
17 | --- | 12 | --- |
18 | block.c | 2 +- | 13 | block.c | 6 +++++- |
19 | block/block-backend.c | 60 +++++++++++++++++++++++++++++++++++++++++++++------ | 14 | 1 file changed, 5 insertions(+), 1 deletion(-) |
20 | 2 files changed, 54 insertions(+), 8 deletions(-) | ||
21 | 15 | ||
22 | diff --git a/block.c b/block.c | 16 | diff --git a/block.c b/block.c |
23 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/block.c | 18 | --- a/block.c |
25 | +++ b/block.c | 19 | +++ b/block.c |
26 | @@ -XXX,XX +XXX,XX @@ out: | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
27 | 21 | assert(role == &child_backing || role == &child_file); | |
28 | AioContext *bdrv_get_aio_context(BlockDriverState *bs) | 22 | |
29 | { | 23 | if (!backing) { |
30 | - return bs->aio_context; | 24 | + int flags = bdrv_reopen_get_flags(reopen_queue, bs); |
31 | + return bs ? bs->aio_context : qemu_get_aio_context(); | ||
32 | } | ||
33 | |||
34 | AioWait *bdrv_get_aio_wait(BlockDriverState *bs) | ||
35 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/block/block-backend.c | ||
38 | +++ b/block/block-backend.c | ||
39 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { | ||
40 | int quiesce_counter; | ||
41 | VMChangeStateEntry *vmsh; | ||
42 | bool force_allow_inactivate; | ||
43 | + | 25 | + |
44 | + /* Number of in-flight aio requests. BlockDriverState also counts | 26 | /* Apart from the modifications below, the same permissions are |
45 | + * in-flight requests but aio requests can exist even when blk->root is | 27 | * forwarded and left alone as for filters */ |
46 | + * NULL, so we cannot rely on its counter for that case. | 28 | bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, |
47 | + * Accessed with atomic ops. | 29 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
48 | + */ | 30 | |
49 | + unsigned int in_flight; | 31 | /* bs->file always needs to be consistent because of the metadata. We |
50 | + AioWait wait; | 32 | * can never allow other users to resize or write to it. */ |
51 | }; | 33 | - perm |= BLK_PERM_CONSISTENT_READ; |
52 | 34 | + if (!(flags & BDRV_O_NO_IO)) { | |
53 | typedef struct BlockBackendAIOCB { | 35 | + perm |= BLK_PERM_CONSISTENT_READ; |
54 | @@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) | 36 | + } |
55 | return bdrv_make_zero(blk->root, flags); | 37 | shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); |
56 | } | 38 | } else { |
57 | 39 | /* We want consistent read from backing files if the parent needs it. | |
58 | +static void blk_inc_in_flight(BlockBackend *blk) | ||
59 | +{ | ||
60 | + atomic_inc(&blk->in_flight); | ||
61 | +} | ||
62 | + | ||
63 | +static void blk_dec_in_flight(BlockBackend *blk) | ||
64 | +{ | ||
65 | + atomic_dec(&blk->in_flight); | ||
66 | + aio_wait_kick(&blk->wait); | ||
67 | +} | ||
68 | + | ||
69 | static void error_callback_bh(void *opaque) | ||
70 | { | ||
71 | struct BlockBackendAIOCB *acb = opaque; | ||
72 | |||
73 | - bdrv_dec_in_flight(acb->common.bs); | ||
74 | + blk_dec_in_flight(acb->blk); | ||
75 | acb->common.cb(acb->common.opaque, acb->ret); | ||
76 | qemu_aio_unref(acb); | ||
77 | } | ||
78 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, | ||
79 | { | ||
80 | struct BlockBackendAIOCB *acb; | ||
81 | |||
82 | - bdrv_inc_in_flight(blk_bs(blk)); | ||
83 | + blk_inc_in_flight(blk); | ||
84 | acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); | ||
85 | acb->blk = blk; | ||
86 | acb->ret = ret; | ||
87 | @@ -XXX,XX +XXX,XX @@ static const AIOCBInfo blk_aio_em_aiocb_info = { | ||
88 | static void blk_aio_complete(BlkAioEmAIOCB *acb) | ||
89 | { | ||
90 | if (acb->has_returned) { | ||
91 | - bdrv_dec_in_flight(acb->common.bs); | ||
92 | + blk_dec_in_flight(acb->rwco.blk); | ||
93 | acb->common.cb(acb->common.opaque, acb->rwco.ret); | ||
94 | qemu_aio_unref(acb); | ||
95 | } | ||
96 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, | ||
97 | BlkAioEmAIOCB *acb; | ||
98 | Coroutine *co; | ||
99 | |||
100 | - bdrv_inc_in_flight(blk_bs(blk)); | ||
101 | + blk_inc_in_flight(blk); | ||
102 | acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); | ||
103 | acb->rwco = (BlkRwCo) { | ||
104 | .blk = blk, | ||
105 | @@ -XXX,XX +XXX,XX @@ int blk_flush(BlockBackend *blk) | ||
106 | |||
107 | void blk_drain(BlockBackend *blk) | ||
108 | { | ||
109 | - if (blk_bs(blk)) { | ||
110 | - bdrv_drain(blk_bs(blk)); | ||
111 | + BlockDriverState *bs = blk_bs(blk); | ||
112 | + | ||
113 | + if (bs) { | ||
114 | + bdrv_drained_begin(bs); | ||
115 | + } | ||
116 | + | ||
117 | + /* We may have -ENOMEDIUM completions in flight */ | ||
118 | + AIO_WAIT_WHILE(&blk->wait, | ||
119 | + blk_get_aio_context(blk), | ||
120 | + atomic_mb_read(&blk->in_flight) > 0); | ||
121 | + | ||
122 | + if (bs) { | ||
123 | + bdrv_drained_end(bs); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | void blk_drain_all(void) | ||
128 | { | ||
129 | - bdrv_drain_all(); | ||
130 | + BlockBackend *blk = NULL; | ||
131 | + | ||
132 | + bdrv_drain_all_begin(); | ||
133 | + | ||
134 | + while ((blk = blk_all_next(blk)) != NULL) { | ||
135 | + AioContext *ctx = blk_get_aio_context(blk); | ||
136 | + | ||
137 | + aio_context_acquire(ctx); | ||
138 | + | ||
139 | + /* We may have -ENOMEDIUM completions in flight */ | ||
140 | + AIO_WAIT_WHILE(&blk->wait, ctx, | ||
141 | + atomic_mb_read(&blk->in_flight) > 0); | ||
142 | + | ||
143 | + aio_context_release(ctx); | ||
144 | + } | ||
145 | + | ||
146 | + bdrv_drain_all_end(); | ||
147 | } | ||
148 | |||
149 | void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, | ||
150 | -- | 40 | -- |
151 | 2.13.6 | 41 | 2.13.6 |
152 | 42 | ||
153 | 43 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: John Snow <jsnow@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | libssh2 does not seem to offer real truncation support, so we can only | 3 | VPC has some difficulty creating geometries of particular size. |
4 | grow files -- but that is better than nothing. | 4 | However, we can indeed force it to use a literal one, so let's |
5 | do that for the sake of test 197, which is testing some specific | ||
6 | offsets. | ||
5 | 7 | ||
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 8 | Signed-off-by: John Snow <jsnow@redhat.com> |
7 | Message-id: 20180214204915.7980-4-mreitz@redhat.com | ||
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | 9 | Reviewed-by: Eric Blake <eblake@redhat.com> |
9 | Reviewed-by: Richard W.M. Jones <rjones@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Lukáš Doktor <ldoktor@redhat.com> | ||
11 | --- | 13 | --- |
12 | block/ssh.c | 24 ++++++++++++++++++++++++ | 14 | tests/qemu-iotests/197 | 4 ++++ |
13 | 1 file changed, 24 insertions(+) | 15 | tests/qemu-iotests/common.filter | 3 ++- |
16 | 2 files changed, 6 insertions(+), 1 deletion(-) | ||
14 | 17 | ||
15 | diff --git a/block/ssh.c b/block/ssh.c | 18 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 |
19 | index XXXXXXX..XXXXXXX 100755 | ||
20 | --- a/tests/qemu-iotests/197 | ||
21 | +++ b/tests/qemu-iotests/197 | ||
22 | @@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ===' | ||
23 | echo | ||
24 | |||
25 | # Prep the images | ||
26 | +# VPC rounds image sizes to a specific geometry, force a specific size. | ||
27 | +if [ "$IMGFMT" = "vpc" ]; then | ||
28 | + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") | ||
29 | +fi | ||
30 | _make_test_img 4G | ||
31 | $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io | ||
32 | IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ | ||
33 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
16 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/ssh.c | 35 | --- a/tests/qemu-iotests/common.filter |
18 | +++ b/block/ssh.c | 36 | +++ b/tests/qemu-iotests/common.filter |
19 | @@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs) | 37 | @@ -XXX,XX +XXX,XX @@ _filter_img_create() |
20 | return length; | 38 | -e "s# log_size=[0-9]\\+##g" \ |
39 | -e "s# refcount_bits=[0-9]\\+##g" \ | ||
40 | -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ | ||
41 | - -e "s# iter-time=[0-9]\\+##g" | ||
42 | + -e "s# iter-time=[0-9]\\+##g" \ | ||
43 | + -e "s# force_size=\\(on\\|off\\)##g" | ||
21 | } | 44 | } |
22 | 45 | ||
23 | +static int ssh_truncate(BlockDriverState *bs, int64_t offset, | 46 | _filter_img_info() |
24 | + PreallocMode prealloc, Error **errp) | ||
25 | +{ | ||
26 | + BDRVSSHState *s = bs->opaque; | ||
27 | + | ||
28 | + if (prealloc != PREALLOC_MODE_OFF) { | ||
29 | + error_setg(errp, "Unsupported preallocation mode '%s'", | ||
30 | + PreallocMode_str(prealloc)); | ||
31 | + return -ENOTSUP; | ||
32 | + } | ||
33 | + | ||
34 | + if (offset < s->attrs.filesize) { | ||
35 | + error_setg(errp, "ssh driver does not support shrinking files"); | ||
36 | + return -ENOTSUP; | ||
37 | + } | ||
38 | + | ||
39 | + if (offset == s->attrs.filesize) { | ||
40 | + return 0; | ||
41 | + } | ||
42 | + | ||
43 | + return ssh_grow_file(s, offset, errp); | ||
44 | +} | ||
45 | + | ||
46 | static BlockDriver bdrv_ssh = { | ||
47 | .format_name = "ssh", | ||
48 | .protocol_name = "ssh", | ||
49 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = { | ||
50 | .bdrv_co_readv = ssh_co_readv, | ||
51 | .bdrv_co_writev = ssh_co_writev, | ||
52 | .bdrv_getlength = ssh_getlength, | ||
53 | + .bdrv_truncate = ssh_truncate, | ||
54 | .bdrv_co_flush_to_disk = ssh_co_flush, | ||
55 | .create_opts = &ssh_create_opts, | ||
56 | }; | ||
57 | -- | 47 | -- |
58 | 2.13.6 | 48 | 2.13.6 |
59 | 49 | ||
60 | 50 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | This change separates bdrv_drain_invoke(), which calls the BlockDriver |
---|---|---|---|
2 | drain callbacks, from bdrv_drain_recurse(). Instead, the function | ||
3 | performs its own recursion now. | ||
2 | 4 | ||
3 | If we ever want to offer even rudimentary truncation functionality for | 5 | One reason for this is that bdrv_drain_recurse() can be called multiple |
4 | ssh, we should put the respective code into a reusable function. | 6 | times by bdrv_drain_all_begin(), but the callbacks may only be called |
7 | once. The separation is necessary to fix this bug. | ||
5 | 8 | ||
6 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 9 | The other reason is that we intend to go to a model where we call all |
7 | Message-id: 20180214204915.7980-2-mreitz@redhat.com | 10 | driver callbacks first, and only then start polling. This is not fully |
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | 11 | achieved yet with this patch, as bdrv_drain_invoke() contains a |
9 | Reviewed-by: Richard W.M. Jones <rjones@redhat.com> | 12 | BDRV_POLL_WHILE() loop for the block driver callbacks, which can still |
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 13 | call callbacks for any unrelated event. It's a step in this direction |
14 | anyway. | ||
15 | |||
16 | Cc: qemu-stable@nongnu.org | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | 19 | --- |
12 | block/ssh.c | 30 ++++++++++++++++++++++-------- | 20 | block/io.c | 14 +++++++++++--- |
13 | 1 file changed, 22 insertions(+), 8 deletions(-) | 21 | 1 file changed, 11 insertions(+), 3 deletions(-) |
14 | 22 | ||
15 | diff --git a/block/ssh.c b/block/ssh.c | 23 | diff --git a/block/io.c b/block/io.c |
16 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/ssh.c | 25 | --- a/block/io.c |
18 | +++ b/block/ssh.c | 26 | +++ b/block/io.c |
19 | @@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, | 27 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
20 | return ret; | 28 | bdrv_wakeup(bs); |
21 | } | 29 | } |
22 | 30 | ||
23 | +static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp) | 31 | +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
24 | +{ | 32 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
25 | + ssize_t ret; | 33 | { |
26 | + char c[1] = { '\0' }; | 34 | + BdrvChild *child, *tmp; |
35 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; | ||
36 | |||
37 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || | ||
38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | ||
39 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); | ||
40 | bdrv_coroutine_enter(bs, data.co); | ||
41 | BDRV_POLL_WHILE(bs, !data.done); | ||
27 | + | 42 | + |
28 | + /* offset must be strictly greater than the current size so we do | 43 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
29 | + * not overwrite anything */ | 44 | + bdrv_drain_invoke(child->bs, begin); |
30 | + assert(offset > 0 && offset > s->attrs.filesize); | ||
31 | + | ||
32 | + libssh2_sftp_seek64(s->sftp_handle, offset - 1); | ||
33 | + ret = libssh2_sftp_write(s->sftp_handle, c, 1); | ||
34 | + if (ret < 0) { | ||
35 | + sftp_error_setg(errp, s, "Failed to grow file"); | ||
36 | + return -EIO; | ||
37 | + } | 45 | + } |
38 | + | 46 | } |
39 | + s->attrs.filesize = offset; | 47 | |
40 | + return 0; | 48 | static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
41 | +} | 49 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
42 | + | 50 | BdrvChild *child, *tmp; |
43 | static QemuOptsList ssh_create_opts = { | 51 | bool waited; |
44 | .name = "ssh-create-opts", | 52 | |
45 | .head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head), | 53 | - /* Ensure any pending metadata writes are submitted to bs->file. */ |
46 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, | 54 | - bdrv_drain_invoke(bs, begin); |
47 | int64_t total_size = 0; | 55 | - |
48 | QDict *uri_options = NULL; | 56 | /* Wait for drained requests to finish */ |
49 | BDRVSSHState s; | 57 | waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); |
50 | - ssize_t r2; | 58 | |
51 | - char c[1] = { '\0' }; | 59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
52 | 60 | bdrv_parent_drained_begin(bs); | |
53 | ssh_state_init(&s); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, | ||
56 | } | 61 | } |
57 | 62 | ||
58 | if (total_size > 0) { | 63 | + bdrv_drain_invoke(bs, true); |
59 | - libssh2_sftp_seek64(s.sftp_handle, total_size-1); | 64 | bdrv_drain_recurse(bs, true); |
60 | - r2 = libssh2_sftp_write(s.sftp_handle, c, 1); | 65 | } |
61 | - if (r2 < 0) { | 66 | |
62 | - sftp_error_setg(errp, &s, "truncate failed"); | 67 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
63 | - ret = -EINVAL; | ||
64 | + ret = ssh_grow_file(&s, total_size, errp); | ||
65 | + if (ret < 0) { | ||
66 | goto out; | ||
67 | } | ||
68 | - s.attrs.filesize = total_size; | ||
69 | } | 68 | } |
70 | 69 | ||
71 | ret = 0; | 70 | bdrv_parent_drained_end(bs); |
71 | + bdrv_drain_invoke(bs, false); | ||
72 | bdrv_drain_recurse(bs, false); | ||
73 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
74 | } | ||
75 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
76 | aio_context_acquire(aio_context); | ||
77 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
78 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
79 | + /* FIXME Calling this multiple times is wrong */ | ||
80 | + bdrv_drain_invoke(bs, true); | ||
81 | waited |= bdrv_drain_recurse(bs, true); | ||
82 | } | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
85 | aio_context_acquire(aio_context); | ||
86 | aio_enable_external(aio_context); | ||
87 | bdrv_parent_drained_end(bs); | ||
88 | + bdrv_drain_invoke(bs, false); | ||
89 | bdrv_drain_recurse(bs, false); | ||
90 | aio_context_release(aio_context); | ||
91 | } | ||
72 | -- | 92 | -- |
73 | 2.13.6 | 93 | 2.13.6 |
74 | 94 | ||
75 | 95 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver |
---|---|---|---|
2 | callback inside its polling loop. This means that how many times it got | ||
3 | called for each node depended on long it had to poll the event loop. | ||
2 | 4 | ||
3 | At runtime (that is, during a future ssh_truncate()), the SSH session is | 5 | This is obviously not right and results in nodes that stay drained even |
4 | non-blocking. However, ssh_truncate() (or rather, bdrv_truncate() in | 6 | after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per |
5 | general) is not a coroutine, so this resize operation needs to block. | 7 | node. |
6 | 8 | ||
7 | For ssh_create(), that is fine, too; the session is never set to | 9 | Fix bdrv_drain_all_begin() to call the callback only once, too. |
8 | non-blocking anyway. | ||
9 | 10 | ||
10 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 11 | Cc: qemu-stable@nongnu.org |
11 | Message-id: 20180214204915.7980-3-mreitz@redhat.com | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Eric Blake <eblake@redhat.com> | 13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
13 | Reviewed-by: Richard W.M. Jones <rjones@redhat.com> | ||
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
15 | --- | 14 | --- |
16 | block/ssh.c | 7 +++++++ | 15 | block/io.c | 3 +-- |
17 | 1 file changed, 7 insertions(+) | 16 | 1 file changed, 1 insertion(+), 2 deletions(-) |
18 | 17 | ||
19 | diff --git a/block/ssh.c b/block/ssh.c | 18 | diff --git a/block/io.c b/block/io.c |
20 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/ssh.c | 20 | --- a/block/io.c |
22 | +++ b/block/ssh.c | 21 | +++ b/block/io.c |
23 | @@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, | 22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
24 | return ret; | 23 | aio_context_acquire(aio_context); |
25 | } | 24 | bdrv_parent_drained_begin(bs); |
26 | 25 | aio_disable_external(aio_context); | |
27 | +/* Note: This is a blocking operation */ | 26 | + bdrv_drain_invoke(bs, true); |
28 | static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp) | 27 | aio_context_release(aio_context); |
29 | { | 28 | |
30 | ssize_t ret; | 29 | if (!g_slist_find(aio_ctxs, aio_context)) { |
31 | char c[1] = { '\0' }; | 30 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
32 | + int was_blocking = libssh2_session_get_blocking(s->session); | 31 | aio_context_acquire(aio_context); |
33 | 32 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | |
34 | /* offset must be strictly greater than the current size so we do | 33 | if (aio_context == bdrv_get_aio_context(bs)) { |
35 | * not overwrite anything */ | 34 | - /* FIXME Calling this multiple times is wrong */ |
36 | assert(offset > 0 && offset > s->attrs.filesize); | 35 | - bdrv_drain_invoke(bs, true); |
37 | 36 | waited |= bdrv_drain_recurse(bs, true); | |
38 | + libssh2_session_set_blocking(s->session, 1); | 37 | } |
39 | + | 38 | } |
40 | libssh2_sftp_seek64(s->sftp_handle, offset - 1); | ||
41 | ret = libssh2_sftp_write(s->sftp_handle, c, 1); | ||
42 | + | ||
43 | + libssh2_session_set_blocking(s->session, was_blocking); | ||
44 | + | ||
45 | if (ret < 0) { | ||
46 | sftp_error_setg(errp, s, "Failed to grow file"); | ||
47 | return -EIO; | ||
48 | -- | 39 | -- |
49 | 2.13.6 | 40 | 2.13.6 |
50 | 41 | ||
51 | 42 | diff view generated by jsdifflib |
1 | This patch adds test cases for the scenario where blk_aio_flush() is | 1 | This adds a test case that the BlockDriver callbacks for drain are |
---|---|---|---|
2 | called on a BlockBackend with no root. Calling drain afterwards should | 2 | called in bdrv_drained_all_begin/end(), and that both of them are called |
3 | complete the requests with -ENOMEDIUM. | 3 | exactly once. |
4 | 4 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Reviewed-by: Eric Blake <eblake@redhat.com> | 7 | Reviewed-by: Eric Blake <eblake@redhat.com> |
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | --- | 8 | --- |
10 | tests/test-block-backend.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ | 9 | tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ |
11 | tests/Makefile.include | 2 ++ | 10 | tests/Makefile.include | 2 + |
12 | 2 files changed, 84 insertions(+) | 11 | 2 files changed, 139 insertions(+) |
13 | create mode 100644 tests/test-block-backend.c | 12 | create mode 100644 tests/test-bdrv-drain.c |
14 | 13 | ||
15 | diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
16 | new file mode 100644 | 15 | new file mode 100644 |
17 | index XXXXXXX..XXXXXXX | 16 | index XXXXXXX..XXXXXXX |
18 | --- /dev/null | 17 | --- /dev/null |
19 | +++ b/tests/test-block-backend.c | 18 | +++ b/tests/test-bdrv-drain.c |
20 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ |
21 | +/* | 20 | +/* |
22 | + * BlockBackend tests | 21 | + * Block node draining tests |
23 | + * | 22 | + * |
24 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> | 23 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> |
25 | + * | 24 | + * |
26 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | 25 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
27 | + * of this software and associated documentation files (the "Software"), to deal | 26 | + * of this software and associated documentation files (the "Software"), to deal |
... | ... | ||
45 | +#include "qemu/osdep.h" | 44 | +#include "qemu/osdep.h" |
46 | +#include "block/block.h" | 45 | +#include "block/block.h" |
47 | +#include "sysemu/block-backend.h" | 46 | +#include "sysemu/block-backend.h" |
48 | +#include "qapi/error.h" | 47 | +#include "qapi/error.h" |
49 | + | 48 | + |
50 | +static void test_drain_aio_error_flush_cb(void *opaque, int ret) | 49 | +typedef struct BDRVTestState { |
50 | + int drain_count; | ||
51 | +} BDRVTestState; | ||
52 | + | ||
53 | +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | ||
51 | +{ | 54 | +{ |
52 | + bool *completed = opaque; | 55 | + BDRVTestState *s = bs->opaque; |
53 | + | 56 | + s->drain_count++; |
54 | + g_assert(ret == -ENOMEDIUM); | ||
55 | + *completed = true; | ||
56 | +} | 57 | +} |
57 | + | 58 | + |
58 | +static void test_drain_aio_error(void) | 59 | +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) |
59 | +{ | 60 | +{ |
60 | + BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 61 | + BDRVTestState *s = bs->opaque; |
61 | + BlockAIOCB *acb; | 62 | + s->drain_count--; |
62 | + bool completed = false; | ||
63 | + | ||
64 | + acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed); | ||
65 | + g_assert(acb != NULL); | ||
66 | + g_assert(completed == false); | ||
67 | + | ||
68 | + blk_drain(blk); | ||
69 | + g_assert(completed == true); | ||
70 | + | ||
71 | + blk_unref(blk); | ||
72 | +} | 63 | +} |
73 | + | 64 | + |
74 | +static void test_drain_all_aio_error(void) | 65 | +static void bdrv_test_close(BlockDriverState *bs) |
75 | +{ | 66 | +{ |
76 | + BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | 67 | + BDRVTestState *s = bs->opaque; |
68 | + g_assert_cmpint(s->drain_count, >, 0); | ||
69 | +} | ||
70 | + | ||
71 | +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | ||
72 | + uint64_t offset, uint64_t bytes, | ||
73 | + QEMUIOVector *qiov, int flags) | ||
74 | +{ | ||
75 | + /* We want this request to stay until the polling loop in drain waits for | ||
76 | + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes | ||
77 | + * first and polls its result, too, but it shouldn't accidentally complete | ||
78 | + * this request yet. */ | ||
79 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | ||
80 | + | ||
81 | + return 0; | ||
82 | +} | ||
83 | + | ||
84 | +static BlockDriver bdrv_test = { | ||
85 | + .format_name = "test", | ||
86 | + .instance_size = sizeof(BDRVTestState), | ||
87 | + | ||
88 | + .bdrv_close = bdrv_test_close, | ||
89 | + .bdrv_co_preadv = bdrv_test_co_preadv, | ||
90 | + | ||
91 | + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | ||
92 | + .bdrv_co_drain_end = bdrv_test_co_drain_end, | ||
93 | +}; | ||
94 | + | ||
95 | +static void aio_ret_cb(void *opaque, int ret) | ||
96 | +{ | ||
97 | + int *aio_ret = opaque; | ||
98 | + *aio_ret = ret; | ||
99 | +} | ||
100 | + | ||
101 | +static void test_drv_cb_drain_all(void) | ||
102 | +{ | ||
103 | + BlockBackend *blk; | ||
104 | + BlockDriverState *bs; | ||
105 | + BDRVTestState *s; | ||
77 | + BlockAIOCB *acb; | 106 | + BlockAIOCB *acb; |
78 | + bool completed = false; | 107 | + int aio_ret; |
79 | + | 108 | + |
80 | + acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed); | 109 | + QEMUIOVector qiov; |
110 | + struct iovec iov = { | ||
111 | + .iov_base = NULL, | ||
112 | + .iov_len = 0, | ||
113 | + }; | ||
114 | + qemu_iovec_init_external(&qiov, &iov, 1); | ||
115 | + | ||
116 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
117 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
118 | + &error_abort); | ||
119 | + s = bs->opaque; | ||
120 | + blk_insert_bs(blk, bs, &error_abort); | ||
121 | + | ||
122 | + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
123 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
124 | + bdrv_drain_all_begin(); | ||
125 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
126 | + bdrv_drain_all_end(); | ||
127 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
128 | + | ||
129 | + /* Now do the same while a request is pending */ | ||
130 | + aio_ret = -EINPROGRESS; | ||
131 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); | ||
81 | + g_assert(acb != NULL); | 132 | + g_assert(acb != NULL); |
82 | + g_assert(completed == false); | 133 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); |
83 | + | 134 | + |
84 | + blk_drain_all(); | 135 | + g_assert_cmpint(s->drain_count, ==, 0); |
85 | + g_assert(completed == true); | 136 | + bdrv_drain_all_begin(); |
137 | + g_assert_cmpint(aio_ret, ==, 0); | ||
138 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
139 | + bdrv_drain_all_end(); | ||
140 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
86 | + | 141 | + |
142 | + bdrv_unref(bs); | ||
87 | + blk_unref(blk); | 143 | + blk_unref(blk); |
88 | +} | 144 | +} |
89 | + | 145 | + |
90 | +int main(int argc, char **argv) | 146 | +int main(int argc, char **argv) |
91 | +{ | 147 | +{ |
92 | + bdrv_init(); | 148 | + bdrv_init(); |
93 | + qemu_init_main_loop(&error_abort); | 149 | + qemu_init_main_loop(&error_abort); |
94 | + | 150 | + |
95 | + g_test_init(&argc, &argv, NULL); | 151 | + g_test_init(&argc, &argv, NULL); |
96 | + | 152 | + |
97 | + g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error); | 153 | + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
98 | + g_test_add_func("/block-backend/drain_all_aio_error", | ||
99 | + test_drain_all_aio_error); | ||
100 | + | 154 | + |
101 | + return g_test_run(); | 155 | + return g_test_run(); |
102 | +} | 156 | +} |
103 | diff --git a/tests/Makefile.include b/tests/Makefile.include | 157 | diff --git a/tests/Makefile.include b/tests/Makefile.include |
104 | index XXXXXXX..XXXXXXX 100644 | 158 | index XXXXXXX..XXXXXXX 100644 |
105 | --- a/tests/Makefile.include | 159 | --- a/tests/Makefile.include |
106 | +++ b/tests/Makefile.include | 160 | +++ b/tests/Makefile.include |
107 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-hbitmap-y = blockjob.c | 161 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c |
108 | check-unit-y += tests/test-bdrv-drain$(EXESUF) | 162 | gcov-files-test-hbitmap-y = util/hbitmap.c |
163 | check-unit-y += tests/test-hbitmap$(EXESUF) | ||
164 | gcov-files-test-hbitmap-y = blockjob.c | ||
165 | +check-unit-y += tests/test-bdrv-drain$(EXESUF) | ||
109 | check-unit-y += tests/test-blockjob$(EXESUF) | 166 | check-unit-y += tests/test-blockjob$(EXESUF) |
110 | check-unit-y += tests/test-blockjob-txn$(EXESUF) | 167 | check-unit-y += tests/test-blockjob-txn$(EXESUF) |
111 | +check-unit-y += tests/test-block-backend$(EXESUF) | ||
112 | check-unit-y += tests/test-x86-cpuid$(EXESUF) | 168 | check-unit-y += tests/test-x86-cpuid$(EXESUF) |
113 | # all code tested by test-x86-cpuid is inside topology.h | 169 | @@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) |
114 | gcov-files-test-x86-cpuid-y = | 170 | tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) |
115 | @@ -XXX,XX +XXX,XX @@ tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) | 171 | tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) |
116 | tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) | 172 | tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) |
173 | +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) | ||
117 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) | 174 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) |
118 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) | 175 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) |
119 | +tests/test-block-backend$(EXESUF): tests/test-block-backend.o $(test-block-obj-y) $(test-util-obj-y) | ||
120 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) | 176 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) |
121 | tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y) | ||
122 | tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y) | ||
123 | -- | 177 | -- |
124 | 2.13.6 | 178 | 2.13.6 |
125 | 179 | ||
126 | 180 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Now that the bdrv_drain_invoke() calls are pulled up to the callers of |
---|---|---|---|
2 | bdrv_drain_recurse(), the 'begin' parameter isn't needed any more. | ||
2 | 3 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | byte-based. Update the vpc driver accordingly. | 5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
6 | --- | ||
7 | block/io.c | 12 ++++++------ | ||
8 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
5 | 9 | ||
6 | Signed-off-by: Eric Blake <eblake@redhat.com> | 10 | diff --git a/block/io.c b/block/io.c |
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | block/vpc.c | 45 +++++++++++++++++++++++---------------------- | ||
12 | 1 file changed, 23 insertions(+), 22 deletions(-) | ||
13 | |||
14 | diff --git a/block/vpc.c b/block/vpc.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/vpc.c | 12 | --- a/block/io.c |
17 | +++ b/block/vpc.c | 13 | +++ b/block/io.c |
18 | @@ -XXX,XX +XXX,XX @@ fail: | 14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
19 | return ret; | 15 | } |
20 | } | 16 | } |
21 | 17 | ||
22 | -static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, | 18 | -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
23 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | 19 | +static bool bdrv_drain_recurse(BlockDriverState *bs) |
24 | +static int coroutine_fn vpc_co_block_status(BlockDriverState *bs, | ||
25 | + bool want_zero, | ||
26 | + int64_t offset, int64_t bytes, | ||
27 | + int64_t *pnum, int64_t *map, | ||
28 | + BlockDriverState **file) | ||
29 | { | 20 | { |
30 | BDRVVPCState *s = bs->opaque; | 21 | BdrvChild *child, *tmp; |
31 | VHDFooter *footer = (VHDFooter*) s->footer_buf; | 22 | bool waited; |
32 | - int64_t start, offset; | 23 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
33 | + int64_t image_offset; | 24 | */ |
34 | bool allocated; | 25 | bdrv_ref(bs); |
35 | - int64_t ret; | 26 | } |
36 | - int n; | 27 | - waited |= bdrv_drain_recurse(bs, begin); |
37 | + int ret; | 28 | + waited |= bdrv_drain_recurse(bs); |
38 | + int64_t n; | 29 | if (in_main_loop) { |
39 | 30 | bdrv_unref(bs); | |
40 | if (be32_to_cpu(footer->type) == VHD_FIXED) { | 31 | } |
41 | - *pnum = nb_sectors; | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
42 | + *pnum = bytes; | ||
43 | + *map = offset; | ||
44 | *file = bs->file->bs; | ||
45 | - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | | ||
46 | - (sector_num << BDRV_SECTOR_BITS); | ||
47 | + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; | ||
48 | } | 33 | } |
49 | 34 | ||
50 | qemu_co_mutex_lock(&s->lock); | 35 | bdrv_drain_invoke(bs, true); |
51 | 36 | - bdrv_drain_recurse(bs, true); | |
52 | - offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL); | 37 | + bdrv_drain_recurse(bs); |
53 | - start = offset; | 38 | } |
54 | - allocated = (offset != -1); | 39 | |
55 | + image_offset = get_image_offset(bs, offset, false, NULL); | 40 | void bdrv_drained_end(BlockDriverState *bs) |
56 | + allocated = (image_offset != -1); | 41 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
57 | *pnum = 0; | 42 | |
58 | ret = 0; | 43 | bdrv_parent_drained_end(bs); |
59 | 44 | bdrv_drain_invoke(bs, false); | |
60 | do { | 45 | - bdrv_drain_recurse(bs, false); |
61 | /* All sectors in a block are contiguous (without using the bitmap) */ | 46 | + bdrv_drain_recurse(bs); |
62 | - n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) | 47 | aio_enable_external(bdrv_get_aio_context(bs)); |
63 | - - sector_num; | 48 | } |
64 | - n = MIN(n, nb_sectors); | 49 | |
65 | + n = ROUND_UP(offset + 1, s->block_size) - offset; | 50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
66 | + n = MIN(n, bytes); | 51 | aio_context_acquire(aio_context); |
67 | 52 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | |
68 | *pnum += n; | 53 | if (aio_context == bdrv_get_aio_context(bs)) { |
69 | - sector_num += n; | 54 | - waited |= bdrv_drain_recurse(bs, true); |
70 | - nb_sectors -= n; | 55 | + waited |= bdrv_drain_recurse(bs); |
71 | + offset += n; | 56 | } |
72 | + bytes -= n; | 57 | } |
73 | /* *pnum can't be greater than one block for allocated | 58 | aio_context_release(aio_context); |
74 | * sectors since there is always a bitmap in between. */ | 59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
75 | if (allocated) { | 60 | aio_enable_external(aio_context); |
76 | *file = bs->file->bs; | 61 | bdrv_parent_drained_end(bs); |
77 | - ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; | 62 | bdrv_drain_invoke(bs, false); |
78 | + *map = image_offset; | 63 | - bdrv_drain_recurse(bs, false); |
79 | + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | 64 | + bdrv_drain_recurse(bs); |
80 | break; | 65 | aio_context_release(aio_context); |
81 | } | 66 | } |
82 | - if (nb_sectors == 0) { | ||
83 | + if (bytes == 0) { | ||
84 | break; | ||
85 | } | ||
86 | - offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, | ||
87 | - NULL); | ||
88 | - } while (offset == -1); | ||
89 | + image_offset = get_image_offset(bs, offset, false, NULL); | ||
90 | + } while (image_offset == -1); | ||
91 | |||
92 | qemu_co_mutex_unlock(&s->lock); | ||
93 | return ret; | ||
94 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vpc = { | ||
95 | |||
96 | .bdrv_co_preadv = vpc_co_preadv, | ||
97 | .bdrv_co_pwritev = vpc_co_pwritev, | ||
98 | - .bdrv_co_get_block_status = vpc_co_get_block_status, | ||
99 | + .bdrv_co_block_status = vpc_co_block_status, | ||
100 | |||
101 | .bdrv_get_info = vpc_get_info, | ||
102 | 67 | ||
103 | -- | 68 | -- |
104 | 2.13.6 | 69 | 2.13.6 |
105 | 70 | ||
106 | 71 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | The device is drained, so there is no point in waiting for requests at |
---|---|---|---|
2 | the end of the drained section. Remove the bdrv_drain_recurse() calls | ||
3 | there. | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 5 | The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e |
4 | byte-based. Update the vdi driver accordingly. Note that the | 6 | in order to call the .bdrv_co_drain_end() driver callback. This is now |
5 | TODO is already covered (the block layer guarantees bounds of its | 7 | done by a separate bdrv_drain_invoke() call. |
6 | requests), and that we can remove the now-unused s->block_sectors. | ||
7 | 8 | ||
8 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | 12 | --- |
13 | block/vdi.c | 33 +++++++++++++-------------------- | 13 | block/io.c | 2 -- |
14 | 1 file changed, 13 insertions(+), 20 deletions(-) | 14 | 1 file changed, 2 deletions(-) |
15 | 15 | ||
16 | diff --git a/block/vdi.c b/block/vdi.c | 16 | diff --git a/block/io.c b/block/io.c |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/vdi.c | 18 | --- a/block/io.c |
19 | +++ b/block/vdi.c | 19 | +++ b/block/io.c |
20 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
21 | uint32_t *bmap; | 21 | |
22 | /* Size of block (bytes). */ | 22 | bdrv_parent_drained_end(bs); |
23 | uint32_t block_size; | 23 | bdrv_drain_invoke(bs, false); |
24 | - /* Size of block (sectors). */ | 24 | - bdrv_drain_recurse(bs); |
25 | - uint32_t block_sectors; | 25 | aio_enable_external(bdrv_get_aio_context(bs)); |
26 | /* First sector of block map. */ | ||
27 | uint32_t bmap_sector; | ||
28 | /* VDI header (converted to host endianness). */ | ||
29 | @@ -XXX,XX +XXX,XX @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, | ||
30 | bs->total_sectors = header.disk_size / SECTOR_SIZE; | ||
31 | |||
32 | s->block_size = header.block_size; | ||
33 | - s->block_sectors = header.block_size / SECTOR_SIZE; | ||
34 | s->bmap_sector = header.offset_bmap / SECTOR_SIZE; | ||
35 | s->header = header; | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ static int vdi_reopen_prepare(BDRVReopenState *state, | ||
38 | return 0; | ||
39 | } | 26 | } |
40 | 27 | ||
41 | -static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
42 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | 29 | aio_enable_external(aio_context); |
43 | +static int coroutine_fn vdi_co_block_status(BlockDriverState *bs, | 30 | bdrv_parent_drained_end(bs); |
44 | + bool want_zero, | 31 | bdrv_drain_invoke(bs, false); |
45 | + int64_t offset, int64_t bytes, | 32 | - bdrv_drain_recurse(bs); |
46 | + int64_t *pnum, int64_t *map, | 33 | aio_context_release(aio_context); |
47 | + BlockDriverState **file) | ||
48 | { | ||
49 | - /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ | ||
50 | BDRVVdiState *s = (BDRVVdiState *)bs->opaque; | ||
51 | - size_t bmap_index = sector_num / s->block_sectors; | ||
52 | - size_t sector_in_block = sector_num % s->block_sectors; | ||
53 | - int n_sectors = s->block_sectors - sector_in_block; | ||
54 | + size_t bmap_index = offset / s->block_size; | ||
55 | + size_t index_in_block = offset % s->block_size; | ||
56 | uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); | ||
57 | - uint64_t offset; | ||
58 | int result; | ||
59 | |||
60 | - logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum); | ||
61 | - if (n_sectors > nb_sectors) { | ||
62 | - n_sectors = nb_sectors; | ||
63 | - } | ||
64 | - *pnum = n_sectors; | ||
65 | + logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum); | ||
66 | + *pnum = MIN(s->block_size - index_in_block, bytes); | ||
67 | result = VDI_IS_ALLOCATED(bmap_entry); | ||
68 | if (!result) { | ||
69 | return 0; | ||
70 | } | 34 | } |
71 | 35 | ||
72 | - offset = s->header.offset_data + | ||
73 | - (uint64_t)bmap_entry * s->block_size + | ||
74 | - sector_in_block * SECTOR_SIZE; | ||
75 | + *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + | ||
76 | + index_in_block; | ||
77 | *file = bs->file->bs; | ||
78 | - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; | ||
79 | + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
80 | } | ||
81 | |||
82 | static int coroutine_fn | ||
83 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vdi = { | ||
84 | .bdrv_child_perm = bdrv_format_default_perms, | ||
85 | .bdrv_create = vdi_create, | ||
86 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
87 | - .bdrv_co_get_block_status = vdi_co_get_block_status, | ||
88 | + .bdrv_co_block_status = vdi_co_block_status, | ||
89 | .bdrv_make_empty = vdi_make_empty, | ||
90 | |||
91 | .bdrv_co_preadv = vdi_co_preadv, | ||
92 | -- | 36 | -- |
93 | 2.13.6 | 37 | 2.13.6 |
94 | 38 | ||
95 | 39 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Drain requests are propagated to child nodes, parent nodes and directly |
---|---|---|---|
2 | to the AioContext. The order in which this happened was different | ||
3 | between all combinations of drain/drain_all and begin/end. | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 5 | The correct order is to keep children only drained when their parents |
4 | byte-based. Update the iscsi driver accordingly. In this case, | 6 | are also drained. This means that at the start of a drained section, the |
5 | it is handy to teach iscsi_co_block_status() to handle a NULL map | 7 | AioContext needs to be drained first, the parents second and only then |
6 | and file parameter, even though the block layer passes non-NULL | 8 | the children. The correct order for the end of a drained section is the |
7 | values, because we also call the function directly. For now, there | 9 | opposite. |
8 | are no optimizations done based on the want_zero flag. | ||
9 | 10 | ||
10 | We can also make the simplification of asserting that the block | 11 | This patch changes the three other functions to follow the example of |
11 | layer passed in aligned values. | 12 | bdrv_drained_begin(), which is the only one that got it right. |
12 | 13 | ||
13 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
14 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | 16 | --- |
17 | block/iscsi.c | 69 ++++++++++++++++++++++++++++------------------------------- | 17 | block/io.c | 12 ++++++++---- |
18 | 1 file changed, 33 insertions(+), 36 deletions(-) | 18 | 1 file changed, 8 insertions(+), 4 deletions(-) |
19 | 19 | ||
20 | diff --git a/block/iscsi.c b/block/iscsi.c | 20 | diff --git a/block/io.c b/block/io.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/iscsi.c | 22 | --- a/block/io.c |
23 | +++ b/block/iscsi.c | 23 | +++ b/block/io.c |
24 | @@ -XXX,XX +XXX,XX @@ out_unlock: | 24 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
25 | 25 | return; | |
26 | |||
27 | |||
28 | -static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, | ||
29 | - int64_t sector_num, | ||
30 | - int nb_sectors, int *pnum, | ||
31 | - BlockDriverState **file) | ||
32 | +static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, | ||
33 | + bool want_zero, int64_t offset, | ||
34 | + int64_t bytes, int64_t *pnum, | ||
35 | + int64_t *map, | ||
36 | + BlockDriverState **file) | ||
37 | { | ||
38 | IscsiLun *iscsilun = bs->opaque; | ||
39 | struct scsi_get_lba_status *lbas = NULL; | ||
40 | struct scsi_lba_status_descriptor *lbasd = NULL; | ||
41 | struct IscsiTask iTask; | ||
42 | uint64_t lba; | ||
43 | - int64_t ret; | ||
44 | + int ret; | ||
45 | |||
46 | iscsi_co_init_iscsitask(iscsilun, &iTask); | ||
47 | |||
48 | - if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { | ||
49 | - ret = -EINVAL; | ||
50 | - goto out; | ||
51 | - } | ||
52 | + assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size)); | ||
53 | |||
54 | /* default to all sectors allocated */ | ||
55 | - ret = BDRV_BLOCK_DATA; | ||
56 | - ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID; | ||
57 | - *pnum = nb_sectors; | ||
58 | + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
59 | + if (map) { | ||
60 | + *map = offset; | ||
61 | + } | ||
62 | + *pnum = bytes; | ||
63 | |||
64 | /* LUN does not support logical block provisioning */ | ||
65 | if (!iscsilun->lbpme) { | ||
66 | goto out; | ||
67 | } | 26 | } |
68 | 27 | ||
69 | - lba = sector_qemu2lun(sector_num, iscsilun); | 28 | + /* Stop things in parent-to-child order */ |
70 | + lba = offset / iscsilun->block_size; | 29 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { |
71 | 30 | aio_disable_external(bdrv_get_aio_context(bs)); | |
72 | qemu_mutex_lock(&iscsilun->mutex); | 31 | bdrv_parent_drained_begin(bs); |
73 | retry: | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
74 | @@ -XXX,XX +XXX,XX @@ retry: | 33 | return; |
75 | |||
76 | lbasd = &lbas->descriptors[0]; | ||
77 | |||
78 | - if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) { | ||
79 | + if (lba != lbasd->lba) { | ||
80 | ret = -EIO; | ||
81 | goto out_unlock; | ||
82 | } | 34 | } |
83 | 35 | ||
84 | - *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun); | 36 | - bdrv_parent_drained_end(bs); |
85 | + *pnum = lbasd->num_blocks * iscsilun->block_size; | 37 | + /* Re-enable things in child-to-parent order */ |
86 | 38 | bdrv_drain_invoke(bs, false); | |
87 | if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || | 39 | + bdrv_parent_drained_end(bs); |
88 | lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { | 40 | aio_enable_external(bdrv_get_aio_context(bs)); |
89 | @@ -XXX,XX +XXX,XX @@ retry: | 41 | } |
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
44 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
45 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
46 | |||
47 | + /* Stop things in parent-to-child order */ | ||
48 | aio_context_acquire(aio_context); | ||
49 | - bdrv_parent_drained_begin(bs); | ||
50 | aio_disable_external(aio_context); | ||
51 | + bdrv_parent_drained_begin(bs); | ||
52 | bdrv_drain_invoke(bs, true); | ||
53 | aio_context_release(aio_context); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
57 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
58 | |||
59 | + /* Re-enable things in child-to-parent order */ | ||
60 | aio_context_acquire(aio_context); | ||
61 | - aio_enable_external(aio_context); | ||
62 | - bdrv_parent_drained_end(bs); | ||
63 | bdrv_drain_invoke(bs, false); | ||
64 | + bdrv_parent_drained_end(bs); | ||
65 | + aio_enable_external(aio_context); | ||
66 | aio_context_release(aio_context); | ||
90 | } | 67 | } |
91 | 68 | ||
92 | if (ret & BDRV_BLOCK_ZERO) { | ||
93 | - iscsi_allocmap_set_unallocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
94 | - *pnum * BDRV_SECTOR_SIZE); | ||
95 | + iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum); | ||
96 | } else { | ||
97 | - iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
98 | - *pnum * BDRV_SECTOR_SIZE); | ||
99 | + iscsi_allocmap_set_allocated(iscsilun, offset, *pnum); | ||
100 | } | ||
101 | |||
102 | - if (*pnum > nb_sectors) { | ||
103 | - *pnum = nb_sectors; | ||
104 | + if (*pnum > bytes) { | ||
105 | + *pnum = bytes; | ||
106 | } | ||
107 | out_unlock: | ||
108 | qemu_mutex_unlock(&iscsilun->mutex); | ||
109 | @@ -XXX,XX +XXX,XX @@ out: | ||
110 | if (iTask.task != NULL) { | ||
111 | scsi_free_scsi_task(iTask.task); | ||
112 | } | ||
113 | - if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { | ||
114 | + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { | ||
115 | *file = bs; | ||
116 | } | ||
117 | return ret; | ||
118 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, | ||
119 | nb_sectors * BDRV_SECTOR_SIZE) && | ||
120 | !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
121 | nb_sectors * BDRV_SECTOR_SIZE)) { | ||
122 | - int pnum; | ||
123 | - BlockDriverState *file; | ||
124 | + int64_t pnum; | ||
125 | /* check the block status from the beginning of the cluster | ||
126 | * containing the start sector */ | ||
127 | - int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS; | ||
128 | - int head; | ||
129 | - int64_t ret; | ||
130 | - | ||
131 | - assert(cluster_sectors); | ||
132 | - head = sector_num % cluster_sectors; | ||
133 | - ret = iscsi_co_get_block_status(bs, sector_num - head, | ||
134 | - BDRV_REQUEST_MAX_SECTORS, &pnum, | ||
135 | - &file); | ||
136 | + int64_t head; | ||
137 | + int ret; | ||
138 | + | ||
139 | + assert(iscsilun->cluster_size); | ||
140 | + head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size; | ||
141 | + ret = iscsi_co_block_status(bs, true, | ||
142 | + sector_num * BDRV_SECTOR_SIZE - head, | ||
143 | + BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL); | ||
144 | if (ret < 0) { | ||
145 | return ret; | ||
146 | } | ||
147 | /* if the whole request falls into an unallocated area we can avoid | ||
148 | * reading and directly return zeroes instead */ | ||
149 | - if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors + head) { | ||
150 | + if (ret & BDRV_BLOCK_ZERO && | ||
151 | + pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) { | ||
152 | qemu_iovec_memset(iov, 0, 0x00, iov->size); | ||
153 | return 0; | ||
154 | } | ||
155 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = { | ||
156 | .bdrv_truncate = iscsi_truncate, | ||
157 | .bdrv_refresh_limits = iscsi_refresh_limits, | ||
158 | |||
159 | - .bdrv_co_get_block_status = iscsi_co_get_block_status, | ||
160 | + .bdrv_co_block_status = iscsi_co_block_status, | ||
161 | .bdrv_co_pdiscard = iscsi_co_pdiscard, | ||
162 | .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, | ||
163 | .bdrv_co_readv = iscsi_co_readv, | ||
164 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = { | ||
165 | .bdrv_truncate = iscsi_truncate, | ||
166 | .bdrv_refresh_limits = iscsi_refresh_limits, | ||
167 | |||
168 | - .bdrv_co_get_block_status = iscsi_co_get_block_status, | ||
169 | + .bdrv_co_block_status = iscsi_co_block_status, | ||
170 | .bdrv_co_pdiscard = iscsi_co_pdiscard, | ||
171 | .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, | ||
172 | .bdrv_co_readv = iscsi_co_readv, | ||
173 | -- | 69 | -- |
174 | 2.13.6 | 70 | 2.13.6 |
175 | 71 | ||
176 | 72 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Commit 15afd94a047 added code to acquire and release the AioContext in |
---|---|---|---|
2 | qemuio_command(). This means that the lock is taken twice now in the | ||
3 | call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for | ||
4 | any requests issued to nodes in a non-mainloop AioContext. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | Dropping the first locking from hmp_qemu_io() fixes the problem. |
4 | byte-based. Update the sheepdog driver accordingly. | ||
5 | 7 | ||
6 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | 10 | --- |
12 | block/sheepdog.c | 26 +++++++++++++------------- | 11 | hmp.c | 6 ------ |
13 | 1 file changed, 13 insertions(+), 13 deletions(-) | 12 | 1 file changed, 6 deletions(-) |
14 | 13 | ||
15 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 14 | diff --git a/hmp.c b/hmp.c |
16 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/sheepdog.c | 16 | --- a/hmp.c |
18 | +++ b/block/sheepdog.c | 17 | +++ b/hmp.c |
19 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, | 18 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
20 | return acb.ret; | ||
21 | } | ||
22 | |||
23 | -static coroutine_fn int64_t | ||
24 | -sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, | ||
25 | - int *pnum, BlockDriverState **file) | ||
26 | +static coroutine_fn int | ||
27 | +sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, | ||
28 | + int64_t bytes, int64_t *pnum, int64_t *map, | ||
29 | + BlockDriverState **file) | ||
30 | { | 19 | { |
31 | BDRVSheepdogState *s = bs->opaque; | 20 | BlockBackend *blk; |
32 | SheepdogInode *inode = &s->inode; | 21 | BlockBackend *local_blk = NULL; |
33 | uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); | 22 | - AioContext *aio_context; |
34 | - uint64_t offset = sector_num * BDRV_SECTOR_SIZE; | 23 | const char* device = qdict_get_str(qdict, "device"); |
35 | unsigned long start = offset / object_size, | 24 | const char* command = qdict_get_str(qdict, "command"); |
36 | - end = DIV_ROUND_UP((sector_num + nb_sectors) * | 25 | Error *err = NULL; |
37 | - BDRV_SECTOR_SIZE, object_size); | 26 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
38 | + end = DIV_ROUND_UP(offset + bytes, object_size); | ||
39 | unsigned long idx; | ||
40 | - int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; | ||
41 | + *map = offset; | ||
42 | + int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
43 | |||
44 | for (idx = start; idx < end; idx++) { | ||
45 | if (inode->data_vdi_id[idx] == 0) { | ||
46 | @@ -XXX,XX +XXX,XX @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, | ||
47 | } | 27 | } |
48 | } | 28 | } |
49 | 29 | ||
50 | - *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE; | 30 | - aio_context = blk_get_aio_context(blk); |
51 | - if (*pnum > nb_sectors) { | 31 | - aio_context_acquire(aio_context); |
52 | - *pnum = nb_sectors; | 32 | - |
53 | + *pnum = (idx - start) * object_size; | 33 | /* |
54 | + if (*pnum > bytes) { | 34 | * Notably absent: Proper permission management. This is sad, but it seems |
55 | + *pnum = bytes; | 35 | * almost impossible to achieve without changing the semantics and thereby |
56 | } | 36 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
57 | if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { | 37 | */ |
58 | *file = bs; | 38 | qemuio_command(blk, command); |
59 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = { | 39 | |
60 | .bdrv_co_writev = sd_co_writev, | 40 | - aio_context_release(aio_context); |
61 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, | 41 | - |
62 | .bdrv_co_pdiscard = sd_co_pdiscard, | 42 | fail: |
63 | - .bdrv_co_get_block_status = sd_co_get_block_status, | 43 | blk_unref(local_blk); |
64 | + .bdrv_co_block_status = sd_co_block_status, | 44 | hmp_handle_error(mon, &err); |
65 | |||
66 | .bdrv_snapshot_create = sd_snapshot_create, | ||
67 | .bdrv_snapshot_goto = sd_snapshot_goto, | ||
68 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = { | ||
69 | .bdrv_co_writev = sd_co_writev, | ||
70 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, | ||
71 | .bdrv_co_pdiscard = sd_co_pdiscard, | ||
72 | - .bdrv_co_get_block_status = sd_co_get_block_status, | ||
73 | + .bdrv_co_block_status = sd_co_block_status, | ||
74 | |||
75 | .bdrv_snapshot_create = sd_snapshot_create, | ||
76 | .bdrv_snapshot_goto = sd_snapshot_goto, | ||
77 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = { | ||
78 | .bdrv_co_writev = sd_co_writev, | ||
79 | .bdrv_co_flush_to_disk = sd_co_flush_to_disk, | ||
80 | .bdrv_co_pdiscard = sd_co_pdiscard, | ||
81 | - .bdrv_co_get_block_status = sd_co_get_block_status, | ||
82 | + .bdrv_co_block_status = sd_co_block_status, | ||
83 | |||
84 | .bdrv_snapshot_create = sd_snapshot_create, | ||
85 | .bdrv_snapshot_goto = sd_snapshot_goto, | ||
86 | -- | 45 | -- |
87 | 2.13.6 | 46 | 2.13.6 |
88 | 47 | ||
89 | 48 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | The align_offset() function is equivalent to the ROUND_UP() macro so | 3 | Since bdrv_co_preadv does all neccessary checks including |
4 | there's no need to use the former. The ROUND_UP() name is also a bit | 4 | reading after the end of the backing file, avoid duplication |
5 | more explicit. | 5 | of verification before bdrv_co_preadv call. |
6 | 6 | ||
7 | This patch uses ROUND_UP() instead of the slower QEMU_ALIGN_UP() | 7 | Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
8 | because align_offset() already requires that the second parameter is a | 8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | power of two. | ||
10 | |||
11 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
12 | Reviewed-by: Eric Blake <eblake@redhat.com> | 9 | Reviewed-by: Eric Blake <eblake@redhat.com> |
13 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | Message-id: 20180215131008.5153-1-berto@igalia.com | ||
15 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
16 | --- | 11 | --- |
17 | block/qcow2.h | 6 ------ | 12 | block/qcow2.h | 3 --- |
18 | block/qcow2-bitmap.c | 4 ++-- | 13 | block/qcow2.c | 51 ++++++++------------------------------------------- |
19 | block/qcow2-cluster.c | 4 ++-- | 14 | 2 files changed, 8 insertions(+), 46 deletions(-) |
20 | block/qcow2-refcount.c | 4 ++-- | ||
21 | block/qcow2-snapshot.c | 10 +++++----- | ||
22 | block/qcow2.c | 14 +++++++------- | ||
23 | 6 files changed, 18 insertions(+), 24 deletions(-) | ||
24 | 15 | ||
25 | diff --git a/block/qcow2.h b/block/qcow2.h | 16 | diff --git a/block/qcow2.h b/block/qcow2.h |
26 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/block/qcow2.h | 18 | --- a/block/qcow2.h |
28 | +++ b/block/qcow2.h | 19 | +++ b/block/qcow2.h |
29 | @@ -XXX,XX +XXX,XX @@ static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) | 20 | @@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) |
30 | return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); | ||
31 | } | 21 | } |
32 | 22 | ||
33 | -static inline int64_t align_offset(int64_t offset, int n) | 23 | /* qcow2.c functions */ |
34 | -{ | 24 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
35 | - offset = (offset + n - 1) & ~(n - 1); | 25 | - int64_t sector_num, int nb_sectors); |
36 | - return offset; | ||
37 | -} | ||
38 | - | 26 | - |
39 | static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) | 27 | int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, |
40 | { | 28 | int refcount_order, bool generous_increase, |
41 | return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); | 29 | uint64_t *refblock_count); |
42 | diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/block/qcow2-bitmap.c | ||
45 | +++ b/block/qcow2-bitmap.c | ||
46 | @@ -XXX,XX +XXX,XX @@ static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry) | ||
47 | |||
48 | static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size) | ||
49 | { | ||
50 | - return align_offset(sizeof(Qcow2BitmapDirEntry) + | ||
51 | - name_size + extra_data_size, 8); | ||
52 | + int size = sizeof(Qcow2BitmapDirEntry) + name_size + extra_data_size; | ||
53 | + return ROUND_UP(size, 8); | ||
54 | } | ||
55 | |||
56 | static inline int dir_entry_size(Qcow2BitmapDirEntry *entry) | ||
57 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/block/qcow2-cluster.c | ||
60 | +++ b/block/qcow2-cluster.c | ||
61 | @@ -XXX,XX +XXX,XX @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, | ||
62 | |||
63 | new_l1_size2 = sizeof(uint64_t) * new_l1_size; | ||
64 | new_l1_table = qemu_try_blockalign(bs->file->bs, | ||
65 | - align_offset(new_l1_size2, 512)); | ||
66 | + ROUND_UP(new_l1_size2, 512)); | ||
67 | if (new_l1_table == NULL) { | ||
68 | return -ENOMEM; | ||
69 | } | ||
70 | - memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); | ||
71 | + memset(new_l1_table, 0, ROUND_UP(new_l1_size2, 512)); | ||
72 | |||
73 | if (s->l1_size) { | ||
74 | memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); | ||
75 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/block/qcow2-refcount.c | ||
78 | +++ b/block/qcow2-refcount.c | ||
79 | @@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, | ||
80 | * l1_table_offset when it is the current s->l1_table_offset! Be careful | ||
81 | * when changing this! */ | ||
82 | if (l1_table_offset != s->l1_table_offset) { | ||
83 | - l1_table = g_try_malloc0(align_offset(l1_size2, 512)); | ||
84 | + l1_table = g_try_malloc0(ROUND_UP(l1_size2, 512)); | ||
85 | if (l1_size2 && l1_table == NULL) { | ||
86 | ret = -ENOMEM; | ||
87 | goto fail; | ||
88 | @@ -XXX,XX +XXX,XX @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, | ||
89 | } | ||
90 | |||
91 | /* align range to test to cluster boundaries */ | ||
92 | - size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size); | ||
93 | + size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size); | ||
94 | offset = start_of_cluster(s, offset); | ||
95 | |||
96 | if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { | ||
97 | diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/block/qcow2-snapshot.c | ||
100 | +++ b/block/qcow2-snapshot.c | ||
101 | @@ -XXX,XX +XXX,XX @@ int qcow2_read_snapshots(BlockDriverState *bs) | ||
102 | |||
103 | for(i = 0; i < s->nb_snapshots; i++) { | ||
104 | /* Read statically sized part of the snapshot header */ | ||
105 | - offset = align_offset(offset, 8); | ||
106 | + offset = ROUND_UP(offset, 8); | ||
107 | ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); | ||
108 | if (ret < 0) { | ||
109 | goto fail; | ||
110 | @@ -XXX,XX +XXX,XX @@ static int qcow2_write_snapshots(BlockDriverState *bs) | ||
111 | offset = 0; | ||
112 | for(i = 0; i < s->nb_snapshots; i++) { | ||
113 | sn = s->snapshots + i; | ||
114 | - offset = align_offset(offset, 8); | ||
115 | + offset = ROUND_UP(offset, 8); | ||
116 | offset += sizeof(h); | ||
117 | offset += sizeof(extra); | ||
118 | offset += strlen(sn->id_str); | ||
119 | @@ -XXX,XX +XXX,XX @@ static int qcow2_write_snapshots(BlockDriverState *bs) | ||
120 | assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX); | ||
121 | h.id_str_size = cpu_to_be16(id_str_size); | ||
122 | h.name_size = cpu_to_be16(name_size); | ||
123 | - offset = align_offset(offset, 8); | ||
124 | + offset = ROUND_UP(offset, 8); | ||
125 | |||
126 | ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); | ||
127 | if (ret < 0) { | ||
128 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) | ||
129 | /* The VM state isn't needed any more in the active L1 table; in fact, it | ||
130 | * hurts by causing expensive COW for the next snapshot. */ | ||
131 | qcow2_cluster_discard(bs, qcow2_vm_state_offset(s), | ||
132 | - align_offset(sn->vm_state_size, s->cluster_size), | ||
133 | + ROUND_UP(sn->vm_state_size, s->cluster_size), | ||
134 | QCOW2_DISCARD_NEVER, false); | ||
135 | |||
136 | #ifdef DEBUG_ALLOC | ||
137 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, | ||
138 | } | ||
139 | new_l1_bytes = sn->l1_size * sizeof(uint64_t); | ||
140 | new_l1_table = qemu_try_blockalign(bs->file->bs, | ||
141 | - align_offset(new_l1_bytes, 512)); | ||
142 | + ROUND_UP(new_l1_bytes, 512)); | ||
143 | if (new_l1_table == NULL) { | ||
144 | return -ENOMEM; | ||
145 | } | ||
146 | diff --git a/block/qcow2.c b/block/qcow2.c | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
147 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
148 | --- a/block/qcow2.c | 32 | --- a/block/qcow2.c |
149 | +++ b/block/qcow2.c | 33 | +++ b/block/qcow2.c |
150 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, | 34 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, |
151 | 35 | return status; | |
152 | if (s->l1_size > 0) { | 36 | } |
153 | s->l1_table = qemu_try_blockalign(bs->file->bs, | 37 | |
154 | - align_offset(s->l1_size * sizeof(uint64_t), 512)); | 38 | -/* handle reading after the end of the backing file */ |
155 | + ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); | 39 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
156 | if (s->l1_table == NULL) { | 40 | - int64_t offset, int bytes) |
157 | error_setg(errp, "Could not allocate L1 table"); | 41 | -{ |
158 | ret = -ENOMEM; | 42 | - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; |
159 | @@ -XXX,XX +XXX,XX @@ static int64_t qcow2_calc_prealloc_size(int64_t total_size, | 43 | - int n1; |
44 | - | ||
45 | - if ((offset + bytes) <= bs_size) { | ||
46 | - return bytes; | ||
47 | - } | ||
48 | - | ||
49 | - if (offset >= bs_size) { | ||
50 | - n1 = 0; | ||
51 | - } else { | ||
52 | - n1 = bs_size - offset; | ||
53 | - } | ||
54 | - | ||
55 | - qemu_iovec_memset(qiov, n1, 0, bytes - n1); | ||
56 | - | ||
57 | - return n1; | ||
58 | -} | ||
59 | - | ||
60 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
61 | uint64_t bytes, QEMUIOVector *qiov, | ||
62 | int flags) | ||
160 | { | 63 | { |
161 | int64_t meta_size = 0; | 64 | BDRVQcow2State *s = bs->opaque; |
162 | uint64_t nl1e, nl2e; | 65 | - int offset_in_cluster, n1; |
163 | - int64_t aligned_total_size = align_offset(total_size, cluster_size); | 66 | + int offset_in_cluster; |
164 | + int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); | 67 | int ret; |
165 | 68 | unsigned int cur_bytes; /* number of bytes in current iteration */ | |
166 | /* header: 1 cluster */ | 69 | uint64_t cluster_offset = 0; |
167 | meta_size += cluster_size; | 70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, |
168 | 71 | case QCOW2_CLUSTER_UNALLOCATED: | |
169 | /* total size of L2 tables */ | 72 | |
170 | nl2e = aligned_total_size / cluster_size; | 73 | if (bs->backing) { |
171 | - nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); | 74 | - /* read from the base image */ |
172 | + nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); | 75 | - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, |
173 | meta_size += nl2e * sizeof(uint64_t); | 76 | - offset, cur_bytes); |
174 | 77 | - if (n1 > 0) { | |
175 | /* total size of L1 tables */ | 78 | - QEMUIOVector local_qiov; |
176 | nl1e = nl2e * sizeof(uint64_t) / cluster_size; | 79 | - |
177 | - nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); | 80 | - qemu_iovec_init(&local_qiov, hd_qiov.niov); |
178 | + nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); | 81 | - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); |
179 | meta_size += nl1e * sizeof(uint64_t); | 82 | - |
180 | 83 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | |
181 | /* total size of refcount table and blocks */ | 84 | - qemu_co_mutex_unlock(&s->lock); |
182 | @@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, | 85 | - ret = bdrv_co_preadv(bs->backing, offset, n1, |
183 | has_backing_file = !!optstr; | 86 | - &local_qiov, 0); |
184 | g_free(optstr); | 87 | - qemu_co_mutex_lock(&s->lock); |
185 | 88 | - | |
186 | - virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), | 89 | - qemu_iovec_destroy(&local_qiov); |
187 | - cluster_size); | 90 | - |
188 | + virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); | 91 | - if (ret < 0) { |
189 | + virtual_size = ROUND_UP(virtual_size, cluster_size); | 92 | - goto fail; |
190 | 93 | - } | |
191 | /* Check that virtual disk size is valid */ | 94 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); |
192 | l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, | 95 | + qemu_co_mutex_unlock(&s->lock); |
193 | @@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, | 96 | + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, |
194 | goto err; | 97 | + &hd_qiov, 0); |
195 | } | 98 | + qemu_co_mutex_lock(&s->lock); |
196 | 99 | + if (ret < 0) { | |
197 | - virtual_size = align_offset(ssize, cluster_size); | 100 | + goto fail; |
198 | + virtual_size = ROUND_UP(ssize, cluster_size); | 101 | } |
199 | 102 | } else { | |
200 | if (has_backing_file) { | 103 | /* Note: in this case, no need to wait */ |
201 | /* We don't how much of the backing chain is shared by the input | ||
202 | -- | 104 | -- |
203 | 2.13.6 | 105 | 2.13.6 |
204 | 106 | ||
205 | 107 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | Removing a quorum child node with x-blockdev-change results in a quorum |
---|---|---|---|
2 | driver state that cannot be recreated with create options because it | ||
3 | would require a list with gaps. This causes trouble in at least | ||
4 | .bdrv_refresh_filename(). | ||
2 | 5 | ||
3 | The issue: | 6 | Document this problem so that we won't accidentally mark the command |
7 | stable without having addressed it. | ||
4 | 8 | ||
5 | $ qemu-img resize -f qcow2 foo.qcow2 | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | qemu-img: Expecting one image file name | 10 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
7 | Try 'qemu-img --help' for more information | 11 | --- |
12 | qapi/block-core.json | 4 ++++ | ||
13 | 1 file changed, 4 insertions(+) | ||
8 | 14 | ||
9 | So we gave an image file name, but we omitted the length. qemu-img | 15 | diff --git a/qapi/block-core.json b/qapi/block-core.json |
10 | thinks the last argument is always the size and removes it immediately | ||
11 | from argv (by decrementing argc), and tries to verify that it is a valid | ||
12 | size only at a later point. | ||
13 | |||
14 | So we do not actually know whether that last argument we called "size" | ||
15 | is indeed a size or whether the user instead forgot to specify that size | ||
16 | but did give a file name. | ||
17 | |||
18 | Therefore, the error message should be more general. | ||
19 | |||
20 | Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1523458 | ||
21 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
22 | Message-id: 20180205162745.23650-1-mreitz@redhat.com | ||
23 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
24 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
25 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
26 | --- | ||
27 | qemu-img.c | 2 +- | ||
28 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
29 | |||
30 | diff --git a/qemu-img.c b/qemu-img.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/qemu-img.c | 17 | --- a/qapi/block-core.json |
33 | +++ b/qemu-img.c | 18 | +++ b/qapi/block-core.json |
34 | @@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv) | 19 | @@ -XXX,XX +XXX,XX @@ |
35 | } | 20 | # does not support all kinds of operations, all kinds of children, nor |
36 | } | 21 | # all block drivers. |
37 | if (optind != argc - 1) { | 22 | # |
38 | - error_exit("Expecting one image file name"); | 23 | +# FIXME Removing children from a quorum node means introducing gaps in the |
39 | + error_exit("Expecting image file name and size"); | 24 | +# child indices. This cannot be represented in the 'children' list of |
40 | } | 25 | +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). |
41 | filename = argv[optind++]; | 26 | +# |
42 | 27 | # Warning: The data in a new quorum child MUST be consistent with that of | |
28 | # the rest of the array. | ||
29 | # | ||
43 | -- | 30 | -- |
44 | 2.13.6 | 31 | 2.13.6 |
45 | 32 | ||
46 | 33 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | From: Doug Gale <doug16k@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | We are gradually converting to byte-based interfaces, as they are | 3 | Add trace output for commands, errors, and undefined behavior. |
4 | easier to reason about than sector-based. Convert all uses of | 4 | Add guest error log output for undefined behavior. |
5 | the cluster size in sectors, along with adding assertions that we | 5 | Report invalid undefined accesses to MMIO. |
6 | are not dividing by zero. | 6 | Annotate unlikely error checks with unlikely. |
7 | 7 | ||
8 | Improve some comment grammar while in the area. | 8 | Signed-off-by: Doug Gale <doug16k@gmail.com> |
9 | 9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | |
10 | Signed-off-by: Eric Blake <eblake@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 12 | --- |
15 | block/iscsi.c | 56 +++++++++++++++++++++++++++++++++++--------------------- | 13 | hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++-------- |
16 | 1 file changed, 35 insertions(+), 21 deletions(-) | 14 | hw/block/trace-events | 93 ++++++++++++++ |
15 | 2 files changed, 390 insertions(+), 52 deletions(-) | ||
17 | 16 | ||
18 | diff --git a/block/iscsi.c b/block/iscsi.c | 17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c |
19 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/iscsi.c | 19 | --- a/hw/block/nvme.c |
21 | +++ b/block/iscsi.c | 20 | +++ b/hw/block/nvme.c |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct IscsiLun { | 21 | @@ -XXX,XX +XXX,XX @@ |
23 | unsigned long *allocmap; | 22 | #include "qapi/visitor.h" |
24 | unsigned long *allocmap_valid; | 23 | #include "sysemu/block-backend.h" |
25 | long allocmap_size; | 24 | |
26 | - int cluster_sectors; | 25 | +#include "qemu/log.h" |
27 | + int cluster_size; | 26 | +#include "trace.h" |
28 | bool use_16_for_rw; | 27 | #include "nvme.h" |
29 | bool write_protected; | 28 | |
30 | bool lbpme; | 29 | +#define NVME_GUEST_ERR(trace, fmt, ...) \ |
31 | @@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) | 30 | + do { \ |
31 | + (trace_##trace)(__VA_ARGS__); \ | ||
32 | + qemu_log_mask(LOG_GUEST_ERROR, #trace \ | ||
33 | + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ | ||
34 | + } while (0) | ||
35 | + | ||
36 | static void nvme_process_sq(void *opaque); | ||
37 | |||
38 | static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) | ||
39 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) | ||
32 | { | 40 | { |
33 | iscsi_allocmap_free(iscsilun); | 41 | if (cq->irq_enabled) { |
34 | 42 | if (msix_enabled(&(n->parent_obj))) { | |
35 | + assert(iscsilun->cluster_size); | 43 | + trace_nvme_irq_msix(cq->vector); |
36 | iscsilun->allocmap_size = | 44 | msix_notify(&(n->parent_obj), cq->vector); |
37 | - DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun), | 45 | } else { |
38 | - iscsilun->cluster_sectors); | 46 | + trace_nvme_irq_pin(); |
39 | + DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size, | 47 | pci_irq_pulse(&n->parent_obj); |
40 | + iscsilun->cluster_size); | 48 | } |
41 | 49 | + } else { | |
42 | iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size); | 50 | + trace_nvme_irq_masked(); |
43 | if (!iscsilun->allocmap) { | 51 | } |
44 | @@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) | 52 | } |
45 | } | 53 | |
46 | 54 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | |
47 | if (open_flags & BDRV_O_NOCACHE) { | 55 | trans_len = MIN(len, trans_len); |
48 | - /* in case that cache.direct = on all allocmap entries are | 56 | int num_prps = (len >> n->page_bits) + 1; |
49 | + /* when cache.direct = on all allocmap entries are | 57 | |
50 | * treated as invalid to force a relookup of the block | 58 | - if (!prp1) { |
51 | * status on every read request */ | 59 | + if (unlikely(!prp1)) { |
52 | return 0; | 60 | + trace_nvme_err_invalid_prp(); |
53 | @@ -XXX,XX +XXX,XX @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, | 61 | return NVME_INVALID_FIELD | NVME_DNR; |
54 | int nb_sectors, bool allocated, bool valid) | 62 | } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && |
63 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
65 | } | ||
66 | len -= trans_len; | ||
67 | if (len) { | ||
68 | - if (!prp2) { | ||
69 | + if (unlikely(!prp2)) { | ||
70 | + trace_nvme_err_invalid_prp2_missing(); | ||
71 | goto unmap; | ||
72 | } | ||
73 | if (len > n->page_size) { | ||
74 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); | ||
76 | |||
77 | if (i == n->max_prp_ents - 1 && len > n->page_size) { | ||
78 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
79 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
80 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
81 | goto unmap; | ||
82 | } | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
85 | prp_ent = le64_to_cpu(prp_list[i]); | ||
86 | } | ||
87 | |||
88 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
89 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
90 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
91 | goto unmap; | ||
92 | } | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
95 | i++; | ||
96 | } | ||
97 | } else { | ||
98 | - if (prp2 & (n->page_size - 1)) { | ||
99 | + if (unlikely(prp2 & (n->page_size - 1))) { | ||
100 | + trace_nvme_err_invalid_prp2_align(prp2); | ||
101 | goto unmap; | ||
102 | } | ||
103 | if (qsg->nsg) { | ||
104 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, | ||
105 | QEMUIOVector iov; | ||
106 | uint16_t status = NVME_SUCCESS; | ||
107 | |||
108 | + trace_nvme_dma_read(prp1, prp2); | ||
109 | + | ||
110 | if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { | ||
111 | return NVME_INVALID_FIELD | NVME_DNR; | ||
112 | } | ||
113 | if (qsg.nsg > 0) { | ||
114 | - if (dma_buf_read(ptr, len, &qsg)) { | ||
115 | + if (unlikely(dma_buf_read(ptr, len, &qsg))) { | ||
116 | + trace_nvme_err_invalid_dma(); | ||
117 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
118 | } | ||
119 | qemu_sglist_destroy(&qsg); | ||
120 | } else { | ||
121 | - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { | ||
122 | + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { | ||
123 | + trace_nvme_err_invalid_dma(); | ||
124 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
125 | } | ||
126 | qemu_iovec_destroy(&iov); | ||
127 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
128 | uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); | ||
129 | uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); | ||
130 | |||
131 | - if (slba + nlb > ns->id_ns.nsze) { | ||
132 | + if (unlikely(slba + nlb > ns->id_ns.nsze)) { | ||
133 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
134 | return NVME_LBA_RANGE | NVME_DNR; | ||
135 | } | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
138 | int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; | ||
139 | enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; | ||
140 | |||
141 | - if ((slba + nlb) > ns->id_ns.nsze) { | ||
142 | + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); | ||
143 | + | ||
144 | + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { | ||
145 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); | ||
146 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
147 | return NVME_LBA_RANGE | NVME_DNR; | ||
148 | } | ||
149 | |||
150 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
151 | NvmeNamespace *ns; | ||
152 | uint32_t nsid = le32_to_cpu(cmd->nsid); | ||
153 | |||
154 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
155 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
156 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
157 | return NVME_INVALID_NSID | NVME_DNR; | ||
158 | } | ||
159 | |||
160 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
161 | case NVME_CMD_READ: | ||
162 | return nvme_rw(n, ns, cmd, req); | ||
163 | default: | ||
164 | + trace_nvme_err_invalid_opc(cmd->opcode); | ||
165 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
166 | } | ||
167 | } | ||
168 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
169 | NvmeCQueue *cq; | ||
170 | uint16_t qid = le16_to_cpu(c->qid); | ||
171 | |||
172 | - if (!qid || nvme_check_sqid(n, qid)) { | ||
173 | + if (unlikely(!qid || nvme_check_sqid(n, qid))) { | ||
174 | + trace_nvme_err_invalid_del_sq(qid); | ||
175 | return NVME_INVALID_QID | NVME_DNR; | ||
176 | } | ||
177 | |||
178 | + trace_nvme_del_sq(qid); | ||
179 | + | ||
180 | sq = n->sq[qid]; | ||
181 | while (!QTAILQ_EMPTY(&sq->out_req_list)) { | ||
182 | req = QTAILQ_FIRST(&sq->out_req_list); | ||
183 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
184 | uint16_t qflags = le16_to_cpu(c->sq_flags); | ||
185 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
186 | |||
187 | - if (!cqid || nvme_check_cqid(n, cqid)) { | ||
188 | + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); | ||
189 | + | ||
190 | + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { | ||
191 | + trace_nvme_err_invalid_create_sq_cqid(cqid); | ||
192 | return NVME_INVALID_CQID | NVME_DNR; | ||
193 | } | ||
194 | - if (!sqid || !nvme_check_sqid(n, sqid)) { | ||
195 | + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { | ||
196 | + trace_nvme_err_invalid_create_sq_sqid(sqid); | ||
197 | return NVME_INVALID_QID | NVME_DNR; | ||
198 | } | ||
199 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
200 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
201 | + trace_nvme_err_invalid_create_sq_size(qsize); | ||
202 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
203 | } | ||
204 | - if (!prp1 || prp1 & (n->page_size - 1)) { | ||
205 | + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { | ||
206 | + trace_nvme_err_invalid_create_sq_addr(prp1); | ||
207 | return NVME_INVALID_FIELD | NVME_DNR; | ||
208 | } | ||
209 | - if (!(NVME_SQ_FLAGS_PC(qflags))) { | ||
210 | + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { | ||
211 | + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); | ||
212 | return NVME_INVALID_FIELD | NVME_DNR; | ||
213 | } | ||
214 | sq = g_malloc0(sizeof(*sq)); | ||
215 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
216 | NvmeCQueue *cq; | ||
217 | uint16_t qid = le16_to_cpu(c->qid); | ||
218 | |||
219 | - if (!qid || nvme_check_cqid(n, qid)) { | ||
220 | + if (unlikely(!qid || nvme_check_cqid(n, qid))) { | ||
221 | + trace_nvme_err_invalid_del_cq_cqid(qid); | ||
222 | return NVME_INVALID_CQID | NVME_DNR; | ||
223 | } | ||
224 | |||
225 | cq = n->cq[qid]; | ||
226 | - if (!QTAILQ_EMPTY(&cq->sq_list)) { | ||
227 | + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { | ||
228 | + trace_nvme_err_invalid_del_cq_notempty(qid); | ||
229 | return NVME_INVALID_QUEUE_DEL; | ||
230 | } | ||
231 | + trace_nvme_del_cq(qid); | ||
232 | nvme_free_cq(cq, n); | ||
233 | return NVME_SUCCESS; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
236 | uint16_t qflags = le16_to_cpu(c->cq_flags); | ||
237 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
238 | |||
239 | - if (!cqid || !nvme_check_cqid(n, cqid)) { | ||
240 | + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, | ||
241 | + NVME_CQ_FLAGS_IEN(qflags) != 0); | ||
242 | + | ||
243 | + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { | ||
244 | + trace_nvme_err_invalid_create_cq_cqid(cqid); | ||
245 | return NVME_INVALID_CQID | NVME_DNR; | ||
246 | } | ||
247 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
248 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
249 | + trace_nvme_err_invalid_create_cq_size(qsize); | ||
250 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
251 | } | ||
252 | - if (!prp1) { | ||
253 | + if (unlikely(!prp1)) { | ||
254 | + trace_nvme_err_invalid_create_cq_addr(prp1); | ||
255 | return NVME_INVALID_FIELD | NVME_DNR; | ||
256 | } | ||
257 | - if (vector > n->num_queues) { | ||
258 | + if (unlikely(vector > n->num_queues)) { | ||
259 | + trace_nvme_err_invalid_create_cq_vector(vector); | ||
260 | return NVME_INVALID_IRQ_VECTOR | NVME_DNR; | ||
261 | } | ||
262 | - if (!(NVME_CQ_FLAGS_PC(qflags))) { | ||
263 | + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { | ||
264 | + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); | ||
265 | return NVME_INVALID_FIELD | NVME_DNR; | ||
266 | } | ||
267 | |||
268 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) | ||
269 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
270 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
271 | |||
272 | + trace_nvme_identify_ctrl(); | ||
273 | + | ||
274 | return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), | ||
275 | prp1, prp2); | ||
276 | } | ||
277 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) | ||
278 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
279 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
280 | |||
281 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
282 | + trace_nvme_identify_ns(nsid); | ||
283 | + | ||
284 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
285 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
286 | return NVME_INVALID_NSID | NVME_DNR; | ||
287 | } | ||
288 | |||
289 | ns = &n->namespaces[nsid - 1]; | ||
290 | + | ||
291 | return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), | ||
292 | prp1, prp2); | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) | ||
295 | uint16_t ret; | ||
296 | int i, j = 0; | ||
297 | |||
298 | + trace_nvme_identify_nslist(min_nsid); | ||
299 | + | ||
300 | list = g_malloc0(data_len); | ||
301 | for (i = 0; i < n->num_namespaces; i++) { | ||
302 | if (i < min_nsid) { | ||
303 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) | ||
304 | case 0x02: | ||
305 | return nvme_identify_nslist(n, c); | ||
306 | default: | ||
307 | + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); | ||
308 | return NVME_INVALID_FIELD | NVME_DNR; | ||
309 | } | ||
310 | } | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
312 | switch (dw10) { | ||
313 | case NVME_VOLATILE_WRITE_CACHE: | ||
314 | result = blk_enable_write_cache(n->conf.blk); | ||
315 | + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); | ||
316 | break; | ||
317 | case NVME_NUMBER_OF_QUEUES: | ||
318 | result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
319 | + trace_nvme_getfeat_numq(result); | ||
320 | break; | ||
321 | default: | ||
322 | + trace_nvme_err_invalid_getfeat(dw10); | ||
323 | return NVME_INVALID_FIELD | NVME_DNR; | ||
324 | } | ||
325 | |||
326 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
327 | blk_set_enable_write_cache(n->conf.blk, dw11 & 1); | ||
328 | break; | ||
329 | case NVME_NUMBER_OF_QUEUES: | ||
330 | + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, | ||
331 | + ((dw11 >> 16) & 0xFFFF) + 1, | ||
332 | + n->num_queues - 1, n->num_queues - 1); | ||
333 | req->cqe.result = | ||
334 | cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
335 | break; | ||
336 | default: | ||
337 | + trace_nvme_err_invalid_setfeat(dw10); | ||
338 | return NVME_INVALID_FIELD | NVME_DNR; | ||
339 | } | ||
340 | return NVME_SUCCESS; | ||
341 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
342 | case NVME_ADM_CMD_GET_FEATURES: | ||
343 | return nvme_get_feature(n, cmd, req); | ||
344 | default: | ||
345 | + trace_nvme_err_invalid_admin_opc(cmd->opcode); | ||
346 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
347 | } | ||
348 | } | ||
349 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
350 | uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; | ||
351 | uint32_t page_size = 1 << page_bits; | ||
352 | |||
353 | - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || | ||
354 | - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || | ||
355 | - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || | ||
356 | - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || | ||
357 | - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || | ||
358 | - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || | ||
359 | - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || | ||
360 | - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || | ||
361 | - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { | ||
362 | + if (unlikely(n->cq[0])) { | ||
363 | + trace_nvme_err_startfail_cq(); | ||
364 | + return -1; | ||
365 | + } | ||
366 | + if (unlikely(n->sq[0])) { | ||
367 | + trace_nvme_err_startfail_sq(); | ||
368 | + return -1; | ||
369 | + } | ||
370 | + if (unlikely(!n->bar.asq)) { | ||
371 | + trace_nvme_err_startfail_nbarasq(); | ||
372 | + return -1; | ||
373 | + } | ||
374 | + if (unlikely(!n->bar.acq)) { | ||
375 | + trace_nvme_err_startfail_nbaracq(); | ||
376 | + return -1; | ||
377 | + } | ||
378 | + if (unlikely(n->bar.asq & (page_size - 1))) { | ||
379 | + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); | ||
380 | + return -1; | ||
381 | + } | ||
382 | + if (unlikely(n->bar.acq & (page_size - 1))) { | ||
383 | + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); | ||
384 | + return -1; | ||
385 | + } | ||
386 | + if (unlikely(NVME_CC_MPS(n->bar.cc) < | ||
387 | + NVME_CAP_MPSMIN(n->bar.cap))) { | ||
388 | + trace_nvme_err_startfail_page_too_small( | ||
389 | + NVME_CC_MPS(n->bar.cc), | ||
390 | + NVME_CAP_MPSMIN(n->bar.cap)); | ||
391 | + return -1; | ||
392 | + } | ||
393 | + if (unlikely(NVME_CC_MPS(n->bar.cc) > | ||
394 | + NVME_CAP_MPSMAX(n->bar.cap))) { | ||
395 | + trace_nvme_err_startfail_page_too_large( | ||
396 | + NVME_CC_MPS(n->bar.cc), | ||
397 | + NVME_CAP_MPSMAX(n->bar.cap)); | ||
398 | + return -1; | ||
399 | + } | ||
400 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < | ||
401 | + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { | ||
402 | + trace_nvme_err_startfail_cqent_too_small( | ||
403 | + NVME_CC_IOCQES(n->bar.cc), | ||
404 | + NVME_CTRL_CQES_MIN(n->bar.cap)); | ||
405 | + return -1; | ||
406 | + } | ||
407 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > | ||
408 | + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { | ||
409 | + trace_nvme_err_startfail_cqent_too_large( | ||
410 | + NVME_CC_IOCQES(n->bar.cc), | ||
411 | + NVME_CTRL_CQES_MAX(n->bar.cap)); | ||
412 | + return -1; | ||
413 | + } | ||
414 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < | ||
415 | + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { | ||
416 | + trace_nvme_err_startfail_sqent_too_small( | ||
417 | + NVME_CC_IOSQES(n->bar.cc), | ||
418 | + NVME_CTRL_SQES_MIN(n->bar.cap)); | ||
419 | + return -1; | ||
420 | + } | ||
421 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > | ||
422 | + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { | ||
423 | + trace_nvme_err_startfail_sqent_too_large( | ||
424 | + NVME_CC_IOSQES(n->bar.cc), | ||
425 | + NVME_CTRL_SQES_MAX(n->bar.cap)); | ||
426 | + return -1; | ||
427 | + } | ||
428 | + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { | ||
429 | + trace_nvme_err_startfail_asqent_sz_zero(); | ||
430 | + return -1; | ||
431 | + } | ||
432 | + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { | ||
433 | + trace_nvme_err_startfail_acqent_sz_zero(); | ||
434 | return -1; | ||
435 | } | ||
436 | |||
437 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
438 | static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
439 | unsigned size) | ||
55 | { | 440 | { |
56 | int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk; | 441 | + if (unlikely(offset & (sizeof(uint32_t) - 1))) { |
57 | + int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS; | 442 | + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, |
58 | 443 | + "MMIO write not 32-bit aligned," | |
59 | if (iscsilun->allocmap == NULL) { | 444 | + " offset=0x%"PRIx64"", offset); |
445 | + /* should be ignored, fall through for now */ | ||
446 | + } | ||
447 | + | ||
448 | + if (unlikely(size < sizeof(uint32_t))) { | ||
449 | + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, | ||
450 | + "MMIO write smaller than 32-bits," | ||
451 | + " offset=0x%"PRIx64", size=%u", | ||
452 | + offset, size); | ||
453 | + /* should be ignored, fall through for now */ | ||
454 | + } | ||
455 | + | ||
456 | switch (offset) { | ||
457 | - case 0xc: | ||
458 | + case 0xc: /* INTMS */ | ||
459 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
460 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
461 | + "undefined access to interrupt mask set" | ||
462 | + " when MSI-X is enabled"); | ||
463 | + /* should be ignored, fall through for now */ | ||
464 | + } | ||
465 | n->bar.intms |= data & 0xffffffff; | ||
466 | n->bar.intmc = n->bar.intms; | ||
467 | + trace_nvme_mmio_intm_set(data & 0xffffffff, | ||
468 | + n->bar.intmc); | ||
469 | break; | ||
470 | - case 0x10: | ||
471 | + case 0x10: /* INTMC */ | ||
472 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
473 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
474 | + "undefined access to interrupt mask clr" | ||
475 | + " when MSI-X is enabled"); | ||
476 | + /* should be ignored, fall through for now */ | ||
477 | + } | ||
478 | n->bar.intms &= ~(data & 0xffffffff); | ||
479 | n->bar.intmc = n->bar.intms; | ||
480 | + trace_nvme_mmio_intm_clr(data & 0xffffffff, | ||
481 | + n->bar.intmc); | ||
482 | break; | ||
483 | - case 0x14: | ||
484 | + case 0x14: /* CC */ | ||
485 | + trace_nvme_mmio_cfg(data & 0xffffffff); | ||
486 | /* Windows first sends data, then sends enable bit */ | ||
487 | if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && | ||
488 | !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
490 | |||
491 | if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { | ||
492 | n->bar.cc = data; | ||
493 | - if (nvme_start_ctrl(n)) { | ||
494 | + if (unlikely(nvme_start_ctrl(n))) { | ||
495 | + trace_nvme_err_startfail(); | ||
496 | n->bar.csts = NVME_CSTS_FAILED; | ||
497 | } else { | ||
498 | + trace_nvme_mmio_start_success(); | ||
499 | n->bar.csts = NVME_CSTS_READY; | ||
500 | } | ||
501 | } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { | ||
502 | + trace_nvme_mmio_stopped(); | ||
503 | nvme_clear_ctrl(n); | ||
504 | n->bar.csts &= ~NVME_CSTS_READY; | ||
505 | } | ||
506 | if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { | ||
507 | - nvme_clear_ctrl(n); | ||
508 | - n->bar.cc = data; | ||
509 | - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
510 | + trace_nvme_mmio_shutdown_set(); | ||
511 | + nvme_clear_ctrl(n); | ||
512 | + n->bar.cc = data; | ||
513 | + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
514 | } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { | ||
515 | - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
516 | - n->bar.cc = data; | ||
517 | + trace_nvme_mmio_shutdown_cleared(); | ||
518 | + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
519 | + n->bar.cc = data; | ||
520 | + } | ||
521 | + break; | ||
522 | + case 0x1C: /* CSTS */ | ||
523 | + if (data & (1 << 4)) { | ||
524 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, | ||
525 | + "attempted to W1C CSTS.NSSRO" | ||
526 | + " but CAP.NSSRS is zero (not supported)"); | ||
527 | + } else if (data != 0) { | ||
528 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, | ||
529 | + "attempted to set a read only bit" | ||
530 | + " of controller status"); | ||
531 | + } | ||
532 | + break; | ||
533 | + case 0x20: /* NSSR */ | ||
534 | + if (data == 0x4E564D65) { | ||
535 | + trace_nvme_ub_mmiowr_ssreset_unsupported(); | ||
536 | + } else { | ||
537 | + /* The spec says that writes of other values have no effect */ | ||
538 | + return; | ||
539 | } | ||
540 | break; | ||
541 | - case 0x24: | ||
542 | + case 0x24: /* AQA */ | ||
543 | n->bar.aqa = data & 0xffffffff; | ||
544 | + trace_nvme_mmio_aqattr(data & 0xffffffff); | ||
545 | break; | ||
546 | - case 0x28: | ||
547 | + case 0x28: /* ASQ */ | ||
548 | n->bar.asq = data; | ||
549 | + trace_nvme_mmio_asqaddr(data); | ||
550 | break; | ||
551 | - case 0x2c: | ||
552 | + case 0x2c: /* ASQ hi */ | ||
553 | n->bar.asq |= data << 32; | ||
554 | + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); | ||
555 | break; | ||
556 | - case 0x30: | ||
557 | + case 0x30: /* ACQ */ | ||
558 | + trace_nvme_mmio_acqaddr(data); | ||
559 | n->bar.acq = data; | ||
560 | break; | ||
561 | - case 0x34: | ||
562 | + case 0x34: /* ACQ hi */ | ||
563 | n->bar.acq |= data << 32; | ||
564 | + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); | ||
565 | break; | ||
566 | + case 0x38: /* CMBLOC */ | ||
567 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, | ||
568 | + "invalid write to reserved CMBLOC" | ||
569 | + " when CMBSZ is zero, ignored"); | ||
570 | + return; | ||
571 | + case 0x3C: /* CMBSZ */ | ||
572 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
573 | + "invalid write to read only CMBSZ, ignored"); | ||
574 | + return; | ||
575 | default: | ||
576 | + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
577 | + "invalid MMIO write," | ||
578 | + " offset=0x%"PRIx64", data=%"PRIx64"", | ||
579 | + offset, data); | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
584 | uint8_t *ptr = (uint8_t *)&n->bar; | ||
585 | uint64_t val = 0; | ||
586 | |||
587 | + if (unlikely(addr & (sizeof(uint32_t) - 1))) { | ||
588 | + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, | ||
589 | + "MMIO read not 32-bit aligned," | ||
590 | + " offset=0x%"PRIx64"", addr); | ||
591 | + /* should RAZ, fall through for now */ | ||
592 | + } else if (unlikely(size < sizeof(uint32_t))) { | ||
593 | + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, | ||
594 | + "MMIO read smaller than 32-bits," | ||
595 | + " offset=0x%"PRIx64"", addr); | ||
596 | + /* should RAZ, fall through for now */ | ||
597 | + } | ||
598 | + | ||
599 | if (addr < sizeof(n->bar)) { | ||
600 | memcpy(&val, ptr + addr, size); | ||
601 | + } else { | ||
602 | + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
603 | + "MMIO read beyond last register," | ||
604 | + " offset=0x%"PRIx64", returning 0", addr); | ||
605 | } | ||
606 | + | ||
607 | return val; | ||
608 | } | ||
609 | |||
610 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
611 | { | ||
612 | uint32_t qid; | ||
613 | |||
614 | - if (addr & ((1 << 2) - 1)) { | ||
615 | + if (unlikely(addr & ((1 << 2) - 1))) { | ||
616 | + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, | ||
617 | + "doorbell write not 32-bit aligned," | ||
618 | + " offset=0x%"PRIx64", ignoring", addr); | ||
60 | return; | 619 | return; |
61 | } | 620 | } |
62 | /* expand to entirely contain all affected clusters */ | 621 | |
63 | - cl_num_expanded = sector_num / iscsilun->cluster_sectors; | 622 | if (((addr - 0x1000) >> 2) & 1) { |
64 | + assert(cluster_sectors); | 623 | + /* Completion queue doorbell write */ |
65 | + cl_num_expanded = sector_num / cluster_sectors; | 624 | + |
66 | nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors, | 625 | uint16_t new_head = val & 0xffff; |
67 | - iscsilun->cluster_sectors) - cl_num_expanded; | 626 | int start_sqs; |
68 | + cluster_sectors) - cl_num_expanded; | 627 | NvmeCQueue *cq; |
69 | /* shrink to touch only completely contained clusters */ | 628 | |
70 | - cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors); | 629 | qid = (addr - (0x1000 + (1 << 2))) >> 3; |
71 | - nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors | 630 | - if (nvme_check_cqid(n, qid)) { |
72 | + cl_num_shrunk = DIV_ROUND_UP(sector_num, cluster_sectors); | 631 | + if (unlikely(nvme_check_cqid(n, qid))) { |
73 | + nb_cls_shrunk = (sector_num + nb_sectors) / cluster_sectors | 632 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, |
74 | - cl_num_shrunk; | 633 | + "completion queue doorbell write" |
75 | if (allocated) { | 634 | + " for nonexistent queue," |
76 | bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded); | 635 | + " sqid=%"PRIu32", ignoring", qid); |
77 | @@ -XXX,XX +XXX,XX @@ iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, | 636 | return; |
78 | if (iscsilun->allocmap == NULL) { | 637 | } |
79 | return true; | 638 | |
80 | } | 639 | cq = n->cq[qid]; |
81 | - size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors); | 640 | - if (new_head >= cq->size) { |
82 | + assert(iscsilun->cluster_size); | 641 | + if (unlikely(new_head >= cq->size)) { |
83 | + size = DIV_ROUND_UP(sector_num + nb_sectors, | 642 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, |
84 | + iscsilun->cluster_size >> BDRV_SECTOR_BITS); | 643 | + "completion queue doorbell write value" |
85 | return !(find_next_bit(iscsilun->allocmap, size, | 644 | + " beyond queue size, sqid=%"PRIu32"," |
86 | - sector_num / iscsilun->cluster_sectors) == size); | 645 | + " new_head=%"PRIu16", ignoring", |
87 | + sector_num * BDRV_SECTOR_SIZE / | 646 | + qid, new_head); |
88 | + iscsilun->cluster_size) == size); | 647 | return; |
89 | } | 648 | } |
90 | 649 | ||
91 | static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, | 650 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) |
92 | @@ -XXX,XX +XXX,XX @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, | 651 | nvme_isr_notify(n, cq); |
93 | if (iscsilun->allocmap_valid == NULL) { | 652 | } |
94 | return false; | 653 | } else { |
95 | } | 654 | + /* Submission queue doorbell write */ |
96 | - size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors); | 655 | + |
97 | + assert(iscsilun->cluster_size); | 656 | uint16_t new_tail = val & 0xffff; |
98 | + size = DIV_ROUND_UP(sector_num + nb_sectors, | 657 | NvmeSQueue *sq; |
99 | + iscsilun->cluster_size >> BDRV_SECTOR_BITS); | 658 | |
100 | return (find_next_zero_bit(iscsilun->allocmap_valid, size, | 659 | qid = (addr - 0x1000) >> 3; |
101 | - sector_num / iscsilun->cluster_sectors) == size); | 660 | - if (nvme_check_sqid(n, qid)) { |
102 | + sector_num * BDRV_SECTOR_SIZE / | 661 | + if (unlikely(nvme_check_sqid(n, qid))) { |
103 | + iscsilun->cluster_size) == size); | 662 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, |
104 | } | 663 | + "submission queue doorbell write" |
105 | 664 | + " for nonexistent queue," | |
106 | static int coroutine_fn | 665 | + " sqid=%"PRIu32", ignoring", qid); |
107 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, | 666 | return; |
108 | BlockDriverState *file; | 667 | } |
109 | /* check the block status from the beginning of the cluster | 668 | |
110 | * containing the start sector */ | 669 | sq = n->sq[qid]; |
111 | - int64_t ret = iscsi_co_get_block_status(bs, | 670 | - if (new_tail >= sq->size) { |
112 | - sector_num - sector_num % iscsilun->cluster_sectors, | 671 | + if (unlikely(new_tail >= sq->size)) { |
113 | - BDRV_REQUEST_MAX_SECTORS, &pnum, &file); | 672 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, |
114 | + int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS; | 673 | + "submission queue doorbell write value" |
115 | + int head; | 674 | + " beyond queue size, sqid=%"PRIu32"," |
116 | + int64_t ret; | 675 | + " new_tail=%"PRIu16", ignoring", |
117 | + | 676 | + qid, new_tail); |
118 | + assert(cluster_sectors); | 677 | return; |
119 | + head = sector_num % cluster_sectors; | 678 | } |
120 | + ret = iscsi_co_get_block_status(bs, sector_num - head, | 679 | |
121 | + BDRV_REQUEST_MAX_SECTORS, &pnum, | 680 | diff --git a/hw/block/trace-events b/hw/block/trace-events |
122 | + &file); | 681 | index XXXXXXX..XXXXXXX 100644 |
123 | if (ret < 0) { | 682 | --- a/hw/block/trace-events |
124 | return ret; | 683 | +++ b/hw/block/trace-events |
125 | } | 684 | @@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 |
126 | /* if the whole request falls into an unallocated area we can avoid | 685 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" |
127 | - * to read and directly return zeroes instead */ | 686 | hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" |
128 | - if (ret & BDRV_BLOCK_ZERO && | 687 | |
129 | - pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) { | 688 | +# hw/block/nvme.c |
130 | + * reading and directly return zeroes instead */ | 689 | +# nvme traces for successful events |
131 | + if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors + head) { | 690 | +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" |
132 | qemu_iovec_memset(iov, 0, 0x00, iov->size); | 691 | +nvme_irq_pin(void) "pulsing IRQ pin" |
133 | return 0; | 692 | +nvme_irq_masked(void) "IRQ is masked" |
134 | } | 693 | +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" |
135 | @@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, | 694 | +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" |
136 | * reasonable size */ | 695 | +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" |
137 | if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 && | 696 | +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" |
138 | iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { | 697 | +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" |
139 | - iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran * | 698 | +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" |
140 | - iscsilun->block_size) >> BDRV_SECTOR_BITS; | 699 | +nvme_identify_ctrl(void) "identify controller" |
141 | + iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran * | 700 | +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" |
142 | + iscsilun->block_size; | 701 | +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" |
143 | if (iscsilun->lbprz) { | 702 | +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" |
144 | ret = iscsi_allocmap_init(iscsilun, bs->open_flags); | 703 | +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" |
145 | } | 704 | +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" |
146 | @@ -XXX,XX +XXX,XX @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) | 705 | +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" |
147 | { | 706 | +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" |
148 | IscsiLun *iscsilun = bs->opaque; | 707 | +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" |
149 | bdi->unallocated_blocks_are_zero = iscsilun->lbprz; | 708 | +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" |
150 | - bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE; | 709 | +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" |
151 | + bdi->cluster_size = iscsilun->cluster_size; | 710 | +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" |
152 | return 0; | 711 | +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" |
153 | } | 712 | +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" |
154 | 713 | +nvme_mmio_start_success(void) "setting controller enable bit succeeded" | |
714 | +nvme_mmio_stopped(void) "cleared controller enable bit" | ||
715 | +nvme_mmio_shutdown_set(void) "shutdown bit set" | ||
716 | +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" | ||
717 | + | ||
718 | +# nvme traces for error conditions | ||
719 | +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" | ||
720 | +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" | ||
721 | +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" | ||
722 | +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" | ||
723 | +nvme_err_invalid_field(void) "invalid field" | ||
724 | +nvme_err_invalid_prp(void) "invalid PRP" | ||
725 | +nvme_err_invalid_sgl(void) "invalid SGL" | ||
726 | +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" | ||
727 | +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" | ||
728 | +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" | ||
729 | +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" | ||
730 | +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" | ||
731 | +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" | ||
732 | +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" | ||
733 | +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" | ||
734 | +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" | ||
735 | +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" | ||
736 | +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" | ||
737 | +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" | ||
738 | +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" | ||
739 | +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" | ||
740 | +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" | ||
741 | +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" | ||
742 | +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" | ||
743 | +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" | ||
744 | +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" | ||
745 | +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" | ||
746 | +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" | ||
747 | +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" | ||
748 | +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" | ||
749 | +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" | ||
750 | +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" | ||
751 | +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" | ||
752 | +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" | ||
753 | +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" | ||
754 | +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" | ||
755 | +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" | ||
756 | +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" | ||
757 | +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" | ||
758 | +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" | ||
759 | +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" | ||
760 | +nvme_err_startfail(void) "setting controller enable bit failed" | ||
761 | + | ||
762 | +# Traces for undefined behavior | ||
763 | +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" | ||
764 | +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" | ||
765 | +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" | ||
766 | +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" | ||
767 | +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" | ||
768 | +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
769 | +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
770 | +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
771 | +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
772 | +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
773 | +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
774 | +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" | ||
775 | +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" | ||
776 | +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" | ||
777 | +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
778 | +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" | ||
779 | +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
780 | + | ||
781 | # hw/block/xen_disk.c | ||
782 | xen_disk_alloc(char *name) "%s" | ||
783 | xen_disk_init(char *name) "%s" | ||
155 | -- | 784 | -- |
156 | 2.13.6 | 785 | 2.13.6 |
157 | 786 | ||
158 | 787 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch fixes several mistakes in the documentation of the | 3 | Management tools create overlays of running guests with qemu-img: |
4 | compressed cluster descriptor: | ||
5 | 4 | ||
6 | 1) the documentation claims that the cluster descriptor contains the | 5 | $ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2 |
7 | number of sectors used to store the compressed data, but what it | ||
8 | actually contains is the number of sectors *minus one* or, in other | ||
9 | words, the number of additional sectors after the first one. | ||
10 | 6 | ||
11 | 2) the width of the fields is incorrectly specified. The number of bits | 7 | but this doesn't work anymore due to image locking: |
12 | used by each field is | ||
13 | 8 | ||
14 | x = 62 - (cluster_bits - 8) for the offset field | 9 | qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock |
15 | y = (cluster_bits - 8) for the size field | 10 | Is another process using the image? |
16 | 11 | Could not open backing image to determine size. | |
17 | So the offset field's location is [0, x-1], not [0, x] as stated. | 12 | Use the force share option to allow this use case again. |
18 | |||
19 | 3) the size field does not contain the size of the compressed data, | ||
20 | but rather the number of sectors where that data is stored. The | ||
21 | compressed data starts at the exact point specified in the offset | ||
22 | field and ends when there's enough data to produce a cluster of | ||
23 | decompressed data. Both points can be in the middle of a sector, | ||
24 | allowing several compressed clusters to be stored next to one | ||
25 | another, sharing sectors if necessary. | ||
26 | 13 | ||
27 | Cc: qemu-stable@nongnu.org | 14 | Cc: qemu-stable@nongnu.org |
28 | Signed-off-by: Alberto Garcia <berto@igalia.com> | 15 | Signed-off-by: Fam Zheng <famz@redhat.com> |
29 | Reviewed-by: Eric Blake <eblake@redhat.com> | 16 | Reviewed-by: Eric Blake <eblake@redhat.com> |
30 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
31 | --- | 18 | --- |
32 | docs/interop/qcow2.txt | 16 +++++++++++++--- | 19 | block.c | 3 ++- |
33 | 1 file changed, 13 insertions(+), 3 deletions(-) | 20 | 1 file changed, 2 insertions(+), 1 deletion(-) |
34 | 21 | ||
35 | diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt | 22 | diff --git a/block.c b/block.c |
36 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/docs/interop/qcow2.txt | 24 | --- a/block.c |
38 | +++ b/docs/interop/qcow2.txt | 25 | +++ b/block.c |
39 | @@ -XXX,XX +XXX,XX @@ Standard Cluster Descriptor: | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt, |
40 | 27 | back_flags = flags; | |
41 | Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)): | 28 | back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); |
42 | 29 | ||
43 | - Bit 0 - x: Host cluster offset. This is usually _not_ aligned to a | 30 | + backing_options = qdict_new(); |
44 | - cluster boundary! | 31 | if (backing_fmt) { |
45 | + Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a | 32 | - backing_options = qdict_new(); |
46 | + cluster or sector boundary! | 33 | qdict_put_str(backing_options, "driver", backing_fmt); |
47 | 34 | } | |
48 | - x+1 - 61: Compressed size of the images in sectors of 512 bytes | 35 | + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); |
49 | + x - 61: Number of additional 512-byte sectors used for the | 36 | |
50 | + compressed data, beyond the sector containing the offset | 37 | bs = bdrv_open(full_backing, NULL, backing_options, back_flags, |
51 | + in the previous field. Some of these sectors may reside | 38 | &local_err); |
52 | + in the next contiguous host cluster. | ||
53 | + | ||
54 | + Note that the compressed data does not necessarily occupy | ||
55 | + all of the bytes in the final sector; rather, decompression | ||
56 | + stops when it has produced a cluster of data. | ||
57 | + | ||
58 | + Another compressed cluster may map to the tail of the final | ||
59 | + sector used by this compressed cluster. | ||
60 | |||
61 | If a cluster is unallocated, read requests shall read the data from the backing | ||
62 | file (except if bit 0 in the Standard Cluster Descriptor is set). If there is | ||
63 | -- | 39 | -- |
64 | 2.13.6 | 40 | 2.13.6 |
65 | 41 | ||
66 | 42 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 3 | It's not working anymore since QEMU v1.3.0 - time to remove it now. |
4 | byte-based. Update the parallels driver accordingly. Note that | ||
5 | the internal function block_status() is still sector-based, because | ||
6 | it is still in use by other sector-based functions; but that's okay | ||
7 | because request_alignment is 512 as a result of those functions. | ||
8 | For now, no optimizations are added based on the mapping hint. | ||
9 | 4 | ||
10 | Signed-off-by: Eric Blake <eblake@redhat.com> | 5 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
11 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 6 | Reviewed-by: John Snow <jsnow@redhat.com> |
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | 7 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 9 | --- |
15 | block/parallels.c | 22 +++++++++++++++------- | 10 | blockdev.c | 11 ----------- |
16 | 1 file changed, 15 insertions(+), 7 deletions(-) | 11 | qemu-doc.texi | 6 ------ |
12 | 2 files changed, 17 deletions(-) | ||
17 | 13 | ||
18 | diff --git a/block/parallels.c b/block/parallels.c | 14 | diff --git a/blockdev.c b/blockdev.c |
19 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/parallels.c | 16 | --- a/blockdev.c |
21 | +++ b/block/parallels.c | 17 | +++ b/blockdev.c |
22 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) | 18 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = { |
23 | } | 19 | .type = QEMU_OPT_STRING, |
24 | 20 | .help = "chs translation (auto, lba, none)", | |
25 | 21 | },{ | |
26 | -static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, | 22 | - .name = "boot", |
27 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | 23 | - .type = QEMU_OPT_BOOL, |
28 | +static int coroutine_fn parallels_co_block_status(BlockDriverState *bs, | 24 | - .help = "(deprecated, ignored)", |
29 | + bool want_zero, | 25 | - },{ |
30 | + int64_t offset, | 26 | .name = "addr", |
31 | + int64_t bytes, | 27 | .type = QEMU_OPT_STRING, |
32 | + int64_t *pnum, | 28 | .help = "pci address (virtio only)", |
33 | + int64_t *map, | 29 | @@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) |
34 | + BlockDriverState **file) | 30 | goto fail; |
35 | { | ||
36 | BDRVParallelsState *s = bs->opaque; | ||
37 | - int64_t offset; | ||
38 | + int count; | ||
39 | |||
40 | + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); | ||
41 | qemu_co_mutex_lock(&s->lock); | ||
42 | - offset = block_status(s, sector_num, nb_sectors, pnum); | ||
43 | + offset = block_status(s, offset >> BDRV_SECTOR_BITS, | ||
44 | + bytes >> BDRV_SECTOR_BITS, &count); | ||
45 | qemu_co_mutex_unlock(&s->lock); | ||
46 | |||
47 | + *pnum = count * BDRV_SECTOR_SIZE; | ||
48 | if (offset < 0) { | ||
49 | return 0; | ||
50 | } | 31 | } |
51 | 32 | ||
52 | + *map = offset * BDRV_SECTOR_SIZE; | 33 | - /* Deprecated option boot=[on|off] */ |
53 | *file = bs->file->bs; | 34 | - if (qemu_opt_get(legacy_opts, "boot") != NULL) { |
54 | - return (offset << BDRV_SECTOR_BITS) | | 35 | - fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " |
55 | - BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | 36 | - "ignored. Future versions will reject this parameter. Please " |
56 | + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | 37 | - "update your scripts.\n"); |
57 | } | 38 | - } |
58 | 39 | - | |
59 | static coroutine_fn int parallels_co_writev(BlockDriverState *bs, | 40 | /* Other deprecated options */ |
60 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { | 41 | if (!qtest_enabled()) { |
61 | .bdrv_open = parallels_open, | 42 | for (i = 0; i < ARRAY_SIZE(deprecated); i++) { |
62 | .bdrv_close = parallels_close, | 43 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
63 | .bdrv_child_perm = bdrv_format_default_perms, | 44 | index XXXXXXX..XXXXXXX 100644 |
64 | - .bdrv_co_get_block_status = parallels_co_get_block_status, | 45 | --- a/qemu-doc.texi |
65 | + .bdrv_co_block_status = parallels_co_block_status, | 46 | +++ b/qemu-doc.texi |
66 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 47 | @@ -XXX,XX +XXX,XX @@ deprecated. |
67 | .bdrv_co_flush_to_os = parallels_co_flush_to_os, | 48 | |
68 | .bdrv_co_readv = parallels_co_readv, | 49 | @section System emulator command line arguments |
50 | |||
51 | -@subsection -drive boot=on|off (since 1.3.0) | ||
52 | - | ||
53 | -The ``boot=on|off'' option to the ``-drive'' argument is | ||
54 | -ignored. Applications should use the ``bootindex=N'' parameter | ||
55 | -to set an absolute ordering between devices instead. | ||
56 | - | ||
57 | @subsection -tdf (since 1.3.0) | ||
58 | |||
59 | The ``-tdf'' argument is ignored. The behaviour implemented | ||
69 | -- | 60 | -- |
70 | 2.13.6 | 61 | 2.13.6 |
71 | 62 | ||
72 | 63 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | qcow2_create2() calls qemu_co_mutex_lock(). Only a coroutine_fn may | 3 | It's been marked as deprecated since QEMU v2.10.0, and so far nobody |
4 | call another coroutine_fn. In fact, qcow2_create2 is always called from | 4 | complained that we should keep it, so let's remove this legacy option |
5 | coroutine context. | 5 | now to simplify the code quite a bit. |
6 | 6 | ||
7 | Rename the function to add the "co" moniker and add coroutine_fn. | 7 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
8 | 8 | Reviewed-by: John Snow <jsnow@redhat.com> | |
9 | Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com> | 9 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Message-Id: <20170705102231.20711-3-stefanha@redhat.com> | ||
12 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
13 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 11 | --- |
16 | block/qcow2.c | 17 +++++++++-------- | 12 | vl.c | 86 ++------------------------------------------------------- |
17 | 1 file changed, 9 insertions(+), 8 deletions(-) | 13 | qemu-doc.texi | 8 ------ |
18 | 14 | qemu-options.hx | 19 ++----------- | |
19 | diff --git a/block/qcow2.c b/block/qcow2.c | 15 | 3 files changed, 4 insertions(+), 109 deletions(-) |
16 | |||
17 | diff --git a/vl.c b/vl.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/qcow2.c | 19 | --- a/vl.c |
22 | +++ b/block/qcow2.c | 20 | +++ b/vl.c |
23 | @@ -XXX,XX +XXX,XX @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, | 21 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
24 | return refcount_bits; | 22 | const char *boot_order = NULL; |
25 | } | 23 | const char *boot_once = NULL; |
26 | 24 | DisplayState *ds; | |
27 | -static int qcow2_create2(const char *filename, int64_t total_size, | 25 | - int cyls, heads, secs, translation; |
28 | - const char *backing_file, const char *backing_format, | 26 | QemuOpts *opts, *machine_opts; |
29 | - int flags, size_t cluster_size, PreallocMode prealloc, | 27 | - QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL; |
30 | - QemuOpts *opts, int version, int refcount_order, | 28 | + QemuOpts *icount_opts = NULL, *accel_opts = NULL; |
31 | - const char *encryptfmt, Error **errp) | 29 | QemuOptsList *olist; |
32 | +static int coroutine_fn | 30 | int optind; |
33 | +qcow2_co_create2(const char *filename, int64_t total_size, | 31 | const char *optarg; |
34 | + const char *backing_file, const char *backing_format, | 32 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
35 | + int flags, size_t cluster_size, PreallocMode prealloc, | 33 | |
36 | + QemuOpts *opts, int version, int refcount_order, | 34 | cpu_model = NULL; |
37 | + const char *encryptfmt, Error **errp) | 35 | snapshot = 0; |
38 | { | 36 | - cyls = heads = secs = 0; |
39 | QDict *options; | 37 | - translation = BIOS_ATA_TRANSLATION_AUTO; |
40 | 38 | ||
41 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt | 39 | nb_nics = 0; |
42 | 40 | ||
43 | refcount_order = ctz32(refcount_bits); | 41 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) |
44 | 42 | if (optind >= argc) | |
45 | - ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, | 43 | break; |
46 | - cluster_size, prealloc, opts, version, refcount_order, | 44 | if (argv[optind][0] != '-') { |
47 | - encryptfmt, &local_err); | 45 | - hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); |
48 | + ret = qcow2_co_create2(filename, size, backing_file, backing_fmt, flags, | 46 | + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); |
49 | + cluster_size, prealloc, opts, version, refcount_order, | 47 | } else { |
50 | + encryptfmt, &local_err); | 48 | const QEMUOption *popt; |
51 | error_propagate(errp, local_err); | 49 | |
52 | 50 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | |
53 | finish: | 51 | cpu_model = optarg; |
52 | break; | ||
53 | case QEMU_OPTION_hda: | ||
54 | - { | ||
55 | - char buf[256]; | ||
56 | - if (cyls == 0) | ||
57 | - snprintf(buf, sizeof(buf), "%s", HD_OPTS); | ||
58 | - else | ||
59 | - snprintf(buf, sizeof(buf), | ||
60 | - "%s,cyls=%d,heads=%d,secs=%d%s", | ||
61 | - HD_OPTS , cyls, heads, secs, | ||
62 | - translation == BIOS_ATA_TRANSLATION_LBA ? | ||
63 | - ",trans=lba" : | ||
64 | - translation == BIOS_ATA_TRANSLATION_NONE ? | ||
65 | - ",trans=none" : ""); | ||
66 | - drive_add(IF_DEFAULT, 0, optarg, buf); | ||
67 | - break; | ||
68 | - } | ||
69 | case QEMU_OPTION_hdb: | ||
70 | case QEMU_OPTION_hdc: | ||
71 | case QEMU_OPTION_hdd: | ||
72 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
73 | case QEMU_OPTION_snapshot: | ||
74 | snapshot = 1; | ||
75 | break; | ||
76 | - case QEMU_OPTION_hdachs: | ||
77 | - { | ||
78 | - const char *p; | ||
79 | - p = optarg; | ||
80 | - cyls = strtol(p, (char **)&p, 0); | ||
81 | - if (cyls < 1 || cyls > 16383) | ||
82 | - goto chs_fail; | ||
83 | - if (*p != ',') | ||
84 | - goto chs_fail; | ||
85 | - p++; | ||
86 | - heads = strtol(p, (char **)&p, 0); | ||
87 | - if (heads < 1 || heads > 16) | ||
88 | - goto chs_fail; | ||
89 | - if (*p != ',') | ||
90 | - goto chs_fail; | ||
91 | - p++; | ||
92 | - secs = strtol(p, (char **)&p, 0); | ||
93 | - if (secs < 1 || secs > 63) | ||
94 | - goto chs_fail; | ||
95 | - if (*p == ',') { | ||
96 | - p++; | ||
97 | - if (!strcmp(p, "large")) { | ||
98 | - translation = BIOS_ATA_TRANSLATION_LARGE; | ||
99 | - } else if (!strcmp(p, "rechs")) { | ||
100 | - translation = BIOS_ATA_TRANSLATION_RECHS; | ||
101 | - } else if (!strcmp(p, "none")) { | ||
102 | - translation = BIOS_ATA_TRANSLATION_NONE; | ||
103 | - } else if (!strcmp(p, "lba")) { | ||
104 | - translation = BIOS_ATA_TRANSLATION_LBA; | ||
105 | - } else if (!strcmp(p, "auto")) { | ||
106 | - translation = BIOS_ATA_TRANSLATION_AUTO; | ||
107 | - } else { | ||
108 | - goto chs_fail; | ||
109 | - } | ||
110 | - } else if (*p != '\0') { | ||
111 | - chs_fail: | ||
112 | - error_report("invalid physical CHS format"); | ||
113 | - exit(1); | ||
114 | - } | ||
115 | - if (hda_opts != NULL) { | ||
116 | - qemu_opt_set_number(hda_opts, "cyls", cyls, | ||
117 | - &error_abort); | ||
118 | - qemu_opt_set_number(hda_opts, "heads", heads, | ||
119 | - &error_abort); | ||
120 | - qemu_opt_set_number(hda_opts, "secs", secs, | ||
121 | - &error_abort); | ||
122 | - if (translation == BIOS_ATA_TRANSLATION_LARGE) { | ||
123 | - qemu_opt_set(hda_opts, "trans", "large", | ||
124 | - &error_abort); | ||
125 | - } else if (translation == BIOS_ATA_TRANSLATION_RECHS) { | ||
126 | - qemu_opt_set(hda_opts, "trans", "rechs", | ||
127 | - &error_abort); | ||
128 | - } else if (translation == BIOS_ATA_TRANSLATION_LBA) { | ||
129 | - qemu_opt_set(hda_opts, "trans", "lba", | ||
130 | - &error_abort); | ||
131 | - } else if (translation == BIOS_ATA_TRANSLATION_NONE) { | ||
132 | - qemu_opt_set(hda_opts, "trans", "none", | ||
133 | - &error_abort); | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - error_report("'-hdachs' is deprecated, please use '-device" | ||
138 | - " ide-hd,cyls=c,heads=h,secs=s,...' instead"); | ||
139 | - break; | ||
140 | case QEMU_OPTION_numa: | ||
141 | opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), | ||
142 | optarg, true); | ||
143 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/qemu-doc.texi | ||
146 | +++ b/qemu-doc.texi | ||
147 | @@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the | ||
148 | ``-object filter-dump'' argument which works in combination | ||
149 | with the modern ``-netdev`` backends instead. | ||
150 | |||
151 | -@subsection -hdachs (since 2.10.0) | ||
152 | - | ||
153 | -The ``-hdachs'' argument is now a synonym for setting | ||
154 | -the ``cyls'', ``heads'', ``secs'', and ``trans'' properties | ||
155 | -on the ``ide-hd'' device using the ``-device'' argument. | ||
156 | -The new syntax allows different settings to be provided | ||
157 | -per disk. | ||
158 | - | ||
159 | @subsection -usbdevice (since 2.10.0) | ||
160 | |||
161 | The ``-usbdevice DEV'' argument is now a synonym for setting | ||
162 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/qemu-options.hx | ||
165 | +++ b/qemu-options.hx | ||
166 | @@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type. | ||
167 | @item media=@var{media} | ||
168 | This option defines the type of the media: disk or cdrom. | ||
169 | @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}] | ||
170 | -These options have the same definition as they have in @option{-hdachs}. | ||
171 | -These parameters are deprecated, use the corresponding parameters | ||
172 | +Force disk physical geometry and the optional BIOS translation (trans=none or | ||
173 | +lba). These parameters are deprecated, use the corresponding parameters | ||
174 | of @code{-device} instead. | ||
175 | @item snapshot=@var{snapshot} | ||
176 | @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive | ||
177 | @@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force | ||
178 | the write back by pressing @key{C-a s} (@pxref{disk_images}). | ||
179 | ETEXI | ||
180 | |||
181 | -DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \ | ||
182 | - "-hdachs c,h,s[,t]\n" \ | ||
183 | - " force hard disk 0 physical geometry and the optional BIOS\n" \ | ||
184 | - " translation (t=none or lba) (usually QEMU can guess them)\n", | ||
185 | - QEMU_ARCH_ALL) | ||
186 | -STEXI | ||
187 | -@item -hdachs @var{c},@var{h},@var{s},[,@var{t}] | ||
188 | -@findex -hdachs | ||
189 | -Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <= | ||
190 | -@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS | ||
191 | -translation mode (@var{t}=none, lba or auto). Usually QEMU can guess | ||
192 | -all those parameters. This option is deprecated, please use | ||
193 | -@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead. | ||
194 | -ETEXI | ||
195 | - | ||
196 | DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, | ||
197 | "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" | ||
198 | " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" | ||
54 | -- | 199 | -- |
55 | 2.13.6 | 200 | 2.13.6 |
56 | 201 | ||
57 | 202 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This reverts commit 4da97120d51a4383aa96d741a2b837f8c4bbcd0b. | 3 | Looks like we forgot to announce the deprecation of these options in |
4 | the corresponding chapter of the qemu-doc text, so let's do that now. | ||
4 | 5 | ||
5 | blk_aio_flush() now handles the blk->root == NULL case, so we no longer | 6 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
6 | need this workaround. | 7 | Reviewed-by: John Snow <jsnow@redhat.com> |
7 | 8 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | |
8 | Cc: John Snow <jsnow@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 10 | --- |
13 | hw/ide/core.c | 10 +--------- | 11 | qemu-doc.texi | 15 +++++++++++++++ |
14 | 1 file changed, 1 insertion(+), 9 deletions(-) | 12 | 1 file changed, 15 insertions(+) |
15 | 13 | ||
16 | diff --git a/hw/ide/core.c b/hw/ide/core.c | 14 | diff --git a/qemu-doc.texi b/qemu-doc.texi |
17 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/ide/core.c | 16 | --- a/qemu-doc.texi |
19 | +++ b/hw/ide/core.c | 17 | +++ b/qemu-doc.texi |
20 | @@ -XXX,XX +XXX,XX @@ static void ide_flush_cache(IDEState *s) | 18 | @@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU. |
21 | s->status |= BUSY_STAT; | 19 | The ``-drive if=scsi'' argument is replaced by the the |
22 | ide_set_retry(s); | 20 | ``-device BUS-TYPE'' argument combined with ``-drive if=none''. |
23 | block_acct_start(blk_get_stats(s->blk), &s->acct, 0, BLOCK_ACCT_FLUSH); | 21 | |
24 | - | 22 | +@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0) |
25 | - if (blk_bs(s->blk)) { | 23 | + |
26 | - s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s); | 24 | +The drive geometry arguments are replaced by the the geometry arguments |
27 | - } else { | 25 | +that can be specified with the ``-device'' parameter. |
28 | - /* XXX blk_aio_flush() crashes when blk_bs(blk) is NULL, remove this | 26 | + |
29 | - * temporary workaround when blk_aio_*() functions handle NULL blk_bs. | 27 | +@subsection -drive serial=... (since 2.10.0) |
30 | - */ | 28 | + |
31 | - ide_flush_cb(s, 0); | 29 | +The drive serial argument is replaced by the the serial argument |
32 | - } | 30 | +that can be specified with the ``-device'' parameter. |
33 | + s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s); | 31 | + |
34 | } | 32 | +@subsection -drive addr=... (since 2.10.0) |
35 | 33 | + | |
36 | static void ide_cfata_metadata_inquiry(IDEState *s) | 34 | +The drive addr argument is replaced by the the addr argument |
35 | +that can be specified with the ``-device'' parameter. | ||
36 | + | ||
37 | @subsection -net dump (since 2.10.0) | ||
38 | |||
39 | The ``--net dump'' argument is now replaced with the | ||
37 | -- | 40 | -- |
38 | 2.13.6 | 41 | 2.13.6 |
39 | 42 | ||
40 | 43 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 3 | Signed-off-by: Fam Zheng <famz@redhat.com> |
4 | byte-based. Update the generic helpers, and all passthrough clients | ||
5 | (blkdebug, commit, mirror, throttle) accordingly. | ||
6 | |||
7 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
9 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 5 | --- |
12 | include/block/block_int.h | 28 ++++++++++++++++------------ | 6 | include/block/block_int.h | 1 - |
13 | block/blkdebug.c | 20 +++++++++++--------- | 7 | block/io.c | 18 ------------------ |
14 | block/commit.c | 2 +- | 8 | 2 files changed, 19 deletions(-) |
15 | block/io.c | 36 ++++++++++++++++++++---------------- | ||
16 | block/mirror.c | 2 +- | ||
17 | block/throttle.c | 2 +- | ||
18 | 6 files changed, 50 insertions(+), 40 deletions(-) | ||
19 | 9 | ||
20 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 10 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
21 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/block/block_int.h | 12 | --- a/include/block/block_int.h |
23 | +++ b/include/block/block_int.h | 13 | +++ b/include/block/block_int.h |
24 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | 14 | @@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk); |
25 | uint64_t *nperm, uint64_t *nshared); | 15 | bool blk_dev_is_medium_locked(BlockBackend *blk); |
26 | 16 | ||
27 | /* | 17 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); |
28 | - * Default implementation for drivers to pass bdrv_co_get_block_status() to | 18 | -bool bdrv_requests_pending(BlockDriverState *bs); |
29 | + * Default implementation for drivers to pass bdrv_co_block_status() to | 19 | |
30 | * their file. | 20 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); |
31 | */ | 21 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); |
32 | -int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs, | ||
33 | - int64_t sector_num, | ||
34 | - int nb_sectors, | ||
35 | - int *pnum, | ||
36 | - BlockDriverState **file); | ||
37 | +int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, | ||
38 | + bool want_zero, | ||
39 | + int64_t offset, | ||
40 | + int64_t bytes, | ||
41 | + int64_t *pnum, | ||
42 | + int64_t *map, | ||
43 | + BlockDriverState **file); | ||
44 | /* | ||
45 | - * Default implementation for drivers to pass bdrv_co_get_block_status() to | ||
46 | + * Default implementation for drivers to pass bdrv_co_block_status() to | ||
47 | * their backing file. | ||
48 | */ | ||
49 | -int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, | ||
50 | - int64_t sector_num, | ||
51 | - int nb_sectors, | ||
52 | - int *pnum, | ||
53 | - BlockDriverState **file); | ||
54 | +int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, | ||
55 | + bool want_zero, | ||
56 | + int64_t offset, | ||
57 | + int64_t bytes, | ||
58 | + int64_t *pnum, | ||
59 | + int64_t *map, | ||
60 | + BlockDriverState **file); | ||
61 | const char *bdrv_get_parent_name(const BlockDriverState *bs); | ||
62 | void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); | ||
63 | bool blk_dev_has_removable_media(BlockBackend *blk); | ||
64 | diff --git a/block/blkdebug.c b/block/blkdebug.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/block/blkdebug.c | ||
67 | +++ b/block/blkdebug.c | ||
68 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, | ||
69 | return bdrv_co_pdiscard(bs->file->bs, offset, bytes); | ||
70 | } | ||
71 | |||
72 | -static int64_t coroutine_fn blkdebug_co_get_block_status( | ||
73 | - BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, | ||
74 | - BlockDriverState **file) | ||
75 | +static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs, | ||
76 | + bool want_zero, | ||
77 | + int64_t offset, | ||
78 | + int64_t bytes, | ||
79 | + int64_t *pnum, | ||
80 | + int64_t *map, | ||
81 | + BlockDriverState **file) | ||
82 | { | ||
83 | - assert(QEMU_IS_ALIGNED(sector_num | nb_sectors, | ||
84 | - DIV_ROUND_UP(bs->bl.request_alignment, | ||
85 | - BDRV_SECTOR_SIZE))); | ||
86 | - return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors, | ||
87 | - pnum, file); | ||
88 | + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); | ||
89 | + return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes, | ||
90 | + pnum, map, file); | ||
91 | } | ||
92 | |||
93 | static void blkdebug_close(BlockDriverState *bs) | ||
94 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_blkdebug = { | ||
95 | .bdrv_co_flush_to_disk = blkdebug_co_flush, | ||
96 | .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, | ||
97 | .bdrv_co_pdiscard = blkdebug_co_pdiscard, | ||
98 | - .bdrv_co_get_block_status = blkdebug_co_get_block_status, | ||
99 | + .bdrv_co_block_status = blkdebug_co_block_status, | ||
100 | |||
101 | .bdrv_debug_event = blkdebug_debug_event, | ||
102 | .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, | ||
103 | diff --git a/block/commit.c b/block/commit.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/block/commit.c | ||
106 | +++ b/block/commit.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
108 | static BlockDriver bdrv_commit_top = { | ||
109 | .format_name = "commit_top", | ||
110 | .bdrv_co_preadv = bdrv_commit_top_preadv, | ||
111 | - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing, | ||
112 | + .bdrv_co_block_status = bdrv_co_block_status_from_backing, | ||
113 | .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, | ||
114 | .bdrv_close = bdrv_commit_top_close, | ||
115 | .bdrv_child_perm = bdrv_commit_top_child_perm, | ||
116 | diff --git a/block/io.c b/block/io.c | 22 | diff --git a/block/io.c b/block/io.c |
117 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
118 | --- a/block/io.c | 24 | --- a/block/io.c |
119 | +++ b/block/io.c | 25 | +++ b/block/io.c |
120 | @@ -XXX,XX +XXX,XX @@ typedef struct BdrvCoBlockStatusData { | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) |
121 | bool done; | 27 | assert(old >= 1); |
122 | } BdrvCoBlockStatusData; | ||
123 | |||
124 | -int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs, | ||
125 | - int64_t sector_num, | ||
126 | - int nb_sectors, | ||
127 | - int *pnum, | ||
128 | - BlockDriverState **file) | ||
129 | +int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, | ||
130 | + bool want_zero, | ||
131 | + int64_t offset, | ||
132 | + int64_t bytes, | ||
133 | + int64_t *pnum, | ||
134 | + int64_t *map, | ||
135 | + BlockDriverState **file) | ||
136 | { | ||
137 | assert(bs->file && bs->file->bs); | ||
138 | - *pnum = nb_sectors; | ||
139 | + *pnum = bytes; | ||
140 | + *map = offset; | ||
141 | *file = bs->file->bs; | ||
142 | - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | | ||
143 | - (sector_num << BDRV_SECTOR_BITS); | ||
144 | + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; | ||
145 | } | 28 | } |
146 | 29 | ||
147 | -int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, | 30 | -/* Check if any requests are in-flight (including throttled requests) */ |
148 | - int64_t sector_num, | 31 | -bool bdrv_requests_pending(BlockDriverState *bs) |
149 | - int nb_sectors, | 32 | -{ |
150 | - int *pnum, | 33 | - BdrvChild *child; |
151 | - BlockDriverState **file) | 34 | - |
152 | +int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, | 35 | - if (atomic_read(&bs->in_flight)) { |
153 | + bool want_zero, | 36 | - return true; |
154 | + int64_t offset, | 37 | - } |
155 | + int64_t bytes, | 38 | - |
156 | + int64_t *pnum, | 39 | - QLIST_FOREACH(child, &bs->children, next) { |
157 | + int64_t *map, | 40 | - if (bdrv_requests_pending(child->bs)) { |
158 | + BlockDriverState **file) | 41 | - return true; |
159 | { | 42 | - } |
160 | assert(bs->backing && bs->backing->bs); | 43 | - } |
161 | - *pnum = nb_sectors; | 44 | - |
162 | + *pnum = bytes; | 45 | - return false; |
163 | + *map = offset; | 46 | -} |
164 | *file = bs->backing->bs; | 47 | - |
165 | - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | | 48 | typedef struct { |
166 | - (sector_num << BDRV_SECTOR_BITS); | 49 | Coroutine *co; |
167 | + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; | 50 | BlockDriverState *bs; |
168 | } | ||
169 | |||
170 | /* | ||
171 | diff --git a/block/mirror.c b/block/mirror.c | ||
172 | index XXXXXXX..XXXXXXX 100644 | ||
173 | --- a/block/mirror.c | ||
174 | +++ b/block/mirror.c | ||
175 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_mirror_top = { | ||
176 | .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, | ||
177 | .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, | ||
178 | .bdrv_co_flush = bdrv_mirror_top_flush, | ||
179 | - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing, | ||
180 | + .bdrv_co_block_status = bdrv_co_block_status_from_backing, | ||
181 | .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, | ||
182 | .bdrv_close = bdrv_mirror_top_close, | ||
183 | .bdrv_child_perm = bdrv_mirror_top_child_perm, | ||
184 | diff --git a/block/throttle.c b/block/throttle.c | ||
185 | index XXXXXXX..XXXXXXX 100644 | ||
186 | --- a/block/throttle.c | ||
187 | +++ b/block/throttle.c | ||
188 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_throttle = { | ||
189 | .bdrv_reopen_prepare = throttle_reopen_prepare, | ||
190 | .bdrv_reopen_commit = throttle_reopen_commit, | ||
191 | .bdrv_reopen_abort = throttle_reopen_abort, | ||
192 | - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file, | ||
193 | + .bdrv_co_block_status = bdrv_co_block_status_from_file, | ||
194 | |||
195 | .bdrv_co_drain_begin = throttle_co_drain_begin, | ||
196 | .bdrv_co_drain_end = throttle_co_drain_end, | ||
197 | -- | 51 | -- |
198 | 2.13.6 | 52 | 2.13.6 |
199 | 53 | ||
200 | 54 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
---|---|---|---|
2 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
3 | --- | ||
4 | block/io.c | 6 ++++++ | ||
5 | 1 file changed, 6 insertions(+) | ||
2 | 6 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Now that all drivers have been updated to provide the | ||
5 | byte-based .bdrv_co_block_status(), we can delete the sector-based | ||
6 | interface. | ||
7 | |||
8 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | --- | ||
13 | include/block/block_int.h | 3 --- | ||
14 | block/io.c | 50 ++++++++++------------------------------------- | ||
15 | 2 files changed, 10 insertions(+), 43 deletions(-) | ||
16 | |||
17 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/block/block_int.h | ||
20 | +++ b/include/block/block_int.h | ||
21 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
22 | * as well as non-NULL pnum, map, and file; in turn, the driver | ||
23 | * must return an error or set pnum to an aligned non-zero value. | ||
24 | */ | ||
25 | - int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, | ||
26 | - int64_t sector_num, int nb_sectors, int *pnum, | ||
27 | - BlockDriverState **file); | ||
28 | int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, | ||
29 | bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, | ||
30 | int64_t *map, BlockDriverState **file); | ||
31 | diff --git a/block/io.c b/block/io.c | 7 | diff --git a/block/io.c b/block/io.c |
32 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/block/io.c | 9 | --- a/block/io.c |
34 | +++ b/block/io.c | 10 | +++ b/block/io.c |
35 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, | 11 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
36 | 12 | BdrvNextIterator it; | |
37 | /* Must be non-NULL or bdrv_getlength() would have failed */ | 13 | GSList *aio_ctxs = NULL, *ctx; |
38 | assert(bs->drv); | 14 | |
39 | - if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) { | 15 | + /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread |
40 | + if (!bs->drv->bdrv_co_block_status) { | 16 | + * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on |
41 | *pnum = bytes; | 17 | + * nodes in several different AioContexts, so make sure we're in the main |
42 | ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; | 18 | + * context. */ |
43 | if (offset + bytes == total_size) { | 19 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); |
44 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, | 20 | + |
45 | 21 | block_job_pause_all(); | |
46 | /* Round out to request_alignment boundaries */ | 22 | |
47 | align = bs->bl.request_alignment; | 23 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
48 | - if (bs->drv->bdrv_co_get_block_status && align < BDRV_SECTOR_SIZE) { | ||
49 | - align = BDRV_SECTOR_SIZE; | ||
50 | - } | ||
51 | aligned_offset = QEMU_ALIGN_DOWN(offset, align); | ||
52 | aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; | ||
53 | |||
54 | - if (bs->drv->bdrv_co_get_block_status) { | ||
55 | - int count; /* sectors */ | ||
56 | - int64_t longret; | ||
57 | - | ||
58 | - assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes, | ||
59 | - BDRV_SECTOR_SIZE)); | ||
60 | - /* | ||
61 | - * The contract allows us to return pnum smaller than bytes, even | ||
62 | - * if the next query would see the same status; we truncate the | ||
63 | - * request to avoid overflowing the driver's 32-bit interface. | ||
64 | - */ | ||
65 | - longret = bs->drv->bdrv_co_get_block_status( | ||
66 | - bs, aligned_offset >> BDRV_SECTOR_BITS, | ||
67 | - MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count, | ||
68 | - &local_file); | ||
69 | - if (longret < 0) { | ||
70 | - assert(INT_MIN <= longret); | ||
71 | - ret = longret; | ||
72 | - goto out; | ||
73 | - } | ||
74 | - if (longret & BDRV_BLOCK_OFFSET_VALID) { | ||
75 | - local_map = longret & BDRV_BLOCK_OFFSET_MASK; | ||
76 | - } | ||
77 | - ret = longret & ~BDRV_BLOCK_OFFSET_MASK; | ||
78 | - *pnum = count * BDRV_SECTOR_SIZE; | ||
79 | - } else { | ||
80 | - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, | ||
81 | - aligned_bytes, pnum, &local_map, | ||
82 | - &local_file); | ||
83 | - if (ret < 0) { | ||
84 | - *pnum = 0; | ||
85 | - goto out; | ||
86 | - } | ||
87 | - assert(*pnum); /* The block driver must make progress */ | ||
88 | + ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, | ||
89 | + aligned_bytes, pnum, &local_map, | ||
90 | + &local_file); | ||
91 | + if (ret < 0) { | ||
92 | + *pnum = 0; | ||
93 | + goto out; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | - * The driver's result must be a multiple of request_alignment. | ||
98 | + * The driver's result must be a non-zero multiple of request_alignment. | ||
99 | * Clamp pnum and adjust map to original request. | ||
100 | */ | ||
101 | - assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset); | ||
102 | + assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) && | ||
103 | + align > offset - aligned_offset); | ||
104 | *pnum -= offset - aligned_offset; | ||
105 | if (*pnum > bytes) { | ||
106 | *pnum = bytes; | ||
107 | -- | 24 | -- |
108 | 2.13.6 | 25 | 2.13.6 |
109 | 26 | ||
110 | 27 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively |
---|---|---|---|
2 | and also doesn't notify other parent nodes of children, which both means | ||
3 | that the child nodes are not actually drained, and bdrv_drained_begin() | ||
4 | is providing useful functionality only on a single node. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | To keep things consistent, we also shouldn't call the block driver |
4 | byte-based. Update the vmdk driver accordingly. Drop the | 7 | callbacks recursively. |
5 | now-unused vmdk_find_index_in_cluster(). | ||
6 | 8 | ||
7 | Also, fix a pre-existing bug: if find_extent() fails (unlikely, | 9 | A proper recursive drain version that provides an actually working |
8 | since the block layer did a bounds check), then we must return a | 10 | drained section for child nodes will be introduced later. |
9 | failure, rather than 0. | ||
10 | 11 | ||
11 | Signed-off-by: Eric Blake <eblake@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
13 | Reviewed-by: Fam Zheng <famz@redhat.com> | 13 | Reviewed-by: Fam Zheng <famz@redhat.com> |
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
15 | --- | 14 | --- |
16 | block/vmdk.c | 38 ++++++++++++++------------------------ | 15 | block/io.c | 16 +++++++++------- |
17 | 1 file changed, 14 insertions(+), 24 deletions(-) | 16 | 1 file changed, 9 insertions(+), 7 deletions(-) |
18 | 17 | ||
19 | diff --git a/block/vmdk.c b/block/vmdk.c | 18 | diff --git a/block/io.c b/block/io.c |
20 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/vmdk.c | 20 | --- a/block/io.c |
22 | +++ b/block/vmdk.c | 21 | +++ b/block/io.c |
23 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, | 22 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
24 | return extent_relative_offset % cluster_size; | ||
25 | } | 23 | } |
26 | 24 | ||
27 | -static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, | 25 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
28 | - int64_t sector_num) | 26 | -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
29 | -{ | 27 | +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) |
30 | - uint64_t offset; | ||
31 | - offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE); | ||
32 | - return offset / BDRV_SECTOR_SIZE; | ||
33 | -} | ||
34 | - | ||
35 | -static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, | ||
36 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | ||
37 | +static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, | ||
38 | + bool want_zero, | ||
39 | + int64_t offset, int64_t bytes, | ||
40 | + int64_t *pnum, int64_t *map, | ||
41 | + BlockDriverState **file) | ||
42 | { | 28 | { |
43 | BDRVVmdkState *s = bs->opaque; | 29 | BdrvChild *child, *tmp; |
44 | int64_t index_in_cluster, n, ret; | 30 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
45 | - uint64_t offset; | 31 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
46 | + uint64_t cluster_offset; | 32 | bdrv_coroutine_enter(bs, data.co); |
47 | VmdkExtent *extent; | 33 | BDRV_POLL_WHILE(bs, !data.done); |
48 | 34 | ||
49 | - extent = find_extent(s, sector_num, NULL); | 35 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
50 | + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); | 36 | - bdrv_drain_invoke(child->bs, begin); |
51 | if (!extent) { | 37 | + if (recursive) { |
52 | - return 0; | 38 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
53 | + return -EIO; | 39 | + bdrv_drain_invoke(child->bs, begin, true); |
40 | + } | ||
54 | } | 41 | } |
55 | qemu_co_mutex_lock(&s->lock); | 42 | } |
56 | - ret = get_cluster_offset(bs, extent, NULL, | 43 | |
57 | - sector_num * 512, false, &offset, | 44 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
58 | + ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset, | 45 | bdrv_parent_drained_begin(bs); |
59 | 0, 0); | ||
60 | qemu_co_mutex_unlock(&s->lock); | ||
61 | |||
62 | - index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); | ||
63 | + index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); | ||
64 | switch (ret) { | ||
65 | case VMDK_ERROR: | ||
66 | ret = -EIO; | ||
67 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, | ||
68 | ret = BDRV_BLOCK_DATA; | ||
69 | if (!extent->compressed) { | ||
70 | ret |= BDRV_BLOCK_OFFSET_VALID; | ||
71 | - ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS)) | ||
72 | - & BDRV_BLOCK_OFFSET_MASK; | ||
73 | + *map = cluster_offset + index_in_cluster; | ||
74 | } | ||
75 | *file = extent->file->bs; | ||
76 | break; | ||
77 | } | 46 | } |
78 | 47 | ||
79 | - n = extent->cluster_sectors - index_in_cluster; | 48 | - bdrv_drain_invoke(bs, true); |
80 | - if (n > nb_sectors) { | 49 | + bdrv_drain_invoke(bs, true, false); |
81 | - n = nb_sectors; | 50 | bdrv_drain_recurse(bs); |
82 | - } | ||
83 | - *pnum = n; | ||
84 | + n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; | ||
85 | + *pnum = MIN(n, bytes); | ||
86 | return ret; | ||
87 | } | 51 | } |
88 | 52 | ||
89 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vmdk = { | 53 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
90 | .bdrv_close = vmdk_close, | 54 | } |
91 | .bdrv_create = vmdk_create, | 55 | |
92 | .bdrv_co_flush_to_disk = vmdk_co_flush, | 56 | /* Re-enable things in child-to-parent order */ |
93 | - .bdrv_co_get_block_status = vmdk_co_get_block_status, | 57 | - bdrv_drain_invoke(bs, false); |
94 | + .bdrv_co_block_status = vmdk_co_block_status, | 58 | + bdrv_drain_invoke(bs, false, false); |
95 | .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, | 59 | bdrv_parent_drained_end(bs); |
96 | .bdrv_has_zero_init = vmdk_has_zero_init, | 60 | aio_enable_external(bdrv_get_aio_context(bs)); |
97 | .bdrv_get_specific_info = vmdk_get_specific_info, | 61 | } |
62 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
63 | aio_context_acquire(aio_context); | ||
64 | aio_disable_external(aio_context); | ||
65 | bdrv_parent_drained_begin(bs); | ||
66 | - bdrv_drain_invoke(bs, true); | ||
67 | + bdrv_drain_invoke(bs, true, true); | ||
68 | aio_context_release(aio_context); | ||
69 | |||
70 | if (!g_slist_find(aio_ctxs, aio_context)) { | ||
71 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
72 | |||
73 | /* Re-enable things in child-to-parent order */ | ||
74 | aio_context_acquire(aio_context); | ||
75 | - bdrv_drain_invoke(bs, false); | ||
76 | + bdrv_drain_invoke(bs, false, true); | ||
77 | bdrv_parent_drained_end(bs); | ||
78 | aio_enable_external(aio_context); | ||
79 | aio_context_release(aio_context); | ||
98 | -- | 80 | -- |
99 | 2.13.6 | 81 | 2.13.6 |
100 | 82 | ||
101 | 83 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | The existing test is for bdrv_drain_all_begin/end() only. Generalise the |
---|---|---|---|
2 | test case so that it can be run for the other variants as well. At the | ||
3 | moment this is only bdrv_drain_begin/end(), but in a while, we'll add | ||
4 | another one. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | Also, add a backing file to the test node to test whether the operations |
4 | byte-based. Update the vvfat driver accordingly. Note that we | 7 | work recursively. |
5 | can rely on the block driver having already clamped limits to our | ||
6 | block size, and simplify accordingly. | ||
7 | 8 | ||
8 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 10 | --- |
13 | block/vvfat.c | 16 +++++++--------- | 11 | tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- |
14 | 1 file changed, 7 insertions(+), 9 deletions(-) | 12 | 1 file changed, 62 insertions(+), 7 deletions(-) |
15 | 13 | ||
16 | diff --git a/block/vvfat.c b/block/vvfat.c | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
17 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/vvfat.c | 16 | --- a/tests/test-bdrv-drain.c |
19 | +++ b/block/vvfat.c | 17 | +++ b/tests/test-bdrv-drain.c |
20 | @@ -XXX,XX +XXX,XX @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | 18 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { |
21 | return ret; | 19 | |
20 | .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | ||
21 | .bdrv_co_drain_end = bdrv_test_co_drain_end, | ||
22 | + | ||
23 | + .bdrv_child_perm = bdrv_format_default_perms, | ||
24 | }; | ||
25 | |||
26 | static void aio_ret_cb(void *opaque, int ret) | ||
27 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) | ||
28 | *aio_ret = ret; | ||
22 | } | 29 | } |
23 | 30 | ||
24 | -static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs, | 31 | -static void test_drv_cb_drain_all(void) |
25 | - int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file) | 32 | +enum drain_type { |
26 | +static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, | 33 | + BDRV_DRAIN_ALL, |
27 | + bool want_zero, int64_t offset, | 34 | + BDRV_DRAIN, |
28 | + int64_t bytes, int64_t *n, | 35 | +}; |
29 | + int64_t *map, | 36 | + |
30 | + BlockDriverState **file) | 37 | +static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
38 | +{ | ||
39 | + switch (drain_type) { | ||
40 | + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; | ||
41 | + case BDRV_DRAIN: bdrv_drained_begin(bs); break; | ||
42 | + default: g_assert_not_reached(); | ||
43 | + } | ||
44 | +} | ||
45 | + | ||
46 | +static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | ||
47 | +{ | ||
48 | + switch (drain_type) { | ||
49 | + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; | ||
50 | + case BDRV_DRAIN: bdrv_drained_end(bs); break; | ||
51 | + default: g_assert_not_reached(); | ||
52 | + } | ||
53 | +} | ||
54 | + | ||
55 | +static void test_drv_cb_common(enum drain_type drain_type, bool recursive) | ||
31 | { | 56 | { |
32 | - *n = bs->total_sectors - sector_num; | 57 | BlockBackend *blk; |
33 | - if (*n > nb_sectors) { | 58 | - BlockDriverState *bs; |
34 | - *n = nb_sectors; | 59 | - BDRVTestState *s; |
35 | - } else if (*n < 0) { | 60 | + BlockDriverState *bs, *backing; |
36 | - return 0; | 61 | + BDRVTestState *s, *backing_s; |
37 | - } | 62 | BlockAIOCB *acb; |
38 | + *n = bytes; | 63 | int aio_ret; |
39 | return BDRV_BLOCK_DATA; | 64 | |
65 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
66 | s = bs->opaque; | ||
67 | blk_insert_bs(blk, bs, &error_abort); | ||
68 | |||
69 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
70 | + backing_s = backing->opaque; | ||
71 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
72 | + | ||
73 | /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
74 | g_assert_cmpint(s->drain_count, ==, 0); | ||
75 | - bdrv_drain_all_begin(); | ||
76 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
77 | + | ||
78 | + do_drain_begin(drain_type, bs); | ||
79 | + | ||
80 | g_assert_cmpint(s->drain_count, ==, 1); | ||
81 | - bdrv_drain_all_end(); | ||
82 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
83 | + | ||
84 | + do_drain_end(drain_type, bs); | ||
85 | + | ||
86 | g_assert_cmpint(s->drain_count, ==, 0); | ||
87 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
88 | |||
89 | /* Now do the same while a request is pending */ | ||
90 | aio_ret = -EINPROGRESS; | ||
91 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
92 | g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | ||
93 | |||
94 | g_assert_cmpint(s->drain_count, ==, 0); | ||
95 | - bdrv_drain_all_begin(); | ||
96 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
97 | + | ||
98 | + do_drain_begin(drain_type, bs); | ||
99 | + | ||
100 | g_assert_cmpint(aio_ret, ==, 0); | ||
101 | g_assert_cmpint(s->drain_count, ==, 1); | ||
102 | - bdrv_drain_all_end(); | ||
103 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
104 | + | ||
105 | + do_drain_end(drain_type, bs); | ||
106 | + | ||
107 | g_assert_cmpint(s->drain_count, ==, 0); | ||
108 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
109 | |||
110 | + bdrv_unref(backing); | ||
111 | bdrv_unref(bs); | ||
112 | blk_unref(blk); | ||
40 | } | 113 | } |
41 | 114 | ||
42 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = { | 115 | +static void test_drv_cb_drain_all(void) |
43 | 116 | +{ | |
44 | .bdrv_co_preadv = vvfat_co_preadv, | 117 | + test_drv_cb_common(BDRV_DRAIN_ALL, true); |
45 | .bdrv_co_pwritev = vvfat_co_pwritev, | 118 | +} |
46 | - .bdrv_co_get_block_status = vvfat_co_get_block_status, | 119 | + |
47 | + .bdrv_co_block_status = vvfat_co_block_status, | 120 | +static void test_drv_cb_drain(void) |
48 | }; | 121 | +{ |
49 | 122 | + test_drv_cb_common(BDRV_DRAIN, false); | |
50 | static void bdrv_vvfat_init(void) | 123 | +} |
124 | + | ||
125 | int main(int argc, char **argv) | ||
126 | { | ||
127 | bdrv_init(); | ||
128 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
129 | g_test_init(&argc, &argv, NULL); | ||
130 | |||
131 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
132 | + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
133 | |||
134 | return g_test_run(); | ||
135 | } | ||
51 | -- | 136 | -- |
52 | 2.13.6 | 137 | 2.13.6 |
53 | 138 | ||
54 | 139 | diff view generated by jsdifflib |
1 | From: Anton Nefedov <anton.nefedov@virtuozzo.com> | 1 | This is currently only working correctly for bdrv_drain(), not for |
---|---|---|---|
2 | bdrv_drain_all(). Leave a comment for the drain_all case, we'll address | ||
3 | it later. | ||
2 | 4 | ||
3 | This new test case only makes sense for qcow2 while iotest 033 is generic; | ||
4 | however it matches the test purpose perfectly and also 033 contains those | ||
5 | do_test() tricks to pass the alignment, which won't look nice being | ||
6 | duplicated in other tests or moved to the common code. | ||
7 | |||
8 | Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 6 | --- |
11 | tests/qemu-iotests/033 | 29 +++++++++++++++++++++++++++++ | 7 | tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ |
12 | tests/qemu-iotests/033.out | 13 +++++++++++++ | 8 | 1 file changed, 45 insertions(+) |
13 | 2 files changed, 42 insertions(+) | ||
14 | 9 | ||
15 | diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033 | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
16 | index XXXXXXX..XXXXXXX 100755 | 11 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/qemu-iotests/033 | 12 | --- a/tests/test-bdrv-drain.c |
18 | +++ b/tests/qemu-iotests/033 | 13 | +++ b/tests/test-bdrv-drain.c |
19 | @@ -XXX,XX +XXX,XX @@ do_test() | 14 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) |
20 | } | $QEMU_IO $IO_EXTRA_ARGS | 15 | test_drv_cb_common(BDRV_DRAIN, false); |
21 | } | 16 | } |
22 | 17 | ||
23 | +echo | 18 | +static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
24 | +echo "=== Test aligned and misaligned write zeroes operations ===" | 19 | +{ |
20 | + BlockBackend *blk; | ||
21 | + BlockDriverState *bs, *backing; | ||
25 | + | 22 | + |
26 | for write_zero_cmd in "write -z" "aio_write -z"; do | 23 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
27 | for align in 512 4k; do | 24 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
28 | echo | 25 | + &error_abort); |
29 | @@ -XXX,XX +XXX,XX @@ for align in 512 4k; do | 26 | + blk_insert_bs(blk, bs, &error_abort); |
30 | done | ||
31 | done | ||
32 | |||
33 | + | 27 | + |
34 | +# Trigger truncate that would shrink qcow2 L1 table, which is done by | 28 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
35 | +# clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes() | 29 | + bdrv_set_backing_hd(bs, backing, &error_abort); |
36 | + | 30 | + |
37 | +echo | 31 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); |
38 | +echo "=== Test misaligned write zeroes via truncate ===" | 32 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); |
39 | +echo | ||
40 | + | 33 | + |
41 | +# any size will do, but the smaller the size the smaller the required image | 34 | + do_drain_begin(drain_type, bs); |
42 | +CLUSTER_SIZE=$((4 * 1024)) | ||
43 | +L2_COVERAGE=$(($CLUSTER_SIZE * $CLUSTER_SIZE / 8)) | ||
44 | +_make_test_img $(($L2_COVERAGE * 2)) | ||
45 | + | 35 | + |
46 | +do_test 512 "write -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io | 36 | + g_assert_cmpint(bs->quiesce_counter, ==, 1); |
47 | +# next L2 table | 37 | + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); |
48 | +do_test 512 "write -P 1 $L2_COVERAGE 0x200" "$TEST_IMG" | _filter_qemu_io | ||
49 | + | 38 | + |
50 | +# only interested in qcow2 here; also other formats might respond with | 39 | + do_drain_end(drain_type, bs); |
51 | +# "not supported" error message | ||
52 | +if [ $IMGFMT = "qcow2" ]; then | ||
53 | + do_test 512 "truncate $L2_COVERAGE" "$TEST_IMG" | _filter_qemu_io | ||
54 | +fi | ||
55 | + | 40 | + |
56 | +do_test 512 "read -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io | 41 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); |
42 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
57 | + | 43 | + |
58 | # success, all done | 44 | + bdrv_unref(backing); |
59 | +echo | 45 | + bdrv_unref(bs); |
60 | echo "*** done" | 46 | + blk_unref(blk); |
61 | rm -f $seq.full | 47 | +} |
62 | status=0 | ||
63 | diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/tests/qemu-iotests/033.out | ||
66 | +++ b/tests/qemu-iotests/033.out | ||
67 | @@ -XXX,XX +XXX,XX @@ | ||
68 | QA output created by 033 | ||
69 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 | ||
70 | |||
71 | +=== Test aligned and misaligned write zeroes operations === | ||
72 | + | 48 | + |
73 | == preparing image == | 49 | +static void test_quiesce_drain_all(void) |
74 | wrote 1024/1024 bytes at offset 512 | 50 | +{ |
75 | 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 51 | + // XXX drain_all doesn't quiesce |
76 | @@ -XXX,XX +XXX,XX @@ read 512/512 bytes at offset 512 | 52 | + //test_quiesce_common(BDRV_DRAIN_ALL, true); |
77 | read 3072/3072 bytes at offset 1024 | 53 | +} |
78 | 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
79 | |||
80 | + | 54 | + |
81 | +=== Test misaligned write zeroes via truncate === | 55 | +static void test_quiesce_drain(void) |
56 | +{ | ||
57 | + test_quiesce_common(BDRV_DRAIN, false); | ||
58 | +} | ||
82 | + | 59 | + |
83 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 | 60 | int main(int argc, char **argv) |
84 | +wrote 512/512 bytes at offset 0 | 61 | { |
85 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 62 | bdrv_init(); |
86 | +wrote 512/512 bytes at offset 2097152 | 63 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
87 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 64 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
88 | +read 512/512 bytes at offset 0 | 65 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
89 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 66 | |
67 | + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
68 | + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
90 | + | 69 | + |
91 | *** done | 70 | return g_test_run(); |
71 | } | ||
92 | -- | 72 | -- |
93 | 2.13.6 | 73 | 2.13.6 |
94 | 74 | ||
95 | 75 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Block jobs already paused themselves when their main BlockBackend |
---|---|---|---|
2 | entered a drained section. This is not good enough: We also want to | ||
3 | pause a block job and may not submit new requests if, for example, the | ||
4 | mirror target node should be drained. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | This implements .drained_begin/end callbacks in child_job in order to |
4 | byte-based. Update the gluster driver accordingly. | 7 | consider all block nodes related to the job, and removes the |
8 | BlockBackend callbacks which are unnecessary now because the root of the | ||
9 | job main BlockBackend is always referenced with a child_job, too. | ||
5 | 10 | ||
6 | In want_zero mode, we continue to report fine-grained hole | ||
7 | information (the caller wants as much mapping detail as possible); | ||
8 | but when not in that mode, the caller prefers larger *pnum and | ||
9 | merely cares about what offsets are allocated at this layer, rather | ||
10 | than where the holes live. Since holes still read as zeroes at | ||
11 | this layer (rather than deferring to a backing layer), we can take | ||
12 | the shortcut of skipping find_allocation(), and merely state that | ||
13 | all bytes are allocated. | ||
14 | |||
15 | We can also drop redundant bounds checks that are already | ||
16 | guaranteed by the block layer. | ||
17 | |||
18 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
19 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
20 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
22 | --- | 12 | --- |
23 | block/gluster.c | 70 ++++++++++++++++++++++++++++----------------------------- | 13 | blockjob.c | 22 +++++++++------------- |
24 | 1 file changed, 34 insertions(+), 36 deletions(-) | 14 | 1 file changed, 9 insertions(+), 13 deletions(-) |
25 | 15 | ||
26 | diff --git a/block/gluster.c b/block/gluster.c | 16 | diff --git a/blockjob.c b/blockjob.c |
27 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block/gluster.c | 18 | --- a/blockjob.c |
29 | +++ b/block/gluster.c | 19 | +++ b/blockjob.c |
30 | @@ -XXX,XX +XXX,XX @@ exit: | 20 | @@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c) |
21 | job->id); | ||
31 | } | 22 | } |
32 | 23 | ||
33 | /* | 24 | -static const BdrvChildRole child_job = { |
34 | - * Returns the allocation status of the specified sectors. | 25 | - .get_parent_desc = child_job_get_parent_desc, |
35 | + * Returns the allocation status of the specified offset. | 26 | - .stay_at_node = true, |
36 | * | 27 | -}; |
37 | - * If 'sector_num' is beyond the end of the disk image the return value is 0 | 28 | - |
38 | - * and 'pnum' is set to 0. | 29 | -static void block_job_drained_begin(void *opaque) |
39 | + * The block layer guarantees 'offset' and 'bytes' are within bounds. | 30 | +static void child_job_drained_begin(BdrvChild *c) |
40 | * | ||
41 | - * 'pnum' is set to the number of sectors (including and immediately following | ||
42 | - * the specified sector) that are known to be in the same | ||
43 | + * 'pnum' is set to the number of bytes (including and immediately following | ||
44 | + * the specified offset) that are known to be in the same | ||
45 | * allocated/unallocated state. | ||
46 | * | ||
47 | - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes | ||
48 | - * beyond the end of the disk image it will be clamped. | ||
49 | + * 'bytes' is the max value 'pnum' should be set to. | ||
50 | * | ||
51 | - * (Based on raw_co_get_block_status() from file-posix.c.) | ||
52 | + * (Based on raw_co_block_status() from file-posix.c.) | ||
53 | */ | ||
54 | -static int64_t coroutine_fn qemu_gluster_co_get_block_status( | ||
55 | - BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, | ||
56 | - BlockDriverState **file) | ||
57 | +static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, | ||
58 | + bool want_zero, | ||
59 | + int64_t offset, | ||
60 | + int64_t bytes, | ||
61 | + int64_t *pnum, | ||
62 | + int64_t *map, | ||
63 | + BlockDriverState **file) | ||
64 | { | 31 | { |
65 | BDRVGlusterState *s = bs->opaque; | 32 | - BlockJob *job = opaque; |
66 | - off_t start, data = 0, hole = 0; | 33 | + BlockJob *job = c->opaque; |
67 | - int64_t total_size; | 34 | block_job_pause(job); |
68 | + off_t data = 0, hole = 0; | ||
69 | int ret = -EINVAL; | ||
70 | |||
71 | if (!s->fd) { | ||
72 | return ret; | ||
73 | } | ||
74 | |||
75 | - start = sector_num * BDRV_SECTOR_SIZE; | ||
76 | - total_size = bdrv_getlength(bs); | ||
77 | - if (total_size < 0) { | ||
78 | - return total_size; | ||
79 | - } else if (start >= total_size) { | ||
80 | - *pnum = 0; | ||
81 | - return 0; | ||
82 | - } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { | ||
83 | - nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); | ||
84 | + if (!want_zero) { | ||
85 | + *pnum = bytes; | ||
86 | + *map = offset; | ||
87 | + *file = bs; | ||
88 | + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
89 | } | ||
90 | |||
91 | - ret = find_allocation(bs, start, &data, &hole); | ||
92 | + ret = find_allocation(bs, offset, &data, &hole); | ||
93 | if (ret == -ENXIO) { | ||
94 | /* Trailing hole */ | ||
95 | - *pnum = nb_sectors; | ||
96 | + *pnum = bytes; | ||
97 | ret = BDRV_BLOCK_ZERO; | ||
98 | } else if (ret < 0) { | ||
99 | /* No info available, so pretend there are no holes */ | ||
100 | - *pnum = nb_sectors; | ||
101 | + *pnum = bytes; | ||
102 | ret = BDRV_BLOCK_DATA; | ||
103 | - } else if (data == start) { | ||
104 | - /* On a data extent, compute sectors to the end of the extent, | ||
105 | + } else if (data == offset) { | ||
106 | + /* On a data extent, compute bytes to the end of the extent, | ||
107 | * possibly including a partial sector at EOF. */ | ||
108 | - *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); | ||
109 | + *pnum = MIN(bytes, hole - offset); | ||
110 | ret = BDRV_BLOCK_DATA; | ||
111 | } else { | ||
112 | - /* On a hole, compute sectors to the beginning of the next extent. */ | ||
113 | - assert(hole == start); | ||
114 | - *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); | ||
115 | + /* On a hole, compute bytes to the beginning of the next extent. */ | ||
116 | + assert(hole == offset); | ||
117 | + *pnum = MIN(bytes, data - offset); | ||
118 | ret = BDRV_BLOCK_ZERO; | ||
119 | } | ||
120 | |||
121 | + *map = offset; | ||
122 | *file = bs; | ||
123 | |||
124 | - return ret | BDRV_BLOCK_OFFSET_VALID | start; | ||
125 | + return ret | BDRV_BLOCK_OFFSET_VALID; | ||
126 | } | 35 | } |
127 | 36 | ||
128 | 37 | -static void block_job_drained_end(void *opaque) | |
129 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = { | 38 | +static void child_job_drained_end(BdrvChild *c) |
130 | #ifdef CONFIG_GLUSTERFS_ZEROFILL | 39 | { |
131 | .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, | 40 | - BlockJob *job = opaque; |
132 | #endif | 41 | + BlockJob *job = c->opaque; |
133 | - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, | 42 | block_job_resume(job); |
134 | + .bdrv_co_block_status = qemu_gluster_co_block_status, | 43 | } |
135 | .create_opts = &qemu_gluster_create_opts, | 44 | |
45 | -static const BlockDevOps block_job_dev_ops = { | ||
46 | - .drained_begin = block_job_drained_begin, | ||
47 | - .drained_end = block_job_drained_end, | ||
48 | +static const BdrvChildRole child_job = { | ||
49 | + .get_parent_desc = child_job_get_parent_desc, | ||
50 | + .drained_begin = child_job_drained_begin, | ||
51 | + .drained_end = child_job_drained_end, | ||
52 | + .stay_at_node = true, | ||
136 | }; | 53 | }; |
137 | 54 | ||
138 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = { | 55 | void block_job_remove_all_bdrv(BlockJob *job) |
139 | #ifdef CONFIG_GLUSTERFS_ZEROFILL | 56 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, |
140 | .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, | 57 | block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); |
141 | #endif | 58 | bs->job = job; |
142 | - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, | 59 | |
143 | + .bdrv_co_block_status = qemu_gluster_co_block_status, | 60 | - blk_set_dev_ops(blk, &block_job_dev_ops, job); |
144 | .create_opts = &qemu_gluster_create_opts, | 61 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); |
145 | }; | 62 | |
146 | 63 | QLIST_INSERT_HEAD(&block_jobs, job, job_list); | |
147 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = { | ||
148 | #ifdef CONFIG_GLUSTERFS_ZEROFILL | ||
149 | .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, | ||
150 | #endif | ||
151 | - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, | ||
152 | + .bdrv_co_block_status = qemu_gluster_co_block_status, | ||
153 | .create_opts = &qemu_gluster_create_opts, | ||
154 | }; | ||
155 | |||
156 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = { | ||
157 | #ifdef CONFIG_GLUSTERFS_ZEROFILL | ||
158 | .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, | ||
159 | #endif | ||
160 | - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, | ||
161 | + .bdrv_co_block_status = qemu_gluster_co_block_status, | ||
162 | .create_opts = &qemu_gluster_create_opts, | ||
163 | }; | ||
164 | |||
165 | -- | 64 | -- |
166 | 2.13.6 | 65 | 2.13.6 |
167 | 66 | ||
168 | 67 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Block jobs must be paused if any of the involved nodes are drained. |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the qed driver accordingly, taking the opportunity | ||
5 | to inline qed_is_allocated_cb() into its lone caller (the callback | ||
6 | used to be important, until we switched qed to coroutines). There is | ||
7 | no intent to optimize based on the want_zero flag for this format. | ||
8 | |||
9 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 4 | --- |
14 | block/qed.c | 76 +++++++++++++++++++------------------------------------------ | 5 | tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ |
15 | 1 file changed, 24 insertions(+), 52 deletions(-) | 6 | 1 file changed, 121 insertions(+) |
16 | 7 | ||
17 | diff --git a/block/qed.c b/block/qed.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
18 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/qed.c | 10 | --- a/tests/test-bdrv-drain.c |
20 | +++ b/block/qed.c | 11 | +++ b/tests/test-bdrv-drain.c |
21 | @@ -XXX,XX +XXX,XX @@ finish: | 12 | @@ -XXX,XX +XXX,XX @@ |
22 | return ret; | 13 | |
14 | #include "qemu/osdep.h" | ||
15 | #include "block/block.h" | ||
16 | +#include "block/blockjob_int.h" | ||
17 | #include "sysemu/block-backend.h" | ||
18 | #include "qapi/error.h" | ||
19 | |||
20 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | ||
21 | test_quiesce_common(BDRV_DRAIN, false); | ||
23 | } | 22 | } |
24 | 23 | ||
25 | -typedef struct { | 24 | + |
26 | - BlockDriverState *bs; | 25 | +typedef struct TestBlockJob { |
27 | - Coroutine *co; | 26 | + BlockJob common; |
28 | - uint64_t pos; | 27 | + bool should_complete; |
29 | - int64_t status; | 28 | +} TestBlockJob; |
30 | - int *pnum; | 29 | + |
31 | - BlockDriverState **file; | 30 | +static void test_job_completed(BlockJob *job, void *opaque) |
32 | -} QEDIsAllocatedCB; | 31 | +{ |
33 | - | 32 | + block_job_completed(job, 0); |
34 | -/* Called with table_lock held. */ | 33 | +} |
35 | -static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) | 34 | + |
36 | +static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs, | 35 | +static void coroutine_fn test_job_start(void *opaque) |
37 | + bool want_zero, | 36 | +{ |
38 | + int64_t pos, int64_t bytes, | 37 | + TestBlockJob *s = opaque; |
39 | + int64_t *pnum, int64_t *map, | 38 | + |
40 | + BlockDriverState **file) | 39 | + while (!s->should_complete) { |
41 | { | 40 | + block_job_sleep_ns(&s->common, 100000); |
42 | - QEDIsAllocatedCB *cb = opaque; | 41 | + } |
43 | - BDRVQEDState *s = cb->bs->opaque; | 42 | + |
44 | - *cb->pnum = len / BDRV_SECTOR_SIZE; | 43 | + block_job_defer_to_main_loop(&s->common, test_job_completed, NULL); |
45 | + BDRVQEDState *s = bs->opaque; | 44 | +} |
46 | + size_t len = MIN(bytes, SIZE_MAX); | 45 | + |
47 | + int status; | 46 | +static void test_job_complete(BlockJob *job, Error **errp) |
48 | + QEDRequest request = { .l2_table = NULL }; | 47 | +{ |
49 | + uint64_t offset; | 48 | + TestBlockJob *s = container_of(job, TestBlockJob, common); |
49 | + s->should_complete = true; | ||
50 | +} | ||
51 | + | ||
52 | +BlockJobDriver test_job_driver = { | ||
53 | + .instance_size = sizeof(TestBlockJob), | ||
54 | + .start = test_job_start, | ||
55 | + .complete = test_job_complete, | ||
56 | +}; | ||
57 | + | ||
58 | +static void test_blockjob_common(enum drain_type drain_type) | ||
59 | +{ | ||
60 | + BlockBackend *blk_src, *blk_target; | ||
61 | + BlockDriverState *src, *target; | ||
62 | + BlockJob *job; | ||
50 | + int ret; | 63 | + int ret; |
51 | + | 64 | + |
52 | + qemu_co_mutex_lock(&s->table_lock); | 65 | + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, |
53 | + ret = qed_find_cluster(s, &request, pos, &len, &offset); | 66 | + &error_abort); |
67 | + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
68 | + blk_insert_bs(blk_src, src, &error_abort); | ||
54 | + | 69 | + |
55 | + *pnum = len; | 70 | + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, |
56 | switch (ret) { | 71 | + &error_abort); |
57 | case QED_CLUSTER_FOUND: | 72 | + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
58 | - offset |= qed_offset_into_cluster(s, cb->pos); | 73 | + blk_insert_bs(blk_target, target, &error_abort); |
59 | - cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; | 74 | + |
60 | - *cb->file = cb->bs->file->bs; | 75 | + job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0, |
61 | + *map = offset | qed_offset_into_cluster(s, pos); | 76 | + 0, NULL, NULL, &error_abort); |
62 | + status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | 77 | + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); |
63 | + *file = bs->file->bs; | 78 | + block_job_start(job); |
64 | break; | 79 | + |
65 | case QED_CLUSTER_ZERO: | 80 | + g_assert_cmpint(job->pause_count, ==, 0); |
66 | - cb->status = BDRV_BLOCK_ZERO; | 81 | + g_assert_false(job->paused); |
67 | + status = BDRV_BLOCK_ZERO; | 82 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ |
68 | break; | 83 | + |
69 | case QED_CLUSTER_L2: | 84 | + do_drain_begin(drain_type, src); |
70 | case QED_CLUSTER_L1: | 85 | + |
71 | - cb->status = 0; | 86 | + if (drain_type == BDRV_DRAIN_ALL) { |
72 | + status = 0; | 87 | + /* bdrv_drain_all() drains both src and target, and involves an |
73 | break; | 88 | + * additional block_job_pause_all() */ |
74 | default: | 89 | + g_assert_cmpint(job->pause_count, ==, 3); |
75 | assert(ret < 0); | 90 | + } else { |
76 | - cb->status = ret; | 91 | + g_assert_cmpint(job->pause_count, ==, 1); |
77 | + status = ret; | 92 | + } |
78 | break; | 93 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ |
79 | } | 94 | + /* g_assert_true(job->paused); */ |
80 | 95 | + g_assert_false(job->busy); /* The job is paused */ | |
81 | - if (cb->co) { | 96 | + |
82 | - aio_co_wake(cb->co); | 97 | + do_drain_end(drain_type, src); |
83 | - } | 98 | + |
84 | -} | 99 | + g_assert_cmpint(job->pause_count, ==, 0); |
85 | - | 100 | + g_assert_false(job->paused); |
86 | -static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, | 101 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ |
87 | - int64_t sector_num, | 102 | + |
88 | - int nb_sectors, int *pnum, | 103 | + do_drain_begin(drain_type, target); |
89 | - BlockDriverState **file) | 104 | + |
90 | -{ | 105 | + if (drain_type == BDRV_DRAIN_ALL) { |
91 | - BDRVQEDState *s = bs->opaque; | 106 | + /* bdrv_drain_all() drains both src and target, and involves an |
92 | - size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; | 107 | + * additional block_job_pause_all() */ |
93 | - QEDIsAllocatedCB cb = { | 108 | + g_assert_cmpint(job->pause_count, ==, 3); |
94 | - .bs = bs, | 109 | + } else { |
95 | - .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, | 110 | + g_assert_cmpint(job->pause_count, ==, 1); |
96 | - .status = BDRV_BLOCK_OFFSET_MASK, | 111 | + } |
97 | - .pnum = pnum, | 112 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ |
98 | - .file = file, | 113 | + /* g_assert_true(job->paused); */ |
99 | - }; | 114 | + g_assert_false(job->busy); /* The job is paused */ |
100 | - QEDRequest request = { .l2_table = NULL }; | 115 | + |
101 | - uint64_t offset; | 116 | + do_drain_end(drain_type, target); |
102 | - int ret; | 117 | + |
103 | - | 118 | + g_assert_cmpint(job->pause_count, ==, 0); |
104 | - qemu_co_mutex_lock(&s->table_lock); | 119 | + g_assert_false(job->paused); |
105 | - ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); | 120 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ |
106 | - qed_is_allocated_cb(&cb, ret, offset, len); | 121 | + |
107 | - | 122 | + ret = block_job_complete_sync(job, &error_abort); |
108 | - /* The callback was invoked immediately */ | 123 | + g_assert_cmpint(ret, ==, 0); |
109 | - assert(cb.status != BDRV_BLOCK_OFFSET_MASK); | 124 | + |
110 | - | 125 | + blk_unref(blk_src); |
111 | qed_unref_l2_cache_entry(request.l2_table); | 126 | + blk_unref(blk_target); |
112 | qemu_co_mutex_unlock(&s->table_lock); | 127 | + bdrv_unref(src); |
113 | 128 | + bdrv_unref(target); | |
114 | - return cb.status; | 129 | +} |
115 | + return status; | 130 | + |
131 | +static void test_blockjob_drain_all(void) | ||
132 | +{ | ||
133 | + test_blockjob_common(BDRV_DRAIN_ALL); | ||
134 | +} | ||
135 | + | ||
136 | +static void test_blockjob_drain(void) | ||
137 | +{ | ||
138 | + test_blockjob_common(BDRV_DRAIN); | ||
139 | +} | ||
140 | + | ||
141 | int main(int argc, char **argv) | ||
142 | { | ||
143 | bdrv_init(); | ||
144 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
145 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
146 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
147 | |||
148 | + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
149 | + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
150 | + | ||
151 | return g_test_run(); | ||
116 | } | 152 | } |
117 | |||
118 | static BDRVQEDState *acb_to_s(QEDAIOCB *acb) | ||
119 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = { | ||
120 | .bdrv_child_perm = bdrv_format_default_perms, | ||
121 | .bdrv_create = bdrv_qed_create, | ||
122 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
123 | - .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, | ||
124 | + .bdrv_co_block_status = bdrv_qed_co_block_status, | ||
125 | .bdrv_co_readv = bdrv_qed_co_readv, | ||
126 | .bdrv_co_writev = bdrv_qed_co_writev, | ||
127 | .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, | ||
128 | -- | 153 | -- |
129 | 2.13.6 | 154 | 2.13.6 |
130 | 155 | ||
131 | 156 | diff view generated by jsdifflib |
1 | From: Anton Nefedov <anton.nefedov@virtuozzo.com> | 1 | Block jobs are already paused using the BdrvChildRole drain callbacks, |
---|---|---|---|
2 | so we don't need an additional block_job_pause_all() call. | ||
2 | 3 | ||
3 | The normal bdrv_co_pwritev() use is either | ||
4 | - BDRV_REQ_ZERO_WRITE clear and iovector provided | ||
5 | - BDRV_REQ_ZERO_WRITE set and iovector == NULL | ||
6 | |||
7 | while | ||
8 | - the flag clear and iovector == NULL is an assertion failure | ||
9 | in bdrv_co_do_zero_pwritev() | ||
10 | - the flag set and iovector provided is in fact allowed | ||
11 | (the flag prevails and zeroes are written) | ||
12 | |||
13 | However the alignment logic does not support the latter case so the padding | ||
14 | areas get overwritten with zeroes. | ||
15 | |||
16 | Currently, general functions like bdrv_rw_co() do provide iovector | ||
17 | regardless of flags. So, keep it supported and use bdrv_co_do_zero_pwritev() | ||
18 | alignment for it which also makes the code a bit more obvious anyway. | ||
19 | |||
20 | Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com> | ||
21 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
22 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
23 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
24 | --- | 5 | --- |
25 | block/io.c | 2 +- | 6 | block/io.c | 4 ---- |
26 | 1 file changed, 1 insertion(+), 1 deletion(-) | 7 | tests/test-bdrv-drain.c | 10 ++++------ |
8 | 2 files changed, 4 insertions(+), 10 deletions(-) | ||
27 | 9 | ||
28 | diff --git a/block/io.c b/block/io.c | 10 | diff --git a/block/io.c b/block/io.c |
29 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/block/io.c | 12 | --- a/block/io.c |
31 | +++ b/block/io.c | 13 | +++ b/block/io.c |
32 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
33 | */ | 15 | * context. */ |
34 | tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); | 16 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); |
35 | 17 | ||
36 | - if (!qiov) { | 18 | - block_job_pause_all(); |
37 | + if (flags & BDRV_REQ_ZERO_WRITE) { | 19 | - |
38 | ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); | 20 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
39 | goto out; | 21 | AioContext *aio_context = bdrv_get_aio_context(bs); |
22 | |||
23 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
24 | aio_enable_external(aio_context); | ||
25 | aio_context_release(aio_context); | ||
26 | } | ||
27 | - | ||
28 | - block_job_resume_all(); | ||
29 | } | ||
30 | |||
31 | void bdrv_drain_all(void) | ||
32 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/tests/test-bdrv-drain.c | ||
35 | +++ b/tests/test-bdrv-drain.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
37 | do_drain_begin(drain_type, src); | ||
38 | |||
39 | if (drain_type == BDRV_DRAIN_ALL) { | ||
40 | - /* bdrv_drain_all() drains both src and target, and involves an | ||
41 | - * additional block_job_pause_all() */ | ||
42 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
43 | + /* bdrv_drain_all() drains both src and target */ | ||
44 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
45 | } else { | ||
46 | g_assert_cmpint(job->pause_count, ==, 1); | ||
47 | } | ||
48 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) | ||
49 | do_drain_begin(drain_type, target); | ||
50 | |||
51 | if (drain_type == BDRV_DRAIN_ALL) { | ||
52 | - /* bdrv_drain_all() drains both src and target, and involves an | ||
53 | - * additional block_job_pause_all() */ | ||
54 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
55 | + /* bdrv_drain_all() drains both src and target */ | ||
56 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
57 | } else { | ||
58 | g_assert_cmpint(job->pause_count, ==, 1); | ||
40 | } | 59 | } |
41 | -- | 60 | -- |
42 | 2.13.6 | 61 | 2.13.6 |
43 | 62 | ||
44 | 63 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | bdrv_do_drained_begin() restricts the call of parent callbacks and |
---|---|---|---|
2 | aio_disable_external() to the outermost drain section, but the block | ||
3 | driver callbacks are always called. bdrv_do_drained_end() must match | ||
4 | this behaviour, otherwise nodes stay drained even if begin/end calls | ||
5 | were balanced. | ||
2 | 6 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the file protocol driver accordingly. | ||
5 | |||
6 | In want_zero mode, we continue to report fine-grained hole | ||
7 | information (the caller wants as much mapping detail as possible); | ||
8 | but when not in that mode, the caller prefers larger *pnum and | ||
9 | merely cares about what offsets are allocated at this layer, rather | ||
10 | than where the holes live. Since holes still read as zeroes at | ||
11 | this layer (rather than deferring to a backing layer), we can take | ||
12 | the shortcut of skipping lseek(), and merely state that all bytes | ||
13 | are allocated. | ||
14 | |||
15 | We can also drop redundant bounds checks that are already | ||
16 | guaranteed by the block layer. | ||
17 | |||
18 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
19 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
20 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
22 | --- | 8 | --- |
23 | block/file-posix.c | 64 +++++++++++++++++++++++++----------------------------- | 9 | block/io.c | 12 +++++++----- |
24 | 1 file changed, 30 insertions(+), 34 deletions(-) | 10 | 1 file changed, 7 insertions(+), 5 deletions(-) |
25 | 11 | ||
26 | diff --git a/block/file-posix.c b/block/file-posix.c | 12 | diff --git a/block/io.c b/block/io.c |
27 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block/file-posix.c | 14 | --- a/block/io.c |
29 | +++ b/block/file-posix.c | 15 | +++ b/block/io.c |
30 | @@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start, | 16 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
17 | |||
18 | void bdrv_drained_end(BlockDriverState *bs) | ||
19 | { | ||
20 | + int old_quiesce_counter; | ||
21 | + | ||
22 | if (qemu_in_coroutine()) { | ||
23 | bdrv_co_yield_to_drain(bs, false); | ||
24 | return; | ||
25 | } | ||
26 | assert(bs->quiesce_counter > 0); | ||
27 | - if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { | ||
28 | - return; | ||
29 | - } | ||
30 | + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); | ||
31 | |||
32 | /* Re-enable things in child-to-parent order */ | ||
33 | bdrv_drain_invoke(bs, false, false); | ||
34 | - bdrv_parent_drained_end(bs); | ||
35 | - aio_enable_external(bdrv_get_aio_context(bs)); | ||
36 | + if (old_quiesce_counter == 1) { | ||
37 | + bdrv_parent_drained_end(bs); | ||
38 | + aio_enable_external(bdrv_get_aio_context(bs)); | ||
39 | + } | ||
31 | } | 40 | } |
32 | 41 | ||
33 | /* | 42 | /* |
34 | - * Returns the allocation status of the specified sectors. | ||
35 | + * Returns the allocation status of the specified offset. | ||
36 | * | ||
37 | - * If 'sector_num' is beyond the end of the disk image the return value is 0 | ||
38 | - * and 'pnum' is set to 0. | ||
39 | + * The block layer guarantees 'offset' and 'bytes' are within bounds. | ||
40 | * | ||
41 | - * 'pnum' is set to the number of sectors (including and immediately following | ||
42 | - * the specified sector) that are known to be in the same | ||
43 | + * 'pnum' is set to the number of bytes (including and immediately following | ||
44 | + * the specified offset) that are known to be in the same | ||
45 | * allocated/unallocated state. | ||
46 | * | ||
47 | - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes | ||
48 | - * beyond the end of the disk image it will be clamped. | ||
49 | + * 'bytes' is the max value 'pnum' should be set to. | ||
50 | */ | ||
51 | -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, | ||
52 | - int64_t sector_num, | ||
53 | - int nb_sectors, int *pnum, | ||
54 | - BlockDriverState **file) | ||
55 | -{ | ||
56 | - off_t start, data = 0, hole = 0; | ||
57 | - int64_t total_size; | ||
58 | +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, | ||
59 | + bool want_zero, | ||
60 | + int64_t offset, | ||
61 | + int64_t bytes, int64_t *pnum, | ||
62 | + int64_t *map, | ||
63 | + BlockDriverState **file) | ||
64 | +{ | ||
65 | + off_t data = 0, hole = 0; | ||
66 | int ret; | ||
67 | |||
68 | ret = fd_open(bs); | ||
69 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | - start = sector_num * BDRV_SECTOR_SIZE; | ||
74 | - total_size = bdrv_getlength(bs); | ||
75 | - if (total_size < 0) { | ||
76 | - return total_size; | ||
77 | - } else if (start >= total_size) { | ||
78 | - *pnum = 0; | ||
79 | - return 0; | ||
80 | - } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { | ||
81 | - nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); | ||
82 | + if (!want_zero) { | ||
83 | + *pnum = bytes; | ||
84 | + *map = offset; | ||
85 | + *file = bs; | ||
86 | + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
87 | } | ||
88 | |||
89 | - ret = find_allocation(bs, start, &data, &hole); | ||
90 | + ret = find_allocation(bs, offset, &data, &hole); | ||
91 | if (ret == -ENXIO) { | ||
92 | /* Trailing hole */ | ||
93 | - *pnum = nb_sectors; | ||
94 | + *pnum = bytes; | ||
95 | ret = BDRV_BLOCK_ZERO; | ||
96 | } else if (ret < 0) { | ||
97 | /* No info available, so pretend there are no holes */ | ||
98 | - *pnum = nb_sectors; | ||
99 | + *pnum = bytes; | ||
100 | ret = BDRV_BLOCK_DATA; | ||
101 | - } else if (data == start) { | ||
102 | - /* On a data extent, compute sectors to the end of the extent, | ||
103 | + } else if (data == offset) { | ||
104 | + /* On a data extent, compute bytes to the end of the extent, | ||
105 | * possibly including a partial sector at EOF. */ | ||
106 | - *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); | ||
107 | + *pnum = MIN(bytes, hole - offset); | ||
108 | ret = BDRV_BLOCK_DATA; | ||
109 | } else { | ||
110 | - /* On a hole, compute sectors to the beginning of the next extent. */ | ||
111 | - assert(hole == start); | ||
112 | - *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); | ||
113 | + /* On a hole, compute bytes to the beginning of the next extent. */ | ||
114 | + assert(hole == offset); | ||
115 | + *pnum = MIN(bytes, data - offset); | ||
116 | ret = BDRV_BLOCK_ZERO; | ||
117 | } | ||
118 | + *map = offset; | ||
119 | *file = bs; | ||
120 | - return ret | BDRV_BLOCK_OFFSET_VALID | start; | ||
121 | + return ret | BDRV_BLOCK_OFFSET_VALID; | ||
122 | } | ||
123 | |||
124 | static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, | ||
125 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
126 | .bdrv_close = raw_close, | ||
127 | .bdrv_create = raw_create, | ||
128 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
129 | - .bdrv_co_get_block_status = raw_co_get_block_status, | ||
130 | + .bdrv_co_block_status = raw_co_block_status, | ||
131 | .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, | ||
132 | |||
133 | .bdrv_co_preadv = raw_co_preadv, | ||
134 | -- | 43 | -- |
135 | 2.13.6 | 44 | 2.13.6 |
136 | 45 | ||
137 | 46 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Commit bdd6a90 has a bug: drivers should never directly set | ||
4 | BDRV_BLOCK_ALLOCATED, but only io.c should do that (as needed). | ||
5 | Instead, drivers should report BDRV_BLOCK_DATA if it knows that | ||
6 | data comes from this BDS. | ||
7 | |||
8 | But let's look at the bigger picture: semantically, the nvme | ||
9 | driver is similar to the nbd, null, and raw drivers (no backing | ||
10 | file, all data comes from this BDS). But while two of those | ||
11 | other drivers have to supply the callback (null because it can | ||
12 | special-case BDRV_BLOCK_ZERO, raw because it can special-case | ||
13 | a different offset), in this case the block layer defaults are | ||
14 | good enough without the callback at all (similar to nbd). | ||
15 | |||
16 | So, fix the bug by deletion ;) | ||
17 | |||
18 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 2 | --- |
21 | block/nvme.c | 14 -------------- | 3 | tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ |
22 | 1 file changed, 14 deletions(-) | 4 | 1 file changed, 57 insertions(+) |
23 | 5 | ||
24 | diff --git a/block/nvme.c b/block/nvme.c | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
25 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/nvme.c | 8 | --- a/tests/test-bdrv-drain.c |
27 | +++ b/block/nvme.c | 9 | +++ b/tests/test-bdrv-drain.c |
28 | @@ -XXX,XX +XXX,XX @@ static int nvme_reopen_prepare(BDRVReopenState *reopen_state, | 10 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
29 | return 0; | 11 | enum drain_type { |
12 | BDRV_DRAIN_ALL, | ||
13 | BDRV_DRAIN, | ||
14 | + DRAIN_TYPE_MAX, | ||
15 | }; | ||
16 | |||
17 | static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) | ||
18 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | ||
19 | test_quiesce_common(BDRV_DRAIN, false); | ||
30 | } | 20 | } |
31 | 21 | ||
32 | -static int64_t coroutine_fn nvme_co_get_block_status(BlockDriverState *bs, | 22 | +static void test_nested(void) |
33 | - int64_t sector_num, | 23 | +{ |
34 | - int nb_sectors, int *pnum, | 24 | + BlockBackend *blk; |
35 | - BlockDriverState **file) | 25 | + BlockDriverState *bs, *backing; |
36 | -{ | 26 | + BDRVTestState *s, *backing_s; |
37 | - *pnum = nb_sectors; | 27 | + enum drain_type outer, inner; |
38 | - *file = bs; | 28 | + |
39 | - | 29 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
40 | - return BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_OFFSET_VALID | | 30 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, |
41 | - (sector_num << BDRV_SECTOR_BITS); | 31 | + &error_abort); |
42 | -} | 32 | + s = bs->opaque; |
43 | - | 33 | + blk_insert_bs(blk, bs, &error_abort); |
44 | static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts) | 34 | + |
45 | { | 35 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
46 | QINCREF(opts); | 36 | + backing_s = backing->opaque; |
47 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = { | 37 | + bdrv_set_backing_hd(bs, backing, &error_abort); |
48 | .bdrv_co_flush_to_disk = nvme_co_flush, | 38 | + |
49 | .bdrv_reopen_prepare = nvme_reopen_prepare, | 39 | + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { |
50 | 40 | + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { | |
51 | - .bdrv_co_get_block_status = nvme_co_get_block_status, | 41 | + /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ |
52 | - | 42 | + int bs_quiesce = (outer != BDRV_DRAIN_ALL) + |
53 | .bdrv_refresh_filename = nvme_refresh_filename, | 43 | + (inner != BDRV_DRAIN_ALL); |
54 | .bdrv_refresh_limits = nvme_refresh_limits, | 44 | + int backing_quiesce = 0; |
45 | + int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
46 | + (inner != BDRV_DRAIN); | ||
47 | + | ||
48 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
49 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
50 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
51 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
52 | + | ||
53 | + do_drain_begin(outer, bs); | ||
54 | + do_drain_begin(inner, bs); | ||
55 | + | ||
56 | + g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); | ||
57 | + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); | ||
58 | + g_assert_cmpint(s->drain_count, ==, 2); | ||
59 | + g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); | ||
60 | + | ||
61 | + do_drain_end(inner, bs); | ||
62 | + do_drain_end(outer, bs); | ||
63 | + | ||
64 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
65 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
66 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
67 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
68 | + } | ||
69 | + } | ||
70 | + | ||
71 | + bdrv_unref(backing); | ||
72 | + bdrv_unref(bs); | ||
73 | + blk_unref(blk); | ||
74 | +} | ||
75 | + | ||
76 | |||
77 | typedef struct TestBlockJob { | ||
78 | BlockJob common; | ||
79 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
80 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
81 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
82 | |||
83 | + g_test_add_func("/bdrv-drain/nested", test_nested); | ||
84 | + | ||
85 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
86 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
55 | 87 | ||
56 | -- | 88 | -- |
57 | 2.13.6 | 89 | 2.13.6 |
58 | 90 | ||
59 | 91 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | This is in preparation for subtree drains, i.e. drained sections that |
---|---|---|---|
2 | 2 | affect not only a single node, but recursively all child nodes, too. | |
3 | BlockDriverState has the BDRV_POLL_WHILE() macro to wait on event loop | 3 | |
4 | activity while a condition evaluates to true. This is used to implement | 4 | Calling the parent callbacks for drain is pointless when we just came |
5 | synchronous operations where it acts as a condvar between the IOThread | 5 | from that parent node recursively and leads to multiple increases of |
6 | running the operation and the main loop waiting for the operation. It | 6 | bs->quiesce_counter in a single drain call. Don't do it. |
7 | can also be called from the thread that owns the AioContext and in that | 7 | |
8 | case it's just a nested event loop. | 8 | In order for this to work correctly, the parent callback must be called |
9 | 9 | for every bdrv_drain_begin/end() call, not only for the outermost one: | |
10 | BlockBackend needs this behavior but doesn't always have a | 10 | |
11 | BlockDriverState it can use. This patch extracts BDRV_POLL_WHILE() into | 11 | If we have a node N with two parents A and B, recursive draining of A |
12 | the AioWait abstraction, which can be used with AioContext and isn't | 12 | should cause the quiesce_counter of B to increase because its child N is |
13 | tied to BlockDriverState anymore. | 13 | drained independently of B. If now B is recursively drained, too, A must |
14 | 14 | increase its quiesce_counter because N is drained independently of A | |
15 | This feature could be built directly into AioContext but then all users | 15 | only now, even if N is going from quiesce_counter 1 to 2. |
16 | would kick the event loop even if they signal different conditions. | 16 | |
17 | Imagine an AioContext with many BlockDriverStates, each time a request | ||
18 | completes any waiter would wake up and re-check their condition. It's | ||
19 | nicer to keep a separate AioWait object for each condition instead. | ||
20 | |||
21 | Please see "block/aio-wait.h" for details on the API. | ||
22 | |||
23 | The name AIO_WAIT_WHILE() avoids the confusion between AIO_POLL_WHILE() | ||
24 | and AioContext polling. | ||
25 | |||
26 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
27 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
28 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
29 | --- | 18 | --- |
30 | include/block/aio-wait.h | 116 ++++++++++++++++++++++++++++++++++++++++++++++ | 19 | include/block/block.h | 4 ++-- |
31 | include/block/block.h | 40 +++------------- | 20 | block.c | 13 +++++++++---- |
32 | include/block/block_int.h | 7 ++- | 21 | block/io.c | 47 ++++++++++++++++++++++++++++++++++------------- |
33 | block.c | 5 ++ | 22 | 3 files changed, 45 insertions(+), 19 deletions(-) |
34 | block/io.c | 10 +--- | 23 | |
35 | util/aio-wait.c | 40 ++++++++++++++++ | ||
36 | util/Makefile.objs | 2 +- | ||
37 | 7 files changed, 174 insertions(+), 46 deletions(-) | ||
38 | create mode 100644 include/block/aio-wait.h | ||
39 | create mode 100644 util/aio-wait.c | ||
40 | |||
41 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h | ||
42 | new file mode 100644 | ||
43 | index XXXXXXX..XXXXXXX | ||
44 | --- /dev/null | ||
45 | +++ b/include/block/aio-wait.h | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | +/* | ||
48 | + * AioContext wait support | ||
49 | + * | ||
50 | + * Copyright (C) 2018 Red Hat, Inc. | ||
51 | + * | ||
52 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
53 | + * of this software and associated documentation files (the "Software"), to deal | ||
54 | + * in the Software without restriction, including without limitation the rights | ||
55 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
56 | + * copies of the Software, and to permit persons to whom the Software is | ||
57 | + * furnished to do so, subject to the following conditions: | ||
58 | + * | ||
59 | + * The above copyright notice and this permission notice shall be included in | ||
60 | + * all copies or substantial portions of the Software. | ||
61 | + * | ||
62 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
63 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
64 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
65 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
66 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
67 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
68 | + * THE SOFTWARE. | ||
69 | + */ | ||
70 | + | ||
71 | +#ifndef QEMU_AIO_WAIT_H | ||
72 | +#define QEMU_AIO_WAIT_H | ||
73 | + | ||
74 | +#include "block/aio.h" | ||
75 | + | ||
76 | +/** | ||
77 | + * AioWait: | ||
78 | + * | ||
79 | + * An object that facilitates synchronous waiting on a condition. The main | ||
80 | + * loop can wait on an operation running in an IOThread as follows: | ||
81 | + * | ||
82 | + * AioWait *wait = ...; | ||
83 | + * AioContext *ctx = ...; | ||
84 | + * MyWork work = { .done = false }; | ||
85 | + * schedule_my_work_in_iothread(ctx, &work); | ||
86 | + * AIO_WAIT_WHILE(wait, ctx, !work.done); | ||
87 | + * | ||
88 | + * The IOThread must call aio_wait_kick() to notify the main loop when | ||
89 | + * work.done changes: | ||
90 | + * | ||
91 | + * static void do_work(...) | ||
92 | + * { | ||
93 | + * ... | ||
94 | + * work.done = true; | ||
95 | + * aio_wait_kick(wait); | ||
96 | + * } | ||
97 | + */ | ||
98 | +typedef struct { | ||
99 | + /* Is the main loop waiting for a kick? Accessed with atomic ops. */ | ||
100 | + bool need_kick; | ||
101 | +} AioWait; | ||
102 | + | ||
103 | +/** | ||
104 | + * AIO_WAIT_WHILE: | ||
105 | + * @wait: the aio wait object | ||
106 | + * @ctx: the aio context | ||
107 | + * @cond: wait while this conditional expression is true | ||
108 | + * | ||
109 | + * Wait while a condition is true. Use this to implement synchronous | ||
110 | + * operations that require event loop activity. | ||
111 | + * | ||
112 | + * The caller must be sure that something calls aio_wait_kick() when the value | ||
113 | + * of @cond might have changed. | ||
114 | + * | ||
115 | + * The caller's thread must be the IOThread that owns @ctx or the main loop | ||
116 | + * thread (with @ctx acquired exactly once). This function cannot be used to | ||
117 | + * wait on conditions between two IOThreads since that could lead to deadlock, | ||
118 | + * go via the main loop instead. | ||
119 | + */ | ||
120 | +#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ | ||
121 | + bool waited_ = false; \ | ||
122 | + bool busy_ = true; \ | ||
123 | + AioWait *wait_ = (wait); \ | ||
124 | + AioContext *ctx_ = (ctx); \ | ||
125 | + if (in_aio_context_home_thread(ctx_)) { \ | ||
126 | + while ((cond) || busy_) { \ | ||
127 | + busy_ = aio_poll(ctx_, (cond)); \ | ||
128 | + waited_ |= !!(cond) | busy_; \ | ||
129 | + } \ | ||
130 | + } else { \ | ||
131 | + assert(qemu_get_current_aio_context() == \ | ||
132 | + qemu_get_aio_context()); \ | ||
133 | + assert(!wait_->need_kick); \ | ||
134 | + /* Set wait_->need_kick before evaluating cond. */ \ | ||
135 | + atomic_mb_set(&wait_->need_kick, true); \ | ||
136 | + while (busy_) { \ | ||
137 | + if ((cond)) { \ | ||
138 | + waited_ = busy_ = true; \ | ||
139 | + aio_context_release(ctx_); \ | ||
140 | + aio_poll(qemu_get_aio_context(), true); \ | ||
141 | + aio_context_acquire(ctx_); \ | ||
142 | + } else { \ | ||
143 | + busy_ = aio_poll(ctx_, false); \ | ||
144 | + waited_ |= busy_; \ | ||
145 | + } \ | ||
146 | + } \ | ||
147 | + atomic_set(&wait_->need_kick, false); \ | ||
148 | + } \ | ||
149 | + waited_; }) | ||
150 | + | ||
151 | +/** | ||
152 | + * aio_wait_kick: | ||
153 | + * @wait: the aio wait object that should re-evaluate its condition | ||
154 | + * | ||
155 | + * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During | ||
156 | + * synchronous operations performed in an IOThread, the main thread lets the | ||
157 | + * IOThread's event loop run, waiting for the operation to complete. A | ||
158 | + * aio_wait_kick() call will wake up the main thread. | ||
159 | + */ | ||
160 | +void aio_wait_kick(AioWait *wait); | ||
161 | + | ||
162 | +#endif /* QEMU_AIO_WAIT */ | ||
163 | diff --git a/include/block/block.h b/include/block/block.h | 24 | diff --git a/include/block/block.h b/include/block/block.h |
164 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
165 | --- a/include/block/block.h | 26 | --- a/include/block/block.h |
166 | +++ b/include/block/block.h | 27 | +++ b/include/block/block.h |
167 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs); |
168 | #define BLOCK_H | 29 | * Begin a quiesced section of all users of @bs. This is part of |
169 | 30 | * bdrv_drained_begin. | |
170 | #include "block/aio.h" | 31 | */ |
171 | +#include "block/aio-wait.h" | 32 | -void bdrv_parent_drained_begin(BlockDriverState *bs); |
172 | #include "qapi-types.h" | 33 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); |
173 | #include "qemu/iov.h" | 34 | |
174 | #include "qemu/coroutine.h" | 35 | /** |
175 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void); | 36 | * bdrv_parent_drained_end: |
176 | void bdrv_drain_all_end(void); | 37 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs); |
177 | void bdrv_drain_all(void); | 38 | * End a quiesced section of all users of @bs. This is part of |
178 | 39 | * bdrv_drained_end. | |
179 | +/* Returns NULL when bs == NULL */ | 40 | */ |
180 | +AioWait *bdrv_get_aio_wait(BlockDriverState *bs); | 41 | -void bdrv_parent_drained_end(BlockDriverState *bs); |
181 | + | 42 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
182 | #define BDRV_POLL_WHILE(bs, cond) ({ \ | 43 | |
183 | - bool waited_ = false; \ | 44 | /** |
184 | - bool busy_ = true; \ | 45 | * bdrv_drained_begin: |
185 | BlockDriverState *bs_ = (bs); \ | ||
186 | - AioContext *ctx_ = bdrv_get_aio_context(bs_); \ | ||
187 | - if (in_aio_context_home_thread(ctx_)) { \ | ||
188 | - while ((cond) || busy_) { \ | ||
189 | - busy_ = aio_poll(ctx_, (cond)); \ | ||
190 | - waited_ |= !!(cond) | busy_; \ | ||
191 | - } \ | ||
192 | - } else { \ | ||
193 | - assert(qemu_get_current_aio_context() == \ | ||
194 | - qemu_get_aio_context()); \ | ||
195 | - /* Ask bdrv_dec_in_flight to wake up the main \ | ||
196 | - * QEMU AioContext. Extra I/O threads never take \ | ||
197 | - * other I/O threads' AioContexts (see for example \ | ||
198 | - * block_job_defer_to_main_loop for how to do it). \ | ||
199 | - */ \ | ||
200 | - assert(!bs_->wakeup); \ | ||
201 | - /* Set bs->wakeup before evaluating cond. */ \ | ||
202 | - atomic_mb_set(&bs_->wakeup, true); \ | ||
203 | - while (busy_) { \ | ||
204 | - if ((cond)) { \ | ||
205 | - waited_ = busy_ = true; \ | ||
206 | - aio_context_release(ctx_); \ | ||
207 | - aio_poll(qemu_get_aio_context(), true); \ | ||
208 | - aio_context_acquire(ctx_); \ | ||
209 | - } else { \ | ||
210 | - busy_ = aio_poll(ctx_, false); \ | ||
211 | - waited_ |= busy_; \ | ||
212 | - } \ | ||
213 | - } \ | ||
214 | - atomic_set(&bs_->wakeup, false); \ | ||
215 | - } \ | ||
216 | - waited_; }) | ||
217 | + AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \ | ||
218 | + bdrv_get_aio_context(bs_), \ | ||
219 | + cond); }) | ||
220 | |||
221 | int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); | ||
222 | int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); | ||
223 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
224 | index XXXXXXX..XXXXXXX 100644 | ||
225 | --- a/include/block/block_int.h | ||
226 | +++ b/include/block/block_int.h | ||
227 | @@ -XXX,XX +XXX,XX @@ | ||
228 | |||
229 | #include "block/accounting.h" | ||
230 | #include "block/block.h" | ||
231 | +#include "block/aio-wait.h" | ||
232 | #include "qemu/queue.h" | ||
233 | #include "qemu/coroutine.h" | ||
234 | #include "qemu/stats64.h" | ||
235 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
236 | unsigned int in_flight; | ||
237 | unsigned int serialising_in_flight; | ||
238 | |||
239 | - /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic | ||
240 | - * ops. | ||
241 | - */ | ||
242 | - bool wakeup; | ||
243 | + /* Kicked to signal main loop when a request completes. */ | ||
244 | + AioWait wait; | ||
245 | |||
246 | /* counter for nested bdrv_io_plug. | ||
247 | * Accessed with atomic ops. | ||
248 | diff --git a/block.c b/block.c | 46 | diff --git a/block.c b/block.c |
249 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
250 | --- a/block.c | 48 | --- a/block.c |
251 | +++ b/block.c | 49 | +++ b/block.c |
252 | @@ -XXX,XX +XXX,XX @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) | 50 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
253 | return bs->aio_context; | 51 | BlockDriverState *new_bs) |
254 | } | 52 | { |
255 | 53 | BlockDriverState *old_bs = child->bs; | |
256 | +AioWait *bdrv_get_aio_wait(BlockDriverState *bs) | 54 | + int i; |
257 | +{ | 55 | |
258 | + return bs ? &bs->wait : NULL; | 56 | if (old_bs && new_bs) { |
259 | +} | 57 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); |
260 | + | 58 | } |
261 | void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) | 59 | if (old_bs) { |
262 | { | 60 | if (old_bs->quiesce_counter && child->role->drained_end) { |
263 | aio_co_enter(bdrv_get_aio_context(bs), co); | 61 | - child->role->drained_end(child); |
62 | + for (i = 0; i < old_bs->quiesce_counter; i++) { | ||
63 | + child->role->drained_end(child); | ||
64 | + } | ||
65 | } | ||
66 | if (child->role->detach) { | ||
67 | child->role->detach(child); | ||
68 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
69 | if (new_bs) { | ||
70 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | ||
71 | if (new_bs->quiesce_counter && child->role->drained_begin) { | ||
72 | - child->role->drained_begin(child); | ||
73 | + for (i = 0; i < new_bs->quiesce_counter; i++) { | ||
74 | + child->role->drained_begin(child); | ||
75 | + } | ||
76 | } | ||
77 | |||
78 | if (child->role->attach) { | ||
79 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
80 | AioContext *ctx = bdrv_get_aio_context(bs); | ||
81 | |||
82 | aio_disable_external(ctx); | ||
83 | - bdrv_parent_drained_begin(bs); | ||
84 | + bdrv_parent_drained_begin(bs, NULL); | ||
85 | bdrv_drain(bs); /* ensure there are no in-flight requests */ | ||
86 | |||
87 | while (aio_poll(ctx, false)) { | ||
88 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
89 | */ | ||
90 | aio_context_acquire(new_context); | ||
91 | bdrv_attach_aio_context(bs, new_context); | ||
92 | - bdrv_parent_drained_end(bs); | ||
93 | + bdrv_parent_drained_end(bs, NULL); | ||
94 | aio_enable_external(ctx); | ||
95 | aio_context_release(new_context); | ||
96 | } | ||
264 | diff --git a/block/io.c b/block/io.c | 97 | diff --git a/block/io.c b/block/io.c |
265 | index XXXXXXX..XXXXXXX 100644 | 98 | index XXXXXXX..XXXXXXX 100644 |
266 | --- a/block/io.c | 99 | --- a/block/io.c |
267 | +++ b/block/io.c | 100 | +++ b/block/io.c |
268 | @@ -XXX,XX +XXX,XX @@ | 101 | @@ -XXX,XX +XXX,XX @@ |
269 | #include "qemu/osdep.h" | 102 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, |
270 | #include "trace.h" | 103 | int64_t offset, int bytes, BdrvRequestFlags flags); |
271 | #include "sysemu/block-backend.h" | 104 | |
272 | +#include "block/aio-wait.h" | 105 | -void bdrv_parent_drained_begin(BlockDriverState *bs) |
273 | #include "block/blockjob.h" | 106 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) |
274 | #include "block/blockjob_int.h" | 107 | { |
275 | #include "block/block_int.h" | 108 | BdrvChild *c, *next; |
276 | @@ -XXX,XX +XXX,XX @@ void bdrv_inc_in_flight(BlockDriverState *bs) | 109 | |
277 | atomic_inc(&bs->in_flight); | 110 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { |
278 | } | 111 | + if (c == ignore) { |
279 | 112 | + continue; | |
280 | -static void dummy_bh_cb(void *opaque) | 113 | + } |
281 | -{ | 114 | if (c->role->drained_begin) { |
282 | -} | 115 | c->role->drained_begin(c); |
283 | - | 116 | } |
284 | void bdrv_wakeup(BlockDriverState *bs) | 117 | } |
285 | { | 118 | } |
286 | - /* The barrier (or an atomic op) is in the caller. */ | 119 | |
287 | - if (atomic_read(&bs->wakeup)) { | 120 | -void bdrv_parent_drained_end(BlockDriverState *bs) |
288 | - aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); | 121 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) |
289 | - } | 122 | { |
290 | + aio_wait_kick(bdrv_get_aio_wait(bs)); | 123 | BdrvChild *c, *next; |
291 | } | 124 | |
292 | 125 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | |
293 | void bdrv_dec_in_flight(BlockDriverState *bs) | 126 | + if (c == ignore) { |
294 | diff --git a/util/aio-wait.c b/util/aio-wait.c | 127 | + continue; |
295 | new file mode 100644 | 128 | + } |
296 | index XXXXXXX..XXXXXXX | 129 | if (c->role->drained_end) { |
297 | --- /dev/null | 130 | c->role->drained_end(c); |
298 | +++ b/util/aio-wait.c | 131 | } |
299 | @@ -XXX,XX +XXX,XX @@ | 132 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
300 | +/* | 133 | BlockDriverState *bs; |
301 | + * AioContext wait support | 134 | bool done; |
302 | + * | 135 | bool begin; |
303 | + * Copyright (C) 2018 Red Hat, Inc. | 136 | + BdrvChild *parent; |
304 | + * | 137 | } BdrvCoDrainData; |
305 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | 138 | |
306 | + * of this software and associated documentation files (the "Software"), to deal | 139 | static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
307 | + * in the Software without restriction, including without limitation the rights | 140 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) |
308 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 141 | return waited; |
309 | + * copies of the Software, and to permit persons to whom the Software is | 142 | } |
310 | + * furnished to do so, subject to the following conditions: | 143 | |
311 | + * | 144 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
312 | + * The above copyright notice and this permission notice shall be included in | 145 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
313 | + * all copies or substantial portions of the Software. | ||
314 | + * | ||
315 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
316 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
317 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
318 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
319 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
320 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
321 | + * THE SOFTWARE. | ||
322 | + */ | ||
323 | + | 146 | + |
324 | +#include "qemu/osdep.h" | 147 | static void bdrv_co_drain_bh_cb(void *opaque) |
325 | +#include "qemu/main-loop.h" | 148 | { |
326 | +#include "block/aio-wait.h" | 149 | BdrvCoDrainData *data = opaque; |
327 | + | 150 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
328 | +static void dummy_bh_cb(void *opaque) | 151 | |
152 | bdrv_dec_in_flight(bs); | ||
153 | if (data->begin) { | ||
154 | - bdrv_drained_begin(bs); | ||
155 | + bdrv_do_drained_begin(bs, data->parent); | ||
156 | } else { | ||
157 | - bdrv_drained_end(bs); | ||
158 | + bdrv_do_drained_end(bs, data->parent); | ||
159 | } | ||
160 | |||
161 | data->done = true; | ||
162 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
163 | } | ||
164 | |||
165 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
166 | - bool begin) | ||
167 | + bool begin, BdrvChild *parent) | ||
168 | { | ||
169 | BdrvCoDrainData data; | ||
170 | |||
171 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
172 | .bs = bs, | ||
173 | .done = false, | ||
174 | .begin = begin, | ||
175 | + .parent = parent, | ||
176 | }; | ||
177 | bdrv_inc_in_flight(bs); | ||
178 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), | ||
179 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
180 | assert(data.done); | ||
181 | } | ||
182 | |||
183 | -void bdrv_drained_begin(BlockDriverState *bs) | ||
184 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
185 | { | ||
186 | if (qemu_in_coroutine()) { | ||
187 | - bdrv_co_yield_to_drain(bs, true); | ||
188 | + bdrv_co_yield_to_drain(bs, true, parent); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | /* Stop things in parent-to-child order */ | ||
193 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { | ||
194 | aio_disable_external(bdrv_get_aio_context(bs)); | ||
195 | - bdrv_parent_drained_begin(bs); | ||
196 | } | ||
197 | |||
198 | + bdrv_parent_drained_begin(bs, parent); | ||
199 | bdrv_drain_invoke(bs, true, false); | ||
200 | bdrv_drain_recurse(bs); | ||
201 | } | ||
202 | |||
203 | -void bdrv_drained_end(BlockDriverState *bs) | ||
204 | +void bdrv_drained_begin(BlockDriverState *bs) | ||
329 | +{ | 205 | +{ |
330 | + /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */ | 206 | + bdrv_do_drained_begin(bs, NULL); |
331 | +} | 207 | +} |
332 | + | 208 | + |
333 | +void aio_wait_kick(AioWait *wait) | 209 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
210 | { | ||
211 | int old_quiesce_counter; | ||
212 | |||
213 | if (qemu_in_coroutine()) { | ||
214 | - bdrv_co_yield_to_drain(bs, false); | ||
215 | + bdrv_co_yield_to_drain(bs, false, parent); | ||
216 | return; | ||
217 | } | ||
218 | assert(bs->quiesce_counter > 0); | ||
219 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
220 | |||
221 | /* Re-enable things in child-to-parent order */ | ||
222 | bdrv_drain_invoke(bs, false, false); | ||
223 | + bdrv_parent_drained_end(bs, parent); | ||
224 | if (old_quiesce_counter == 1) { | ||
225 | - bdrv_parent_drained_end(bs); | ||
226 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
227 | } | ||
228 | } | ||
229 | |||
230 | +void bdrv_drained_end(BlockDriverState *bs) | ||
334 | +{ | 231 | +{ |
335 | + /* The barrier (or an atomic op) is in the caller. */ | 232 | + bdrv_do_drained_end(bs, NULL); |
336 | + if (atomic_read(&wait->need_kick)) { | ||
337 | + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); | ||
338 | + } | ||
339 | +} | 233 | +} |
340 | diff --git a/util/Makefile.objs b/util/Makefile.objs | 234 | + |
341 | index XXXXXXX..XXXXXXX 100644 | 235 | /* |
342 | --- a/util/Makefile.objs | 236 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
343 | +++ b/util/Makefile.objs | 237 | * and suspend block driver's internal I/O until next request arrives. |
344 | @@ -XXX,XX +XXX,XX @@ | 238 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
345 | util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o | 239 | /* Stop things in parent-to-child order */ |
346 | util-obj-y += bufferiszero.o | 240 | aio_context_acquire(aio_context); |
347 | util-obj-y += lockcnt.o | 241 | aio_disable_external(aio_context); |
348 | -util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o | 242 | - bdrv_parent_drained_begin(bs); |
349 | +util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o | 243 | + bdrv_parent_drained_begin(bs, NULL); |
350 | util-obj-y += main-loop.o iohandler.o | 244 | bdrv_drain_invoke(bs, true, true); |
351 | util-obj-$(CONFIG_POSIX) += aio-posix.o | 245 | aio_context_release(aio_context); |
352 | util-obj-$(CONFIG_POSIX) += compatfd.o | 246 | |
247 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
248 | /* Re-enable things in child-to-parent order */ | ||
249 | aio_context_acquire(aio_context); | ||
250 | bdrv_drain_invoke(bs, false, true); | ||
251 | - bdrv_parent_drained_end(bs); | ||
252 | + bdrv_parent_drained_end(bs, NULL); | ||
253 | aio_enable_external(aio_context); | ||
254 | aio_context_release(aio_context); | ||
255 | } | ||
353 | -- | 256 | -- |
354 | 2.13.6 | 257 | 2.13.6 |
355 | 258 | ||
356 | 259 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | bdrv_drained_begin() waits for the completion of requests in the whole |
---|---|---|---|
2 | subtree, but it only actually keeps its immediate bs parameter quiesced | ||
3 | until bdrv_drained_end(). | ||
2 | 4 | ||
3 | We are gradually converting to byte-based interfaces, as they are | 5 | Add a version that keeps the whole subtree drained. As of this commit, |
4 | easier to reason about than sector-based. Convert all uses of | 6 | graph changes cannot be allowed during a subtree drained section, but |
5 | the allocmap (no semantic change). Callers that already had bytes | 7 | this will be fixed soon. |
6 | available are simpler, and callers that now scale to bytes will be | ||
7 | easier to switch to byte-based in the future. | ||
8 | 8 | ||
9 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
10 | Acked-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 10 | --- |
14 | block/iscsi.c | 90 +++++++++++++++++++++++++++++------------------------------ | 11 | include/block/block.h | 13 +++++++++++++ |
15 | 1 file changed, 44 insertions(+), 46 deletions(-) | 12 | block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++----------- |
13 | 2 files changed, 56 insertions(+), 11 deletions(-) | ||
16 | 14 | ||
17 | diff --git a/block/iscsi.c b/block/iscsi.c | 15 | diff --git a/include/block/block.h b/include/block/block.h |
18 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/iscsi.c | 17 | --- a/include/block/block.h |
20 | +++ b/block/iscsi.c | 18 | +++ b/include/block/block.h |
21 | @@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) | 19 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
20 | void bdrv_drained_begin(BlockDriverState *bs); | ||
21 | |||
22 | /** | ||
23 | + * Like bdrv_drained_begin, but recursively begins a quiesced section for | ||
24 | + * exclusive access to all child nodes as well. | ||
25 | + * | ||
26 | + * Graph changes are not allowed during a subtree drain section. | ||
27 | + */ | ||
28 | +void bdrv_subtree_drained_begin(BlockDriverState *bs); | ||
29 | + | ||
30 | +/** | ||
31 | * bdrv_drained_end: | ||
32 | * | ||
33 | * End a quiescent section started by bdrv_drained_begin(). | ||
34 | */ | ||
35 | void bdrv_drained_end(BlockDriverState *bs); | ||
36 | |||
37 | +/** | ||
38 | + * End a quiescent section started by bdrv_subtree_drained_begin(). | ||
39 | + */ | ||
40 | +void bdrv_subtree_drained_end(BlockDriverState *bs); | ||
41 | + | ||
42 | void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, | ||
43 | Error **errp); | ||
44 | void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); | ||
45 | diff --git a/block/io.c b/block/io.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/block/io.c | ||
48 | +++ b/block/io.c | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
50 | BlockDriverState *bs; | ||
51 | bool done; | ||
52 | bool begin; | ||
53 | + bool recursive; | ||
54 | BdrvChild *parent; | ||
55 | } BdrvCoDrainData; | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
58 | return waited; | ||
22 | } | 59 | } |
23 | 60 | ||
24 | static void | 61 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
25 | -iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, | 62 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
26 | - int nb_sectors, bool allocated, bool valid) | 63 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
27 | +iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset, | 64 | + BdrvChild *parent); |
28 | + int64_t bytes, bool allocated, bool valid) | 65 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
66 | + BdrvChild *parent); | ||
67 | |||
68 | static void bdrv_co_drain_bh_cb(void *opaque) | ||
29 | { | 69 | { |
30 | int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk; | 70 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
31 | - int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS; | 71 | |
32 | 72 | bdrv_dec_in_flight(bs); | |
33 | if (iscsilun->allocmap == NULL) { | 73 | if (data->begin) { |
74 | - bdrv_do_drained_begin(bs, data->parent); | ||
75 | + bdrv_do_drained_begin(bs, data->recursive, data->parent); | ||
76 | } else { | ||
77 | - bdrv_do_drained_end(bs, data->parent); | ||
78 | + bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
79 | } | ||
80 | |||
81 | data->done = true; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
83 | } | ||
84 | |||
85 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
86 | - bool begin, BdrvChild *parent) | ||
87 | + bool begin, bool recursive, | ||
88 | + BdrvChild *parent) | ||
89 | { | ||
90 | BdrvCoDrainData data; | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
93 | .bs = bs, | ||
94 | .done = false, | ||
95 | .begin = begin, | ||
96 | + .recursive = recursive, | ||
97 | .parent = parent, | ||
98 | }; | ||
99 | bdrv_inc_in_flight(bs); | ||
100 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
101 | assert(data.done); | ||
102 | } | ||
103 | |||
104 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
105 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
106 | + BdrvChild *parent) | ||
107 | { | ||
108 | + BdrvChild *child, *next; | ||
109 | + | ||
110 | if (qemu_in_coroutine()) { | ||
111 | - bdrv_co_yield_to_drain(bs, true, parent); | ||
112 | + bdrv_co_yield_to_drain(bs, true, recursive, parent); | ||
34 | return; | 113 | return; |
35 | } | 114 | } |
36 | /* expand to entirely contain all affected clusters */ | 115 | |
37 | - assert(cluster_sectors); | 116 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) |
38 | - cl_num_expanded = sector_num / cluster_sectors; | 117 | bdrv_parent_drained_begin(bs, parent); |
39 | - nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors, | 118 | bdrv_drain_invoke(bs, true, false); |
40 | - cluster_sectors) - cl_num_expanded; | 119 | bdrv_drain_recurse(bs); |
41 | + assert(iscsilun->cluster_size); | 120 | + |
42 | + cl_num_expanded = offset / iscsilun->cluster_size; | 121 | + if (recursive) { |
43 | + nb_cls_expanded = DIV_ROUND_UP(offset + bytes, | 122 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { |
44 | + iscsilun->cluster_size) - cl_num_expanded; | 123 | + bdrv_do_drained_begin(child->bs, true, child); |
45 | /* shrink to touch only completely contained clusters */ | 124 | + } |
46 | - cl_num_shrunk = DIV_ROUND_UP(sector_num, cluster_sectors); | 125 | + } |
47 | - nb_cls_shrunk = (sector_num + nb_sectors) / cluster_sectors | ||
48 | - - cl_num_shrunk; | ||
49 | + cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size); | ||
50 | + nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk; | ||
51 | if (allocated) { | ||
52 | bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded); | ||
53 | } else { | ||
54 | @@ -XXX,XX +XXX,XX @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, | ||
55 | } | 126 | } |
56 | 127 | ||
57 | static void | 128 | void bdrv_drained_begin(BlockDriverState *bs) |
58 | -iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num, | ||
59 | - int nb_sectors) | ||
60 | +iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset, | ||
61 | + int64_t bytes) | ||
62 | { | 129 | { |
63 | - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true); | 130 | - bdrv_do_drained_begin(bs, NULL); |
64 | + iscsi_allocmap_update(iscsilun, offset, bytes, true, true); | 131 | + bdrv_do_drained_begin(bs, false, NULL); |
132 | +} | ||
133 | + | ||
134 | +void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
135 | +{ | ||
136 | + bdrv_do_drained_begin(bs, true, NULL); | ||
65 | } | 137 | } |
66 | 138 | ||
67 | static void | 139 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
68 | -iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num, | 140 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
69 | - int nb_sectors) | 141 | + BdrvChild *parent) |
70 | +iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset, | ||
71 | + int64_t bytes) | ||
72 | { | 142 | { |
73 | /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update | 143 | + BdrvChild *child, *next; |
74 | * is ignored, so this will in effect be an iscsi_allocmap_set_invalid. | 144 | int old_quiesce_counter; |
75 | */ | 145 | |
76 | - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true); | 146 | if (qemu_in_coroutine()) { |
77 | + iscsi_allocmap_update(iscsilun, offset, bytes, false, true); | 147 | - bdrv_co_yield_to_drain(bs, false, parent); |
148 | + bdrv_co_yield_to_drain(bs, false, recursive, parent); | ||
149 | return; | ||
150 | } | ||
151 | assert(bs->quiesce_counter > 0); | ||
152 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
153 | if (old_quiesce_counter == 1) { | ||
154 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
155 | } | ||
156 | + | ||
157 | + if (recursive) { | ||
158 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
159 | + bdrv_do_drained_end(child->bs, true, child); | ||
160 | + } | ||
161 | + } | ||
78 | } | 162 | } |
79 | 163 | ||
80 | -static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num, | 164 | void bdrv_drained_end(BlockDriverState *bs) |
81 | - int nb_sectors) | ||
82 | +static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset, | ||
83 | + int64_t bytes) | ||
84 | { | 165 | { |
85 | - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false); | 166 | - bdrv_do_drained_end(bs, NULL); |
86 | + iscsi_allocmap_update(iscsilun, offset, bytes, false, false); | 167 | + bdrv_do_drained_end(bs, false, NULL); |
168 | +} | ||
169 | + | ||
170 | +void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
171 | +{ | ||
172 | + bdrv_do_drained_end(bs, true, NULL); | ||
87 | } | 173 | } |
88 | 174 | ||
89 | static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) | 175 | /* |
90 | @@ -XXX,XX +XXX,XX @@ static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) | ||
91 | } | ||
92 | |||
93 | static inline bool | ||
94 | -iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, | ||
95 | - int nb_sectors) | ||
96 | +iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset, | ||
97 | + int64_t bytes) | ||
98 | { | ||
99 | unsigned long size; | ||
100 | if (iscsilun->allocmap == NULL) { | ||
101 | return true; | ||
102 | } | ||
103 | assert(iscsilun->cluster_size); | ||
104 | - size = DIV_ROUND_UP(sector_num + nb_sectors, | ||
105 | - iscsilun->cluster_size >> BDRV_SECTOR_BITS); | ||
106 | + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); | ||
107 | return !(find_next_bit(iscsilun->allocmap, size, | ||
108 | - sector_num * BDRV_SECTOR_SIZE / | ||
109 | - iscsilun->cluster_size) == size); | ||
110 | + offset / iscsilun->cluster_size) == size); | ||
111 | } | ||
112 | |||
113 | static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, | ||
114 | - int64_t sector_num, int nb_sectors) | ||
115 | + int64_t offset, int64_t bytes) | ||
116 | { | ||
117 | unsigned long size; | ||
118 | if (iscsilun->allocmap_valid == NULL) { | ||
119 | return false; | ||
120 | } | ||
121 | assert(iscsilun->cluster_size); | ||
122 | - size = DIV_ROUND_UP(sector_num + nb_sectors, | ||
123 | - iscsilun->cluster_size >> BDRV_SECTOR_BITS); | ||
124 | + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); | ||
125 | return (find_next_zero_bit(iscsilun->allocmap_valid, size, | ||
126 | - sector_num * BDRV_SECTOR_SIZE / | ||
127 | - iscsilun->cluster_size) == size); | ||
128 | + offset / iscsilun->cluster_size) == size); | ||
129 | } | ||
130 | |||
131 | static int coroutine_fn | ||
132 | @@ -XXX,XX +XXX,XX @@ retry: | ||
133 | } | ||
134 | |||
135 | if (iTask.status != SCSI_STATUS_GOOD) { | ||
136 | - iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors); | ||
137 | + iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
138 | + nb_sectors * BDRV_SECTOR_SIZE); | ||
139 | error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba, | ||
140 | iTask.err_str); | ||
141 | r = iTask.err_code; | ||
142 | goto out_unlock; | ||
143 | } | ||
144 | |||
145 | - iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors); | ||
146 | + iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
147 | + nb_sectors * BDRV_SECTOR_SIZE); | ||
148 | |||
149 | out_unlock: | ||
150 | qemu_mutex_unlock(&iscsilun->mutex); | ||
151 | @@ -XXX,XX +XXX,XX @@ retry: | ||
152 | } | ||
153 | |||
154 | if (ret & BDRV_BLOCK_ZERO) { | ||
155 | - iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum); | ||
156 | + iscsi_allocmap_set_unallocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
157 | + *pnum * BDRV_SECTOR_SIZE); | ||
158 | } else { | ||
159 | - iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum); | ||
160 | + iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
161 | + *pnum * BDRV_SECTOR_SIZE); | ||
162 | } | ||
163 | |||
164 | if (*pnum > nb_sectors) { | ||
165 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, | ||
166 | /* if cache.direct is off and we have a valid entry in our allocation map | ||
167 | * we can skip checking the block status and directly return zeroes if | ||
168 | * the request falls within an unallocated area */ | ||
169 | - if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) && | ||
170 | - !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) { | ||
171 | + if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
172 | + nb_sectors * BDRV_SECTOR_SIZE) && | ||
173 | + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
174 | + nb_sectors * BDRV_SECTOR_SIZE)) { | ||
175 | qemu_iovec_memset(iov, 0, 0x00, iov->size); | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | if (nb_sectors >= ISCSI_CHECKALLOC_THRES && | ||
180 | - !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) && | ||
181 | - !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) { | ||
182 | + !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
183 | + nb_sectors * BDRV_SECTOR_SIZE) && | ||
184 | + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, | ||
185 | + nb_sectors * BDRV_SECTOR_SIZE)) { | ||
186 | int pnum; | ||
187 | BlockDriverState *file; | ||
188 | /* check the block status from the beginning of the cluster | ||
189 | @@ -XXX,XX +XXX,XX @@ retry: | ||
190 | goto retry; | ||
191 | } | ||
192 | |||
193 | - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, | ||
194 | - bytes >> BDRV_SECTOR_BITS); | ||
195 | + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); | ||
196 | |||
197 | if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { | ||
198 | /* the target might fail with a check condition if it | ||
199 | @@ -XXX,XX +XXX,XX @@ retry: | ||
200 | } | ||
201 | |||
202 | if (iTask.status != SCSI_STATUS_GOOD) { | ||
203 | - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, | ||
204 | - bytes >> BDRV_SECTOR_BITS); | ||
205 | + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); | ||
206 | error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s", | ||
207 | lba, iTask.err_str); | ||
208 | r = iTask.err_code; | ||
209 | @@ -XXX,XX +XXX,XX @@ retry: | ||
210 | } | ||
211 | |||
212 | if (flags & BDRV_REQ_MAY_UNMAP) { | ||
213 | - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, | ||
214 | - bytes >> BDRV_SECTOR_BITS); | ||
215 | + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); | ||
216 | } else { | ||
217 | - iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS, | ||
218 | - bytes >> BDRV_SECTOR_BITS); | ||
219 | + iscsi_allocmap_set_allocated(iscsilun, offset, bytes); | ||
220 | } | ||
221 | |||
222 | out_unlock: | ||
223 | -- | 176 | -- |
224 | 2.13.6 | 177 | 2.13.6 |
225 | 178 | ||
226 | 179 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Add a subtree drain version to the existing test cases. |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the null driver accordingly. | ||
5 | |||
6 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 4 | --- |
11 | block/null.c | 23 ++++++++++++----------- | 5 | tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++- |
12 | 1 file changed, 12 insertions(+), 11 deletions(-) | 6 | 1 file changed, 26 insertions(+), 1 deletion(-) |
13 | 7 | ||
14 | diff --git a/block/null.c b/block/null.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
15 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/null.c | 10 | --- a/tests/test-bdrv-drain.c |
17 | +++ b/block/null.c | 11 | +++ b/tests/test-bdrv-drain.c |
18 | @@ -XXX,XX +XXX,XX @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, | 12 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
19 | return 0; | 13 | enum drain_type { |
14 | BDRV_DRAIN_ALL, | ||
15 | BDRV_DRAIN, | ||
16 | + BDRV_SUBTREE_DRAIN, | ||
17 | DRAIN_TYPE_MAX, | ||
18 | }; | ||
19 | |||
20 | @@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) | ||
21 | switch (drain_type) { | ||
22 | case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; | ||
23 | case BDRV_DRAIN: bdrv_drained_begin(bs); break; | ||
24 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; | ||
25 | default: g_assert_not_reached(); | ||
26 | } | ||
20 | } | 27 | } |
21 | 28 | @@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | |
22 | -static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs, | 29 | switch (drain_type) { |
23 | - int64_t sector_num, | 30 | case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; |
24 | - int nb_sectors, int *pnum, | 31 | case BDRV_DRAIN: bdrv_drained_end(bs); break; |
25 | - BlockDriverState **file) | 32 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; |
26 | +static int coroutine_fn null_co_block_status(BlockDriverState *bs, | 33 | default: g_assert_not_reached(); |
27 | + bool want_zero, int64_t offset, | 34 | } |
28 | + int64_t bytes, int64_t *pnum, | 35 | } |
29 | + int64_t *map, | 36 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) |
30 | + BlockDriverState **file) | 37 | test_drv_cb_common(BDRV_DRAIN, false); |
38 | } | ||
39 | |||
40 | +static void test_drv_cb_drain_subtree(void) | ||
41 | +{ | ||
42 | + test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | ||
43 | +} | ||
44 | + | ||
45 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
31 | { | 46 | { |
32 | BDRVNullState *s = bs->opaque; | 47 | BlockBackend *blk; |
33 | - off_t start = sector_num * BDRV_SECTOR_SIZE; | 48 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
34 | + int ret = BDRV_BLOCK_OFFSET_VALID; | 49 | test_quiesce_common(BDRV_DRAIN, false); |
35 | |||
36 | - *pnum = nb_sectors; | ||
37 | + *pnum = bytes; | ||
38 | + *map = offset; | ||
39 | *file = bs; | ||
40 | |||
41 | if (s->read_zeroes) { | ||
42 | - return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO; | ||
43 | - } else { | ||
44 | - return BDRV_BLOCK_OFFSET_VALID | start; | ||
45 | + ret |= BDRV_BLOCK_ZERO; | ||
46 | } | ||
47 | + return ret; | ||
48 | } | 50 | } |
49 | 51 | ||
50 | static void null_refresh_filename(BlockDriverState *bs, QDict *opts) | 52 | +static void test_quiesce_drain_subtree(void) |
51 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_null_co = { | 53 | +{ |
52 | .bdrv_co_flush_to_disk = null_co_flush, | 54 | + test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
53 | .bdrv_reopen_prepare = null_reopen_prepare, | 55 | +} |
54 | 56 | + | |
55 | - .bdrv_co_get_block_status = null_co_get_block_status, | 57 | static void test_nested(void) |
56 | + .bdrv_co_block_status = null_co_block_status, | 58 | { |
57 | 59 | BlockBackend *blk; | |
58 | .bdrv_refresh_filename = null_refresh_filename, | 60 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
59 | }; | 61 | /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ |
60 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_null_aio = { | 62 | int bs_quiesce = (outer != BDRV_DRAIN_ALL) + |
61 | .bdrv_aio_flush = null_aio_flush, | 63 | (inner != BDRV_DRAIN_ALL); |
62 | .bdrv_reopen_prepare = null_reopen_prepare, | 64 | - int backing_quiesce = 0; |
63 | 65 | + int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + | |
64 | - .bdrv_co_get_block_status = null_co_get_block_status, | 66 | + (inner == BDRV_SUBTREE_DRAIN); |
65 | + .bdrv_co_block_status = null_co_block_status, | 67 | int backing_cb_cnt = (outer != BDRV_DRAIN) + |
66 | 68 | (inner != BDRV_DRAIN); | |
67 | .bdrv_refresh_filename = null_refresh_filename, | 69 | |
68 | }; | 70 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void) |
71 | test_blockjob_common(BDRV_DRAIN); | ||
72 | } | ||
73 | |||
74 | +static void test_blockjob_drain_subtree(void) | ||
75 | +{ | ||
76 | + test_blockjob_common(BDRV_SUBTREE_DRAIN); | ||
77 | +} | ||
78 | + | ||
79 | int main(int argc, char **argv) | ||
80 | { | ||
81 | bdrv_init(); | ||
82 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
83 | |||
84 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
85 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
86 | + g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", | ||
87 | + test_drv_cb_drain_subtree); | ||
88 | |||
89 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
90 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
91 | + g_test_add_func("/bdrv-drain/quiesce/drain_subtree", | ||
92 | + test_quiesce_drain_subtree); | ||
93 | |||
94 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
95 | |||
96 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
97 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
98 | + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", | ||
99 | + test_blockjob_drain_subtree); | ||
100 | |||
101 | return g_test_run(); | ||
102 | } | ||
69 | -- | 103 | -- |
70 | 2.13.6 | 104 | 2.13.6 |
71 | 105 | ||
72 | 106 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | If bdrv_do_drained_begin/end() are called in coroutine context, they |
---|---|---|---|
2 | first use a BH to get out of the coroutine context. Call some existing | ||
3 | tests again from a coroutine to cover this code path. | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the raw driver accordingly. | ||
5 | |||
6 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 6 | --- |
11 | block/raw-format.c | 16 ++++++++-------- | 7 | tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ |
12 | 1 file changed, 8 insertions(+), 8 deletions(-) | 8 | 1 file changed, 59 insertions(+) |
13 | 9 | ||
14 | diff --git a/block/raw-format.c b/block/raw-format.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/raw-format.c | 12 | --- a/tests/test-bdrv-drain.c |
17 | +++ b/block/raw-format.c | 13 | +++ b/tests/test-bdrv-drain.c |
18 | @@ -XXX,XX +XXX,XX @@ fail: | 14 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
19 | return ret; | 15 | *aio_ret = ret; |
20 | } | 16 | } |
21 | 17 | ||
22 | -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, | 18 | +typedef struct CallInCoroutineData { |
23 | - int64_t sector_num, | 19 | + void (*entry)(void); |
24 | - int nb_sectors, int *pnum, | 20 | + bool done; |
25 | +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, | 21 | +} CallInCoroutineData; |
26 | + bool want_zero, int64_t offset, | 22 | + |
27 | + int64_t bytes, int64_t *pnum, | 23 | +static coroutine_fn void call_in_coroutine_entry(void *opaque) |
28 | + int64_t *map, | 24 | +{ |
29 | BlockDriverState **file) | 25 | + CallInCoroutineData *data = opaque; |
26 | + | ||
27 | + data->entry(); | ||
28 | + data->done = true; | ||
29 | +} | ||
30 | + | ||
31 | +static void call_in_coroutine(void (*entry)(void)) | ||
32 | +{ | ||
33 | + Coroutine *co; | ||
34 | + CallInCoroutineData data = { | ||
35 | + .entry = entry, | ||
36 | + .done = false, | ||
37 | + }; | ||
38 | + | ||
39 | + co = qemu_coroutine_create(call_in_coroutine_entry, &data); | ||
40 | + qemu_coroutine_enter(co); | ||
41 | + while (!data.done) { | ||
42 | + aio_poll(qemu_get_aio_context(), true); | ||
43 | + } | ||
44 | +} | ||
45 | + | ||
46 | enum drain_type { | ||
47 | BDRV_DRAIN_ALL, | ||
48 | BDRV_DRAIN, | ||
49 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void) | ||
50 | test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | ||
51 | } | ||
52 | |||
53 | +static void test_drv_cb_co_drain(void) | ||
54 | +{ | ||
55 | + call_in_coroutine(test_drv_cb_drain); | ||
56 | +} | ||
57 | + | ||
58 | +static void test_drv_cb_co_drain_subtree(void) | ||
59 | +{ | ||
60 | + call_in_coroutine(test_drv_cb_drain_subtree); | ||
61 | +} | ||
62 | + | ||
63 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
30 | { | 64 | { |
31 | BDRVRawState *s = bs->opaque; | 65 | BlockBackend *blk; |
32 | - *pnum = nb_sectors; | 66 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void) |
33 | + *pnum = bytes; | 67 | test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
34 | *file = bs->file->bs; | ||
35 | - sector_num += s->offset / BDRV_SECTOR_SIZE; | ||
36 | - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | | ||
37 | - (sector_num << BDRV_SECTOR_BITS); | ||
38 | + *map = offset + s->offset; | ||
39 | + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; | ||
40 | } | 68 | } |
41 | 69 | ||
42 | static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, | 70 | +static void test_quiesce_co_drain(void) |
43 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = { | 71 | +{ |
44 | .bdrv_co_pwritev = &raw_co_pwritev, | 72 | + call_in_coroutine(test_quiesce_drain); |
45 | .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, | 73 | +} |
46 | .bdrv_co_pdiscard = &raw_co_pdiscard, | 74 | + |
47 | - .bdrv_co_get_block_status = &raw_co_get_block_status, | 75 | +static void test_quiesce_co_drain_subtree(void) |
48 | + .bdrv_co_block_status = &raw_co_block_status, | 76 | +{ |
49 | .bdrv_truncate = &raw_truncate, | 77 | + call_in_coroutine(test_quiesce_drain_subtree); |
50 | .bdrv_getlength = &raw_getlength, | 78 | +} |
51 | .has_variable_length = true, | 79 | + |
80 | static void test_nested(void) | ||
81 | { | ||
82 | BlockBackend *blk; | ||
83 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
84 | g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", | ||
85 | test_drv_cb_drain_subtree); | ||
86 | |||
87 | + // XXX bdrv_drain_all() doesn't work in coroutine context | ||
88 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); | ||
89 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", | ||
90 | + test_drv_cb_co_drain_subtree); | ||
91 | + | ||
92 | + | ||
93 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
94 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
95 | g_test_add_func("/bdrv-drain/quiesce/drain_subtree", | ||
96 | test_quiesce_drain_subtree); | ||
97 | |||
98 | + // XXX bdrv_drain_all() doesn't work in coroutine context | ||
99 | + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); | ||
100 | + g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", | ||
101 | + test_quiesce_co_drain_subtree); | ||
102 | + | ||
103 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
104 | |||
105 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
52 | -- | 106 | -- |
53 | 2.13.6 | 107 | 2.13.6 |
54 | 108 | ||
55 | 109 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Test that drain sections are correctly propagated through the graph. |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the qcow driver accordingly. There is no | ||
5 | intent to optimize based on the want_zero flag for this format. | ||
6 | |||
7 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
9 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 4 | --- |
12 | block/qcow.c | 27 ++++++++++++++++----------- | 5 | tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ |
13 | 1 file changed, 16 insertions(+), 11 deletions(-) | 6 | 1 file changed, 74 insertions(+) |
14 | 7 | ||
15 | diff --git a/block/qcow.c b/block/qcow.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
16 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/qcow.c | 10 | --- a/tests/test-bdrv-drain.c |
18 | +++ b/block/qcow.c | 11 | +++ b/tests/test-bdrv-drain.c |
19 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | 12 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
20 | return 1; | 13 | blk_unref(blk); |
21 | } | 14 | } |
22 | 15 | ||
23 | -static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, | 16 | +static void test_multiparent(void) |
24 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | 17 | +{ |
25 | +static int coroutine_fn qcow_co_block_status(BlockDriverState *bs, | 18 | + BlockBackend *blk_a, *blk_b; |
26 | + bool want_zero, | 19 | + BlockDriverState *bs_a, *bs_b, *backing; |
27 | + int64_t offset, int64_t bytes, | 20 | + BDRVTestState *a_s, *b_s, *backing_s; |
28 | + int64_t *pnum, int64_t *map, | 21 | + |
29 | + BlockDriverState **file) | 22 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
30 | { | 23 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
31 | BDRVQcowState *s = bs->opaque; | 24 | + &error_abort); |
32 | - int index_in_cluster, n, ret; | 25 | + a_s = bs_a->opaque; |
33 | + int index_in_cluster, ret; | 26 | + blk_insert_bs(blk_a, bs_a, &error_abort); |
34 | + int64_t n; | 27 | + |
35 | uint64_t cluster_offset; | 28 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
36 | 29 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, | |
37 | qemu_co_mutex_lock(&s->lock); | 30 | + &error_abort); |
38 | - ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset); | 31 | + b_s = bs_b->opaque; |
39 | + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); | 32 | + blk_insert_bs(blk_b, bs_b, &error_abort); |
40 | qemu_co_mutex_unlock(&s->lock); | 33 | + |
41 | if (ret < 0) { | 34 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
42 | return ret; | 35 | + backing_s = backing->opaque; |
43 | } | 36 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); |
44 | - index_in_cluster = sector_num & (s->cluster_sectors - 1); | 37 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); |
45 | - n = s->cluster_sectors - index_in_cluster; | 38 | + |
46 | - if (n > nb_sectors) | 39 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); |
47 | - n = nb_sectors; | 40 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); |
48 | + index_in_cluster = offset & (s->cluster_size - 1); | 41 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); |
49 | + n = s->cluster_size - index_in_cluster; | 42 | + g_assert_cmpint(a_s->drain_count, ==, 0); |
50 | + if (n > bytes) { | 43 | + g_assert_cmpint(b_s->drain_count, ==, 0); |
51 | + n = bytes; | 44 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
52 | + } | 45 | + |
53 | *pnum = n; | 46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
54 | if (!cluster_offset) { | 47 | + |
55 | return 0; | 48 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); |
56 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, | 49 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); |
57 | if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) { | 50 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); |
58 | return BDRV_BLOCK_DATA; | 51 | + g_assert_cmpint(a_s->drain_count, ==, 1); |
59 | } | 52 | + g_assert_cmpint(b_s->drain_count, ==, 1); |
60 | - cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); | 53 | + g_assert_cmpint(backing_s->drain_count, ==, 1); |
61 | + *map = cluster_offset | index_in_cluster; | 54 | + |
62 | *file = bs->file->bs; | 55 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); |
63 | - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; | 56 | + |
64 | + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | 57 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 2); |
65 | } | 58 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); |
66 | 59 | + g_assert_cmpint(backing->quiesce_counter, ==, 2); | |
67 | static int decompress_buffer(uint8_t *out_buf, int out_buf_size, | 60 | + g_assert_cmpint(a_s->drain_count, ==, 2); |
68 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qcow = { | 61 | + g_assert_cmpint(b_s->drain_count, ==, 2); |
69 | 62 | + g_assert_cmpint(backing_s->drain_count, ==, 2); | |
70 | .bdrv_co_readv = qcow_co_readv, | 63 | + |
71 | .bdrv_co_writev = qcow_co_writev, | 64 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); |
72 | - .bdrv_co_get_block_status = qcow_co_get_block_status, | 65 | + |
73 | + .bdrv_co_block_status = qcow_co_block_status, | 66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); |
74 | 67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | |
75 | .bdrv_make_empty = qcow_make_empty, | 68 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); |
76 | .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed, | 69 | + g_assert_cmpint(a_s->drain_count, ==, 1); |
70 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
74 | + | ||
75 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
76 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
77 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
78 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
79 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
80 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
81 | + | ||
82 | + bdrv_unref(backing); | ||
83 | + bdrv_unref(bs_a); | ||
84 | + bdrv_unref(bs_b); | ||
85 | + blk_unref(blk_a); | ||
86 | + blk_unref(blk_b); | ||
87 | +} | ||
88 | + | ||
89 | |||
90 | typedef struct TestBlockJob { | ||
91 | BlockJob common; | ||
92 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
93 | test_quiesce_co_drain_subtree); | ||
94 | |||
95 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
96 | + g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
97 | |||
98 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
99 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
77 | -- | 100 | -- |
78 | 2.13.6 | 101 | 2.13.6 |
79 | 102 | ||
80 | 103 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | We need to remember how many of the drain sections in which a node is |
---|---|---|---|
2 | 2 | were recursive (i.e. subtree drain rather than node drain), so that they | |
3 | We are gradually moving away from sector-based interfaces, towards | 3 | can be correctly applied when children are added or removed during the |
4 | byte-based. Now that the block layer exposes byte-based allocation, | 4 | drained section. |
5 | it's time to tackle the drivers. Add a new callback that operates | 5 | |
6 | on as small as byte boundaries. Subsequent patches will then update | 6 | With this change, it is safe to modify the graph even inside a |
7 | individual drivers, then finally remove .bdrv_co_get_block_status(). | 7 | bdrv_subtree_drained_begin/end() section. |
8 | 8 | ||
9 | The new code also passes through the 'want_zero' hint, which will | ||
10 | allow subsequent patches to further optimize callers that only care | ||
11 | about how much of the image is allocated (want_zero is false), | ||
12 | rather than full details about runs of zeroes and which offsets the | ||
13 | allocation actually maps to (want_zero is true). As part of this | ||
14 | effort, fix another part of the documentation: the claim in commit | ||
15 | 4c41cb4 that BDRV_BLOCK_ALLOCATED is short for 'DATA || ZERO' is a | ||
16 | lie at the block layer (see commit e88ae2264), even though it is | ||
17 | how the bit is computed from the driver layer. After all, there | ||
18 | are intentionally cases where we return ZERO but not ALLOCATED at | ||
19 | the block layer, when we know that a read sees zero because the | ||
20 | backing file is too short. Note that the driver interface is thus | ||
21 | slightly different than the public interface with regards to which | ||
22 | bits will be set, and what guarantees are provided on input. | ||
23 | |||
24 | We also add an assertion that any driver using the new callback will | ||
25 | make progress (the only time pnum will be 0 is if the block layer | ||
26 | already handled an out-of-bounds request, or if there is an error); | ||
27 | the old driver interface did not provide this guarantee, which | ||
28 | could lead to some inf-loops in drastic corner-case failures. | ||
29 | |||
30 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
31 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
32 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
33 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
34 | --- | 10 | --- |
35 | include/block/block.h | 14 +++++++------- | 11 | include/block/block.h | 2 -- |
36 | include/block/block_int.h | 20 +++++++++++++++----- | 12 | include/block/block_int.h | 5 +++++ |
37 | block/io.c | 28 +++++++++++++++++++--------- | 13 | block.c | 32 +++++++++++++++++++++++++++++--- |
38 | 3 files changed, 41 insertions(+), 21 deletions(-) | 14 | block/io.c | 28 ++++++++++++++++++++++++---- |
15 | 4 files changed, 58 insertions(+), 9 deletions(-) | ||
39 | 16 | ||
40 | diff --git a/include/block/block.h b/include/block/block.h | 17 | diff --git a/include/block/block.h b/include/block/block.h |
41 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/include/block/block.h | 19 | --- a/include/block/block.h |
43 | +++ b/include/block/block.h | 20 | +++ b/include/block/block.h |
44 | @@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry { | 21 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs); |
45 | * BDRV_BLOCK_ZERO: offset reads as zero | 22 | /** |
46 | * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data | 23 | * Like bdrv_drained_begin, but recursively begins a quiesced section for |
47 | * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this | 24 | * exclusive access to all child nodes as well. |
48 | - * layer (short for DATA || ZERO), set by block layer | 25 | - * |
49 | - * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer | 26 | - * Graph changes are not allowed during a subtree drain section. |
50 | + * layer rather than any backing, set by block layer | 27 | */ |
51 | + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this | 28 | void bdrv_subtree_drained_begin(BlockDriverState *bs); |
52 | + * layer, set by block layer | 29 | |
53 | * | ||
54 | * Internal flag: | ||
55 | * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request | ||
56 | * that the block layer recompute the answer from the returned | ||
57 | * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. | ||
58 | * | ||
59 | - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of | ||
60 | - * the return value (old interface) or the entire map parameter (new | ||
61 | - * interface) represent the offset in the returned BDS that is allocated for | ||
62 | - * the corresponding raw data. However, whether that offset actually | ||
63 | - * contains data also depends on BDRV_BLOCK_DATA, as follows: | ||
64 | + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the | ||
65 | + * host offset within the returned BDS that is allocated for the | ||
66 | + * corresponding raw guest data. However, whether that offset | ||
67 | + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: | ||
68 | * | ||
69 | * DATA ZERO OFFSET_VALID | ||
70 | * t t t sectors read as zero, returned file is zero at offset | ||
71 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 30 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
72 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
73 | --- a/include/block/block_int.h | 32 | --- a/include/block/block_int.h |
74 | +++ b/include/block/block_int.h | 33 | +++ b/include/block/block_int.h |
75 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | 34 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { |
76 | /* | 35 | |
77 | * Building block for bdrv_block_status[_above] and | 36 | /* Accessed with atomic ops. */ |
78 | * bdrv_is_allocated[_above]. The driver should answer only | 37 | int quiesce_counter; |
79 | - * according to the current layer, and should not set | 38 | + int recursive_quiesce_counter; |
80 | - * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h | 39 | + |
81 | - * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block | 40 | unsigned int write_gen; /* Current data generation */ |
82 | - * layer guarantees input aligned to request_alignment, as well as | 41 | |
83 | - * non-NULL pnum and file. | 42 | /* Protected by reqs_lock. */ |
84 | + * according to the current layer, and should only need to set | 43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, |
85 | + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, | 44 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
86 | + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing | 45 | BdrvRequestFlags flags); |
87 | + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See | 46 | |
88 | + * block.h for the overall meaning of the bits. As a hint, the | 47 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); |
89 | + * flag want_zero is true if the caller cares more about precise | 48 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); |
90 | + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for | 49 | + |
91 | + * overall allocation (favor larger *pnum, perhaps by reporting | 50 | int get_tmp_filename(char *filename, int size); |
92 | + * _DATA instead of _ZERO). The block layer guarantees input | 51 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, |
93 | + * clamped to bdrv_getlength() and aligned to request_alignment, | 52 | const char *filename); |
94 | + * as well as non-NULL pnum, map, and file; in turn, the driver | 53 | diff --git a/block.c b/block.c |
95 | + * must return an error or set pnum to an aligned non-zero value. | 54 | index XXXXXXX..XXXXXXX 100644 |
96 | */ | 55 | --- a/block.c |
97 | int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, | 56 | +++ b/block.c |
98 | int64_t sector_num, int nb_sectors, int *pnum, | 57 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child) |
99 | BlockDriverState **file); | 58 | bdrv_drained_end(bs); |
100 | + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, | 59 | } |
101 | + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, | 60 | |
102 | + int64_t *map, BlockDriverState **file); | 61 | +static void bdrv_child_cb_attach(BdrvChild *child) |
103 | 62 | +{ | |
104 | /* | 63 | + BlockDriverState *bs = child->opaque; |
105 | * Invalidate any cached meta-data. | 64 | + bdrv_apply_subtree_drain(child, bs); |
65 | +} | ||
66 | + | ||
67 | +static void bdrv_child_cb_detach(BdrvChild *child) | ||
68 | +{ | ||
69 | + BlockDriverState *bs = child->opaque; | ||
70 | + bdrv_unapply_subtree_drain(child, bs); | ||
71 | +} | ||
72 | + | ||
73 | static int bdrv_child_cb_inactivate(BdrvChild *child) | ||
74 | { | ||
75 | BlockDriverState *bs = child->opaque; | ||
76 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { | ||
77 | .inherit_options = bdrv_inherited_options, | ||
78 | .drained_begin = bdrv_child_cb_drained_begin, | ||
79 | .drained_end = bdrv_child_cb_drained_end, | ||
80 | + .attach = bdrv_child_cb_attach, | ||
81 | + .detach = bdrv_child_cb_detach, | ||
82 | .inactivate = bdrv_child_cb_inactivate, | ||
83 | }; | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { | ||
86 | .inherit_options = bdrv_inherited_fmt_options, | ||
87 | .drained_begin = bdrv_child_cb_drained_begin, | ||
88 | .drained_end = bdrv_child_cb_drained_end, | ||
89 | + .attach = bdrv_child_cb_attach, | ||
90 | + .detach = bdrv_child_cb_detach, | ||
91 | .inactivate = bdrv_child_cb_inactivate, | ||
92 | }; | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c) | ||
95 | parent->backing_blocker); | ||
96 | bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, | ||
97 | parent->backing_blocker); | ||
98 | + | ||
99 | + bdrv_child_cb_attach(c); | ||
100 | } | ||
101 | |||
102 | static void bdrv_backing_detach(BdrvChild *c) | ||
103 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c) | ||
104 | bdrv_op_unblock_all(c->bs, parent->backing_blocker); | ||
105 | error_free(parent->backing_blocker); | ||
106 | parent->backing_blocker = NULL; | ||
107 | + | ||
108 | + bdrv_child_cb_detach(c); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
113 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | ||
114 | } | ||
115 | if (old_bs) { | ||
116 | + /* Detach first so that the recursive drain sections coming from @child | ||
117 | + * are already gone and we only end the drain sections that came from | ||
118 | + * elsewhere. */ | ||
119 | + if (child->role->detach) { | ||
120 | + child->role->detach(child); | ||
121 | + } | ||
122 | if (old_bs->quiesce_counter && child->role->drained_end) { | ||
123 | for (i = 0; i < old_bs->quiesce_counter; i++) { | ||
124 | child->role->drained_end(child); | ||
125 | } | ||
126 | } | ||
127 | - if (child->role->detach) { | ||
128 | - child->role->detach(child); | ||
129 | - } | ||
130 | QLIST_REMOVE(child, next_parent); | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
134 | } | ||
135 | } | ||
136 | |||
137 | + /* Attach only after starting new drained sections, so that recursive | ||
138 | + * drain sections coming from @child don't get an extra .drained_begin | ||
139 | + * callback. */ | ||
140 | if (child->role->attach) { | ||
141 | child->role->attach(child); | ||
142 | } | ||
106 | diff --git a/block/io.c b/block/io.c | 143 | diff --git a/block/io.c b/block/io.c |
107 | index XXXXXXX..XXXXXXX 100644 | 144 | index XXXXXXX..XXXXXXX 100644 |
108 | --- a/block/io.c | 145 | --- a/block/io.c |
109 | +++ b/block/io.c | 146 | +++ b/block/io.c |
110 | @@ -XXX,XX +XXX,XX @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, | 147 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
111 | * Drivers not implementing the functionality are assumed to not support | 148 | assert(data.done); |
112 | * backing files, hence all their sectors are reported as allocated. | 149 | } |
113 | * | 150 | |
114 | - * If 'want_zero' is true, the caller is querying for mapping purposes, | 151 | -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
115 | - * and the result should include BDRV_BLOCK_OFFSET_VALID and | 152 | - BdrvChild *parent) |
116 | - * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those | 153 | +void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
117 | - * bits particularly if it allows for a larger value in 'pnum'. | 154 | + BdrvChild *parent) |
118 | + * If 'want_zero' is true, the caller is querying for mapping | 155 | { |
119 | + * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and | 156 | BdrvChild *child, *next; |
120 | + * _ZERO where possible; otherwise, the result favors larger 'pnum', | 157 | |
121 | + * with a focus on accurate BDRV_BLOCK_ALLOCATED. | 158 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
122 | * | 159 | bdrv_drain_recurse(bs); |
123 | * If 'offset' is beyond the end of the disk image the return value is | 160 | |
124 | * BDRV_BLOCK_EOF and 'pnum' is set to 0. | 161 | if (recursive) { |
125 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, | 162 | + bs->recursive_quiesce_counter++; |
126 | 163 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | |
127 | /* Must be non-NULL or bdrv_getlength() would have failed */ | 164 | bdrv_do_drained_begin(child->bs, true, child); |
128 | assert(bs->drv); | 165 | } |
129 | - if (!bs->drv->bdrv_co_get_block_status) { | 166 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) |
130 | + if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) { | 167 | bdrv_do_drained_begin(bs, true, NULL); |
131 | *pnum = bytes; | 168 | } |
132 | ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; | 169 | |
133 | if (offset + bytes == total_size) { | 170 | -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
134 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, | 171 | - BdrvChild *parent) |
135 | bdrv_inc_in_flight(bs); | 172 | +void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
136 | 173 | + BdrvChild *parent) | |
137 | /* Round out to request_alignment boundaries */ | 174 | { |
138 | - /* TODO: until we have a byte-based driver callback, we also have to | 175 | BdrvChild *child, *next; |
139 | - * round out to sectors, even if that is bigger than request_alignment */ | 176 | int old_quiesce_counter; |
140 | - align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE); | 177 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
141 | + align = bs->bl.request_alignment; | 178 | } |
142 | + if (bs->drv->bdrv_co_get_block_status && align < BDRV_SECTOR_SIZE) { | 179 | |
143 | + align = BDRV_SECTOR_SIZE; | 180 | if (recursive) { |
181 | + bs->recursive_quiesce_counter--; | ||
182 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
183 | bdrv_do_drained_end(child->bs, true, child); | ||
184 | } | ||
185 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
186 | bdrv_do_drained_end(bs, true, NULL); | ||
187 | } | ||
188 | |||
189 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | ||
190 | +{ | ||
191 | + int i; | ||
192 | + | ||
193 | + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { | ||
194 | + bdrv_do_drained_begin(child->bs, true, child); | ||
144 | + } | 195 | + } |
145 | aligned_offset = QEMU_ALIGN_DOWN(offset, align); | 196 | +} |
146 | aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; | 197 | + |
147 | 198 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) | |
148 | - { | 199 | +{ |
149 | + if (bs->drv->bdrv_co_get_block_status) { | 200 | + int i; |
150 | int count; /* sectors */ | 201 | + |
151 | int64_t longret; | 202 | + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { |
152 | 203 | + bdrv_do_drained_end(child->bs, true, child); | |
153 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, | 204 | + } |
154 | } | 205 | +} |
155 | ret = longret & ~BDRV_BLOCK_OFFSET_MASK; | 206 | + |
156 | *pnum = count * BDRV_SECTOR_SIZE; | 207 | /* |
157 | + } else { | 208 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
158 | + ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, | 209 | * and suspend block driver's internal I/O until next request arrives. |
159 | + aligned_bytes, pnum, &local_map, | ||
160 | + &local_file); | ||
161 | + if (ret < 0) { | ||
162 | + *pnum = 0; | ||
163 | + goto out; | ||
164 | + } | ||
165 | + assert(*pnum); /* The block driver must make progress */ | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | -- | 210 | -- |
170 | 2.13.6 | 211 | 2.13.6 |
171 | 212 | ||
172 | 213 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | ||
---|---|---|---|
2 | |||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Update the qcow2 driver accordingly. | ||
5 | |||
6 | For now, we are ignoring the 'want_zero' hint. However, it should | ||
7 | be relatively straightforward to honor the hint as a way to return | ||
8 | larger *pnum values when we have consecutive clusters with the same | ||
9 | data/zero status but which differ only in having non-consecutive | ||
10 | mappings. | ||
11 | |||
12 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
13 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
14 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
16 | --- | 2 | --- |
17 | block/qcow2.c | 24 +++++++++++++----------- | 3 | tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ |
18 | 1 file changed, 13 insertions(+), 11 deletions(-) | 4 | 1 file changed, 80 insertions(+) |
19 | 5 | ||
20 | diff --git a/block/qcow2.c b/block/qcow2.c | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
21 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/qcow2.c | 8 | --- a/tests/test-bdrv-drain.c |
23 | +++ b/block/qcow2.c | 9 | +++ b/tests/test-bdrv-drain.c |
24 | @@ -XXX,XX +XXX,XX @@ static void qcow2_join_options(QDict *options, QDict *old_options) | 10 | @@ -XXX,XX +XXX,XX @@ static void test_multiparent(void) |
25 | } | 11 | blk_unref(blk_b); |
26 | } | 12 | } |
27 | 13 | ||
28 | -static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, | 14 | +static void test_graph_change(void) |
29 | - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) | 15 | +{ |
30 | +static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, | 16 | + BlockBackend *blk_a, *blk_b; |
31 | + bool want_zero, | 17 | + BlockDriverState *bs_a, *bs_b, *backing; |
32 | + int64_t offset, int64_t count, | 18 | + BDRVTestState *a_s, *b_s, *backing_s; |
33 | + int64_t *pnum, int64_t *map, | 19 | + |
34 | + BlockDriverState **file) | 20 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
35 | { | 21 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
36 | BDRVQcow2State *s = bs->opaque; | 22 | + &error_abort); |
37 | uint64_t cluster_offset; | 23 | + a_s = bs_a->opaque; |
38 | int index_in_cluster, ret; | 24 | + blk_insert_bs(blk_a, bs_a, &error_abort); |
39 | unsigned int bytes; | 25 | + |
40 | - int64_t status = 0; | 26 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
41 | + int status = 0; | 27 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, |
42 | 28 | + &error_abort); | |
43 | - bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE); | 29 | + b_s = bs_b->opaque; |
44 | + bytes = MIN(INT_MAX, count); | 30 | + blk_insert_bs(blk_b, bs_b, &error_abort); |
45 | qemu_co_mutex_lock(&s->lock); | 31 | + |
46 | - ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes, | 32 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); |
47 | - &cluster_offset); | 33 | + backing_s = backing->opaque; |
48 | + ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); | 34 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); |
49 | qemu_co_mutex_unlock(&s->lock); | 35 | + |
50 | if (ret < 0) { | 36 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); |
51 | return ret; | 37 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); |
52 | } | 38 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); |
53 | 39 | + g_assert_cmpint(a_s->drain_count, ==, 0); | |
54 | - *pnum = bytes >> BDRV_SECTOR_BITS; | 40 | + g_assert_cmpint(b_s->drain_count, ==, 0); |
55 | + *pnum = bytes; | 41 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
56 | 42 | + | |
57 | if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && | 43 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
58 | !s->crypto) { | 44 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
59 | - index_in_cluster = sector_num & (s->cluster_sectors - 1); | 45 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
60 | - cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); | 46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); |
61 | + index_in_cluster = offset & (s->cluster_size - 1); | 47 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); |
62 | + *map = cluster_offset | index_in_cluster; | 48 | + |
63 | *file = bs->file->bs; | 49 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); |
64 | - status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; | 50 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); |
65 | + status |= BDRV_BLOCK_OFFSET_VALID; | 51 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); |
66 | } | 52 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); |
67 | if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { | 53 | + g_assert_cmpint(a_s->drain_count, ==, 5); |
68 | status |= BDRV_BLOCK_ZERO; | 54 | + g_assert_cmpint(b_s->drain_count, ==, 5); |
69 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | 55 | + g_assert_cmpint(backing_s->drain_count, ==, 5); |
70 | .bdrv_child_perm = bdrv_format_default_perms, | 56 | + |
71 | .bdrv_create = qcow2_create, | 57 | + bdrv_set_backing_hd(bs_b, NULL, &error_abort); |
72 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 58 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 3); |
73 | - .bdrv_co_get_block_status = qcow2_co_get_block_status, | 59 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); |
74 | + .bdrv_co_block_status = qcow2_co_block_status, | 60 | + g_assert_cmpint(backing->quiesce_counter, ==, 3); |
75 | 61 | + g_assert_cmpint(a_s->drain_count, ==, 3); | |
76 | .bdrv_co_preadv = qcow2_co_preadv, | 62 | + g_assert_cmpint(b_s->drain_count, ==, 2); |
77 | .bdrv_co_pwritev = qcow2_co_pwritev, | 63 | + g_assert_cmpint(backing_s->drain_count, ==, 3); |
64 | + | ||
65 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
74 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
75 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
76 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
77 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
78 | + | ||
79 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
80 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
81 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
82 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
83 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
84 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
85 | + | ||
86 | + bdrv_unref(backing); | ||
87 | + bdrv_unref(bs_a); | ||
88 | + bdrv_unref(bs_b); | ||
89 | + blk_unref(blk_a); | ||
90 | + blk_unref(blk_b); | ||
91 | +} | ||
92 | + | ||
93 | |||
94 | typedef struct TestBlockJob { | ||
95 | BlockJob common; | ||
96 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
97 | |||
98 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
99 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
100 | + g_test_add_func("/bdrv-drain/graph-change", test_graph_change); | ||
101 | |||
102 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
103 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
78 | -- | 104 | -- |
79 | 2.13.6 | 105 | 2.13.6 |
80 | 106 | ||
81 | 107 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Eric Blake <eblake@redhat.com> | ||
2 | 1 | ||
3 | Rework the debug define so that we always get -Wformat checking, | ||
4 | even when debugging is disabled. | ||
5 | |||
6 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Stefan Weil <sw@weilnetz.de> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | --- | ||
13 | block/vdi.c | 12 +++++++++--- | ||
14 | 1 file changed, 9 insertions(+), 3 deletions(-) | ||
15 | |||
16 | diff --git a/block/vdi.c b/block/vdi.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/vdi.c | ||
19 | +++ b/block/vdi.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | #define DEFAULT_CLUSTER_SIZE (1 * MiB) | ||
22 | |||
23 | #if defined(CONFIG_VDI_DEBUG) | ||
24 | -#define logout(fmt, ...) \ | ||
25 | - fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__) | ||
26 | +#define VDI_DEBUG 1 | ||
27 | #else | ||
28 | -#define logout(fmt, ...) ((void)0) | ||
29 | +#define VDI_DEBUG 0 | ||
30 | #endif | ||
31 | |||
32 | +#define logout(fmt, ...) \ | ||
33 | + do { \ | ||
34 | + if (VDI_DEBUG) { \ | ||
35 | + fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ | ||
36 | + } \ | ||
37 | + } while (0) | ||
38 | + | ||
39 | /* Image signature. */ | ||
40 | #define VDI_SIGNATURE 0xbeda107f | ||
41 | |||
42 | -- | ||
43 | 2.13.6 | ||
44 | |||
45 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alberto Garcia <berto@igalia.com> | ||
2 | 1 | ||
3 | This patch updates docs/qcow2-cache.txt explaining how to use the new | ||
4 | l2-cache-entry-size parameter. | ||
5 | |||
6 | Here's a more detailed technical description of this feature: | ||
7 | |||
8 | https://lists.gnu.org/archive/html/qemu-block/2017-09/msg00635.html | ||
9 | |||
10 | And here are some performance numbers: | ||
11 | |||
12 | https://lists.gnu.org/archive/html/qemu-block/2017-12/msg00507.html | ||
13 | |||
14 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
15 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
17 | --- | ||
18 | docs/qcow2-cache.txt | 46 +++++++++++++++++++++++++++++++++++++++++++--- | ||
19 | 1 file changed, 43 insertions(+), 3 deletions(-) | ||
20 | |||
21 | diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/docs/qcow2-cache.txt | ||
24 | +++ b/docs/qcow2-cache.txt | ||
25 | @@ -XXX,XX +XXX,XX @@ | ||
26 | qcow2 L2/refcount cache configuration | ||
27 | ===================================== | ||
28 | -Copyright (C) 2015 Igalia, S.L. | ||
29 | +Copyright (C) 2015, 2018 Igalia, S.L. | ||
30 | Author: Alberto Garcia <berto@igalia.com> | ||
31 | |||
32 | This work is licensed under the terms of the GNU GPL, version 2 or | ||
33 | @@ -XXX,XX +XXX,XX @@ There are three options available, and all of them take bytes: | ||
34 | |||
35 | There are two things that need to be taken into account: | ||
36 | |||
37 | - - Both caches must have a size that is a multiple of the cluster | ||
38 | - size. | ||
39 | + - Both caches must have a size that is a multiple of the cluster size | ||
40 | + (or the cache entry size: see "Using smaller cache sizes" below). | ||
41 | |||
42 | - If you only set one of the options above, QEMU will automatically | ||
43 | adjust the others so that the L2 cache is 4 times bigger than the | ||
44 | @@ -XXX,XX +XXX,XX @@ much less often than the L2 cache, so it's perfectly reasonable to | ||
45 | keep it small. | ||
46 | |||
47 | |||
48 | +Using smaller cache entries | ||
49 | +--------------------------- | ||
50 | +The qcow2 L2 cache stores complete tables by default. This means that | ||
51 | +if QEMU needs an entry from an L2 table then the whole table is read | ||
52 | +from disk and is kept in the cache. If the cache is full then a | ||
53 | +complete table needs to be evicted first. | ||
54 | + | ||
55 | +This can be inefficient with large cluster sizes since it results in | ||
56 | +more disk I/O and wastes more cache memory. | ||
57 | + | ||
58 | +Since QEMU 2.12 you can change the size of the L2 cache entry and make | ||
59 | +it smaller than the cluster size. This can be configured using the | ||
60 | +"l2-cache-entry-size" parameter: | ||
61 | + | ||
62 | + -drive file=hd.qcow2,l2-cache-size=2097152,l2-cache-entry-size=4096 | ||
63 | + | ||
64 | +Some things to take into account: | ||
65 | + | ||
66 | + - The L2 cache entry size has the same restrictions as the cluster | ||
67 | + size (power of two, at least 512 bytes). | ||
68 | + | ||
69 | + - Smaller entry sizes generally improve the cache efficiency and make | ||
70 | + disk I/O faster. This is particularly true with solid state drives | ||
71 | + so it's a good idea to reduce the entry size in those cases. With | ||
72 | + rotating hard drives the situation is a bit more complicated so you | ||
73 | + should test it first and stay with the default size if unsure. | ||
74 | + | ||
75 | + - Try different entry sizes to see which one gives faster performance | ||
76 | + in your case. The block size of the host filesystem is generally a | ||
77 | + good default (usually 4096 bytes in the case of ext4). | ||
78 | + | ||
79 | + - Only the L2 cache can be configured this way. The refcount cache | ||
80 | + always uses the cluster size as the entry size. | ||
81 | + | ||
82 | + - If the L2 cache is big enough to hold all of the image's L2 tables | ||
83 | + (as explained in the "Choosing the right cache sizes" section | ||
84 | + earlier in this document) then none of this is necessary and you | ||
85 | + can omit the "l2-cache-entry-size" parameter altogether. | ||
86 | + | ||
87 | + | ||
88 | Reducing the memory usage | ||
89 | ------------------------- | ||
90 | It is possible to clean unused cache entries in order to reduce the | ||
91 | -- | ||
92 | 2.13.6 | ||
93 | |||
94 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | Since commit bde70715, base is the only node that is reopened in |
---|---|---|---|
2 | commit_start(). This means that the code, which still involves an | ||
3 | explicit BlockReopenQueue, can now be simplified by using bdrv_reopen(). | ||
2 | 4 | ||
3 | The name aio_context_in_iothread() is misleading because it also returns | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | true when called on the main AioContext from the main loop thread, which | 6 | Reviewed-by: Fam Zheng <famz@redhat.com> |
5 | is not an IOThread. | 7 | --- |
8 | block/commit.c | 8 +------- | ||
9 | 1 file changed, 1 insertion(+), 7 deletions(-) | ||
6 | 10 | ||
7 | This patch renames it to in_aio_context_home_thread() and expands the | 11 | diff --git a/block/commit.c b/block/commit.c |
8 | doc comment to make the semantics clearer. | ||
9 | |||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | ||
14 | include/block/aio.h | 7 +++++-- | ||
15 | include/block/block.h | 2 +- | ||
16 | 2 files changed, 6 insertions(+), 3 deletions(-) | ||
17 | |||
18 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/include/block/aio.h | 13 | --- a/block/commit.c |
21 | +++ b/include/block/aio.h | 14 | +++ b/block/commit.c |
22 | @@ -XXX,XX +XXX,XX @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); | 15 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
23 | AioContext *qemu_get_current_aio_context(void); | 16 | const char *filter_node_name, Error **errp) |
24 | |||
25 | /** | ||
26 | + * in_aio_context_home_thread: | ||
27 | * @ctx: the aio context | ||
28 | * | ||
29 | - * Return whether we are running in the I/O thread that manages @ctx. | ||
30 | + * Return whether we are running in the thread that normally runs @ctx. Note | ||
31 | + * that acquiring/releasing ctx does not affect the outcome, each AioContext | ||
32 | + * still only has one home thread that is responsible for running it. | ||
33 | */ | ||
34 | -static inline bool aio_context_in_iothread(AioContext *ctx) | ||
35 | +static inline bool in_aio_context_home_thread(AioContext *ctx) | ||
36 | { | 17 | { |
37 | return ctx == qemu_get_current_aio_context(); | 18 | CommitBlockJob *s; |
38 | } | 19 | - BlockReopenQueue *reopen_queue = NULL; |
39 | diff --git a/include/block/block.h b/include/block/block.h | 20 | int orig_base_flags; |
40 | index XXXXXXX..XXXXXXX 100644 | 21 | BlockDriverState *iter; |
41 | --- a/include/block/block.h | 22 | BlockDriverState *commit_top_bs = NULL; |
42 | +++ b/include/block/block.h | 23 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all(void); | 24 | /* convert base to r/w, if necessary */ |
44 | bool busy_ = true; \ | 25 | orig_base_flags = bdrv_get_flags(base); |
45 | BlockDriverState *bs_ = (bs); \ | 26 | if (!(orig_base_flags & BDRV_O_RDWR)) { |
46 | AioContext *ctx_ = bdrv_get_aio_context(bs_); \ | 27 | - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, |
47 | - if (aio_context_in_iothread(ctx_)) { \ | 28 | - orig_base_flags | BDRV_O_RDWR); |
48 | + if (in_aio_context_home_thread(ctx_)) { \ | 29 | - } |
49 | while ((cond) || busy_) { \ | 30 | - |
50 | busy_ = aio_poll(ctx_, (cond)); \ | 31 | - if (reopen_queue) { |
51 | waited_ |= !!(cond) | busy_; \ | 32 | - bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); |
33 | + bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); | ||
34 | if (local_err != NULL) { | ||
35 | error_propagate(errp, local_err); | ||
36 | goto fail; | ||
52 | -- | 37 | -- |
53 | 2.13.6 | 38 | 2.13.6 |
54 | 39 | ||
55 | 40 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | The bdrv_reopen*() implementation doesn't like it if the graph is |
---|---|---|---|
2 | changed between queuing nodes for reopen and actually reopening them | ||
3 | (one of the reasons is that queuing can be recursive). | ||
2 | 4 | ||
3 | BlockDriver->bdrv_create() has been called from coroutine context since | 5 | So instead of draining the device only in bdrv_reopen_multiple(), |
4 | commit 5b7e1542cfa41a281af9629d31cef03704d976e6 ("block: make | 6 | require that callers already drained all affected nodes, and assert this |
5 | bdrv_create adopt coroutine"). | 7 | in bdrv_reopen_queue(). |
6 | 8 | ||
7 | Make this explicit by renaming to .bdrv_co_create_opts() and add the | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | coroutine_fn annotation. This makes it obvious to block driver authors | 10 | Reviewed-by: Fam Zheng <famz@redhat.com> |
9 | that they may yield, use CoMutex, or other coroutine_fn APIs. | 11 | --- |
10 | bdrv_co_create is reserved for the QAPI-based version that Kevin is | 12 | block.c | 23 ++++++++++++++++------- |
11 | working on. | 13 | block/replication.c | 6 ++++++ |
14 | qemu-io-cmds.c | 3 +++ | ||
15 | 3 files changed, 25 insertions(+), 7 deletions(-) | ||
12 | 16 | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Message-Id: <20170705102231.20711-2-stefanha@redhat.com> | ||
15 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
16 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | --- | ||
19 | include/block/block_int.h | 3 ++- | ||
20 | block.c | 4 ++-- | ||
21 | block/crypto.c | 8 ++++---- | ||
22 | block/file-posix.c | 15 ++++++++------- | ||
23 | block/file-win32.c | 5 +++-- | ||
24 | block/gluster.c | 13 +++++++------ | ||
25 | block/iscsi.c | 7 ++++--- | ||
26 | block/nfs.c | 5 +++-- | ||
27 | block/parallels.c | 6 ++++-- | ||
28 | block/qcow.c | 5 +++-- | ||
29 | block/qcow2.c | 5 +++-- | ||
30 | block/qed.c | 6 ++++-- | ||
31 | block/raw-format.c | 5 +++-- | ||
32 | block/rbd.c | 6 ++++-- | ||
33 | block/sheepdog.c | 10 +++++----- | ||
34 | block/ssh.c | 5 +++-- | ||
35 | block/vdi.c | 5 +++-- | ||
36 | block/vhdx.c | 5 +++-- | ||
37 | block/vmdk.c | 5 +++-- | ||
38 | block/vpc.c | 5 +++-- | ||
39 | 20 files changed, 74 insertions(+), 54 deletions(-) | ||
40 | |||
41 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/include/block/block_int.h | ||
44 | +++ b/include/block/block_int.h | ||
45 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
46 | int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, | ||
47 | Error **errp); | ||
48 | void (*bdrv_close)(BlockDriverState *bs); | ||
49 | - int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); | ||
50 | + int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts, | ||
51 | + Error **errp); | ||
52 | int (*bdrv_make_empty)(BlockDriverState *bs); | ||
53 | |||
54 | void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); | ||
55 | diff --git a/block.c b/block.c | 17 | diff --git a/block.c b/block.c |
56 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
57 | --- a/block.c | 19 | --- a/block.c |
58 | +++ b/block.c | 20 | +++ b/block.c |
59 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) | 21 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, |
60 | CreateCo *cco = opaque; | 22 | * returns a pointer to bs_queue, which is either the newly allocated |
61 | assert(cco->drv); | 23 | * bs_queue, or the existing bs_queue being used. |
62 | 24 | * | |
63 | - ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); | 25 | + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). |
64 | + ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); | 26 | */ |
65 | error_propagate(&cco->err, local_err); | 27 | static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
66 | cco->ret = ret; | 28 | BlockDriverState *bs, |
67 | } | 29 | @@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, |
68 | @@ -XXX,XX +XXX,XX @@ int bdrv_create(BlockDriver *drv, const char* filename, | 30 | BdrvChild *child; |
69 | .err = NULL, | 31 | QDict *old_options, *explicit_options; |
70 | }; | 32 | |
71 | 33 | + /* Make sure that the caller remembered to use a drained section. This is | |
72 | - if (!drv->bdrv_create) { | 34 | + * important to avoid graph changes between the recursive queuing here and |
73 | + if (!drv->bdrv_co_create_opts) { | 35 | + * bdrv_reopen_multiple(). */ |
74 | error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); | 36 | + assert(bs->quiesce_counter > 0); |
75 | ret = -ENOTSUP; | 37 | + |
76 | goto out; | 38 | if (bs_queue == NULL) { |
77 | diff --git a/block/crypto.c b/block/crypto.c | 39 | bs_queue = g_new0(BlockReopenQueue, 1); |
78 | index XXXXXXX..XXXXXXX 100644 | 40 | QSIMPLEQ_INIT(bs_queue); |
79 | --- a/block/crypto.c | 41 | @@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, |
80 | +++ b/block/crypto.c | 42 | * If all devices prepare successfully, then the changes are committed |
81 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_open_luks(BlockDriverState *bs, | 43 | * to all devices. |
82 | bs, options, flags, errp); | 44 | * |
83 | } | 45 | + * All affected nodes must be drained between bdrv_reopen_queue() and |
84 | 46 | + * bdrv_reopen_multiple(). | |
85 | -static int block_crypto_create_luks(const char *filename, | 47 | */ |
86 | - QemuOpts *opts, | 48 | int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) |
87 | - Error **errp) | ||
88 | +static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, | ||
89 | + QemuOpts *opts, | ||
90 | + Error **errp) | ||
91 | { | 49 | { |
92 | return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, | 50 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er |
93 | filename, opts, errp); | 51 | |
94 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_crypto_luks = { | 52 | assert(bs_queue != NULL); |
95 | .bdrv_open = block_crypto_open_luks, | 53 | |
96 | .bdrv_close = block_crypto_close, | 54 | - aio_context_release(ctx); |
97 | .bdrv_child_perm = bdrv_format_default_perms, | 55 | - bdrv_drain_all_begin(); |
98 | - .bdrv_create = block_crypto_create_luks, | 56 | - aio_context_acquire(ctx); |
99 | + .bdrv_co_create_opts = block_crypto_co_create_opts_luks, | 57 | - |
100 | .bdrv_truncate = block_crypto_truncate, | 58 | QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { |
101 | .create_opts = &block_crypto_create_opts_luks, | 59 | + assert(bs_entry->state.bs->quiesce_counter > 0); |
102 | 60 | if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { | |
103 | diff --git a/block/file-posix.c b/block/file-posix.c | 61 | error_propagate(errp, local_err); |
104 | index XXXXXXX..XXXXXXX 100644 | 62 | goto cleanup; |
105 | --- a/block/file-posix.c | 63 | @@ -XXX,XX +XXX,XX @@ cleanup: |
106 | +++ b/block/file-posix.c | ||
107 | @@ -XXX,XX +XXX,XX @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) | ||
108 | return (int64_t)st.st_blocks * 512; | ||
109 | } | ||
110 | |||
111 | -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) | ||
112 | +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, | ||
113 | + Error **errp) | ||
114 | { | ||
115 | int fd; | ||
116 | int result = 0; | ||
117 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
118 | .bdrv_reopen_commit = raw_reopen_commit, | ||
119 | .bdrv_reopen_abort = raw_reopen_abort, | ||
120 | .bdrv_close = raw_close, | ||
121 | - .bdrv_create = raw_create, | ||
122 | + .bdrv_co_create_opts = raw_co_create_opts, | ||
123 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
124 | .bdrv_co_block_status = raw_co_block_status, | ||
125 | .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, | ||
126 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, | ||
127 | return -ENOTSUP; | ||
128 | } | ||
129 | |||
130 | -static int hdev_create(const char *filename, QemuOpts *opts, | ||
131 | - Error **errp) | ||
132 | +static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, | ||
133 | + Error **errp) | ||
134 | { | ||
135 | int fd; | ||
136 | int ret = 0; | ||
137 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = { | ||
138 | .bdrv_reopen_prepare = raw_reopen_prepare, | ||
139 | .bdrv_reopen_commit = raw_reopen_commit, | ||
140 | .bdrv_reopen_abort = raw_reopen_abort, | ||
141 | - .bdrv_create = hdev_create, | ||
142 | + .bdrv_co_create_opts = hdev_co_create_opts, | ||
143 | .create_opts = &raw_create_opts, | ||
144 | .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, | ||
145 | |||
146 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
147 | .bdrv_reopen_prepare = raw_reopen_prepare, | ||
148 | .bdrv_reopen_commit = raw_reopen_commit, | ||
149 | .bdrv_reopen_abort = raw_reopen_abort, | ||
150 | - .bdrv_create = hdev_create, | ||
151 | + .bdrv_co_create_opts = hdev_co_create_opts, | ||
152 | .create_opts = &raw_create_opts, | ||
153 | |||
154 | |||
155 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
156 | .bdrv_reopen_prepare = raw_reopen_prepare, | ||
157 | .bdrv_reopen_commit = raw_reopen_commit, | ||
158 | .bdrv_reopen_abort = raw_reopen_abort, | ||
159 | - .bdrv_create = hdev_create, | ||
160 | + .bdrv_co_create_opts = hdev_co_create_opts, | ||
161 | .create_opts = &raw_create_opts, | ||
162 | |||
163 | .bdrv_co_preadv = raw_co_preadv, | ||
164 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/block/file-win32.c | ||
167 | +++ b/block/file-win32.c | ||
168 | @@ -XXX,XX +XXX,XX @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) | ||
169 | return st.st_size; | ||
170 | } | ||
171 | |||
172 | -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) | ||
173 | +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, | ||
174 | + Error **errp) | ||
175 | { | ||
176 | int fd; | ||
177 | int64_t total_size = 0; | ||
178 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
179 | .bdrv_file_open = raw_open, | ||
180 | .bdrv_refresh_limits = raw_probe_alignment, | ||
181 | .bdrv_close = raw_close, | ||
182 | - .bdrv_create = raw_create, | ||
183 | + .bdrv_co_create_opts = raw_co_create_opts, | ||
184 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
185 | |||
186 | .bdrv_aio_readv = raw_aio_readv, | ||
187 | diff --git a/block/gluster.c b/block/gluster.c | ||
188 | index XXXXXXX..XXXXXXX 100644 | ||
189 | --- a/block/gluster.c | ||
190 | +++ b/block/gluster.c | ||
191 | @@ -XXX,XX +XXX,XX @@ static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset, | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | -static int qemu_gluster_create(const char *filename, | ||
196 | - QemuOpts *opts, Error **errp) | ||
197 | +static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, | ||
198 | + QemuOpts *opts, | ||
199 | + Error **errp) | ||
200 | { | ||
201 | BlockdevOptionsGluster *gconf; | ||
202 | struct glfs *glfs; | ||
203 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = { | ||
204 | .bdrv_reopen_commit = qemu_gluster_reopen_commit, | ||
205 | .bdrv_reopen_abort = qemu_gluster_reopen_abort, | ||
206 | .bdrv_close = qemu_gluster_close, | ||
207 | - .bdrv_create = qemu_gluster_create, | ||
208 | + .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
209 | .bdrv_getlength = qemu_gluster_getlength, | ||
210 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
211 | .bdrv_truncate = qemu_gluster_truncate, | ||
212 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = { | ||
213 | .bdrv_reopen_commit = qemu_gluster_reopen_commit, | ||
214 | .bdrv_reopen_abort = qemu_gluster_reopen_abort, | ||
215 | .bdrv_close = qemu_gluster_close, | ||
216 | - .bdrv_create = qemu_gluster_create, | ||
217 | + .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
218 | .bdrv_getlength = qemu_gluster_getlength, | ||
219 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
220 | .bdrv_truncate = qemu_gluster_truncate, | ||
221 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = { | ||
222 | .bdrv_reopen_commit = qemu_gluster_reopen_commit, | ||
223 | .bdrv_reopen_abort = qemu_gluster_reopen_abort, | ||
224 | .bdrv_close = qemu_gluster_close, | ||
225 | - .bdrv_create = qemu_gluster_create, | ||
226 | + .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
227 | .bdrv_getlength = qemu_gluster_getlength, | ||
228 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
229 | .bdrv_truncate = qemu_gluster_truncate, | ||
230 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = { | ||
231 | .bdrv_reopen_commit = qemu_gluster_reopen_commit, | ||
232 | .bdrv_reopen_abort = qemu_gluster_reopen_abort, | ||
233 | .bdrv_close = qemu_gluster_close, | ||
234 | - .bdrv_create = qemu_gluster_create, | ||
235 | + .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
236 | .bdrv_getlength = qemu_gluster_getlength, | ||
237 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
238 | .bdrv_truncate = qemu_gluster_truncate, | ||
239 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
240 | index XXXXXXX..XXXXXXX 100644 | ||
241 | --- a/block/iscsi.c | ||
242 | +++ b/block/iscsi.c | ||
243 | @@ -XXX,XX +XXX,XX @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset, | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | -static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp) | ||
248 | +static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, | ||
249 | + Error **errp) | ||
250 | { | ||
251 | int ret = 0; | ||
252 | int64_t total_size = 0; | ||
253 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = { | ||
254 | .bdrv_parse_filename = iscsi_parse_filename, | ||
255 | .bdrv_file_open = iscsi_open, | ||
256 | .bdrv_close = iscsi_close, | ||
257 | - .bdrv_create = iscsi_create, | ||
258 | + .bdrv_co_create_opts = iscsi_co_create_opts, | ||
259 | .create_opts = &iscsi_create_opts, | ||
260 | .bdrv_reopen_prepare = iscsi_reopen_prepare, | ||
261 | .bdrv_reopen_commit = iscsi_reopen_commit, | ||
262 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = { | ||
263 | .bdrv_parse_filename = iscsi_parse_filename, | ||
264 | .bdrv_file_open = iscsi_open, | ||
265 | .bdrv_close = iscsi_close, | ||
266 | - .bdrv_create = iscsi_create, | ||
267 | + .bdrv_co_create_opts = iscsi_co_create_opts, | ||
268 | .create_opts = &iscsi_create_opts, | ||
269 | .bdrv_reopen_prepare = iscsi_reopen_prepare, | ||
270 | .bdrv_reopen_commit = iscsi_reopen_commit, | ||
271 | diff --git a/block/nfs.c b/block/nfs.c | ||
272 | index XXXXXXX..XXXXXXX 100644 | ||
273 | --- a/block/nfs.c | ||
274 | +++ b/block/nfs.c | ||
275 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList nfs_create_opts = { | ||
276 | } | 64 | } |
277 | }; | 65 | g_free(bs_queue); |
278 | 66 | ||
279 | -static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) | 67 | - bdrv_drain_all_end(); |
280 | +static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, | 68 | - |
281 | + Error **errp) | ||
282 | { | ||
283 | int64_t ret, total_size; | ||
284 | NFSClient *client = g_new0(NFSClient, 1); | ||
285 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = { | ||
286 | |||
287 | .bdrv_file_open = nfs_file_open, | ||
288 | .bdrv_close = nfs_file_close, | ||
289 | - .bdrv_create = nfs_file_create, | ||
290 | + .bdrv_co_create_opts = nfs_file_co_create_opts, | ||
291 | .bdrv_reopen_prepare = nfs_reopen_prepare, | ||
292 | |||
293 | .bdrv_co_preadv = nfs_co_preadv, | ||
294 | diff --git a/block/parallels.c b/block/parallels.c | ||
295 | index XXXXXXX..XXXXXXX 100644 | ||
296 | --- a/block/parallels.c | ||
297 | +++ b/block/parallels.c | ||
298 | @@ -XXX,XX +XXX,XX @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
299 | } | ||
300 | |||
301 | |||
302 | -static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) | ||
303 | +static int coroutine_fn parallels_co_create_opts(const char *filename, | ||
304 | + QemuOpts *opts, | ||
305 | + Error **errp) | ||
306 | { | ||
307 | int64_t total_size, cl_size; | ||
308 | uint8_t tmp[BDRV_SECTOR_SIZE]; | ||
309 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { | ||
310 | .bdrv_co_readv = parallels_co_readv, | ||
311 | .bdrv_co_writev = parallels_co_writev, | ||
312 | .supports_backing = true, | ||
313 | - .bdrv_create = parallels_create, | ||
314 | + .bdrv_co_create_opts = parallels_co_create_opts, | ||
315 | .bdrv_check = parallels_check, | ||
316 | .create_opts = ¶llels_create_opts, | ||
317 | }; | ||
318 | diff --git a/block/qcow.c b/block/qcow.c | ||
319 | index XXXXXXX..XXXXXXX 100644 | ||
320 | --- a/block/qcow.c | ||
321 | +++ b/block/qcow.c | ||
322 | @@ -XXX,XX +XXX,XX @@ static void qcow_close(BlockDriverState *bs) | ||
323 | error_free(s->migration_blocker); | ||
324 | } | ||
325 | |||
326 | -static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) | ||
327 | +static int coroutine_fn qcow_co_create_opts(const char *filename, QemuOpts *opts, | ||
328 | + Error **errp) | ||
329 | { | ||
330 | int header_size, backing_filename_len, l1_size, shift, i; | ||
331 | QCowHeader header; | ||
332 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qcow = { | ||
333 | .bdrv_close = qcow_close, | ||
334 | .bdrv_child_perm = bdrv_format_default_perms, | ||
335 | .bdrv_reopen_prepare = qcow_reopen_prepare, | ||
336 | - .bdrv_create = qcow_create, | ||
337 | + .bdrv_co_create_opts = qcow_co_create_opts, | ||
338 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
339 | .supports_backing = true, | ||
340 | |||
341 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
342 | index XXXXXXX..XXXXXXX 100644 | ||
343 | --- a/block/qcow2.c | ||
344 | +++ b/block/qcow2.c | ||
345 | @@ -XXX,XX +XXX,XX @@ out: | ||
346 | return ret; | 69 | return ret; |
347 | } | 70 | } |
348 | 71 | ||
349 | -static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) | 72 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) |
350 | +static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, | ||
351 | + Error **errp) | ||
352 | { | 73 | { |
353 | char *backing_file = NULL; | 74 | int ret = -1; |
354 | char *backing_fmt = NULL; | 75 | Error *local_err = NULL; |
355 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | 76 | - BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); |
356 | .bdrv_reopen_abort = qcow2_reopen_abort, | 77 | + BlockReopenQueue *queue; |
357 | .bdrv_join_options = qcow2_join_options, | 78 | |
358 | .bdrv_child_perm = bdrv_format_default_perms, | 79 | + bdrv_subtree_drained_begin(bs); |
359 | - .bdrv_create = qcow2_create, | 80 | + |
360 | + .bdrv_co_create_opts = qcow2_co_create_opts, | 81 | + queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); |
361 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 82 | ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); |
362 | .bdrv_co_block_status = qcow2_co_block_status, | 83 | if (local_err != NULL) { |
363 | 84 | error_propagate(errp, local_err); | |
364 | diff --git a/block/qed.c b/block/qed.c | 85 | } |
365 | index XXXXXXX..XXXXXXX 100644 | 86 | + |
366 | --- a/block/qed.c | 87 | + bdrv_subtree_drained_end(bs); |
367 | +++ b/block/qed.c | 88 | + |
368 | @@ -XXX,XX +XXX,XX @@ out: | ||
369 | return ret; | 89 | return ret; |
370 | } | 90 | } |
371 | 91 | ||
372 | -static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) | 92 | diff --git a/block/replication.c b/block/replication.c |
373 | +static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, | ||
374 | + QemuOpts *opts, | ||
375 | + Error **errp) | ||
376 | { | ||
377 | uint64_t image_size = 0; | ||
378 | uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; | ||
379 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = { | ||
380 | .bdrv_close = bdrv_qed_close, | ||
381 | .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, | ||
382 | .bdrv_child_perm = bdrv_format_default_perms, | ||
383 | - .bdrv_create = bdrv_qed_create, | ||
384 | + .bdrv_co_create_opts = bdrv_qed_co_create_opts, | ||
385 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
386 | .bdrv_co_block_status = bdrv_qed_co_block_status, | ||
387 | .bdrv_co_readv = bdrv_qed_co_readv, | ||
388 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
389 | index XXXXXXX..XXXXXXX 100644 | 93 | index XXXXXXX..XXXXXXX 100644 |
390 | --- a/block/raw-format.c | 94 | --- a/block/replication.c |
391 | +++ b/block/raw-format.c | 95 | +++ b/block/replication.c |
392 | @@ -XXX,XX +XXX,XX @@ static int raw_has_zero_init(BlockDriverState *bs) | 96 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, |
393 | return bdrv_has_zero_init(bs->file->bs); | 97 | new_secondary_flags = s->orig_secondary_flags; |
98 | } | ||
99 | |||
100 | + bdrv_subtree_drained_begin(s->hidden_disk->bs); | ||
101 | + bdrv_subtree_drained_begin(s->secondary_disk->bs); | ||
102 | + | ||
103 | if (orig_hidden_flags != new_hidden_flags) { | ||
104 | reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, | ||
105 | new_hidden_flags); | ||
106 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, | ||
107 | reopen_queue, &local_err); | ||
108 | error_propagate(errp, local_err); | ||
109 | } | ||
110 | + | ||
111 | + bdrv_subtree_drained_end(s->hidden_disk->bs); | ||
112 | + bdrv_subtree_drained_end(s->secondary_disk->bs); | ||
394 | } | 113 | } |
395 | 114 | ||
396 | -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) | 115 | static void backup_job_cleanup(BlockDriverState *bs) |
397 | +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, | 116 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c |
398 | + Error **errp) | ||
399 | { | ||
400 | return bdrv_create_file(filename, opts, errp); | ||
401 | } | ||
402 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = { | ||
403 | .bdrv_open = &raw_open, | ||
404 | .bdrv_close = &raw_close, | ||
405 | .bdrv_child_perm = bdrv_filter_default_perms, | ||
406 | - .bdrv_create = &raw_create, | ||
407 | + .bdrv_co_create_opts = &raw_co_create_opts, | ||
408 | .bdrv_co_preadv = &raw_co_preadv, | ||
409 | .bdrv_co_pwritev = &raw_co_pwritev, | ||
410 | .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, | ||
411 | diff --git a/block/rbd.c b/block/rbd.c | ||
412 | index XXXXXXX..XXXXXXX 100644 | 117 | index XXXXXXX..XXXXXXX 100644 |
413 | --- a/block/rbd.c | 118 | --- a/qemu-io-cmds.c |
414 | +++ b/block/rbd.c | 119 | +++ b/qemu-io-cmds.c |
415 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { | 120 | @@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) |
416 | }, | 121 | opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; |
417 | }; | 122 | qemu_opts_reset(&reopen_opts); |
418 | 123 | ||
419 | -static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) | 124 | + bdrv_subtree_drained_begin(bs); |
420 | +static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, | 125 | brq = bdrv_reopen_queue(NULL, bs, opts, flags); |
421 | + QemuOpts *opts, | 126 | bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); |
422 | + Error **errp) | 127 | + bdrv_subtree_drained_end(bs); |
423 | { | 128 | + |
424 | Error *local_err = NULL; | 129 | if (local_err) { |
425 | int64_t bytes = 0; | 130 | error_report_err(local_err); |
426 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = { | 131 | } else { |
427 | .bdrv_file_open = qemu_rbd_open, | ||
428 | .bdrv_close = qemu_rbd_close, | ||
429 | .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, | ||
430 | - .bdrv_create = qemu_rbd_create, | ||
431 | + .bdrv_co_create_opts = qemu_rbd_co_create_opts, | ||
432 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
433 | .bdrv_get_info = qemu_rbd_getinfo, | ||
434 | .create_opts = &qemu_rbd_create_opts, | ||
435 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
436 | index XXXXXXX..XXXXXXX 100644 | ||
437 | --- a/block/sheepdog.c | ||
438 | +++ b/block/sheepdog.c | ||
439 | @@ -XXX,XX +XXX,XX @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) | ||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | -static int sd_create(const char *filename, QemuOpts *opts, | ||
444 | - Error **errp) | ||
445 | +static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, | ||
446 | + Error **errp) | ||
447 | { | ||
448 | Error *err = NULL; | ||
449 | int ret = 0; | ||
450 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = { | ||
451 | .bdrv_reopen_commit = sd_reopen_commit, | ||
452 | .bdrv_reopen_abort = sd_reopen_abort, | ||
453 | .bdrv_close = sd_close, | ||
454 | - .bdrv_create = sd_create, | ||
455 | + .bdrv_co_create_opts = sd_co_create_opts, | ||
456 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
457 | .bdrv_getlength = sd_getlength, | ||
458 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
459 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = { | ||
460 | .bdrv_reopen_commit = sd_reopen_commit, | ||
461 | .bdrv_reopen_abort = sd_reopen_abort, | ||
462 | .bdrv_close = sd_close, | ||
463 | - .bdrv_create = sd_create, | ||
464 | + .bdrv_co_create_opts = sd_co_create_opts, | ||
465 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
466 | .bdrv_getlength = sd_getlength, | ||
467 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
468 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = { | ||
469 | .bdrv_reopen_commit = sd_reopen_commit, | ||
470 | .bdrv_reopen_abort = sd_reopen_abort, | ||
471 | .bdrv_close = sd_close, | ||
472 | - .bdrv_create = sd_create, | ||
473 | + .bdrv_co_create_opts = sd_co_create_opts, | ||
474 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
475 | .bdrv_getlength = sd_getlength, | ||
476 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
477 | diff --git a/block/ssh.c b/block/ssh.c | ||
478 | index XXXXXXX..XXXXXXX 100644 | ||
479 | --- a/block/ssh.c | ||
480 | +++ b/block/ssh.c | ||
481 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList ssh_create_opts = { | ||
482 | } | ||
483 | }; | ||
484 | |||
485 | -static int ssh_create(const char *filename, QemuOpts *opts, Error **errp) | ||
486 | +static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, | ||
487 | + Error **errp) | ||
488 | { | ||
489 | int r, ret; | ||
490 | int64_t total_size = 0; | ||
491 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = { | ||
492 | .instance_size = sizeof(BDRVSSHState), | ||
493 | .bdrv_parse_filename = ssh_parse_filename, | ||
494 | .bdrv_file_open = ssh_file_open, | ||
495 | - .bdrv_create = ssh_create, | ||
496 | + .bdrv_co_create_opts = ssh_co_create_opts, | ||
497 | .bdrv_close = ssh_close, | ||
498 | .bdrv_has_zero_init = ssh_has_zero_init, | ||
499 | .bdrv_co_readv = ssh_co_readv, | ||
500 | diff --git a/block/vdi.c b/block/vdi.c | ||
501 | index XXXXXXX..XXXXXXX 100644 | ||
502 | --- a/block/vdi.c | ||
503 | +++ b/block/vdi.c | ||
504 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: | ||
505 | return ret; | ||
506 | } | ||
507 | |||
508 | -static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) | ||
509 | +static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, | ||
510 | + Error **errp) | ||
511 | { | ||
512 | int ret = 0; | ||
513 | uint64_t bytes = 0; | ||
514 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vdi = { | ||
515 | .bdrv_close = vdi_close, | ||
516 | .bdrv_reopen_prepare = vdi_reopen_prepare, | ||
517 | .bdrv_child_perm = bdrv_format_default_perms, | ||
518 | - .bdrv_create = vdi_create, | ||
519 | + .bdrv_co_create_opts = vdi_co_create_opts, | ||
520 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
521 | .bdrv_co_block_status = vdi_co_block_status, | ||
522 | .bdrv_make_empty = vdi_make_empty, | ||
523 | diff --git a/block/vhdx.c b/block/vhdx.c | ||
524 | index XXXXXXX..XXXXXXX 100644 | ||
525 | --- a/block/vhdx.c | ||
526 | +++ b/block/vhdx.c | ||
527 | @@ -XXX,XX +XXX,XX @@ exit: | ||
528 | * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. | ||
529 | * 1MB | ||
530 | */ | ||
531 | -static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) | ||
532 | +static int coroutine_fn vhdx_co_create_opts(const char *filename, QemuOpts *opts, | ||
533 | + Error **errp) | ||
534 | { | ||
535 | int ret = 0; | ||
536 | uint64_t image_size = (uint64_t) 2 * GiB; | ||
537 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vhdx = { | ||
538 | .bdrv_child_perm = bdrv_format_default_perms, | ||
539 | .bdrv_co_readv = vhdx_co_readv, | ||
540 | .bdrv_co_writev = vhdx_co_writev, | ||
541 | - .bdrv_create = vhdx_create, | ||
542 | + .bdrv_co_create_opts = vhdx_co_create_opts, | ||
543 | .bdrv_get_info = vhdx_get_info, | ||
544 | .bdrv_check = vhdx_check, | ||
545 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
546 | diff --git a/block/vmdk.c b/block/vmdk.c | ||
547 | index XXXXXXX..XXXXXXX 100644 | ||
548 | --- a/block/vmdk.c | ||
549 | +++ b/block/vmdk.c | ||
550 | @@ -XXX,XX +XXX,XX @@ static int filename_decompose(const char *filename, char *path, char *prefix, | ||
551 | return VMDK_OK; | ||
552 | } | ||
553 | |||
554 | -static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) | ||
555 | +static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, | ||
556 | + Error **errp) | ||
557 | { | ||
558 | int idx = 0; | ||
559 | BlockBackend *new_blk = NULL; | ||
560 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vmdk = { | ||
561 | .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, | ||
562 | .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes, | ||
563 | .bdrv_close = vmdk_close, | ||
564 | - .bdrv_create = vmdk_create, | ||
565 | + .bdrv_co_create_opts = vmdk_co_create_opts, | ||
566 | .bdrv_co_flush_to_disk = vmdk_co_flush, | ||
567 | .bdrv_co_block_status = vmdk_co_block_status, | ||
568 | .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, | ||
569 | diff --git a/block/vpc.c b/block/vpc.c | ||
570 | index XXXXXXX..XXXXXXX 100644 | ||
571 | --- a/block/vpc.c | ||
572 | +++ b/block/vpc.c | ||
573 | @@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, | ||
574 | return ret; | ||
575 | } | ||
576 | |||
577 | -static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) | ||
578 | +static int coroutine_fn vpc_co_create_opts(const char *filename, QemuOpts *opts, | ||
579 | + Error **errp) | ||
580 | { | ||
581 | uint8_t buf[1024]; | ||
582 | VHDFooter *footer = (VHDFooter *) buf; | ||
583 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vpc = { | ||
584 | .bdrv_close = vpc_close, | ||
585 | .bdrv_reopen_prepare = vpc_reopen_prepare, | ||
586 | .bdrv_child_perm = bdrv_format_default_perms, | ||
587 | - .bdrv_create = vpc_create, | ||
588 | + .bdrv_co_create_opts = vpc_co_create_opts, | ||
589 | |||
590 | .bdrv_co_preadv = vpc_co_preadv, | ||
591 | .bdrv_co_pwritev = vpc_co_pwritev, | ||
592 | -- | 132 | -- |
593 | 2.13.6 | 133 | 2.13.6 |
594 | 134 | ||
595 | 135 | diff view generated by jsdifflib |