1 | The following changes since commit 609ef9f451759151d0bfe7c3843410ab94d68f18: | 1 | The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/berrange/tags/qio-next-pull-request' into staging (2018-06-28 17:53:31 +0100) | 3 | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to 583c99d39368526dfb57a715b04a6ceea27dbe1e: | 9 | for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8: |
10 | 10 | ||
11 | block: Remove unused sector-based vectored I/O (2018-06-29 14:20:56 +0200) | 11 | block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches: | 14 | Block layer patches |
15 | |||
16 | - Make truncate operations asynchronous (so that preallocation in | ||
17 | blockdev-create doesn't block the main loop any more) | ||
18 | - usb-storage: Add rerror/werror properties | ||
19 | - nvme: Add num_queues property | ||
20 | - qemu-img convert: Copy offloading fixes (including data corruption fix) | ||
21 | - qcow2: Fix cluster leak on temporary write error | ||
22 | - Use byte-based functions instead of bdrv_co_readv/writev() | ||
23 | - Various small fixes and cleanups | ||
24 | 15 | ||
25 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
26 | Eric Blake (8): | 17 | Doug Gale (1): |
27 | parallels: Switch to byte-based calls | 18 | nvme: Add tracing |
28 | qcow: Switch get_cluster_offset to be byte-based | ||
29 | qcow: Switch qcow_co_readv to byte-based calls | ||
30 | qcow: Switch qcow_co_writev to byte-based calls | ||
31 | qcow: Switch to a byte-based driver | ||
32 | replication: Switch to byte-based calls | ||
33 | vhdx: Switch to byte-based calls | ||
34 | block: Remove unused sector-based vectored I/O | ||
35 | 19 | ||
36 | Fam Zheng (5): | 20 | Edgar Kaziakhmedov (1): |
37 | qcow2: Remove dead check on !ret | 21 | qcow2: get rid of qcow2_backing_read1 routine |
38 | block: Move request tracking to children in copy offloading | ||
39 | qcow2: Fix src_offset in copy offloading | ||
40 | iscsi: Don't blindly use designator length in response for memcpy | ||
41 | file-posix: Fix EINTR handling | ||
42 | 22 | ||
43 | Kevin Wolf (12): | 23 | Fam Zheng (2): |
44 | qapi/job: The next release will be 3.0 | 24 | block: Open backing image in force share mode for size probe |
45 | usb-storage: Add rerror/werror properties | 25 | block: Remove unused bdrv_requests_pending |
46 | qcow2: Fix qcow2_truncate() error return value | ||
47 | block: Convert .bdrv_truncate callback to coroutine_fn | ||
48 | qcow2: Remove coroutine trampoline for preallocate_co() | ||
49 | block: Move bdrv_truncate() implementation to io.c | ||
50 | block: Use tracked request for truncate | ||
51 | file-posix: Make .bdrv_co_truncate asynchronous | ||
52 | qemu-iotests: Update 026.out.nocache reference output | ||
53 | qcow2: Free allocated clusters on write error | ||
54 | qemu-iotests: Test qcow2 not leaking clusters on write error | ||
55 | file-posix: Implement co versions of discard/flush | ||
56 | 26 | ||
57 | Markus Armbruster (3): | 27 | John Snow (1): |
58 | block-qdict: Pacify Coverity after commit f1b34a248e9 | 28 | iotests: fix 197 for vpc |
59 | block/crypto: Pacify Coverity after commit f853465aacb | ||
60 | block/crypto: Simplify block_crypto_{open,create}_opts_init() | ||
61 | 29 | ||
62 | Weiping Zhang (1): | 30 | Kevin Wolf (27): |
63 | hw/block/nvme: add optional parameter num_queues for nvme device | 31 | block: Formats don't need CONSISTENT_READ with NO_IO |
32 | block: Make bdrv_drain_invoke() recursive | ||
33 | block: Call .drain_begin only once in bdrv_drain_all_begin() | ||
34 | test-bdrv-drain: Test BlockDriver callbacks for drain | ||
35 | block: bdrv_drain_recurse(): Remove unused begin parameter | ||
36 | block: Don't wait for requests in bdrv_drain*_end() | ||
37 | block: Unify order in drain functions | ||
38 | block: Don't acquire AioContext in hmp_qemu_io() | ||
39 | block: Document that x-blockdev-change breaks quorum children list | ||
40 | block: Assert drain_all is only called from main AioContext | ||
41 | block: Make bdrv_drain() driver callbacks non-recursive | ||
42 | test-bdrv-drain: Test callback for bdrv_drain | ||
43 | test-bdrv-drain: Test bs->quiesce_counter | ||
44 | blockjob: Pause job on draining any job BDS | ||
45 | test-bdrv-drain: Test drain vs. block jobs | ||
46 | block: Don't block_job_pause_all() in bdrv_drain_all() | ||
47 | block: Nested drain_end must still call callbacks | ||
48 | test-bdrv-drain: Test nested drain sections | ||
49 | block: Don't notify parents in drain call chain | ||
50 | block: Add bdrv_subtree_drained_begin/end() | ||
51 | test-bdrv-drain: Tests for bdrv_subtree_drain | ||
52 | test-bdrv-drain: Test behaviour in coroutine context | ||
53 | test-bdrv-drain: Recursive draining with multiple parents | ||
54 | block: Allow graph changes in subtree drained section | ||
55 | test-bdrv-drain: Test graph changes in drained section | ||
56 | commit: Simplify reopen of base | ||
57 | block: Keep nodes drained between reopen_queue/multiple | ||
64 | 58 | ||
65 | qapi/job.json | 18 +- | 59 | Thomas Huth (3): |
66 | block/crypto.h | 8 +- | 60 | block: Remove the obsolete -drive boot=on|off parameter |
67 | block/qcow2.h | 1 + | 61 | block: Remove the deprecated -hdachs option |
68 | include/block/block.h | 8 +- | 62 | block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter |
69 | include/block/block_int.h | 7 +- | ||
70 | include/block/raw-aio.h | 4 +- | ||
71 | include/hw/scsi/scsi.h | 2 + | ||
72 | block.c | 64 +------ | ||
73 | block/copy-on-read.c | 8 +- | ||
74 | block/crypto.c | 112 +++-------- | ||
75 | block/file-posix.c | 367 +++++++++++++++++++------------------ | ||
76 | block/file-win32.c | 6 +- | ||
77 | block/gluster.c | 14 +- | ||
78 | block/io.c | 219 +++++++++++++++------- | ||
79 | block/iscsi.c | 10 +- | ||
80 | block/nfs.c | 7 +- | ||
81 | block/parallels.c | 16 +- | ||
82 | block/qcow.c | 135 +++++++------- | ||
83 | block/qcow2-cluster.c | 11 ++ | ||
84 | block/qcow2.c | 140 ++++++-------- | ||
85 | block/qed.c | 8 +- | ||
86 | block/raw-format.c | 8 +- | ||
87 | block/rbd.c | 8 +- | ||
88 | block/replication.c | 14 +- | ||
89 | block/sheepdog.c | 12 +- | ||
90 | block/ssh.c | 6 +- | ||
91 | block/vhdx.c | 12 +- | ||
92 | hw/block/nvme.c | 5 +- | ||
93 | hw/scsi/scsi-bus.c | 11 +- | ||
94 | hw/usb/dev-storage.c | 2 + | ||
95 | qobject/block-qdict.c | 16 +- | ||
96 | tests/qemu-iotests/026 | 17 ++ | ||
97 | tests/qemu-iotests/026.out | 8 + | ||
98 | tests/qemu-iotests/026.out.nocache | 14 +- | ||
99 | tests/qemu-iotests/063 | 9 + | ||
100 | tests/qemu-iotests/063.out | 12 ++ | ||
101 | 36 files changed, 685 insertions(+), 634 deletions(-) | ||
102 | 63 | ||
64 | qapi/block-core.json | 4 + | ||
65 | block/qcow2.h | 3 - | ||
66 | include/block/block.h | 15 +- | ||
67 | include/block/block_int.h | 6 +- | ||
68 | block.c | 75 ++++- | ||
69 | block/commit.c | 8 +- | ||
70 | block/io.c | 164 +++++++--- | ||
71 | block/qcow2.c | 51 +-- | ||
72 | block/replication.c | 6 + | ||
73 | blockdev.c | 11 - | ||
74 | blockjob.c | 22 +- | ||
75 | hmp.c | 6 - | ||
76 | hw/block/nvme.c | 349 +++++++++++++++++---- | ||
77 | qemu-io-cmds.c | 3 + | ||
78 | tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++ | ||
79 | vl.c | 86 +----- | ||
80 | hw/block/trace-events | 93 ++++++ | ||
81 | qemu-doc.texi | 29 +- | ||
82 | qemu-options.hx | 19 +- | ||
83 | tests/Makefile.include | 2 + | ||
84 | tests/qemu-iotests/197 | 4 + | ||
85 | tests/qemu-iotests/common.filter | 3 +- | ||
86 | 22 files changed, 1294 insertions(+), 316 deletions(-) | ||
87 | create mode 100644 tests/test-bdrv-drain.c | ||
88 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently | ||
2 | in use as a mirror target. It is not enough for image formats, though, | ||
3 | as these still unconditionally request BLK_PERM_CONSISTENT_READ. | ||
1 | 4 | ||
5 | As this permission is geared towards whether the guest-visible data is | ||
6 | consistent, and has no impact on whether the metadata is sane, and | ||
7 | 'qemu-img info' does not read guest-visible data (except for the raw | ||
8 | format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there | ||
9 | is not going to be any guest I/O performed, regardless of image format. | ||
10 | |||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | --- | ||
13 | block.c | 6 +++++- | ||
14 | 1 file changed, 5 insertions(+), 1 deletion(-) | ||
15 | |||
16 | diff --git a/block.c b/block.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block.c | ||
19 | +++ b/block.c | ||
20 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | ||
21 | assert(role == &child_backing || role == &child_file); | ||
22 | |||
23 | if (!backing) { | ||
24 | + int flags = bdrv_reopen_get_flags(reopen_queue, bs); | ||
25 | + | ||
26 | /* Apart from the modifications below, the same permissions are | ||
27 | * forwarded and left alone as for filters */ | ||
28 | bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, | ||
29 | @@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, | ||
30 | |||
31 | /* bs->file always needs to be consistent because of the metadata. We | ||
32 | * can never allow other users to resize or write to it. */ | ||
33 | - perm |= BLK_PERM_CONSISTENT_READ; | ||
34 | + if (!(flags & BDRV_O_NO_IO)) { | ||
35 | + perm |= BLK_PERM_CONSISTENT_READ; | ||
36 | + } | ||
37 | shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); | ||
38 | } else { | ||
39 | /* We want consistent read from backing files if the parent needs it. | ||
40 | -- | ||
41 | 2.13.6 | ||
42 | |||
43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: John Snow <jsnow@redhat.com> | ||
1 | 2 | ||
3 | VPC has some difficulty creating geometries of particular size. | ||
4 | However, we can indeed force it to use a literal one, so let's | ||
5 | do that for the sake of test 197, which is testing some specific | ||
6 | offsets. | ||
7 | |||
8 | Signed-off-by: John Snow <jsnow@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Reviewed-by: Lukáš Doktor <ldoktor@redhat.com> | ||
13 | --- | ||
14 | tests/qemu-iotests/197 | 4 ++++ | ||
15 | tests/qemu-iotests/common.filter | 3 ++- | ||
16 | 2 files changed, 6 insertions(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 | ||
19 | index XXXXXXX..XXXXXXX 100755 | ||
20 | --- a/tests/qemu-iotests/197 | ||
21 | +++ b/tests/qemu-iotests/197 | ||
22 | @@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ===' | ||
23 | echo | ||
24 | |||
25 | # Prep the images | ||
26 | +# VPC rounds image sizes to a specific geometry, force a specific size. | ||
27 | +if [ "$IMGFMT" = "vpc" ]; then | ||
28 | + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") | ||
29 | +fi | ||
30 | _make_test_img 4G | ||
31 | $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io | ||
32 | IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ | ||
33 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/tests/qemu-iotests/common.filter | ||
36 | +++ b/tests/qemu-iotests/common.filter | ||
37 | @@ -XXX,XX +XXX,XX @@ _filter_img_create() | ||
38 | -e "s# log_size=[0-9]\\+##g" \ | ||
39 | -e "s# refcount_bits=[0-9]\\+##g" \ | ||
40 | -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ | ||
41 | - -e "s# iter-time=[0-9]\\+##g" | ||
42 | + -e "s# iter-time=[0-9]\\+##g" \ | ||
43 | + -e "s# force_size=\\(on\\|off\\)##g" | ||
44 | } | ||
45 | |||
46 | _filter_img_info() | ||
47 | -- | ||
48 | 2.13.6 | ||
49 | |||
50 | diff view generated by jsdifflib |
1 | This moves the code to resize an image file to the thread pool to avoid | 1 | This change separates bdrv_drain_invoke(), which calls the BlockDriver |
---|---|---|---|
2 | blocking. | 2 | drain callbacks, from bdrv_drain_recurse(). Instead, the function |
3 | performs its own recursion now. | ||
3 | 4 | ||
4 | Creating large images with preallocation with blockdev-create is now | 5 | One reason for this is that bdrv_drain_recurse() can be called multiple |
5 | actually a background job instead of blocking the monitor (and most | 6 | times by bdrv_drain_all_begin(), but the callbacks may only be called |
6 | other things) until the preallocation has completed. | 7 | once. The separation is necessary to fix this bug. |
7 | 8 | ||
9 | The other reason is that we intend to go to a model where we call all | ||
10 | driver callbacks first, and only then start polling. This is not fully | ||
11 | achieved yet with this patch, as bdrv_drain_invoke() contains a | ||
12 | BDRV_POLL_WHILE() loop for the block driver callbacks, which can still | ||
13 | call callbacks for any unrelated event. It's a step in this direction | ||
14 | anyway. | ||
15 | |||
16 | Cc: qemu-stable@nongnu.org | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | --- | 19 | --- |
11 | include/block/raw-aio.h | 4 +- | 20 | block/io.c | 14 +++++++++++--- |
12 | block/file-posix.c | 266 +++++++++++++++++++++++++++--------------------- | 21 | 1 file changed, 11 insertions(+), 3 deletions(-) |
13 | 2 files changed, 154 insertions(+), 116 deletions(-) | ||
14 | 22 | ||
15 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | 23 | diff --git a/block/io.c b/block/io.c |
16 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/block/raw-aio.h | 25 | --- a/block/io.c |
18 | +++ b/include/block/raw-aio.h | 26 | +++ b/block/io.c |
19 | @@ -XXX,XX +XXX,XX @@ | 27 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
20 | #define QEMU_AIO_DISCARD 0x0010 | 28 | bdrv_wakeup(bs); |
21 | #define QEMU_AIO_WRITE_ZEROES 0x0020 | ||
22 | #define QEMU_AIO_COPY_RANGE 0x0040 | ||
23 | +#define QEMU_AIO_TRUNCATE 0x0080 | ||
24 | #define QEMU_AIO_TYPE_MASK \ | ||
25 | (QEMU_AIO_READ | \ | ||
26 | QEMU_AIO_WRITE | \ | ||
27 | @@ -XXX,XX +XXX,XX @@ | ||
28 | QEMU_AIO_FLUSH | \ | ||
29 | QEMU_AIO_DISCARD | \ | ||
30 | QEMU_AIO_WRITE_ZEROES | \ | ||
31 | - QEMU_AIO_COPY_RANGE) | ||
32 | + QEMU_AIO_COPY_RANGE | \ | ||
33 | + QEMU_AIO_TRUNCATE) | ||
34 | |||
35 | /* AIO flags */ | ||
36 | #define QEMU_AIO_MISALIGNED 0x1000 | ||
37 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/block/file-posix.c | ||
40 | +++ b/block/file-posix.c | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct RawPosixAIOData { | ||
42 | #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ | ||
43 | off_t aio_offset; | ||
44 | int aio_type; | ||
45 | - int aio_fd2; | ||
46 | - off_t aio_offset2; | ||
47 | + union { | ||
48 | + struct { | ||
49 | + int aio_fd2; | ||
50 | + off_t aio_offset2; | ||
51 | + }; | ||
52 | + struct { | ||
53 | + PreallocMode prealloc; | ||
54 | + Error **errp; | ||
55 | + }; | ||
56 | + }; | ||
57 | } RawPosixAIOData; | ||
58 | |||
59 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) | ||
60 | @@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) | ||
61 | return ret; | ||
62 | } | 29 | } |
63 | 30 | ||
64 | +static int handle_aiocb_truncate(RawPosixAIOData *aiocb) | 31 | +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
65 | +{ | 32 | static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
66 | + int result = 0; | 33 | { |
67 | + int64_t current_length = 0; | 34 | + BdrvChild *child, *tmp; |
68 | + char *buf = NULL; | 35 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
69 | + struct stat st; | 36 | |
70 | + int fd = aiocb->aio_fildes; | 37 | if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || |
71 | + int64_t offset = aiocb->aio_offset; | 38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
72 | + Error **errp = aiocb->errp; | 39 | data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); |
40 | bdrv_coroutine_enter(bs, data.co); | ||
41 | BDRV_POLL_WHILE(bs, !data.done); | ||
73 | + | 42 | + |
74 | + if (fstat(fd, &st) < 0) { | 43 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { |
75 | + result = -errno; | 44 | + bdrv_drain_invoke(child->bs, begin); |
76 | + error_setg_errno(errp, -result, "Could not stat file"); | ||
77 | + return result; | ||
78 | + } | 45 | + } |
79 | + | 46 | } |
80 | + current_length = st.st_size; | 47 | |
81 | + if (current_length > offset && aiocb->prealloc != PREALLOC_MODE_OFF) { | 48 | static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
82 | + error_setg(errp, "Cannot use preallocation for shrinking files"); | 49 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
83 | + return -ENOTSUP; | 50 | BdrvChild *child, *tmp; |
84 | + } | 51 | bool waited; |
85 | + | 52 | |
86 | + switch (aiocb->prealloc) { | 53 | - /* Ensure any pending metadata writes are submitted to bs->file. */ |
87 | +#ifdef CONFIG_POSIX_FALLOCATE | 54 | - bdrv_drain_invoke(bs, begin); |
88 | + case PREALLOC_MODE_FALLOC: | ||
89 | + /* | ||
90 | + * Truncating before posix_fallocate() makes it about twice slower on | ||
91 | + * file systems that do not support fallocate(), trying to check if a | ||
92 | + * block is allocated before allocating it, so don't do that here. | ||
93 | + */ | ||
94 | + if (offset != current_length) { | ||
95 | + result = -posix_fallocate(fd, current_length, | ||
96 | + offset - current_length); | ||
97 | + if (result != 0) { | ||
98 | + /* posix_fallocate() doesn't set errno. */ | ||
99 | + error_setg_errno(errp, -result, | ||
100 | + "Could not preallocate new data"); | ||
101 | + } | ||
102 | + } else { | ||
103 | + result = 0; | ||
104 | + } | ||
105 | + goto out; | ||
106 | +#endif | ||
107 | + case PREALLOC_MODE_FULL: | ||
108 | + { | ||
109 | + int64_t num = 0, left = offset - current_length; | ||
110 | + off_t seek_result; | ||
111 | + | ||
112 | + /* | ||
113 | + * Knowing the final size from the beginning could allow the file | ||
114 | + * system driver to do less allocations and possibly avoid | ||
115 | + * fragmentation of the file. | ||
116 | + */ | ||
117 | + if (ftruncate(fd, offset) != 0) { | ||
118 | + result = -errno; | ||
119 | + error_setg_errno(errp, -result, "Could not resize file"); | ||
120 | + goto out; | ||
121 | + } | ||
122 | + | ||
123 | + buf = g_malloc0(65536); | ||
124 | + | ||
125 | + seek_result = lseek(fd, current_length, SEEK_SET); | ||
126 | + if (seek_result < 0) { | ||
127 | + result = -errno; | ||
128 | + error_setg_errno(errp, -result, | ||
129 | + "Failed to seek to the old end of file"); | ||
130 | + goto out; | ||
131 | + } | ||
132 | + | ||
133 | + while (left > 0) { | ||
134 | + num = MIN(left, 65536); | ||
135 | + result = write(fd, buf, num); | ||
136 | + if (result < 0) { | ||
137 | + result = -errno; | ||
138 | + error_setg_errno(errp, -result, | ||
139 | + "Could not write zeros for preallocation"); | ||
140 | + goto out; | ||
141 | + } | ||
142 | + left -= result; | ||
143 | + } | ||
144 | + if (result >= 0) { | ||
145 | + result = fsync(fd); | ||
146 | + if (result < 0) { | ||
147 | + result = -errno; | ||
148 | + error_setg_errno(errp, -result, | ||
149 | + "Could not flush file to disk"); | ||
150 | + goto out; | ||
151 | + } | ||
152 | + } | ||
153 | + goto out; | ||
154 | + } | ||
155 | + case PREALLOC_MODE_OFF: | ||
156 | + if (ftruncate(fd, offset) != 0) { | ||
157 | + result = -errno; | ||
158 | + error_setg_errno(errp, -result, "Could not resize file"); | ||
159 | + } | ||
160 | + return result; | ||
161 | + default: | ||
162 | + result = -ENOTSUP; | ||
163 | + error_setg(errp, "Unsupported preallocation mode: %s", | ||
164 | + PreallocMode_str(aiocb->prealloc)); | ||
165 | + return result; | ||
166 | + } | ||
167 | + | ||
168 | +out: | ||
169 | + if (result < 0) { | ||
170 | + if (ftruncate(fd, current_length) < 0) { | ||
171 | + error_report("Failed to restore old file length: %s", | ||
172 | + strerror(errno)); | ||
173 | + } | ||
174 | + } | ||
175 | + | ||
176 | + g_free(buf); | ||
177 | + return result; | ||
178 | +} | ||
179 | + | ||
180 | static int aio_worker(void *arg) | ||
181 | { | ||
182 | RawPosixAIOData *aiocb = arg; | ||
183 | @@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg) | ||
184 | case QEMU_AIO_COPY_RANGE: | ||
185 | ret = handle_aiocb_copy_range(aiocb); | ||
186 | break; | ||
187 | + case QEMU_AIO_TRUNCATE: | ||
188 | + ret = handle_aiocb_truncate(aiocb); | ||
189 | + break; | ||
190 | default: | ||
191 | fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); | ||
192 | ret = -EINVAL; | ||
193 | @@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs) | ||
194 | * | ||
195 | * Returns: 0 on success, -errno on failure. | ||
196 | */ | ||
197 | -static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc, | ||
198 | - Error **errp) | ||
199 | +static int coroutine_fn | ||
200 | +raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, | ||
201 | + PreallocMode prealloc, Error **errp) | ||
202 | { | ||
203 | - int result = 0; | ||
204 | - int64_t current_length = 0; | ||
205 | - char *buf = NULL; | ||
206 | - struct stat st; | ||
207 | - | 55 | - |
208 | - if (fstat(fd, &st) < 0) { | 56 | /* Wait for drained requests to finish */ |
209 | - result = -errno; | 57 | waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); |
210 | - error_setg_errno(errp, -result, "Could not stat file"); | 58 | |
211 | - return result; | 59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
212 | - } | 60 | bdrv_parent_drained_begin(bs); |
213 | - | 61 | } |
214 | - current_length = st.st_size; | 62 | |
215 | - if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { | 63 | + bdrv_drain_invoke(bs, true); |
216 | - error_setg(errp, "Cannot use preallocation for shrinking files"); | 64 | bdrv_drain_recurse(bs, true); |
217 | - return -ENOTSUP; | ||
218 | - } | ||
219 | - | ||
220 | - switch (prealloc) { | ||
221 | -#ifdef CONFIG_POSIX_FALLOCATE | ||
222 | - case PREALLOC_MODE_FALLOC: | ||
223 | - /* | ||
224 | - * Truncating before posix_fallocate() makes it about twice slower on | ||
225 | - * file systems that do not support fallocate(), trying to check if a | ||
226 | - * block is allocated before allocating it, so don't do that here. | ||
227 | - */ | ||
228 | - if (offset != current_length) { | ||
229 | - result = -posix_fallocate(fd, current_length, offset - current_length); | ||
230 | - if (result != 0) { | ||
231 | - /* posix_fallocate() doesn't set errno. */ | ||
232 | - error_setg_errno(errp, -result, | ||
233 | - "Could not preallocate new data"); | ||
234 | - } | ||
235 | - } else { | ||
236 | - result = 0; | ||
237 | - } | ||
238 | - goto out; | ||
239 | -#endif | ||
240 | - case PREALLOC_MODE_FULL: | ||
241 | - { | ||
242 | - int64_t num = 0, left = offset - current_length; | ||
243 | - off_t seek_result; | ||
244 | - | ||
245 | - /* | ||
246 | - * Knowing the final size from the beginning could allow the file | ||
247 | - * system driver to do less allocations and possibly avoid | ||
248 | - * fragmentation of the file. | ||
249 | - */ | ||
250 | - if (ftruncate(fd, offset) != 0) { | ||
251 | - result = -errno; | ||
252 | - error_setg_errno(errp, -result, "Could not resize file"); | ||
253 | - goto out; | ||
254 | - } | ||
255 | - | ||
256 | - buf = g_malloc0(65536); | ||
257 | - | ||
258 | - seek_result = lseek(fd, current_length, SEEK_SET); | ||
259 | - if (seek_result < 0) { | ||
260 | - result = -errno; | ||
261 | - error_setg_errno(errp, -result, | ||
262 | - "Failed to seek to the old end of file"); | ||
263 | - goto out; | ||
264 | - } | ||
265 | - | ||
266 | - while (left > 0) { | ||
267 | - num = MIN(left, 65536); | ||
268 | - result = write(fd, buf, num); | ||
269 | - if (result < 0) { | ||
270 | - result = -errno; | ||
271 | - error_setg_errno(errp, -result, | ||
272 | - "Could not write zeros for preallocation"); | ||
273 | - goto out; | ||
274 | - } | ||
275 | - left -= result; | ||
276 | - } | ||
277 | - if (result >= 0) { | ||
278 | - result = fsync(fd); | ||
279 | - if (result < 0) { | ||
280 | - result = -errno; | ||
281 | - error_setg_errno(errp, -result, | ||
282 | - "Could not flush file to disk"); | ||
283 | - goto out; | ||
284 | - } | ||
285 | - } | ||
286 | - goto out; | ||
287 | - } | ||
288 | - case PREALLOC_MODE_OFF: | ||
289 | - if (ftruncate(fd, offset) != 0) { | ||
290 | - result = -errno; | ||
291 | - error_setg_errno(errp, -result, "Could not resize file"); | ||
292 | - } | ||
293 | - return result; | ||
294 | - default: | ||
295 | - result = -ENOTSUP; | ||
296 | - error_setg(errp, "Unsupported preallocation mode: %s", | ||
297 | - PreallocMode_str(prealloc)); | ||
298 | - return result; | ||
299 | - } | ||
300 | + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); | ||
301 | + ThreadPool *pool; | ||
302 | |||
303 | -out: | ||
304 | - if (result < 0) { | ||
305 | - if (ftruncate(fd, current_length) < 0) { | ||
306 | - error_report("Failed to restore old file length: %s", | ||
307 | - strerror(errno)); | ||
308 | - } | ||
309 | - } | ||
310 | + *acb = (RawPosixAIOData) { | ||
311 | + .bs = bs, | ||
312 | + .aio_fildes = fd, | ||
313 | + .aio_type = QEMU_AIO_TRUNCATE, | ||
314 | + .aio_offset = offset, | ||
315 | + .prealloc = prealloc, | ||
316 | + .errp = errp, | ||
317 | + }; | ||
318 | |||
319 | - g_free(buf); | ||
320 | - return result; | ||
321 | + /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ | ||
322 | + pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); | ||
323 | + return thread_pool_submit_co(pool, aio_worker, acb); | ||
324 | } | 65 | } |
325 | 66 | ||
326 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | 67 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
327 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
328 | } | 68 | } |
329 | 69 | ||
330 | if (S_ISREG(st.st_mode)) { | 70 | bdrv_parent_drained_end(bs); |
331 | - return raw_regular_truncate(s->fd, offset, prealloc, errp); | 71 | + bdrv_drain_invoke(bs, false); |
332 | + return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); | 72 | bdrv_drain_recurse(bs, false); |
333 | } | 73 | aio_enable_external(bdrv_get_aio_context(bs)); |
334 | |||
335 | if (prealloc != PREALLOC_MODE_OFF) { | ||
336 | @@ -XXX,XX +XXX,XX @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) | ||
337 | return (int64_t)st.st_blocks * 512; | ||
338 | } | 74 | } |
339 | 75 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | |
340 | -static int raw_co_create(BlockdevCreateOptions *options, Error **errp) | 76 | aio_context_acquire(aio_context); |
341 | +static int coroutine_fn | 77 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
342 | +raw_co_create(BlockdevCreateOptions *options, Error **errp) | 78 | if (aio_context == bdrv_get_aio_context(bs)) { |
343 | { | 79 | + /* FIXME Calling this multiple times is wrong */ |
344 | BlockdevCreateOptionsFile *file_opts; | 80 | + bdrv_drain_invoke(bs, true); |
345 | int fd; | 81 | waited |= bdrv_drain_recurse(bs, true); |
346 | @@ -XXX,XX +XXX,XX @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) | 82 | } |
347 | } | 83 | } |
348 | 84 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | |
349 | /* Clear the file by truncating it to 0 */ | 85 | aio_context_acquire(aio_context); |
350 | - result = raw_regular_truncate(fd, 0, PREALLOC_MODE_OFF, errp); | 86 | aio_enable_external(aio_context); |
351 | + result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); | 87 | bdrv_parent_drained_end(bs); |
352 | if (result < 0) { | 88 | + bdrv_drain_invoke(bs, false); |
353 | goto out_close; | 89 | bdrv_drain_recurse(bs, false); |
354 | } | 90 | aio_context_release(aio_context); |
355 | @@ -XXX,XX +XXX,XX @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) | ||
356 | |||
357 | /* Resize and potentially preallocate the file to the desired | ||
358 | * final size */ | ||
359 | - result = raw_regular_truncate(fd, file_opts->size, file_opts->preallocation, | ||
360 | - errp); | ||
361 | + result = raw_regular_truncate(NULL, fd, file_opts->size, | ||
362 | + file_opts->preallocation, errp); | ||
363 | if (result < 0) { | ||
364 | goto out_close; | ||
365 | } | 91 | } |
366 | -- | 92 | -- |
367 | 2.13.6 | 93 | 2.13.6 |
368 | 94 | ||
369 | 95 | diff view generated by jsdifflib |
1 | If qcow2_alloc_clusters_at() returns an error, we do need to negate it | 1 | bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver |
---|---|---|---|
2 | to get back the positive errno code for error_setg_errno(), but we still | 2 | callback inside its polling loop. This means that how many times it got |
3 | need to return the negative error code. | 3 | called for each node depended on long it had to poll the event loop. |
4 | 4 | ||
5 | Fixes: 772d1f973f87269f6a4a4ea4b880680f3779bbdf | 5 | This is obviously not right and results in nodes that stay drained even |
6 | after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per | ||
7 | node. | ||
8 | |||
9 | Fix bdrv_drain_all_begin() to call the callback only once, too. | ||
10 | |||
11 | Cc: qemu-stable@nongnu.org | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | --- | 14 | --- |
9 | block/qcow2.c | 2 +- | 15 | block/io.c | 3 +-- |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 16 | 1 file changed, 1 insertion(+), 2 deletions(-) |
11 | 17 | ||
12 | diff --git a/block/qcow2.c b/block/qcow2.c | 18 | diff --git a/block/io.c b/block/io.c |
13 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/block/qcow2.c | 20 | --- a/block/io.c |
15 | +++ b/block/qcow2.c | 21 | +++ b/block/io.c |
16 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | 22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
17 | if (clusters_allocated < 0) { | 23 | aio_context_acquire(aio_context); |
18 | error_setg_errno(errp, -clusters_allocated, | 24 | bdrv_parent_drained_begin(bs); |
19 | "Failed to allocate data clusters"); | 25 | aio_disable_external(aio_context); |
20 | - return -clusters_allocated; | 26 | + bdrv_drain_invoke(bs, true); |
21 | + return clusters_allocated; | 27 | aio_context_release(aio_context); |
22 | } | 28 | |
23 | 29 | if (!g_slist_find(aio_ctxs, aio_context)) { | |
24 | assert(clusters_allocated == nb_new_data_clusters); | 30 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
31 | aio_context_acquire(aio_context); | ||
32 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
33 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
34 | - /* FIXME Calling this multiple times is wrong */ | ||
35 | - bdrv_drain_invoke(bs, true); | ||
36 | waited |= bdrv_drain_recurse(bs, true); | ||
37 | } | ||
38 | } | ||
25 | -- | 39 | -- |
26 | 2.13.6 | 40 | 2.13.6 |
27 | 41 | ||
28 | 42 | diff view generated by jsdifflib |
1 | Commit 51f63ec7d tried to change all references to 2.13 into 3.0, but | 1 | This adds a test case that the BlockDriver callbacks for drain are |
---|---|---|---|
2 | it failed to achieve this because it was not properly rebased on top of | 2 | called in bdrv_drained_all_begin/end(), and that both of them are called |
3 | the series introducing qapi/job.json. Change the references now. | 3 | exactly once. |
4 | 4 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | 6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Reviewed-by: Eric Blake <eblake@redhat.com> | 7 | Reviewed-by: Eric Blake <eblake@redhat.com> |
8 | --- | 8 | --- |
9 | qapi/job.json | 18 +++++++++--------- | 9 | tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ |
10 | 1 file changed, 9 insertions(+), 9 deletions(-) | 10 | tests/Makefile.include | 2 + |
11 | 2 files changed, 139 insertions(+) | ||
12 | create mode 100644 tests/test-bdrv-drain.c | ||
11 | 13 | ||
12 | diff --git a/qapi/job.json b/qapi/job.json | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
15 | new file mode 100644 | ||
16 | index XXXXXXX..XXXXXXX | ||
17 | --- /dev/null | ||
18 | +++ b/tests/test-bdrv-drain.c | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | +/* | ||
21 | + * Block node draining tests | ||
22 | + * | ||
23 | + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> | ||
24 | + * | ||
25 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
26 | + * of this software and associated documentation files (the "Software"), to deal | ||
27 | + * in the Software without restriction, including without limitation the rights | ||
28 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
29 | + * copies of the Software, and to permit persons to whom the Software is | ||
30 | + * furnished to do so, subject to the following conditions: | ||
31 | + * | ||
32 | + * The above copyright notice and this permission notice shall be included in | ||
33 | + * all copies or substantial portions of the Software. | ||
34 | + * | ||
35 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
36 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
37 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
38 | + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
39 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
40 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
41 | + * THE SOFTWARE. | ||
42 | + */ | ||
43 | + | ||
44 | +#include "qemu/osdep.h" | ||
45 | +#include "block/block.h" | ||
46 | +#include "sysemu/block-backend.h" | ||
47 | +#include "qapi/error.h" | ||
48 | + | ||
49 | +typedef struct BDRVTestState { | ||
50 | + int drain_count; | ||
51 | +} BDRVTestState; | ||
52 | + | ||
53 | +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) | ||
54 | +{ | ||
55 | + BDRVTestState *s = bs->opaque; | ||
56 | + s->drain_count++; | ||
57 | +} | ||
58 | + | ||
59 | +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) | ||
60 | +{ | ||
61 | + BDRVTestState *s = bs->opaque; | ||
62 | + s->drain_count--; | ||
63 | +} | ||
64 | + | ||
65 | +static void bdrv_test_close(BlockDriverState *bs) | ||
66 | +{ | ||
67 | + BDRVTestState *s = bs->opaque; | ||
68 | + g_assert_cmpint(s->drain_count, >, 0); | ||
69 | +} | ||
70 | + | ||
71 | +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, | ||
72 | + uint64_t offset, uint64_t bytes, | ||
73 | + QEMUIOVector *qiov, int flags) | ||
74 | +{ | ||
75 | + /* We want this request to stay until the polling loop in drain waits for | ||
76 | + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes | ||
77 | + * first and polls its result, too, but it shouldn't accidentally complete | ||
78 | + * this request yet. */ | ||
79 | + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); | ||
80 | + | ||
81 | + return 0; | ||
82 | +} | ||
83 | + | ||
84 | +static BlockDriver bdrv_test = { | ||
85 | + .format_name = "test", | ||
86 | + .instance_size = sizeof(BDRVTestState), | ||
87 | + | ||
88 | + .bdrv_close = bdrv_test_close, | ||
89 | + .bdrv_co_preadv = bdrv_test_co_preadv, | ||
90 | + | ||
91 | + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, | ||
92 | + .bdrv_co_drain_end = bdrv_test_co_drain_end, | ||
93 | +}; | ||
94 | + | ||
95 | +static void aio_ret_cb(void *opaque, int ret) | ||
96 | +{ | ||
97 | + int *aio_ret = opaque; | ||
98 | + *aio_ret = ret; | ||
99 | +} | ||
100 | + | ||
101 | +static void test_drv_cb_drain_all(void) | ||
102 | +{ | ||
103 | + BlockBackend *blk; | ||
104 | + BlockDriverState *bs; | ||
105 | + BDRVTestState *s; | ||
106 | + BlockAIOCB *acb; | ||
107 | + int aio_ret; | ||
108 | + | ||
109 | + QEMUIOVector qiov; | ||
110 | + struct iovec iov = { | ||
111 | + .iov_base = NULL, | ||
112 | + .iov_len = 0, | ||
113 | + }; | ||
114 | + qemu_iovec_init_external(&qiov, &iov, 1); | ||
115 | + | ||
116 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
117 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
118 | + &error_abort); | ||
119 | + s = bs->opaque; | ||
120 | + blk_insert_bs(blk, bs, &error_abort); | ||
121 | + | ||
122 | + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
123 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
124 | + bdrv_drain_all_begin(); | ||
125 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
126 | + bdrv_drain_all_end(); | ||
127 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
128 | + | ||
129 | + /* Now do the same while a request is pending */ | ||
130 | + aio_ret = -EINPROGRESS; | ||
131 | + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); | ||
132 | + g_assert(acb != NULL); | ||
133 | + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | ||
134 | + | ||
135 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
136 | + bdrv_drain_all_begin(); | ||
137 | + g_assert_cmpint(aio_ret, ==, 0); | ||
138 | + g_assert_cmpint(s->drain_count, ==, 1); | ||
139 | + bdrv_drain_all_end(); | ||
140 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
141 | + | ||
142 | + bdrv_unref(bs); | ||
143 | + blk_unref(blk); | ||
144 | +} | ||
145 | + | ||
146 | +int main(int argc, char **argv) | ||
147 | +{ | ||
148 | + bdrv_init(); | ||
149 | + qemu_init_main_loop(&error_abort); | ||
150 | + | ||
151 | + g_test_init(&argc, &argv, NULL); | ||
152 | + | ||
153 | + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
154 | + | ||
155 | + return g_test_run(); | ||
156 | +} | ||
157 | diff --git a/tests/Makefile.include b/tests/Makefile.include | ||
13 | index XXXXXXX..XXXXXXX 100644 | 158 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/qapi/job.json | 159 | --- a/tests/Makefile.include |
15 | +++ b/qapi/job.json | 160 | +++ b/tests/Makefile.include |
16 | @@ -XXX,XX +XXX,XX @@ | 161 | @@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c |
17 | # @id: The job identifier | 162 | gcov-files-test-hbitmap-y = util/hbitmap.c |
18 | # @status: The new job status | 163 | check-unit-y += tests/test-hbitmap$(EXESUF) |
19 | # | 164 | gcov-files-test-hbitmap-y = blockjob.c |
20 | -# Since: 2.13 | 165 | +check-unit-y += tests/test-bdrv-drain$(EXESUF) |
21 | +# Since: 3.0 | 166 | check-unit-y += tests/test-blockjob$(EXESUF) |
22 | ## | 167 | check-unit-y += tests/test-blockjob-txn$(EXESUF) |
23 | { 'event': 'JOB_STATUS_CHANGE', | 168 | check-unit-y += tests/test-x86-cpuid$(EXESUF) |
24 | 'data': { 'id': 'str', | 169 | @@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) |
25 | @@ -XXX,XX +XXX,XX @@ | 170 | tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) |
26 | # | 171 | tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) |
27 | # @id: The job identifier. | 172 | tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) |
28 | # | 173 | +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) |
29 | -# Since: 2.13 | 174 | tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) |
30 | +# Since: 3.0 | 175 | tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) |
31 | ## | 176 | tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) |
32 | { 'command': 'job-pause', 'data': { 'id': 'str' } } | ||
33 | |||
34 | @@ -XXX,XX +XXX,XX @@ | ||
35 | # | ||
36 | # @id : The job identifier. | ||
37 | # | ||
38 | -# Since: 2.13 | ||
39 | +# Since: 3.0 | ||
40 | ## | ||
41 | { 'command': 'job-resume', 'data': { 'id': 'str' } } | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ | ||
44 | # | ||
45 | # @id: The job identifier. | ||
46 | # | ||
47 | -# Since: 2.13 | ||
48 | +# Since: 3.0 | ||
49 | ## | ||
50 | { 'command': 'job-cancel', 'data': { 'id': 'str' } } | ||
51 | |||
52 | @@ -XXX,XX +XXX,XX @@ | ||
53 | # | ||
54 | # @id: The job identifier. | ||
55 | # | ||
56 | -# Since: 2.13 | ||
57 | +# Since: 3.0 | ||
58 | ## | ||
59 | { 'command': 'job-complete', 'data': { 'id': 'str' } } | ||
60 | |||
61 | @@ -XXX,XX +XXX,XX @@ | ||
62 | # | ||
63 | # @id: The job identifier. | ||
64 | # | ||
65 | -# Since: 2.13 | ||
66 | +# Since: 3.0 | ||
67 | ## | ||
68 | { 'command': 'job-dismiss', 'data': { 'id': 'str' } } | ||
69 | |||
70 | @@ -XXX,XX +XXX,XX @@ | ||
71 | # @id: The identifier of any job in the transaction, or of a job that is not | ||
72 | # part of any transaction. | ||
73 | # | ||
74 | -# Since: 2.13 | ||
75 | +# Since: 3.0 | ||
76 | ## | ||
77 | { 'command': 'job-finalize', 'data': { 'id': 'str' } } | ||
78 | |||
79 | @@ -XXX,XX +XXX,XX @@ | ||
80 | # the reason for the job failure. It should not be parsed | ||
81 | # by applications. | ||
82 | # | ||
83 | -# Since: 2.13 | ||
84 | +# Since: 3.0 | ||
85 | ## | ||
86 | { 'struct': 'JobInfo', | ||
87 | 'data': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus', | ||
88 | @@ -XXX,XX +XXX,XX @@ | ||
89 | # | ||
90 | # Returns: a list with a @JobInfo for each active job | ||
91 | # | ||
92 | -# Since: 2.13 | ||
93 | +# Since: 3.0 | ||
94 | ## | ||
95 | { 'command': 'query-jobs', 'returns': ['JobInfo'] } | ||
96 | -- | 177 | -- |
97 | 2.13.6 | 178 | 2.13.6 |
98 | 179 | ||
99 | 180 | diff view generated by jsdifflib |
1 | When growing an image, block drivers (especially protocol drivers) may | 1 | Now that the bdrv_drain_invoke() calls are pulled up to the callers of |
---|---|---|---|
2 | initialise the newly added area. I/O requests to the same area need to | 2 | bdrv_drain_recurse(), the 'begin' parameter isn't needed any more. |
3 | wait for this initialisation to be completed so that data writes don't | ||
4 | get overwritten and reads don't read uninitialised data. | ||
5 | |||
6 | To avoid overhead in the fast I/O path by adding new locking in the | ||
7 | protocol drivers and to restrict the impact to requests that actually | ||
8 | touch the new area, reuse the existing tracked request infrastructure in | ||
9 | block/io.c and mark all discard requests as serialising. | ||
10 | |||
11 | With this change, it is safe for protocol drivers to make | ||
12 | .bdrv_co_truncate actually asynchronous. | ||
13 | 3 | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | --- | 6 | --- |
17 | include/block/block_int.h | 1 + | 7 | block/io.c | 12 ++++++------ |
18 | block/io.c | 25 +++++++++++++++++++++++++ | 8 | 1 file changed, 6 insertions(+), 6 deletions(-) |
19 | 2 files changed, 26 insertions(+) | ||
20 | 9 | ||
21 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/include/block/block_int.h | ||
24 | +++ b/include/block/block_int.h | ||
25 | @@ -XXX,XX +XXX,XX @@ enum BdrvTrackedRequestType { | ||
26 | BDRV_TRACKED_READ, | ||
27 | BDRV_TRACKED_WRITE, | ||
28 | BDRV_TRACKED_DISCARD, | ||
29 | + BDRV_TRACKED_TRUNCATE, | ||
30 | }; | ||
31 | |||
32 | typedef struct BdrvTrackedRequest { | ||
33 | diff --git a/block/io.c b/block/io.c | 10 | diff --git a/block/io.c b/block/io.c |
34 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/block/io.c | 12 | --- a/block/io.c |
36 | +++ b/block/io.c | 13 | +++ b/block/io.c |
37 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 14 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
15 | } | ||
16 | } | ||
17 | |||
18 | -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) | ||
19 | +static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
38 | { | 20 | { |
39 | BlockDriverState *bs = child->bs; | 21 | BdrvChild *child, *tmp; |
40 | BlockDriver *drv = bs->drv; | 22 | bool waited; |
41 | + BdrvTrackedRequest req; | 23 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) |
42 | + int64_t old_size, new_bytes; | 24 | */ |
43 | int ret; | 25 | bdrv_ref(bs); |
44 | 26 | } | |
45 | assert(child->perm & BLK_PERM_RESIZE); | 27 | - waited |= bdrv_drain_recurse(bs, begin); |
46 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 28 | + waited |= bdrv_drain_recurse(bs); |
47 | return -EINVAL; | 29 | if (in_main_loop) { |
30 | bdrv_unref(bs); | ||
31 | } | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) | ||
48 | } | 33 | } |
49 | 34 | ||
50 | + old_size = bdrv_getlength(bs); | 35 | bdrv_drain_invoke(bs, true); |
51 | + if (old_size < 0) { | 36 | - bdrv_drain_recurse(bs, true); |
52 | + error_setg_errno(errp, -old_size, "Failed to get old image size"); | 37 | + bdrv_drain_recurse(bs); |
53 | + return old_size; | ||
54 | + } | ||
55 | + | ||
56 | + if (offset > old_size) { | ||
57 | + new_bytes = offset - old_size; | ||
58 | + } else { | ||
59 | + new_bytes = 0; | ||
60 | + } | ||
61 | + | ||
62 | bdrv_inc_in_flight(bs); | ||
63 | + tracked_request_begin(&req, bs, offset, new_bytes, BDRV_TRACKED_TRUNCATE); | ||
64 | + | ||
65 | + /* If we are growing the image and potentially using preallocation for the | ||
66 | + * new area, we need to make sure that no write requests are made to it | ||
67 | + * concurrently or they might be overwritten by preallocation. */ | ||
68 | + if (new_bytes) { | ||
69 | + mark_request_serialising(&req, 1); | ||
70 | + wait_serialising_requests(&req); | ||
71 | + } | ||
72 | |||
73 | if (!drv->bdrv_co_truncate) { | ||
74 | if (bs->file && drv->is_filter) { | ||
75 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | ||
76 | atomic_inc(&bs->write_gen); | ||
77 | |||
78 | out: | ||
79 | + tracked_request_end(&req); | ||
80 | bdrv_dec_in_flight(bs); | ||
81 | + | ||
82 | return ret; | ||
83 | } | 38 | } |
39 | |||
40 | void bdrv_drained_end(BlockDriverState *bs) | ||
41 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
42 | |||
43 | bdrv_parent_drained_end(bs); | ||
44 | bdrv_drain_invoke(bs, false); | ||
45 | - bdrv_drain_recurse(bs, false); | ||
46 | + bdrv_drain_recurse(bs); | ||
47 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
48 | } | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
51 | aio_context_acquire(aio_context); | ||
52 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
53 | if (aio_context == bdrv_get_aio_context(bs)) { | ||
54 | - waited |= bdrv_drain_recurse(bs, true); | ||
55 | + waited |= bdrv_drain_recurse(bs); | ||
56 | } | ||
57 | } | ||
58 | aio_context_release(aio_context); | ||
59 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
60 | aio_enable_external(aio_context); | ||
61 | bdrv_parent_drained_end(bs); | ||
62 | bdrv_drain_invoke(bs, false); | ||
63 | - bdrv_drain_recurse(bs, false); | ||
64 | + bdrv_drain_recurse(bs); | ||
65 | aio_context_release(aio_context); | ||
66 | } | ||
84 | 67 | ||
85 | -- | 68 | -- |
86 | 2.13.6 | 69 | 2.13.6 |
87 | 70 | ||
88 | 71 | diff view generated by jsdifflib |
1 | Commit abf754fe406 updated 026.out, but forgot to also update | 1 | The device is drained, so there is no point in waiting for requests at |
---|---|---|---|
2 | 026.out.nocache. | 2 | the end of the drained section. Remove the bdrv_drain_recurse() calls |
3 | there. | ||
4 | |||
5 | The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e | ||
6 | in order to call the .bdrv_co_drain_end() driver callback. This is now | ||
7 | done by a separate bdrv_drain_invoke() call. | ||
3 | 8 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Max Reitz <mreitz@redhat.com> | 10 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> |
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | 12 | --- |
7 | tests/qemu-iotests/026.out.nocache | 6 +++--- | 13 | block/io.c | 2 -- |
8 | 1 file changed, 3 insertions(+), 3 deletions(-) | 14 | 1 file changed, 2 deletions(-) |
9 | 15 | ||
10 | diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache | 16 | diff --git a/block/io.c b/block/io.c |
11 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tests/qemu-iotests/026.out.nocache | 18 | --- a/block/io.c |
13 | +++ b/tests/qemu-iotests/026.out.nocache | 19 | +++ b/block/io.c |
14 | @@ -XXX,XX +XXX,XX @@ Failed to flush the L2 table cache: No space left on device | 20 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
15 | Failed to flush the refcount block cache: No space left on device | 21 | |
16 | write failed: No space left on device | 22 | bdrv_parent_drained_end(bs); |
17 | 23 | bdrv_drain_invoke(bs, false); | |
18 | -11 leaked clusters were found on the image. | 24 | - bdrv_drain_recurse(bs); |
19 | +10 leaked clusters were found on the image. | 25 | aio_enable_external(bdrv_get_aio_context(bs)); |
20 | This means waste of disk space, but no harm to data. | 26 | } |
21 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 | 27 | |
22 | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | |
23 | @@ -XXX,XX +XXX,XX @@ Failed to flush the L2 table cache: No space left on device | 29 | aio_enable_external(aio_context); |
24 | Failed to flush the refcount block cache: No space left on device | 30 | bdrv_parent_drained_end(bs); |
25 | write failed: No space left on device | 31 | bdrv_drain_invoke(bs, false); |
26 | 32 | - bdrv_drain_recurse(bs); | |
27 | -11 leaked clusters were found on the image. | 33 | aio_context_release(aio_context); |
28 | +10 leaked clusters were found on the image. | 34 | } |
29 | This means waste of disk space, but no harm to data. | ||
30 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ Failed to flush the L2 table cache: No space left on device | ||
33 | Failed to flush the refcount block cache: No space left on device | ||
34 | write failed: No space left on device | ||
35 | |||
36 | -11 leaked clusters were found on the image. | ||
37 | +10 leaked clusters were found on the image. | ||
38 | This means waste of disk space, but no harm to data. | ||
39 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 | ||
40 | 35 | ||
41 | -- | 36 | -- |
42 | 2.13.6 | 37 | 2.13.6 |
43 | 38 | ||
44 | 39 | diff view generated by jsdifflib |
1 | From: Markus Armbruster <armbru@redhat.com> | 1 | Drain requests are propagated to child nodes, parent nodes and directly |
---|---|---|---|
2 | to the AioContext. The order in which this happened was different | ||
3 | between all combinations of drain/drain_all and begin/end. | ||
2 | 4 | ||
3 | block_crypto_open_opts_init() and block_crypto_create_opts_init() | 5 | The correct order is to keep children only drained when their parents |
4 | contain a virtual visit of QCryptoBlockOptions and | 6 | are also drained. This means that at the start of a drained section, the |
5 | QCryptoBlockCreateOptions less member "format", respectively. | 7 | AioContext needs to be drained first, the parents second and only then |
8 | the children. The correct order for the end of a drained section is the | ||
9 | opposite. | ||
6 | 10 | ||
7 | Change their callers to put member "format" in the QDict, so they can | 11 | This patch changes the three other functions to follow the example of |
8 | use the generated visitors for these types instead. | 12 | bdrv_drained_begin(), which is the only one that got it right. |
9 | 13 | ||
10 | Signed-off-by: Markus Armbruster <armbru@redhat.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | --- | 16 | --- |
14 | block/crypto.h | 8 ++--- | 17 | block/io.c | 12 ++++++++---- |
15 | block/crypto.c | 99 +++++++++------------------------------------------------- | 18 | 1 file changed, 8 insertions(+), 4 deletions(-) |
16 | block/qcow.c | 5 ++- | ||
17 | block/qcow2.c | 10 +++--- | ||
18 | 4 files changed, 22 insertions(+), 100 deletions(-) | ||
19 | 19 | ||
20 | diff --git a/block/crypto.h b/block/crypto.h | 20 | diff --git a/block/io.c b/block/io.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/crypto.h | 22 | --- a/block/io.c |
23 | +++ b/block/crypto.h | 23 | +++ b/block/io.c |
24 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
25 | return; | ||
25 | } | 26 | } |
26 | 27 | ||
27 | QCryptoBlockCreateOptions * | 28 | + /* Stop things in parent-to-child order */ |
28 | -block_crypto_create_opts_init(QCryptoBlockFormat format, | 29 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { |
29 | - QDict *opts, | 30 | aio_disable_external(bdrv_get_aio_context(bs)); |
30 | - Error **errp); | 31 | bdrv_parent_drained_begin(bs); |
31 | +block_crypto_create_opts_init(QDict *opts, Error **errp); | 32 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
32 | 33 | return; | |
33 | QCryptoBlockOpenOptions * | ||
34 | -block_crypto_open_opts_init(QCryptoBlockFormat format, | ||
35 | - QDict *opts, | ||
36 | - Error **errp); | ||
37 | +block_crypto_open_opts_init(QDict *opts, Error **errp); | ||
38 | |||
39 | #endif /* BLOCK_CRYPTO_H__ */ | ||
40 | diff --git a/block/crypto.c b/block/crypto.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/block/crypto.c | ||
43 | +++ b/block/crypto.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList block_crypto_create_opts_luks = { | ||
45 | |||
46 | |||
47 | QCryptoBlockOpenOptions * | ||
48 | -block_crypto_open_opts_init(QCryptoBlockFormat format, | ||
49 | - QDict *opts, | ||
50 | - Error **errp) | ||
51 | +block_crypto_open_opts_init(QDict *opts, Error **errp) | ||
52 | { | ||
53 | Visitor *v; | ||
54 | - QCryptoBlockOpenOptions *ret = NULL; | ||
55 | - Error *local_err = NULL; | ||
56 | - | ||
57 | - ret = g_new0(QCryptoBlockOpenOptions, 1); | ||
58 | - ret->format = format; | ||
59 | + QCryptoBlockOpenOptions *ret; | ||
60 | |||
61 | - v = qobject_input_visitor_new_flat_confused(opts, &local_err); | ||
62 | + v = qobject_input_visitor_new_flat_confused(opts, errp); | ||
63 | if (!v) { | ||
64 | - goto out; | ||
65 | - } | ||
66 | - | ||
67 | - visit_start_struct(v, NULL, NULL, 0, &local_err); | ||
68 | - if (local_err) { | ||
69 | - goto out; | ||
70 | - } | ||
71 | - | ||
72 | - switch (format) { | ||
73 | - case Q_CRYPTO_BLOCK_FORMAT_LUKS: | ||
74 | - visit_type_QCryptoBlockOptionsLUKS_members( | ||
75 | - v, &ret->u.luks, &local_err); | ||
76 | - break; | ||
77 | - | ||
78 | - case Q_CRYPTO_BLOCK_FORMAT_QCOW: | ||
79 | - visit_type_QCryptoBlockOptionsQCow_members( | ||
80 | - v, &ret->u.qcow, &local_err); | ||
81 | - break; | ||
82 | - | ||
83 | - default: | ||
84 | - error_setg(&local_err, "Unsupported block format %d", format); | ||
85 | - break; | ||
86 | - } | ||
87 | - if (!local_err) { | ||
88 | - visit_check_struct(v, &local_err); | ||
89 | + return NULL; | ||
90 | } | 34 | } |
91 | 35 | ||
92 | - visit_end_struct(v, NULL); | 36 | - bdrv_parent_drained_end(bs); |
93 | + visit_type_QCryptoBlockOpenOptions(v, NULL, &ret, errp); | 37 | + /* Re-enable things in child-to-parent order */ |
94 | 38 | bdrv_drain_invoke(bs, false); | |
95 | - out: | 39 | + bdrv_parent_drained_end(bs); |
96 | - if (local_err) { | 40 | aio_enable_external(bdrv_get_aio_context(bs)); |
97 | - error_propagate(errp, local_err); | ||
98 | - qapi_free_QCryptoBlockOpenOptions(ret); | ||
99 | - ret = NULL; | ||
100 | - } | ||
101 | visit_free(v); | ||
102 | return ret; | ||
103 | } | 41 | } |
104 | 42 | ||
105 | 43 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | |
106 | QCryptoBlockCreateOptions * | 44 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
107 | -block_crypto_create_opts_init(QCryptoBlockFormat format, | 45 | AioContext *aio_context = bdrv_get_aio_context(bs); |
108 | - QDict *opts, | 46 | |
109 | - Error **errp) | 47 | + /* Stop things in parent-to-child order */ |
110 | +block_crypto_create_opts_init(QDict *opts, Error **errp) | 48 | aio_context_acquire(aio_context); |
111 | { | 49 | - bdrv_parent_drained_begin(bs); |
112 | Visitor *v; | 50 | aio_disable_external(aio_context); |
113 | - QCryptoBlockCreateOptions *ret = NULL; | 51 | + bdrv_parent_drained_begin(bs); |
114 | - Error *local_err = NULL; | 52 | bdrv_drain_invoke(bs, true); |
115 | - | 53 | aio_context_release(aio_context); |
116 | - ret = g_new0(QCryptoBlockCreateOptions, 1); | 54 | |
117 | - ret->format = format; | 55 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
118 | + QCryptoBlockCreateOptions *ret; | 56 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { |
119 | 57 | AioContext *aio_context = bdrv_get_aio_context(bs); | |
120 | - v = qobject_input_visitor_new_flat_confused(opts, &local_err); | 58 | |
121 | + v = qobject_input_visitor_new_flat_confused(opts, errp); | 59 | + /* Re-enable things in child-to-parent order */ |
122 | if (!v) { | 60 | aio_context_acquire(aio_context); |
123 | - goto out; | 61 | - aio_enable_external(aio_context); |
124 | - } | 62 | - bdrv_parent_drained_end(bs); |
125 | - | 63 | bdrv_drain_invoke(bs, false); |
126 | - visit_start_struct(v, NULL, NULL, 0, &local_err); | 64 | + bdrv_parent_drained_end(bs); |
127 | - if (local_err) { | 65 | + aio_enable_external(aio_context); |
128 | - goto out; | 66 | aio_context_release(aio_context); |
129 | - } | ||
130 | - | ||
131 | - switch (format) { | ||
132 | - case Q_CRYPTO_BLOCK_FORMAT_LUKS: | ||
133 | - visit_type_QCryptoBlockCreateOptionsLUKS_members( | ||
134 | - v, &ret->u.luks, &local_err); | ||
135 | - break; | ||
136 | - | ||
137 | - case Q_CRYPTO_BLOCK_FORMAT_QCOW: | ||
138 | - visit_type_QCryptoBlockOptionsQCow_members( | ||
139 | - v, &ret->u.qcow, &local_err); | ||
140 | - break; | ||
141 | - | ||
142 | - default: | ||
143 | - error_setg(&local_err, "Unsupported block format %d", format); | ||
144 | - break; | ||
145 | - } | ||
146 | - if (!local_err) { | ||
147 | - visit_check_struct(v, &local_err); | ||
148 | + return NULL; | ||
149 | } | 67 | } |
150 | 68 | ||
151 | - visit_end_struct(v, NULL); | ||
152 | + visit_type_QCryptoBlockCreateOptions(v, NULL, &ret, errp); | ||
153 | |||
154 | - out: | ||
155 | - if (local_err) { | ||
156 | - error_propagate(errp, local_err); | ||
157 | - qapi_free_QCryptoBlockCreateOptions(ret); | ||
158 | - ret = NULL; | ||
159 | - } | ||
160 | visit_free(v); | ||
161 | return ret; | ||
162 | } | ||
163 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_open_generic(QCryptoBlockFormat format, | ||
164 | } | ||
165 | |||
166 | cryptoopts = qemu_opts_to_qdict(opts, NULL); | ||
167 | + qdict_put_str(cryptoopts, "format", QCryptoBlockFormat_str(format)); | ||
168 | |||
169 | - open_opts = block_crypto_open_opts_init(format, cryptoopts, errp); | ||
170 | + open_opts = block_crypto_open_opts_init(cryptoopts, errp); | ||
171 | if (!open_opts) { | ||
172 | goto cleanup; | ||
173 | } | ||
174 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, | ||
175 | &block_crypto_create_opts_luks, | ||
176 | true); | ||
177 | |||
178 | - create_opts = block_crypto_create_opts_init(Q_CRYPTO_BLOCK_FORMAT_LUKS, | ||
179 | - cryptoopts, errp); | ||
180 | + qdict_put_str(cryptoopts, "format", "luks"); | ||
181 | + create_opts = block_crypto_create_opts_init(cryptoopts, errp); | ||
182 | if (!create_opts) { | ||
183 | ret = -EINVAL; | ||
184 | goto fail; | ||
185 | diff --git a/block/qcow.c b/block/qcow.c | ||
186 | index XXXXXXX..XXXXXXX 100644 | ||
187 | --- a/block/qcow.c | ||
188 | +++ b/block/qcow.c | ||
189 | @@ -XXX,XX +XXX,XX @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, | ||
190 | ret = -EINVAL; | ||
191 | goto fail; | ||
192 | } | ||
193 | - qdict_del(encryptopts, "format"); | ||
194 | - crypto_opts = block_crypto_open_opts_init( | ||
195 | - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); | ||
196 | + qdict_put_str(encryptopts, "format", "qcow"); | ||
197 | + crypto_opts = block_crypto_open_opts_init(encryptopts, errp); | ||
198 | if (!crypto_opts) { | ||
199 | ret = -EINVAL; | ||
200 | goto fail; | ||
201 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
202 | index XXXXXXX..XXXXXXX 100644 | ||
203 | --- a/block/qcow2.c | ||
204 | +++ b/block/qcow2.c | ||
205 | @@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs, | ||
206 | ret = -EINVAL; | ||
207 | goto fail; | ||
208 | } | ||
209 | - qdict_del(encryptopts, "format"); | ||
210 | - r->crypto_opts = block_crypto_open_opts_init( | ||
211 | - Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); | ||
212 | + qdict_put_str(encryptopts, "format", "qcow"); | ||
213 | + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); | ||
214 | break; | ||
215 | |||
216 | case QCOW_CRYPT_LUKS: | ||
217 | @@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs, | ||
218 | ret = -EINVAL; | ||
219 | goto fail; | ||
220 | } | ||
221 | - qdict_del(encryptopts, "format"); | ||
222 | - r->crypto_opts = block_crypto_open_opts_init( | ||
223 | - Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); | ||
224 | + qdict_put_str(encryptopts, "format", "luks"); | ||
225 | + r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); | ||
226 | break; | ||
227 | |||
228 | default: | ||
229 | -- | 69 | -- |
230 | 2.13.6 | 70 | 2.13.6 |
231 | 71 | ||
232 | 72 | diff view generated by jsdifflib |
1 | All callers are coroutine_fns now, so we can just directly call | 1 | Commit 15afd94a047 added code to acquire and release the AioContext in |
---|---|---|---|
2 | preallocate_co(). | 2 | qemuio_command(). This means that the lock is taken twice now in the |
3 | call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for | ||
4 | any requests issued to nodes in a non-mainloop AioContext. | ||
5 | |||
6 | Dropping the first locking from hmp_qemu_io() fixes the problem. | ||
3 | 7 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
6 | --- | 10 | --- |
7 | block/qcow2.c | 51 ++++++++------------------------------------------- | 11 | hmp.c | 6 ------ |
8 | 1 file changed, 8 insertions(+), 43 deletions(-) | 12 | 1 file changed, 6 deletions(-) |
9 | 13 | ||
10 | diff --git a/block/qcow2.c b/block/qcow2.c | 14 | diff --git a/hmp.c b/hmp.c |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/block/qcow2.c | 16 | --- a/hmp.c |
13 | +++ b/block/qcow2.c | 17 | +++ b/hmp.c |
14 | @@ -XXX,XX +XXX,XX @@ static int qcow2_set_up_encryption(BlockDriverState *bs, | 18 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
15 | return ret; | ||
16 | } | ||
17 | |||
18 | - | ||
19 | -typedef struct PreallocCo { | ||
20 | - BlockDriverState *bs; | ||
21 | - uint64_t offset; | ||
22 | - uint64_t new_length; | ||
23 | - | ||
24 | - int ret; | ||
25 | -} PreallocCo; | ||
26 | - | ||
27 | /** | ||
28 | * Preallocates metadata structures for data clusters between @offset (in the | ||
29 | * guest disk) and @new_length (which is thus generally the new guest disk | ||
30 | @@ -XXX,XX +XXX,XX @@ typedef struct PreallocCo { | ||
31 | * | ||
32 | * Returns: 0 on success, -errno on failure. | ||
33 | */ | ||
34 | -static void coroutine_fn preallocate_co(void *opaque) | ||
35 | +static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, | ||
36 | + uint64_t new_length) | ||
37 | { | 19 | { |
38 | - PreallocCo *params = opaque; | 20 | BlockBackend *blk; |
39 | - BlockDriverState *bs = params->bs; | 21 | BlockBackend *local_blk = NULL; |
40 | - uint64_t offset = params->offset; | 22 | - AioContext *aio_context; |
41 | - uint64_t new_length = params->new_length; | 23 | const char* device = qdict_get_str(qdict, "device"); |
42 | uint64_t bytes; | 24 | const char* command = qdict_get_str(qdict, "command"); |
43 | uint64_t host_offset = 0; | 25 | Error *err = NULL; |
44 | unsigned int cur_bytes; | 26 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) |
45 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn preallocate_co(void *opaque) | ||
46 | ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, | ||
47 | &host_offset, &meta); | ||
48 | if (ret < 0) { | ||
49 | - goto done; | ||
50 | + return ret; | ||
51 | } | ||
52 | |||
53 | while (meta) { | ||
54 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn preallocate_co(void *opaque) | ||
55 | if (ret < 0) { | ||
56 | qcow2_free_any_clusters(bs, meta->alloc_offset, | ||
57 | meta->nb_clusters, QCOW2_DISCARD_NEVER); | ||
58 | - goto done; | ||
59 | + return ret; | ||
60 | } | ||
61 | |||
62 | /* There are no dependent requests, but we need to remove our | ||
63 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn preallocate_co(void *opaque) | ||
64 | ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, | ||
65 | &data, 1); | ||
66 | if (ret < 0) { | ||
67 | - goto done; | ||
68 | + return ret; | ||
69 | } | 27 | } |
70 | } | 28 | } |
71 | 29 | ||
72 | - ret = 0; | 30 | - aio_context = blk_get_aio_context(blk); |
31 | - aio_context_acquire(aio_context); | ||
73 | - | 32 | - |
74 | -done: | 33 | /* |
75 | - params->ret = ret; | 34 | * Notably absent: Proper permission management. This is sad, but it seems |
76 | -} | 35 | * almost impossible to achieve without changing the semantics and thereby |
36 | @@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) | ||
37 | */ | ||
38 | qemuio_command(blk, command); | ||
39 | |||
40 | - aio_context_release(aio_context); | ||
77 | - | 41 | - |
78 | -static int preallocate(BlockDriverState *bs, | 42 | fail: |
79 | - uint64_t offset, uint64_t new_length) | 43 | blk_unref(local_blk); |
80 | -{ | 44 | hmp_handle_error(mon, &err); |
81 | - PreallocCo params = { | ||
82 | - .bs = bs, | ||
83 | - .offset = offset, | ||
84 | - .new_length = new_length, | ||
85 | - .ret = -EINPROGRESS, | ||
86 | - }; | ||
87 | - | ||
88 | - if (qemu_in_coroutine()) { | ||
89 | - preallocate_co(¶ms); | ||
90 | - } else { | ||
91 | - Coroutine *co = qemu_coroutine_create(preallocate_co, ¶ms); | ||
92 | - bdrv_coroutine_enter(bs, co); | ||
93 | - BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS); | ||
94 | - } | ||
95 | - return params.ret; | ||
96 | + return 0; | ||
97 | } | ||
98 | |||
99 | /* qcow2_refcount_metadata_size: | ||
100 | @@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) | ||
101 | if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { | ||
102 | BDRVQcow2State *s = blk_bs(blk)->opaque; | ||
103 | qemu_co_mutex_lock(&s->lock); | ||
104 | - ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); | ||
105 | + ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size); | ||
106 | qemu_co_mutex_unlock(&s->lock); | ||
107 | |||
108 | if (ret < 0) { | ||
109 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
110 | break; | ||
111 | |||
112 | case PREALLOC_MODE_METADATA: | ||
113 | - ret = preallocate(bs, old_length, offset); | ||
114 | + ret = preallocate_co(bs, old_length, offset); | ||
115 | if (ret < 0) { | ||
116 | error_setg_errno(errp, -ret, "Preallocation failed"); | ||
117 | goto fail; | ||
118 | -- | 45 | -- |
119 | 2.13.6 | 46 | 2.13.6 |
120 | 47 | ||
121 | 48 | diff view generated by jsdifflib |
1 | If we managed to allocate the clusters, but then failed to write the | 1 | From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> |
---|---|---|---|
2 | data, there's a good chance that we'll still be able to free the | ||
3 | clusters again in order to avoid cluster leaks (the refcounts are | ||
4 | cached, so even if we can't write them out right now, we may be able to | ||
5 | do so when the VM is resumed after a werror=stop/enospc pause). | ||
6 | 2 | ||
3 | Since bdrv_co_preadv does all neccessary checks including | ||
4 | reading after the end of the backing file, avoid duplication | ||
5 | of verification before bdrv_co_preadv call. | ||
6 | |||
7 | Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Tested-by: Eric Blake <eblake@redhat.com> | ||
11 | --- | 11 | --- |
12 | block/qcow2.h | 1 + | 12 | block/qcow2.h | 3 --- |
13 | block/qcow2-cluster.c | 11 +++++++++++ | 13 | block/qcow2.c | 51 ++++++++------------------------------------------- |
14 | block/qcow2.c | 2 ++ | 14 | 2 files changed, 8 insertions(+), 46 deletions(-) |
15 | 3 files changed, 14 insertions(+) | ||
16 | 15 | ||
17 | diff --git a/block/qcow2.h b/block/qcow2.h | 16 | diff --git a/block/qcow2.h b/block/qcow2.h |
18 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/qcow2.h | 18 | --- a/block/qcow2.h |
20 | +++ b/block/qcow2.h | 19 | +++ b/block/qcow2.h |
21 | @@ -XXX,XX +XXX,XX @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, | 20 | @@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) |
22 | int compressed_size); | 21 | } |
23 | 22 | ||
24 | int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); | 23 | /* qcow2.c functions */ |
25 | +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); | 24 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
26 | int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, | 25 | - int64_t sector_num, int nb_sectors); |
27 | uint64_t bytes, enum qcow2_discard_type type, | 26 | - |
28 | bool full_discard); | 27 | int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, |
29 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 28 | int refcount_order, bool generous_increase, |
30 | index XXXXXXX..XXXXXXX 100644 | 29 | uint64_t *refblock_count); |
31 | --- a/block/qcow2-cluster.c | ||
32 | +++ b/block/qcow2-cluster.c | ||
33 | @@ -XXX,XX +XXX,XX @@ err: | ||
34 | return ret; | ||
35 | } | ||
36 | |||
37 | +/** | ||
38 | + * Frees the allocated clusters because the request failed and they won't | ||
39 | + * actually be linked. | ||
40 | + */ | ||
41 | +void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) | ||
42 | +{ | ||
43 | + BDRVQcow2State *s = bs->opaque; | ||
44 | + qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, | ||
45 | + QCOW2_DISCARD_NEVER); | ||
46 | +} | ||
47 | + | ||
48 | /* | ||
49 | * Returns the number of contiguous clusters that can be used for an allocating | ||
50 | * write, but require COW to be performed (this includes yet unallocated space, | ||
51 | diff --git a/block/qcow2.c b/block/qcow2.c | 30 | diff --git a/block/qcow2.c b/block/qcow2.c |
52 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
53 | --- a/block/qcow2.c | 32 | --- a/block/qcow2.c |
54 | +++ b/block/qcow2.c | 33 | +++ b/block/qcow2.c |
55 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, | 34 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, |
56 | if (ret) { | 35 | return status; |
57 | goto out; | 36 | } |
58 | } | 37 | |
59 | + } else { | 38 | -/* handle reading after the end of the backing file */ |
60 | + qcow2_alloc_cluster_abort(bs, l2meta); | 39 | -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, |
61 | } | 40 | - int64_t offset, int bytes) |
62 | 41 | -{ | |
63 | /* Take the request off the list of running requests */ | 42 | - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; |
43 | - int n1; | ||
44 | - | ||
45 | - if ((offset + bytes) <= bs_size) { | ||
46 | - return bytes; | ||
47 | - } | ||
48 | - | ||
49 | - if (offset >= bs_size) { | ||
50 | - n1 = 0; | ||
51 | - } else { | ||
52 | - n1 = bs_size - offset; | ||
53 | - } | ||
54 | - | ||
55 | - qemu_iovec_memset(qiov, n1, 0, bytes - n1); | ||
56 | - | ||
57 | - return n1; | ||
58 | -} | ||
59 | - | ||
60 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
61 | uint64_t bytes, QEMUIOVector *qiov, | ||
62 | int flags) | ||
63 | { | ||
64 | BDRVQcow2State *s = bs->opaque; | ||
65 | - int offset_in_cluster, n1; | ||
66 | + int offset_in_cluster; | ||
67 | int ret; | ||
68 | unsigned int cur_bytes; /* number of bytes in current iteration */ | ||
69 | uint64_t cluster_offset = 0; | ||
70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
71 | case QCOW2_CLUSTER_UNALLOCATED: | ||
72 | |||
73 | if (bs->backing) { | ||
74 | - /* read from the base image */ | ||
75 | - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, | ||
76 | - offset, cur_bytes); | ||
77 | - if (n1 > 0) { | ||
78 | - QEMUIOVector local_qiov; | ||
79 | - | ||
80 | - qemu_iovec_init(&local_qiov, hd_qiov.niov); | ||
81 | - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); | ||
82 | - | ||
83 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
84 | - qemu_co_mutex_unlock(&s->lock); | ||
85 | - ret = bdrv_co_preadv(bs->backing, offset, n1, | ||
86 | - &local_qiov, 0); | ||
87 | - qemu_co_mutex_lock(&s->lock); | ||
88 | - | ||
89 | - qemu_iovec_destroy(&local_qiov); | ||
90 | - | ||
91 | - if (ret < 0) { | ||
92 | - goto fail; | ||
93 | - } | ||
94 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
95 | + qemu_co_mutex_unlock(&s->lock); | ||
96 | + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, | ||
97 | + &hd_qiov, 0); | ||
98 | + qemu_co_mutex_lock(&s->lock); | ||
99 | + if (ret < 0) { | ||
100 | + goto fail; | ||
101 | } | ||
102 | } else { | ||
103 | /* Note: in this case, no need to wait */ | ||
64 | -- | 104 | -- |
65 | 2.13.6 | 105 | 2.13.6 |
66 | 106 | ||
67 | 107 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Removing a quorum child node with x-blockdev-change results in a quorum | ||
2 | driver state that cannot be recreated with create options because it | ||
3 | would require a list with gaps. This causes trouble in at least | ||
4 | .bdrv_refresh_filename(). | ||
1 | 5 | ||
6 | Document this problem so that we won't accidentally mark the command | ||
7 | stable without having addressed it. | ||
8 | |||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
11 | --- | ||
12 | qapi/block-core.json | 4 ++++ | ||
13 | 1 file changed, 4 insertions(+) | ||
14 | |||
15 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qapi/block-core.json | ||
18 | +++ b/qapi/block-core.json | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | # does not support all kinds of operations, all kinds of children, nor | ||
21 | # all block drivers. | ||
22 | # | ||
23 | +# FIXME Removing children from a quorum node means introducing gaps in the | ||
24 | +# child indices. This cannot be represented in the 'children' list of | ||
25 | +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). | ||
26 | +# | ||
27 | # Warning: The data in a new quorum child MUST be consistent with that of | ||
28 | # the rest of the array. | ||
29 | # | ||
30 | -- | ||
31 | 2.13.6 | ||
32 | |||
33 | diff view generated by jsdifflib |
1 | From: Weiping Zhang <zhangweiping@didichuxing.com> | 1 | From: Doug Gale <doug16k@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Add an optional paramter num_queues for device, and set it | 3 | Add trace output for commands, errors, and undefined behavior. |
4 | to 64 by default. | 4 | Add guest error log output for undefined behavior. |
5 | Report invalid undefined accesses to MMIO. | ||
6 | Annotate unlikely error checks with unlikely. | ||
5 | 7 | ||
6 | Signed-off-by: Weiping Zhang <zhangweiping@didichuxing.com> | 8 | Signed-off-by: Doug Gale <doug16k@gmail.com> |
7 | Acked-by: Keith Busch <keith.busch@intel.com> | 9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 12 | --- |
10 | hw/block/nvme.c | 5 +++-- | 13 | hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++-------- |
11 | 1 file changed, 3 insertions(+), 2 deletions(-) | 14 | hw/block/trace-events | 93 ++++++++++++++ |
15 | 2 files changed, 390 insertions(+), 52 deletions(-) | ||
12 | 16 | ||
13 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c | 17 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c |
14 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/hw/block/nvme.c | 19 | --- a/hw/block/nvme.c |
16 | +++ b/hw/block/nvme.c | 20 | +++ b/hw/block/nvme.c |
17 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
18 | * Usage: add options: | 22 | #include "qapi/visitor.h" |
19 | * -drive file=<file>,if=none,id=<drive_id> | 23 | #include "sysemu/block-backend.h" |
20 | * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ | 24 | |
21 | - * cmb_size_mb=<cmb_size_mb[optional]> | 25 | +#include "qemu/log.h" |
22 | + * cmb_size_mb=<cmb_size_mb[optional]>, \ | 26 | +#include "trace.h" |
23 | + * num_queues=<N[optional]> | 27 | #include "nvme.h" |
24 | * | 28 | |
25 | * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at | 29 | +#define NVME_GUEST_ERR(trace, fmt, ...) \ |
26 | * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. | 30 | + do { \ |
27 | @@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) | 31 | + (trace_##trace)(__VA_ARGS__); \ |
28 | pcie_endpoint_cap_init(&n->parent_obj, 0x80); | 32 | + qemu_log_mask(LOG_GUEST_ERROR, #trace \ |
29 | 33 | + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ | |
30 | n->num_namespaces = 1; | 34 | + } while (0) |
31 | - n->num_queues = 64; | 35 | + |
32 | n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); | 36 | static void nvme_process_sq(void *opaque); |
33 | n->ns_size = bs_size / (uint64_t)n->num_namespaces; | 37 | |
34 | 38 | static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) | |
35 | @@ -XXX,XX +XXX,XX @@ static Property nvme_props[] = { | 39 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) |
36 | DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), | 40 | { |
37 | DEFINE_PROP_STRING("serial", NvmeCtrl, serial), | 41 | if (cq->irq_enabled) { |
38 | DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), | 42 | if (msix_enabled(&(n->parent_obj))) { |
39 | + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), | 43 | + trace_nvme_irq_msix(cq->vector); |
40 | DEFINE_PROP_END_OF_LIST(), | 44 | msix_notify(&(n->parent_obj), cq->vector); |
41 | }; | 45 | } else { |
42 | 46 | + trace_nvme_irq_pin(); | |
47 | pci_irq_pulse(&n->parent_obj); | ||
48 | } | ||
49 | + } else { | ||
50 | + trace_nvme_irq_masked(); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
55 | trans_len = MIN(len, trans_len); | ||
56 | int num_prps = (len >> n->page_bits) + 1; | ||
57 | |||
58 | - if (!prp1) { | ||
59 | + if (unlikely(!prp1)) { | ||
60 | + trace_nvme_err_invalid_prp(); | ||
61 | return NVME_INVALID_FIELD | NVME_DNR; | ||
62 | } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && | ||
63 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { | ||
64 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
65 | } | ||
66 | len -= trans_len; | ||
67 | if (len) { | ||
68 | - if (!prp2) { | ||
69 | + if (unlikely(!prp2)) { | ||
70 | + trace_nvme_err_invalid_prp2_missing(); | ||
71 | goto unmap; | ||
72 | } | ||
73 | if (len > n->page_size) { | ||
74 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); | ||
76 | |||
77 | if (i == n->max_prp_ents - 1 && len > n->page_size) { | ||
78 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
79 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
80 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
81 | goto unmap; | ||
82 | } | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
85 | prp_ent = le64_to_cpu(prp_list[i]); | ||
86 | } | ||
87 | |||
88 | - if (!prp_ent || prp_ent & (n->page_size - 1)) { | ||
89 | + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { | ||
90 | + trace_nvme_err_invalid_prplist_ent(prp_ent); | ||
91 | goto unmap; | ||
92 | } | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
95 | i++; | ||
96 | } | ||
97 | } else { | ||
98 | - if (prp2 & (n->page_size - 1)) { | ||
99 | + if (unlikely(prp2 & (n->page_size - 1))) { | ||
100 | + trace_nvme_err_invalid_prp2_align(prp2); | ||
101 | goto unmap; | ||
102 | } | ||
103 | if (qsg->nsg) { | ||
104 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, | ||
105 | QEMUIOVector iov; | ||
106 | uint16_t status = NVME_SUCCESS; | ||
107 | |||
108 | + trace_nvme_dma_read(prp1, prp2); | ||
109 | + | ||
110 | if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { | ||
111 | return NVME_INVALID_FIELD | NVME_DNR; | ||
112 | } | ||
113 | if (qsg.nsg > 0) { | ||
114 | - if (dma_buf_read(ptr, len, &qsg)) { | ||
115 | + if (unlikely(dma_buf_read(ptr, len, &qsg))) { | ||
116 | + trace_nvme_err_invalid_dma(); | ||
117 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
118 | } | ||
119 | qemu_sglist_destroy(&qsg); | ||
120 | } else { | ||
121 | - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { | ||
122 | + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { | ||
123 | + trace_nvme_err_invalid_dma(); | ||
124 | status = NVME_INVALID_FIELD | NVME_DNR; | ||
125 | } | ||
126 | qemu_iovec_destroy(&iov); | ||
127 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
128 | uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); | ||
129 | uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); | ||
130 | |||
131 | - if (slba + nlb > ns->id_ns.nsze) { | ||
132 | + if (unlikely(slba + nlb > ns->id_ns.nsze)) { | ||
133 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
134 | return NVME_LBA_RANGE | NVME_DNR; | ||
135 | } | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
138 | int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; | ||
139 | enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; | ||
140 | |||
141 | - if ((slba + nlb) > ns->id_ns.nsze) { | ||
142 | + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); | ||
143 | + | ||
144 | + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { | ||
145 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); | ||
146 | + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); | ||
147 | return NVME_LBA_RANGE | NVME_DNR; | ||
148 | } | ||
149 | |||
150 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
151 | NvmeNamespace *ns; | ||
152 | uint32_t nsid = le32_to_cpu(cmd->nsid); | ||
153 | |||
154 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
155 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
156 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
157 | return NVME_INVALID_NSID | NVME_DNR; | ||
158 | } | ||
159 | |||
160 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
161 | case NVME_CMD_READ: | ||
162 | return nvme_rw(n, ns, cmd, req); | ||
163 | default: | ||
164 | + trace_nvme_err_invalid_opc(cmd->opcode); | ||
165 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
166 | } | ||
167 | } | ||
168 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
169 | NvmeCQueue *cq; | ||
170 | uint16_t qid = le16_to_cpu(c->qid); | ||
171 | |||
172 | - if (!qid || nvme_check_sqid(n, qid)) { | ||
173 | + if (unlikely(!qid || nvme_check_sqid(n, qid))) { | ||
174 | + trace_nvme_err_invalid_del_sq(qid); | ||
175 | return NVME_INVALID_QID | NVME_DNR; | ||
176 | } | ||
177 | |||
178 | + trace_nvme_del_sq(qid); | ||
179 | + | ||
180 | sq = n->sq[qid]; | ||
181 | while (!QTAILQ_EMPTY(&sq->out_req_list)) { | ||
182 | req = QTAILQ_FIRST(&sq->out_req_list); | ||
183 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) | ||
184 | uint16_t qflags = le16_to_cpu(c->sq_flags); | ||
185 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
186 | |||
187 | - if (!cqid || nvme_check_cqid(n, cqid)) { | ||
188 | + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); | ||
189 | + | ||
190 | + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { | ||
191 | + trace_nvme_err_invalid_create_sq_cqid(cqid); | ||
192 | return NVME_INVALID_CQID | NVME_DNR; | ||
193 | } | ||
194 | - if (!sqid || !nvme_check_sqid(n, sqid)) { | ||
195 | + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { | ||
196 | + trace_nvme_err_invalid_create_sq_sqid(sqid); | ||
197 | return NVME_INVALID_QID | NVME_DNR; | ||
198 | } | ||
199 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
200 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
201 | + trace_nvme_err_invalid_create_sq_size(qsize); | ||
202 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
203 | } | ||
204 | - if (!prp1 || prp1 & (n->page_size - 1)) { | ||
205 | + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { | ||
206 | + trace_nvme_err_invalid_create_sq_addr(prp1); | ||
207 | return NVME_INVALID_FIELD | NVME_DNR; | ||
208 | } | ||
209 | - if (!(NVME_SQ_FLAGS_PC(qflags))) { | ||
210 | + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { | ||
211 | + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); | ||
212 | return NVME_INVALID_FIELD | NVME_DNR; | ||
213 | } | ||
214 | sq = g_malloc0(sizeof(*sq)); | ||
215 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
216 | NvmeCQueue *cq; | ||
217 | uint16_t qid = le16_to_cpu(c->qid); | ||
218 | |||
219 | - if (!qid || nvme_check_cqid(n, qid)) { | ||
220 | + if (unlikely(!qid || nvme_check_cqid(n, qid))) { | ||
221 | + trace_nvme_err_invalid_del_cq_cqid(qid); | ||
222 | return NVME_INVALID_CQID | NVME_DNR; | ||
223 | } | ||
224 | |||
225 | cq = n->cq[qid]; | ||
226 | - if (!QTAILQ_EMPTY(&cq->sq_list)) { | ||
227 | + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { | ||
228 | + trace_nvme_err_invalid_del_cq_notempty(qid); | ||
229 | return NVME_INVALID_QUEUE_DEL; | ||
230 | } | ||
231 | + trace_nvme_del_cq(qid); | ||
232 | nvme_free_cq(cq, n); | ||
233 | return NVME_SUCCESS; | ||
234 | } | ||
235 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) | ||
236 | uint16_t qflags = le16_to_cpu(c->cq_flags); | ||
237 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
238 | |||
239 | - if (!cqid || !nvme_check_cqid(n, cqid)) { | ||
240 | + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, | ||
241 | + NVME_CQ_FLAGS_IEN(qflags) != 0); | ||
242 | + | ||
243 | + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { | ||
244 | + trace_nvme_err_invalid_create_cq_cqid(cqid); | ||
245 | return NVME_INVALID_CQID | NVME_DNR; | ||
246 | } | ||
247 | - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { | ||
248 | + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { | ||
249 | + trace_nvme_err_invalid_create_cq_size(qsize); | ||
250 | return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; | ||
251 | } | ||
252 | - if (!prp1) { | ||
253 | + if (unlikely(!prp1)) { | ||
254 | + trace_nvme_err_invalid_create_cq_addr(prp1); | ||
255 | return NVME_INVALID_FIELD | NVME_DNR; | ||
256 | } | ||
257 | - if (vector > n->num_queues) { | ||
258 | + if (unlikely(vector > n->num_queues)) { | ||
259 | + trace_nvme_err_invalid_create_cq_vector(vector); | ||
260 | return NVME_INVALID_IRQ_VECTOR | NVME_DNR; | ||
261 | } | ||
262 | - if (!(NVME_CQ_FLAGS_PC(qflags))) { | ||
263 | + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { | ||
264 | + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); | ||
265 | return NVME_INVALID_FIELD | NVME_DNR; | ||
266 | } | ||
267 | |||
268 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) | ||
269 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
270 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
271 | |||
272 | + trace_nvme_identify_ctrl(); | ||
273 | + | ||
274 | return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), | ||
275 | prp1, prp2); | ||
276 | } | ||
277 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) | ||
278 | uint64_t prp1 = le64_to_cpu(c->prp1); | ||
279 | uint64_t prp2 = le64_to_cpu(c->prp2); | ||
280 | |||
281 | - if (nsid == 0 || nsid > n->num_namespaces) { | ||
282 | + trace_nvme_identify_ns(nsid); | ||
283 | + | ||
284 | + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { | ||
285 | + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); | ||
286 | return NVME_INVALID_NSID | NVME_DNR; | ||
287 | } | ||
288 | |||
289 | ns = &n->namespaces[nsid - 1]; | ||
290 | + | ||
291 | return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), | ||
292 | prp1, prp2); | ||
293 | } | ||
294 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) | ||
295 | uint16_t ret; | ||
296 | int i, j = 0; | ||
297 | |||
298 | + trace_nvme_identify_nslist(min_nsid); | ||
299 | + | ||
300 | list = g_malloc0(data_len); | ||
301 | for (i = 0; i < n->num_namespaces; i++) { | ||
302 | if (i < min_nsid) { | ||
303 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) | ||
304 | case 0x02: | ||
305 | return nvme_identify_nslist(n, c); | ||
306 | default: | ||
307 | + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); | ||
308 | return NVME_INVALID_FIELD | NVME_DNR; | ||
309 | } | ||
310 | } | ||
311 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
312 | switch (dw10) { | ||
313 | case NVME_VOLATILE_WRITE_CACHE: | ||
314 | result = blk_enable_write_cache(n->conf.blk); | ||
315 | + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); | ||
316 | break; | ||
317 | case NVME_NUMBER_OF_QUEUES: | ||
318 | result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
319 | + trace_nvme_getfeat_numq(result); | ||
320 | break; | ||
321 | default: | ||
322 | + trace_nvme_err_invalid_getfeat(dw10); | ||
323 | return NVME_INVALID_FIELD | NVME_DNR; | ||
324 | } | ||
325 | |||
326 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
327 | blk_set_enable_write_cache(n->conf.blk, dw11 & 1); | ||
328 | break; | ||
329 | case NVME_NUMBER_OF_QUEUES: | ||
330 | + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, | ||
331 | + ((dw11 >> 16) & 0xFFFF) + 1, | ||
332 | + n->num_queues - 1, n->num_queues - 1); | ||
333 | req->cqe.result = | ||
334 | cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); | ||
335 | break; | ||
336 | default: | ||
337 | + trace_nvme_err_invalid_setfeat(dw10); | ||
338 | return NVME_INVALID_FIELD | NVME_DNR; | ||
339 | } | ||
340 | return NVME_SUCCESS; | ||
341 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) | ||
342 | case NVME_ADM_CMD_GET_FEATURES: | ||
343 | return nvme_get_feature(n, cmd, req); | ||
344 | default: | ||
345 | + trace_nvme_err_invalid_admin_opc(cmd->opcode); | ||
346 | return NVME_INVALID_OPCODE | NVME_DNR; | ||
347 | } | ||
348 | } | ||
349 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
350 | uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; | ||
351 | uint32_t page_size = 1 << page_bits; | ||
352 | |||
353 | - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || | ||
354 | - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || | ||
355 | - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || | ||
356 | - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || | ||
357 | - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || | ||
358 | - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || | ||
359 | - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || | ||
360 | - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || | ||
361 | - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { | ||
362 | + if (unlikely(n->cq[0])) { | ||
363 | + trace_nvme_err_startfail_cq(); | ||
364 | + return -1; | ||
365 | + } | ||
366 | + if (unlikely(n->sq[0])) { | ||
367 | + trace_nvme_err_startfail_sq(); | ||
368 | + return -1; | ||
369 | + } | ||
370 | + if (unlikely(!n->bar.asq)) { | ||
371 | + trace_nvme_err_startfail_nbarasq(); | ||
372 | + return -1; | ||
373 | + } | ||
374 | + if (unlikely(!n->bar.acq)) { | ||
375 | + trace_nvme_err_startfail_nbaracq(); | ||
376 | + return -1; | ||
377 | + } | ||
378 | + if (unlikely(n->bar.asq & (page_size - 1))) { | ||
379 | + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); | ||
380 | + return -1; | ||
381 | + } | ||
382 | + if (unlikely(n->bar.acq & (page_size - 1))) { | ||
383 | + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); | ||
384 | + return -1; | ||
385 | + } | ||
386 | + if (unlikely(NVME_CC_MPS(n->bar.cc) < | ||
387 | + NVME_CAP_MPSMIN(n->bar.cap))) { | ||
388 | + trace_nvme_err_startfail_page_too_small( | ||
389 | + NVME_CC_MPS(n->bar.cc), | ||
390 | + NVME_CAP_MPSMIN(n->bar.cap)); | ||
391 | + return -1; | ||
392 | + } | ||
393 | + if (unlikely(NVME_CC_MPS(n->bar.cc) > | ||
394 | + NVME_CAP_MPSMAX(n->bar.cap))) { | ||
395 | + trace_nvme_err_startfail_page_too_large( | ||
396 | + NVME_CC_MPS(n->bar.cc), | ||
397 | + NVME_CAP_MPSMAX(n->bar.cap)); | ||
398 | + return -1; | ||
399 | + } | ||
400 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < | ||
401 | + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { | ||
402 | + trace_nvme_err_startfail_cqent_too_small( | ||
403 | + NVME_CC_IOCQES(n->bar.cc), | ||
404 | + NVME_CTRL_CQES_MIN(n->bar.cap)); | ||
405 | + return -1; | ||
406 | + } | ||
407 | + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > | ||
408 | + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { | ||
409 | + trace_nvme_err_startfail_cqent_too_large( | ||
410 | + NVME_CC_IOCQES(n->bar.cc), | ||
411 | + NVME_CTRL_CQES_MAX(n->bar.cap)); | ||
412 | + return -1; | ||
413 | + } | ||
414 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < | ||
415 | + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { | ||
416 | + trace_nvme_err_startfail_sqent_too_small( | ||
417 | + NVME_CC_IOSQES(n->bar.cc), | ||
418 | + NVME_CTRL_SQES_MIN(n->bar.cap)); | ||
419 | + return -1; | ||
420 | + } | ||
421 | + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > | ||
422 | + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { | ||
423 | + trace_nvme_err_startfail_sqent_too_large( | ||
424 | + NVME_CC_IOSQES(n->bar.cc), | ||
425 | + NVME_CTRL_SQES_MAX(n->bar.cap)); | ||
426 | + return -1; | ||
427 | + } | ||
428 | + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { | ||
429 | + trace_nvme_err_startfail_asqent_sz_zero(); | ||
430 | + return -1; | ||
431 | + } | ||
432 | + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { | ||
433 | + trace_nvme_err_startfail_acqent_sz_zero(); | ||
434 | return -1; | ||
435 | } | ||
436 | |||
437 | @@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n) | ||
438 | static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
439 | unsigned size) | ||
440 | { | ||
441 | + if (unlikely(offset & (sizeof(uint32_t) - 1))) { | ||
442 | + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, | ||
443 | + "MMIO write not 32-bit aligned," | ||
444 | + " offset=0x%"PRIx64"", offset); | ||
445 | + /* should be ignored, fall through for now */ | ||
446 | + } | ||
447 | + | ||
448 | + if (unlikely(size < sizeof(uint32_t))) { | ||
449 | + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, | ||
450 | + "MMIO write smaller than 32-bits," | ||
451 | + " offset=0x%"PRIx64", size=%u", | ||
452 | + offset, size); | ||
453 | + /* should be ignored, fall through for now */ | ||
454 | + } | ||
455 | + | ||
456 | switch (offset) { | ||
457 | - case 0xc: | ||
458 | + case 0xc: /* INTMS */ | ||
459 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
460 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
461 | + "undefined access to interrupt mask set" | ||
462 | + " when MSI-X is enabled"); | ||
463 | + /* should be ignored, fall through for now */ | ||
464 | + } | ||
465 | n->bar.intms |= data & 0xffffffff; | ||
466 | n->bar.intmc = n->bar.intms; | ||
467 | + trace_nvme_mmio_intm_set(data & 0xffffffff, | ||
468 | + n->bar.intmc); | ||
469 | break; | ||
470 | - case 0x10: | ||
471 | + case 0x10: /* INTMC */ | ||
472 | + if (unlikely(msix_enabled(&(n->parent_obj)))) { | ||
473 | + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, | ||
474 | + "undefined access to interrupt mask clr" | ||
475 | + " when MSI-X is enabled"); | ||
476 | + /* should be ignored, fall through for now */ | ||
477 | + } | ||
478 | n->bar.intms &= ~(data & 0xffffffff); | ||
479 | n->bar.intmc = n->bar.intms; | ||
480 | + trace_nvme_mmio_intm_clr(data & 0xffffffff, | ||
481 | + n->bar.intmc); | ||
482 | break; | ||
483 | - case 0x14: | ||
484 | + case 0x14: /* CC */ | ||
485 | + trace_nvme_mmio_cfg(data & 0xffffffff); | ||
486 | /* Windows first sends data, then sends enable bit */ | ||
487 | if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && | ||
488 | !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) | ||
489 | @@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, | ||
490 | |||
491 | if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { | ||
492 | n->bar.cc = data; | ||
493 | - if (nvme_start_ctrl(n)) { | ||
494 | + if (unlikely(nvme_start_ctrl(n))) { | ||
495 | + trace_nvme_err_startfail(); | ||
496 | n->bar.csts = NVME_CSTS_FAILED; | ||
497 | } else { | ||
498 | + trace_nvme_mmio_start_success(); | ||
499 | n->bar.csts = NVME_CSTS_READY; | ||
500 | } | ||
501 | } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { | ||
502 | + trace_nvme_mmio_stopped(); | ||
503 | nvme_clear_ctrl(n); | ||
504 | n->bar.csts &= ~NVME_CSTS_READY; | ||
505 | } | ||
506 | if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { | ||
507 | - nvme_clear_ctrl(n); | ||
508 | - n->bar.cc = data; | ||
509 | - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
510 | + trace_nvme_mmio_shutdown_set(); | ||
511 | + nvme_clear_ctrl(n); | ||
512 | + n->bar.cc = data; | ||
513 | + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; | ||
514 | } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { | ||
515 | - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
516 | - n->bar.cc = data; | ||
517 | + trace_nvme_mmio_shutdown_cleared(); | ||
518 | + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; | ||
519 | + n->bar.cc = data; | ||
520 | + } | ||
521 | + break; | ||
522 | + case 0x1C: /* CSTS */ | ||
523 | + if (data & (1 << 4)) { | ||
524 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, | ||
525 | + "attempted to W1C CSTS.NSSRO" | ||
526 | + " but CAP.NSSRS is zero (not supported)"); | ||
527 | + } else if (data != 0) { | ||
528 | + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, | ||
529 | + "attempted to set a read only bit" | ||
530 | + " of controller status"); | ||
531 | + } | ||
532 | + break; | ||
533 | + case 0x20: /* NSSR */ | ||
534 | + if (data == 0x4E564D65) { | ||
535 | + trace_nvme_ub_mmiowr_ssreset_unsupported(); | ||
536 | + } else { | ||
537 | + /* The spec says that writes of other values have no effect */ | ||
538 | + return; | ||
539 | } | ||
540 | break; | ||
541 | - case 0x24: | ||
542 | + case 0x24: /* AQA */ | ||
543 | n->bar.aqa = data & 0xffffffff; | ||
544 | + trace_nvme_mmio_aqattr(data & 0xffffffff); | ||
545 | break; | ||
546 | - case 0x28: | ||
547 | + case 0x28: /* ASQ */ | ||
548 | n->bar.asq = data; | ||
549 | + trace_nvme_mmio_asqaddr(data); | ||
550 | break; | ||
551 | - case 0x2c: | ||
552 | + case 0x2c: /* ASQ hi */ | ||
553 | n->bar.asq |= data << 32; | ||
554 | + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); | ||
555 | break; | ||
556 | - case 0x30: | ||
557 | + case 0x30: /* ACQ */ | ||
558 | + trace_nvme_mmio_acqaddr(data); | ||
559 | n->bar.acq = data; | ||
560 | break; | ||
561 | - case 0x34: | ||
562 | + case 0x34: /* ACQ hi */ | ||
563 | n->bar.acq |= data << 32; | ||
564 | + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); | ||
565 | break; | ||
566 | + case 0x38: /* CMBLOC */ | ||
567 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, | ||
568 | + "invalid write to reserved CMBLOC" | ||
569 | + " when CMBSZ is zero, ignored"); | ||
570 | + return; | ||
571 | + case 0x3C: /* CMBSZ */ | ||
572 | + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, | ||
573 | + "invalid write to read only CMBSZ, ignored"); | ||
574 | + return; | ||
575 | default: | ||
576 | + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, | ||
577 | + "invalid MMIO write," | ||
578 | + " offset=0x%"PRIx64", data=%"PRIx64"", | ||
579 | + offset, data); | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | @@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) | ||
584 | uint8_t *ptr = (uint8_t *)&n->bar; | ||
585 | uint64_t val = 0; | ||
586 | |||
587 | + if (unlikely(addr & (sizeof(uint32_t) - 1))) { | ||
588 | + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, | ||
589 | + "MMIO read not 32-bit aligned," | ||
590 | + " offset=0x%"PRIx64"", addr); | ||
591 | + /* should RAZ, fall through for now */ | ||
592 | + } else if (unlikely(size < sizeof(uint32_t))) { | ||
593 | + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, | ||
594 | + "MMIO read smaller than 32-bits," | ||
595 | + " offset=0x%"PRIx64"", addr); | ||
596 | + /* should RAZ, fall through for now */ | ||
597 | + } | ||
598 | + | ||
599 | if (addr < sizeof(n->bar)) { | ||
600 | memcpy(&val, ptr + addr, size); | ||
601 | + } else { | ||
602 | + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, | ||
603 | + "MMIO read beyond last register," | ||
604 | + " offset=0x%"PRIx64", returning 0", addr); | ||
605 | } | ||
606 | + | ||
607 | return val; | ||
608 | } | ||
609 | |||
610 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
611 | { | ||
612 | uint32_t qid; | ||
613 | |||
614 | - if (addr & ((1 << 2) - 1)) { | ||
615 | + if (unlikely(addr & ((1 << 2) - 1))) { | ||
616 | + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, | ||
617 | + "doorbell write not 32-bit aligned," | ||
618 | + " offset=0x%"PRIx64", ignoring", addr); | ||
619 | return; | ||
620 | } | ||
621 | |||
622 | if (((addr - 0x1000) >> 2) & 1) { | ||
623 | + /* Completion queue doorbell write */ | ||
624 | + | ||
625 | uint16_t new_head = val & 0xffff; | ||
626 | int start_sqs; | ||
627 | NvmeCQueue *cq; | ||
628 | |||
629 | qid = (addr - (0x1000 + (1 << 2))) >> 3; | ||
630 | - if (nvme_check_cqid(n, qid)) { | ||
631 | + if (unlikely(nvme_check_cqid(n, qid))) { | ||
632 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, | ||
633 | + "completion queue doorbell write" | ||
634 | + " for nonexistent queue," | ||
635 | + " sqid=%"PRIu32", ignoring", qid); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | cq = n->cq[qid]; | ||
640 | - if (new_head >= cq->size) { | ||
641 | + if (unlikely(new_head >= cq->size)) { | ||
642 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, | ||
643 | + "completion queue doorbell write value" | ||
644 | + " beyond queue size, sqid=%"PRIu32"," | ||
645 | + " new_head=%"PRIu16", ignoring", | ||
646 | + qid, new_head); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | @@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) | ||
651 | nvme_isr_notify(n, cq); | ||
652 | } | ||
653 | } else { | ||
654 | + /* Submission queue doorbell write */ | ||
655 | + | ||
656 | uint16_t new_tail = val & 0xffff; | ||
657 | NvmeSQueue *sq; | ||
658 | |||
659 | qid = (addr - 0x1000) >> 3; | ||
660 | - if (nvme_check_sqid(n, qid)) { | ||
661 | + if (unlikely(nvme_check_sqid(n, qid))) { | ||
662 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, | ||
663 | + "submission queue doorbell write" | ||
664 | + " for nonexistent queue," | ||
665 | + " sqid=%"PRIu32", ignoring", qid); | ||
666 | return; | ||
667 | } | ||
668 | |||
669 | sq = n->sq[qid]; | ||
670 | - if (new_tail >= sq->size) { | ||
671 | + if (unlikely(new_tail >= sq->size)) { | ||
672 | + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, | ||
673 | + "submission queue doorbell write value" | ||
674 | + " beyond queue size, sqid=%"PRIu32"," | ||
675 | + " new_tail=%"PRIu16", ignoring", | ||
676 | + qid, new_tail); | ||
677 | return; | ||
678 | } | ||
679 | |||
680 | diff --git a/hw/block/trace-events b/hw/block/trace-events | ||
681 | index XXXXXXX..XXXXXXX 100644 | ||
682 | --- a/hw/block/trace-events | ||
683 | +++ b/hw/block/trace-events | ||
684 | @@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 | ||
685 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" | ||
686 | hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" | ||
687 | |||
688 | +# hw/block/nvme.c | ||
689 | +# nvme traces for successful events | ||
690 | +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" | ||
691 | +nvme_irq_pin(void) "pulsing IRQ pin" | ||
692 | +nvme_irq_masked(void) "IRQ is masked" | ||
693 | +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" | ||
694 | +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" | ||
695 | +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" | ||
696 | +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" | ||
697 | +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" | ||
698 | +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" | ||
699 | +nvme_identify_ctrl(void) "identify controller" | ||
700 | +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" | ||
701 | +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" | ||
702 | +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" | ||
703 | +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" | ||
704 | +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" | ||
705 | +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
706 | +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" | ||
707 | +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" | ||
708 | +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" | ||
709 | +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" | ||
710 | +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" | ||
711 | +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
712 | +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" | ||
713 | +nvme_mmio_start_success(void) "setting controller enable bit succeeded" | ||
714 | +nvme_mmio_stopped(void) "cleared controller enable bit" | ||
715 | +nvme_mmio_shutdown_set(void) "shutdown bit set" | ||
716 | +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" | ||
717 | + | ||
718 | +# nvme traces for error conditions | ||
719 | +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" | ||
720 | +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" | ||
721 | +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" | ||
722 | +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" | ||
723 | +nvme_err_invalid_field(void) "invalid field" | ||
724 | +nvme_err_invalid_prp(void) "invalid PRP" | ||
725 | +nvme_err_invalid_sgl(void) "invalid SGL" | ||
726 | +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" | ||
727 | +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" | ||
728 | +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" | ||
729 | +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" | ||
730 | +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" | ||
731 | +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" | ||
732 | +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" | ||
733 | +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" | ||
734 | +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" | ||
735 | +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" | ||
736 | +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" | ||
737 | +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" | ||
738 | +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" | ||
739 | +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" | ||
740 | +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" | ||
741 | +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" | ||
742 | +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" | ||
743 | +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" | ||
744 | +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" | ||
745 | +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" | ||
746 | +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" | ||
747 | +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" | ||
748 | +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" | ||
749 | +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" | ||
750 | +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" | ||
751 | +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" | ||
752 | +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" | ||
753 | +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" | ||
754 | +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" | ||
755 | +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" | ||
756 | +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" | ||
757 | +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" | ||
758 | +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" | ||
759 | +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" | ||
760 | +nvme_err_startfail(void) "setting controller enable bit failed" | ||
761 | + | ||
762 | +# Traces for undefined behavior | ||
763 | +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" | ||
764 | +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" | ||
765 | +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" | ||
766 | +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" | ||
767 | +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" | ||
768 | +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" | ||
769 | +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" | ||
770 | +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" | ||
771 | +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" | ||
772 | +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" | ||
773 | +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" | ||
774 | +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" | ||
775 | +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" | ||
776 | +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" | ||
777 | +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
778 | +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" | ||
779 | +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" | ||
780 | + | ||
781 | # hw/block/xen_disk.c | ||
782 | xen_disk_alloc(char *name) "%s" | ||
783 | xen_disk_init(char *name) "%s" | ||
43 | -- | 784 | -- |
44 | 2.13.6 | 785 | 2.13.6 |
45 | 786 | ||
46 | 787 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | EINTR should be checked against errno, not ret. While fixing the bug, | 3 | Management tools create overlays of running guests with qemu-img: |
4 | collect the branches with a switch block. | ||
5 | 4 | ||
6 | Also, change the return value from -ENOSTUP to -ENOSPC when the actual | 5 | $ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2 |
7 | issue is request range passes EOF, which should be distinguishable from | ||
8 | the case of error == ENOSYS by the caller, so that it could still retry | ||
9 | with other byte ranges, whereas it shouldn't retry anymore upon ENOSYS. | ||
10 | 6 | ||
7 | but this doesn't work anymore due to image locking: | ||
8 | |||
9 | qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock | ||
10 | Is another process using the image? | ||
11 | Could not open backing image to determine size. | ||
12 | Use the force share option to allow this use case again. | ||
13 | |||
14 | Cc: qemu-stable@nongnu.org | ||
11 | Signed-off-by: Fam Zheng <famz@redhat.com> | 15 | Signed-off-by: Fam Zheng <famz@redhat.com> |
16 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 18 | --- |
14 | block/file-posix.c | 17 +++++++++-------- | 19 | block.c | 3 ++- |
15 | 1 file changed, 9 insertions(+), 8 deletions(-) | 20 | 1 file changed, 2 insertions(+), 1 deletion(-) |
16 | 21 | ||
17 | diff --git a/block/file-posix.c b/block/file-posix.c | 22 | diff --git a/block.c b/block.c |
18 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/file-posix.c | 24 | --- a/block.c |
20 | +++ b/block/file-posix.c | 25 | +++ b/block.c |
21 | @@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) | 26 | @@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt, |
22 | ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, | 27 | back_flags = flags; |
23 | aiocb->aio_fd2, &out_off, | 28 | back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); |
24 | bytes, 0); | 29 | |
25 | - if (ret == -EINTR) { | 30 | + backing_options = qdict_new(); |
26 | - continue; | 31 | if (backing_fmt) { |
27 | + if (ret == 0) { | 32 | - backing_options = qdict_new(); |
28 | + /* No progress (e.g. when beyond EOF), let the caller fall back to | 33 | qdict_put_str(backing_options, "driver", backing_fmt); |
29 | + * buffer I/O. */ | ||
30 | + return -ENOSPC; | ||
31 | } | 34 | } |
32 | if (ret < 0) { | 35 | + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); |
33 | - if (errno == ENOSYS) { | 36 | |
34 | + switch (errno) { | 37 | bs = bdrv_open(full_backing, NULL, backing_options, back_flags, |
35 | + case ENOSYS: | 38 | &local_err); |
36 | return -ENOTSUP; | ||
37 | - } else { | ||
38 | + case EINTR: | ||
39 | + continue; | ||
40 | + default: | ||
41 | return -errno; | ||
42 | } | ||
43 | } | ||
44 | - if (!ret) { | ||
45 | - /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */ | ||
46 | - return -ENOTSUP; | ||
47 | - } | ||
48 | bytes -= ret; | ||
49 | } | ||
50 | return 0; | ||
51 | -- | 39 | -- |
52 | 2.13.6 | 40 | 2.13.6 |
53 | 41 | ||
54 | 42 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 3 | It's not working anymore since QEMU v1.3.0 - time to remove it now. |
4 | byte-based. Make the change for the internals of the qcow | ||
5 | driver read function, by iterating over offset/bytes instead of | ||
6 | sector_num/nb_sectors, and with a rename of index_in_cluster and | ||
7 | repurposing of n to track bytes instead of sectors. | ||
8 | 4 | ||
9 | A later patch will then switch the qcow driver as a whole over | 5 | Signed-off-by: Thomas Huth <thuth@redhat.com> |
10 | to byte-based operation. | 6 | Reviewed-by: John Snow <jsnow@redhat.com> |
11 | 7 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | |
12 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
13 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 9 | --- |
16 | block/qcow.c | 42 ++++++++++++++++++++---------------------- | 10 | blockdev.c | 11 ----------- |
17 | 1 file changed, 20 insertions(+), 22 deletions(-) | 11 | qemu-doc.texi | 6 ------ |
12 | 2 files changed, 17 deletions(-) | ||
18 | 13 | ||
19 | diff --git a/block/qcow.c b/block/qcow.c | 14 | diff --git a/blockdev.c b/blockdev.c |
20 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/qcow.c | 16 | --- a/blockdev.c |
22 | +++ b/block/qcow.c | 17 | +++ b/blockdev.c |
23 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | 18 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = { |
24 | int nb_sectors, QEMUIOVector *qiov) | 19 | .type = QEMU_OPT_STRING, |
25 | { | 20 | .help = "chs translation (auto, lba, none)", |
26 | BDRVQcowState *s = bs->opaque; | 21 | },{ |
27 | - int index_in_cluster; | 22 | - .name = "boot", |
28 | + int offset_in_cluster; | 23 | - .type = QEMU_OPT_BOOL, |
29 | int ret = 0, n; | 24 | - .help = "(deprecated, ignored)", |
30 | uint64_t cluster_offset; | 25 | - },{ |
31 | struct iovec hd_iov; | 26 | .name = "addr", |
32 | QEMUIOVector hd_qiov; | 27 | .type = QEMU_OPT_STRING, |
33 | uint8_t *buf; | 28 | .help = "pci address (virtio only)", |
34 | void *orig_buf; | 29 | @@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) |
35 | + int64_t offset = sector_num * BDRV_SECTOR_SIZE; | 30 | goto fail; |
36 | + int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; | ||
37 | |||
38 | if (qiov->niov > 1) { | ||
39 | buf = orig_buf = qemu_try_blockalign(bs, qiov->size); | ||
40 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
41 | |||
42 | qemu_co_mutex_lock(&s->lock); | ||
43 | |||
44 | - while (nb_sectors != 0) { | ||
45 | + while (bytes != 0) { | ||
46 | /* prepare next request */ | ||
47 | - ret = get_cluster_offset(bs, sector_num << 9, | ||
48 | - 0, 0, 0, 0, &cluster_offset); | ||
49 | + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); | ||
50 | if (ret < 0) { | ||
51 | break; | ||
52 | } | ||
53 | - index_in_cluster = sector_num & (s->cluster_sectors - 1); | ||
54 | - n = s->cluster_sectors - index_in_cluster; | ||
55 | - if (n > nb_sectors) { | ||
56 | - n = nb_sectors; | ||
57 | + offset_in_cluster = offset & (s->cluster_size - 1); | ||
58 | + n = s->cluster_size - offset_in_cluster; | ||
59 | + if (n > bytes) { | ||
60 | + n = bytes; | ||
61 | } | ||
62 | |||
63 | if (!cluster_offset) { | ||
64 | if (bs->backing) { | ||
65 | /* read from the base image */ | ||
66 | hd_iov.iov_base = (void *)buf; | ||
67 | - hd_iov.iov_len = n * 512; | ||
68 | + hd_iov.iov_len = n; | ||
69 | qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); | ||
70 | qemu_co_mutex_unlock(&s->lock); | ||
71 | /* qcow2 emits this on bs->file instead of bs->backing */ | ||
72 | BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
73 | - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); | ||
74 | + ret = bdrv_co_preadv(bs->backing, offset, n, &hd_qiov, 0); | ||
75 | qemu_co_mutex_lock(&s->lock); | ||
76 | if (ret < 0) { | ||
77 | break; | ||
78 | } | ||
79 | } else { | ||
80 | /* Note: in this case, no need to wait */ | ||
81 | - memset(buf, 0, 512 * n); | ||
82 | + memset(buf, 0, n); | ||
83 | } | ||
84 | } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { | ||
85 | /* add AIO support for compressed blocks ? */ | ||
86 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
87 | ret = -EIO; | ||
88 | break; | ||
89 | } | ||
90 | - memcpy(buf, | ||
91 | - s->cluster_cache + index_in_cluster * 512, 512 * n); | ||
92 | + memcpy(buf, s->cluster_cache + offset_in_cluster, n); | ||
93 | } else { | ||
94 | if ((cluster_offset & 511) != 0) { | ||
95 | ret = -EIO; | ||
96 | break; | ||
97 | } | ||
98 | hd_iov.iov_base = (void *)buf; | ||
99 | - hd_iov.iov_len = n * 512; | ||
100 | + hd_iov.iov_len = n; | ||
101 | qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); | ||
102 | qemu_co_mutex_unlock(&s->lock); | ||
103 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
104 | - ret = bdrv_co_readv(bs->file, | ||
105 | - (cluster_offset >> 9) + index_in_cluster, | ||
106 | - n, &hd_qiov); | ||
107 | + ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster, | ||
108 | + n, &hd_qiov, 0); | ||
109 | qemu_co_mutex_lock(&s->lock); | ||
110 | if (ret < 0) { | ||
111 | break; | ||
112 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
113 | if (bs->encrypted) { | ||
114 | assert(s->crypto); | ||
115 | if (qcrypto_block_decrypt(s->crypto, | ||
116 | - sector_num * BDRV_SECTOR_SIZE, buf, | ||
117 | - n * BDRV_SECTOR_SIZE, NULL) < 0) { | ||
118 | + offset, buf, n, NULL) < 0) { | ||
119 | ret = -EIO; | ||
120 | break; | ||
121 | } | ||
122 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
123 | } | ||
124 | ret = 0; | ||
125 | |||
126 | - nb_sectors -= n; | ||
127 | - sector_num += n; | ||
128 | - buf += n * 512; | ||
129 | + bytes -= n; | ||
130 | + offset += n; | ||
131 | + buf += n; | ||
132 | } | 31 | } |
133 | 32 | ||
134 | qemu_co_mutex_unlock(&s->lock); | 33 | - /* Deprecated option boot=[on|off] */ |
34 | - if (qemu_opt_get(legacy_opts, "boot") != NULL) { | ||
35 | - fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " | ||
36 | - "ignored. Future versions will reject this parameter. Please " | ||
37 | - "update your scripts.\n"); | ||
38 | - } | ||
39 | - | ||
40 | /* Other deprecated options */ | ||
41 | if (!qtest_enabled()) { | ||
42 | for (i = 0; i < ARRAY_SIZE(deprecated); i++) { | ||
43 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/qemu-doc.texi | ||
46 | +++ b/qemu-doc.texi | ||
47 | @@ -XXX,XX +XXX,XX @@ deprecated. | ||
48 | |||
49 | @section System emulator command line arguments | ||
50 | |||
51 | -@subsection -drive boot=on|off (since 1.3.0) | ||
52 | - | ||
53 | -The ``boot=on|off'' option to the ``-drive'' argument is | ||
54 | -ignored. Applications should use the ``bootindex=N'' parameter | ||
55 | -to set an absolute ordering between devices instead. | ||
56 | - | ||
57 | @subsection -tdf (since 1.3.0) | ||
58 | |||
59 | The ``-tdf'' argument is ignored. The behaviour implemented | ||
135 | -- | 60 | -- |
136 | 2.13.6 | 61 | 2.13.6 |
137 | 62 | ||
138 | 63 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Thomas Huth <thuth@redhat.com> | |
2 | |||
3 | It's been marked as deprecated since QEMU v2.10.0, and so far nobody | ||
4 | complained that we should keep it, so let's remove this legacy option | ||
5 | now to simplify the code quite a bit. | ||
6 | |||
7 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
8 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
9 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | ||
12 | vl.c | 86 ++------------------------------------------------------- | ||
13 | qemu-doc.texi | 8 ------ | ||
14 | qemu-options.hx | 19 ++----------- | ||
15 | 3 files changed, 4 insertions(+), 109 deletions(-) | ||
16 | |||
17 | diff --git a/vl.c b/vl.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/vl.c | ||
20 | +++ b/vl.c | ||
21 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
22 | const char *boot_order = NULL; | ||
23 | const char *boot_once = NULL; | ||
24 | DisplayState *ds; | ||
25 | - int cyls, heads, secs, translation; | ||
26 | QemuOpts *opts, *machine_opts; | ||
27 | - QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL; | ||
28 | + QemuOpts *icount_opts = NULL, *accel_opts = NULL; | ||
29 | QemuOptsList *olist; | ||
30 | int optind; | ||
31 | const char *optarg; | ||
32 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
33 | |||
34 | cpu_model = NULL; | ||
35 | snapshot = 0; | ||
36 | - cyls = heads = secs = 0; | ||
37 | - translation = BIOS_ATA_TRANSLATION_AUTO; | ||
38 | |||
39 | nb_nics = 0; | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
42 | if (optind >= argc) | ||
43 | break; | ||
44 | if (argv[optind][0] != '-') { | ||
45 | - hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); | ||
46 | + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); | ||
47 | } else { | ||
48 | const QEMUOption *popt; | ||
49 | |||
50 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
51 | cpu_model = optarg; | ||
52 | break; | ||
53 | case QEMU_OPTION_hda: | ||
54 | - { | ||
55 | - char buf[256]; | ||
56 | - if (cyls == 0) | ||
57 | - snprintf(buf, sizeof(buf), "%s", HD_OPTS); | ||
58 | - else | ||
59 | - snprintf(buf, sizeof(buf), | ||
60 | - "%s,cyls=%d,heads=%d,secs=%d%s", | ||
61 | - HD_OPTS , cyls, heads, secs, | ||
62 | - translation == BIOS_ATA_TRANSLATION_LBA ? | ||
63 | - ",trans=lba" : | ||
64 | - translation == BIOS_ATA_TRANSLATION_NONE ? | ||
65 | - ",trans=none" : ""); | ||
66 | - drive_add(IF_DEFAULT, 0, optarg, buf); | ||
67 | - break; | ||
68 | - } | ||
69 | case QEMU_OPTION_hdb: | ||
70 | case QEMU_OPTION_hdc: | ||
71 | case QEMU_OPTION_hdd: | ||
72 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp) | ||
73 | case QEMU_OPTION_snapshot: | ||
74 | snapshot = 1; | ||
75 | break; | ||
76 | - case QEMU_OPTION_hdachs: | ||
77 | - { | ||
78 | - const char *p; | ||
79 | - p = optarg; | ||
80 | - cyls = strtol(p, (char **)&p, 0); | ||
81 | - if (cyls < 1 || cyls > 16383) | ||
82 | - goto chs_fail; | ||
83 | - if (*p != ',') | ||
84 | - goto chs_fail; | ||
85 | - p++; | ||
86 | - heads = strtol(p, (char **)&p, 0); | ||
87 | - if (heads < 1 || heads > 16) | ||
88 | - goto chs_fail; | ||
89 | - if (*p != ',') | ||
90 | - goto chs_fail; | ||
91 | - p++; | ||
92 | - secs = strtol(p, (char **)&p, 0); | ||
93 | - if (secs < 1 || secs > 63) | ||
94 | - goto chs_fail; | ||
95 | - if (*p == ',') { | ||
96 | - p++; | ||
97 | - if (!strcmp(p, "large")) { | ||
98 | - translation = BIOS_ATA_TRANSLATION_LARGE; | ||
99 | - } else if (!strcmp(p, "rechs")) { | ||
100 | - translation = BIOS_ATA_TRANSLATION_RECHS; | ||
101 | - } else if (!strcmp(p, "none")) { | ||
102 | - translation = BIOS_ATA_TRANSLATION_NONE; | ||
103 | - } else if (!strcmp(p, "lba")) { | ||
104 | - translation = BIOS_ATA_TRANSLATION_LBA; | ||
105 | - } else if (!strcmp(p, "auto")) { | ||
106 | - translation = BIOS_ATA_TRANSLATION_AUTO; | ||
107 | - } else { | ||
108 | - goto chs_fail; | ||
109 | - } | ||
110 | - } else if (*p != '\0') { | ||
111 | - chs_fail: | ||
112 | - error_report("invalid physical CHS format"); | ||
113 | - exit(1); | ||
114 | - } | ||
115 | - if (hda_opts != NULL) { | ||
116 | - qemu_opt_set_number(hda_opts, "cyls", cyls, | ||
117 | - &error_abort); | ||
118 | - qemu_opt_set_number(hda_opts, "heads", heads, | ||
119 | - &error_abort); | ||
120 | - qemu_opt_set_number(hda_opts, "secs", secs, | ||
121 | - &error_abort); | ||
122 | - if (translation == BIOS_ATA_TRANSLATION_LARGE) { | ||
123 | - qemu_opt_set(hda_opts, "trans", "large", | ||
124 | - &error_abort); | ||
125 | - } else if (translation == BIOS_ATA_TRANSLATION_RECHS) { | ||
126 | - qemu_opt_set(hda_opts, "trans", "rechs", | ||
127 | - &error_abort); | ||
128 | - } else if (translation == BIOS_ATA_TRANSLATION_LBA) { | ||
129 | - qemu_opt_set(hda_opts, "trans", "lba", | ||
130 | - &error_abort); | ||
131 | - } else if (translation == BIOS_ATA_TRANSLATION_NONE) { | ||
132 | - qemu_opt_set(hda_opts, "trans", "none", | ||
133 | - &error_abort); | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - error_report("'-hdachs' is deprecated, please use '-device" | ||
138 | - " ide-hd,cyls=c,heads=h,secs=s,...' instead"); | ||
139 | - break; | ||
140 | case QEMU_OPTION_numa: | ||
141 | opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), | ||
142 | optarg, true); | ||
143 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/qemu-doc.texi | ||
146 | +++ b/qemu-doc.texi | ||
147 | @@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the | ||
148 | ``-object filter-dump'' argument which works in combination | ||
149 | with the modern ``-netdev`` backends instead. | ||
150 | |||
151 | -@subsection -hdachs (since 2.10.0) | ||
152 | - | ||
153 | -The ``-hdachs'' argument is now a synonym for setting | ||
154 | -the ``cyls'', ``heads'', ``secs'', and ``trans'' properties | ||
155 | -on the ``ide-hd'' device using the ``-device'' argument. | ||
156 | -The new syntax allows different settings to be provided | ||
157 | -per disk. | ||
158 | - | ||
159 | @subsection -usbdevice (since 2.10.0) | ||
160 | |||
161 | The ``-usbdevice DEV'' argument is now a synonym for setting | ||
162 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
163 | index XXXXXXX..XXXXXXX 100644 | ||
164 | --- a/qemu-options.hx | ||
165 | +++ b/qemu-options.hx | ||
166 | @@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type. | ||
167 | @item media=@var{media} | ||
168 | This option defines the type of the media: disk or cdrom. | ||
169 | @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}] | ||
170 | -These options have the same definition as they have in @option{-hdachs}. | ||
171 | -These parameters are deprecated, use the corresponding parameters | ||
172 | +Force disk physical geometry and the optional BIOS translation (trans=none or | ||
173 | +lba). These parameters are deprecated, use the corresponding parameters | ||
174 | of @code{-device} instead. | ||
175 | @item snapshot=@var{snapshot} | ||
176 | @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive | ||
177 | @@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force | ||
178 | the write back by pressing @key{C-a s} (@pxref{disk_images}). | ||
179 | ETEXI | ||
180 | |||
181 | -DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \ | ||
182 | - "-hdachs c,h,s[,t]\n" \ | ||
183 | - " force hard disk 0 physical geometry and the optional BIOS\n" \ | ||
184 | - " translation (t=none or lba) (usually QEMU can guess them)\n", | ||
185 | - QEMU_ARCH_ALL) | ||
186 | -STEXI | ||
187 | -@item -hdachs @var{c},@var{h},@var{s},[,@var{t}] | ||
188 | -@findex -hdachs | ||
189 | -Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <= | ||
190 | -@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS | ||
191 | -translation mode (@var{t}=none, lba or auto). Usually QEMU can guess | ||
192 | -all those parameters. This option is deprecated, please use | ||
193 | -@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead. | ||
194 | -ETEXI | ||
195 | - | ||
196 | DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, | ||
197 | "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" | ||
198 | " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" | ||
199 | -- | ||
200 | 2.13.6 | ||
201 | |||
202 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | Looks like we forgot to announce the deprecation of these options in | ||
4 | the corresponding chapter of the qemu-doc text, so let's do that now. | ||
5 | |||
6 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
7 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
8 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | qemu-doc.texi | 15 +++++++++++++++ | ||
12 | 1 file changed, 15 insertions(+) | ||
13 | |||
14 | diff --git a/qemu-doc.texi b/qemu-doc.texi | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/qemu-doc.texi | ||
17 | +++ b/qemu-doc.texi | ||
18 | @@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU. | ||
19 | The ``-drive if=scsi'' argument is replaced by the the | ||
20 | ``-device BUS-TYPE'' argument combined with ``-drive if=none''. | ||
21 | |||
22 | +@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0) | ||
23 | + | ||
24 | +The drive geometry arguments are replaced by the the geometry arguments | ||
25 | +that can be specified with the ``-device'' parameter. | ||
26 | + | ||
27 | +@subsection -drive serial=... (since 2.10.0) | ||
28 | + | ||
29 | +The drive serial argument is replaced by the the serial argument | ||
30 | +that can be specified with the ``-device'' parameter. | ||
31 | + | ||
32 | +@subsection -drive addr=... (since 2.10.0) | ||
33 | + | ||
34 | +The drive addr argument is replaced by the the addr argument | ||
35 | +that can be specified with the ``-device'' parameter. | ||
36 | + | ||
37 | @subsection -net dump (since 2.10.0) | ||
38 | |||
39 | The ``--net dump'' argument is now replaced with the | ||
40 | -- | ||
41 | 2.13.6 | ||
42 | |||
43 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | From: Fam Zheng <famz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Per SCSI definition the designator_length we receive from INQUIRY is 8, | ||
4 | 12 or at most 16, but we should be careful because the remote iscsi | ||
5 | target may misbehave, otherwise we could have a buffer overflow. | ||
6 | |||
7 | Reported-by: Max Reitz <mreitz@redhat.com> | ||
8 | Signed-off-by: Fam Zheng <famz@redhat.com> | 3 | Signed-off-by: Fam Zheng <famz@redhat.com> |
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 5 | --- |
11 | block/iscsi.c | 2 +- | 6 | include/block/block_int.h | 1 - |
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | 7 | block/io.c | 18 ------------------ |
8 | 2 files changed, 19 deletions(-) | ||
13 | 9 | ||
14 | diff --git a/block/iscsi.c b/block/iscsi.c | 10 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/iscsi.c | 12 | --- a/include/block/block_int.h |
17 | +++ b/block/iscsi.c | 13 | +++ b/include/block/block_int.h |
18 | @@ -XXX,XX +XXX,XX @@ static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) | 14 | @@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk); |
19 | desc[5] = (dd->designator_type & 0xF) | 15 | bool blk_dev_is_medium_locked(BlockBackend *blk); |
20 | | ((dd->association & 3) << 4); | 16 | |
21 | desc[7] = dd->designator_length; | 17 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); |
22 | - memcpy(desc + 8, dd->designator, dd->designator_length); | 18 | -bool bdrv_requests_pending(BlockDriverState *bs); |
23 | + memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20)); | 19 | |
24 | 20 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); | |
25 | desc[28] = 0; | 21 | void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); |
26 | desc[29] = (lun->block_size >> 16) & 0xFF; | 22 | diff --git a/block/io.c b/block/io.c |
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/block/io.c | ||
25 | +++ b/block/io.c | ||
26 | @@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) | ||
27 | assert(old >= 1); | ||
28 | } | ||
29 | |||
30 | -/* Check if any requests are in-flight (including throttled requests) */ | ||
31 | -bool bdrv_requests_pending(BlockDriverState *bs) | ||
32 | -{ | ||
33 | - BdrvChild *child; | ||
34 | - | ||
35 | - if (atomic_read(&bs->in_flight)) { | ||
36 | - return true; | ||
37 | - } | ||
38 | - | ||
39 | - QLIST_FOREACH(child, &bs->children, next) { | ||
40 | - if (bdrv_requests_pending(child->bs)) { | ||
41 | - return true; | ||
42 | - } | ||
43 | - } | ||
44 | - | ||
45 | - return false; | ||
46 | -} | ||
47 | - | ||
48 | typedef struct { | ||
49 | Coroutine *co; | ||
50 | BlockDriverState *bs; | ||
27 | -- | 51 | -- |
28 | 2.13.6 | 52 | 2.13.6 |
29 | 53 | ||
30 | 54 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
2 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
3 | --- | ||
4 | block/io.c | 6 ++++++ | ||
5 | 1 file changed, 6 insertions(+) | ||
1 | 6 | ||
7 | diff --git a/block/io.c b/block/io.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/block/io.c | ||
10 | +++ b/block/io.c | ||
11 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | ||
12 | BdrvNextIterator it; | ||
13 | GSList *aio_ctxs = NULL, *ctx; | ||
14 | |||
15 | + /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread | ||
16 | + * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on | ||
17 | + * nodes in several different AioContexts, so make sure we're in the main | ||
18 | + * context. */ | ||
19 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | ||
20 | + | ||
21 | block_job_pause_all(); | ||
22 | |||
23 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
24 | -- | ||
25 | 2.13.6 | ||
26 | |||
27 | diff view generated by jsdifflib |
1 | This simplifies file-posix by implementing the coroutine variants of | 1 | bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively |
---|---|---|---|
2 | the discard and flush BlockDriver callbacks. These were the last | 2 | and also doesn't notify other parent nodes of children, which both means |
3 | remaining users of paio_submit(), which can be removed now. | 3 | that the child nodes are not actually drained, and bdrv_drained_begin() |
4 | is providing useful functionality only on a single node. | ||
5 | |||
6 | To keep things consistent, we also shouldn't call the block driver | ||
7 | callbacks recursively. | ||
8 | |||
9 | A proper recursive drain version that provides an actually working | ||
10 | drained section for child nodes will be introduced later. | ||
4 | 11 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | 13 | Reviewed-by: Fam Zheng <famz@redhat.com> |
7 | --- | 14 | --- |
8 | block/file-posix.c | 72 ++++++++++++++++++------------------------------------ | 15 | block/io.c | 16 +++++++++------- |
9 | 1 file changed, 24 insertions(+), 48 deletions(-) | 16 | 1 file changed, 9 insertions(+), 7 deletions(-) |
10 | 17 | ||
11 | diff --git a/block/file-posix.c b/block/file-posix.c | 18 | diff --git a/block/io.c b/block/io.c |
12 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/block/file-posix.c | 20 | --- a/block/io.c |
14 | +++ b/block/file-posix.c | 21 | +++ b/block/io.c |
15 | @@ -XXX,XX +XXX,XX @@ static inline int paio_submit_co(BlockDriverState *bs, int fd, | 22 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) |
16 | return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); | ||
17 | } | 23 | } |
18 | 24 | ||
19 | -static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, | 25 | /* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ |
20 | - int64_t offset, QEMUIOVector *qiov, int bytes, | 26 | -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) |
21 | - BlockCompletionFunc *cb, void *opaque, int type) | 27 | +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) |
22 | -{ | ||
23 | - RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); | ||
24 | - ThreadPool *pool; | ||
25 | - | ||
26 | - acb->bs = bs; | ||
27 | - acb->aio_type = type; | ||
28 | - acb->aio_fildes = fd; | ||
29 | - | ||
30 | - acb->aio_nbytes = bytes; | ||
31 | - acb->aio_offset = offset; | ||
32 | - | ||
33 | - if (qiov) { | ||
34 | - acb->aio_iov = qiov->iov; | ||
35 | - acb->aio_niov = qiov->niov; | ||
36 | - assert(qiov->size == acb->aio_nbytes); | ||
37 | - } | ||
38 | - | ||
39 | - trace_paio_submit(acb, opaque, offset, bytes, type); | ||
40 | - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); | ||
41 | - return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); | ||
42 | -} | ||
43 | - | ||
44 | static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, | ||
45 | uint64_t bytes, QEMUIOVector *qiov, int type) | ||
46 | { | 28 | { |
47 | @@ -XXX,XX +XXX,XX @@ static void raw_aio_unplug(BlockDriverState *bs) | 29 | BdrvChild *child, *tmp; |
48 | #endif | 30 | BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; |
31 | @@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) | ||
32 | bdrv_coroutine_enter(bs, data.co); | ||
33 | BDRV_POLL_WHILE(bs, !data.done); | ||
34 | |||
35 | - QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
36 | - bdrv_drain_invoke(child->bs, begin); | ||
37 | + if (recursive) { | ||
38 | + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { | ||
39 | + bdrv_drain_invoke(child->bs, begin, true); | ||
40 | + } | ||
41 | } | ||
49 | } | 42 | } |
50 | 43 | ||
51 | -static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, | 44 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
52 | - BlockCompletionFunc *cb, void *opaque) | 45 | bdrv_parent_drained_begin(bs); |
53 | +static int raw_co_flush_to_disk(BlockDriverState *bs) | 46 | } |
54 | { | 47 | |
55 | BDRVRawState *s = bs->opaque; | 48 | - bdrv_drain_invoke(bs, true); |
56 | + int ret; | 49 | + bdrv_drain_invoke(bs, true, false); |
57 | 50 | bdrv_drain_recurse(bs); | |
58 | - if (fd_open(bs) < 0) | ||
59 | - return NULL; | ||
60 | + ret = fd_open(bs); | ||
61 | + if (ret < 0) { | ||
62 | + return ret; | ||
63 | + } | ||
64 | |||
65 | - return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); | ||
66 | + return paio_submit_co(bs, s->fd, 0, NULL, 0, QEMU_AIO_FLUSH); | ||
67 | } | 51 | } |
68 | 52 | ||
69 | static void raw_aio_attach_aio_context(BlockDriverState *bs, | 53 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) |
70 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, | 54 | } |
71 | #endif /* !__linux__ */ | 55 | |
56 | /* Re-enable things in child-to-parent order */ | ||
57 | - bdrv_drain_invoke(bs, false); | ||
58 | + bdrv_drain_invoke(bs, false, false); | ||
59 | bdrv_parent_drained_end(bs); | ||
60 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
72 | } | 61 | } |
73 | 62 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) | |
74 | -static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, | 63 | aio_context_acquire(aio_context); |
75 | - int64_t offset, int bytes, | 64 | aio_disable_external(aio_context); |
76 | - BlockCompletionFunc *cb, void *opaque) | 65 | bdrv_parent_drained_begin(bs); |
77 | +static coroutine_fn int | 66 | - bdrv_drain_invoke(bs, true); |
78 | +raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) | 67 | + bdrv_drain_invoke(bs, true, true); |
79 | { | 68 | aio_context_release(aio_context); |
80 | BDRVRawState *s = bs->opaque; | 69 | |
81 | 70 | if (!g_slist_find(aio_ctxs, aio_context)) { | |
82 | - return paio_submit(bs, s->fd, offset, NULL, bytes, | 71 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
83 | - cb, opaque, QEMU_AIO_DISCARD); | 72 | |
84 | + return paio_submit_co(bs, s->fd, offset, NULL, bytes, QEMU_AIO_DISCARD); | 73 | /* Re-enable things in child-to-parent order */ |
85 | } | 74 | aio_context_acquire(aio_context); |
86 | 75 | - bdrv_drain_invoke(bs, false); | |
87 | static int coroutine_fn raw_co_pwrite_zeroes( | 76 | + bdrv_drain_invoke(bs, false, true); |
88 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | 77 | bdrv_parent_drained_end(bs); |
89 | 78 | aio_enable_external(aio_context); | |
90 | .bdrv_co_preadv = raw_co_preadv, | 79 | aio_context_release(aio_context); |
91 | .bdrv_co_pwritev = raw_co_pwritev, | ||
92 | - .bdrv_aio_flush = raw_aio_flush, | ||
93 | - .bdrv_aio_pdiscard = raw_aio_pdiscard, | ||
94 | + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
95 | + .bdrv_co_pdiscard = raw_co_pdiscard, | ||
96 | .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
97 | .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
98 | .bdrv_refresh_limits = raw_refresh_limits, | ||
99 | @@ -XXX,XX +XXX,XX @@ static int fd_open(BlockDriverState *bs) | ||
100 | return -EIO; | ||
101 | } | ||
102 | |||
103 | -static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, | ||
104 | - int64_t offset, int bytes, | ||
105 | - BlockCompletionFunc *cb, void *opaque) | ||
106 | +static coroutine_fn int | ||
107 | +hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) | ||
108 | { | ||
109 | BDRVRawState *s = bs->opaque; | ||
110 | + int ret; | ||
111 | |||
112 | - if (fd_open(bs) < 0) { | ||
113 | - return NULL; | ||
114 | + ret = fd_open(bs); | ||
115 | + if (ret < 0) { | ||
116 | + return ret; | ||
117 | } | ||
118 | - return paio_submit(bs, s->fd, offset, NULL, bytes, | ||
119 | - cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); | ||
120 | + return paio_submit_co(bs, s->fd, offset, NULL, bytes, | ||
121 | + QEMU_AIO_DISCARD | QEMU_AIO_BLKDEV); | ||
122 | } | ||
123 | |||
124 | static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, | ||
125 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = { | ||
126 | |||
127 | .bdrv_co_preadv = raw_co_preadv, | ||
128 | .bdrv_co_pwritev = raw_co_pwritev, | ||
129 | - .bdrv_aio_flush = raw_aio_flush, | ||
130 | - .bdrv_aio_pdiscard = hdev_aio_pdiscard, | ||
131 | + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
132 | + .bdrv_co_pdiscard = hdev_co_pdiscard, | ||
133 | .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
134 | .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
135 | .bdrv_refresh_limits = raw_refresh_limits, | ||
136 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
137 | |||
138 | .bdrv_co_preadv = raw_co_preadv, | ||
139 | .bdrv_co_pwritev = raw_co_pwritev, | ||
140 | - .bdrv_aio_flush = raw_aio_flush, | ||
141 | + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
142 | .bdrv_refresh_limits = raw_refresh_limits, | ||
143 | .bdrv_io_plug = raw_aio_plug, | ||
144 | .bdrv_io_unplug = raw_aio_unplug, | ||
145 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
146 | |||
147 | .bdrv_co_preadv = raw_co_preadv, | ||
148 | .bdrv_co_pwritev = raw_co_pwritev, | ||
149 | - .bdrv_aio_flush = raw_aio_flush, | ||
150 | + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, | ||
151 | .bdrv_refresh_limits = raw_refresh_limits, | ||
152 | .bdrv_io_plug = raw_aio_plug, | ||
153 | .bdrv_io_unplug = raw_aio_unplug, | ||
154 | -- | 80 | -- |
155 | 2.13.6 | 81 | 2.13.6 |
156 | 82 | ||
157 | 83 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | The existing test is for bdrv_drain_all_begin/end() only. Generalise the |
---|---|---|---|
2 | test case so that it can be run for the other variants as well. At the | ||
3 | moment this is only bdrv_drain_begin/end(), but in a while, we'll add | ||
4 | another one. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | Also, add a backing file to the test node to test whether the operations |
4 | byte-based. Make the change for the last few sector-based calls | 7 | work recursively. |
5 | into the block layer from the vhdx driver. | ||
6 | 8 | ||
7 | Ideally, the vhdx driver should switch to doing everything | ||
8 | byte-based, but that's a more invasive change that requires a | ||
9 | bit more auditing. | ||
10 | |||
11 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
12 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 10 | --- |
15 | block/vhdx.c | 12 ++++++------ | 11 | tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- |
16 | 1 file changed, 6 insertions(+), 6 deletions(-) | 12 | 1 file changed, 62 insertions(+), 7 deletions(-) |
17 | 13 | ||
18 | diff --git a/block/vhdx.c b/block/vhdx.c | 14 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
19 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/vhdx.c | 16 | --- a/tests/test-bdrv-drain.c |
21 | +++ b/block/vhdx.c | 17 | +++ b/tests/test-bdrv-drain.c |
22 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, | 18 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = { |
23 | break; | 19 | |
24 | case PAYLOAD_BLOCK_FULLY_PRESENT: | 20 | .bdrv_co_drain_begin = bdrv_test_co_drain_begin, |
25 | qemu_co_mutex_unlock(&s->lock); | 21 | .bdrv_co_drain_end = bdrv_test_co_drain_end, |
26 | - ret = bdrv_co_readv(bs->file, | 22 | + |
27 | - sinfo.file_offset >> BDRV_SECTOR_BITS, | 23 | + .bdrv_child_perm = bdrv_format_default_perms, |
28 | - sinfo.sectors_avail, &hd_qiov); | 24 | }; |
29 | + ret = bdrv_co_preadv(bs->file, sinfo.file_offset, | 25 | |
30 | + sinfo.sectors_avail * BDRV_SECTOR_SIZE, | 26 | static void aio_ret_cb(void *opaque, int ret) |
31 | + &hd_qiov, 0); | 27 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
32 | qemu_co_mutex_lock(&s->lock); | 28 | *aio_ret = ret; |
33 | if (ret < 0) { | 29 | } |
34 | goto exit; | 30 | |
35 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, | 31 | -static void test_drv_cb_drain_all(void) |
36 | } | 32 | +enum drain_type { |
37 | /* block exists, so we can just overwrite it */ | 33 | + BDRV_DRAIN_ALL, |
38 | qemu_co_mutex_unlock(&s->lock); | 34 | + BDRV_DRAIN, |
39 | - ret = bdrv_co_writev(bs->file, | 35 | +}; |
40 | - sinfo.file_offset >> BDRV_SECTOR_BITS, | 36 | + |
41 | - sectors_to_write, &hd_qiov); | 37 | +static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
42 | + ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, | 38 | +{ |
43 | + sectors_to_write * BDRV_SECTOR_SIZE, | 39 | + switch (drain_type) { |
44 | + &hd_qiov, 0); | 40 | + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; |
45 | qemu_co_mutex_lock(&s->lock); | 41 | + case BDRV_DRAIN: bdrv_drained_begin(bs); break; |
46 | if (ret < 0) { | 42 | + default: g_assert_not_reached(); |
47 | goto error_bat_restore; | 43 | + } |
44 | +} | ||
45 | + | ||
46 | +static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | ||
47 | +{ | ||
48 | + switch (drain_type) { | ||
49 | + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; | ||
50 | + case BDRV_DRAIN: bdrv_drained_end(bs); break; | ||
51 | + default: g_assert_not_reached(); | ||
52 | + } | ||
53 | +} | ||
54 | + | ||
55 | +static void test_drv_cb_common(enum drain_type drain_type, bool recursive) | ||
56 | { | ||
57 | BlockBackend *blk; | ||
58 | - BlockDriverState *bs; | ||
59 | - BDRVTestState *s; | ||
60 | + BlockDriverState *bs, *backing; | ||
61 | + BDRVTestState *s, *backing_s; | ||
62 | BlockAIOCB *acb; | ||
63 | int aio_ret; | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
66 | s = bs->opaque; | ||
67 | blk_insert_bs(blk, bs, &error_abort); | ||
68 | |||
69 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
70 | + backing_s = backing->opaque; | ||
71 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
72 | + | ||
73 | /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ | ||
74 | g_assert_cmpint(s->drain_count, ==, 0); | ||
75 | - bdrv_drain_all_begin(); | ||
76 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
77 | + | ||
78 | + do_drain_begin(drain_type, bs); | ||
79 | + | ||
80 | g_assert_cmpint(s->drain_count, ==, 1); | ||
81 | - bdrv_drain_all_end(); | ||
82 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
83 | + | ||
84 | + do_drain_end(drain_type, bs); | ||
85 | + | ||
86 | g_assert_cmpint(s->drain_count, ==, 0); | ||
87 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
88 | |||
89 | /* Now do the same while a request is pending */ | ||
90 | aio_ret = -EINPROGRESS; | ||
91 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void) | ||
92 | g_assert_cmpint(aio_ret, ==, -EINPROGRESS); | ||
93 | |||
94 | g_assert_cmpint(s->drain_count, ==, 0); | ||
95 | - bdrv_drain_all_begin(); | ||
96 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
97 | + | ||
98 | + do_drain_begin(drain_type, bs); | ||
99 | + | ||
100 | g_assert_cmpint(aio_ret, ==, 0); | ||
101 | g_assert_cmpint(s->drain_count, ==, 1); | ||
102 | - bdrv_drain_all_end(); | ||
103 | + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); | ||
104 | + | ||
105 | + do_drain_end(drain_type, bs); | ||
106 | + | ||
107 | g_assert_cmpint(s->drain_count, ==, 0); | ||
108 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
109 | |||
110 | + bdrv_unref(backing); | ||
111 | bdrv_unref(bs); | ||
112 | blk_unref(blk); | ||
113 | } | ||
114 | |||
115 | +static void test_drv_cb_drain_all(void) | ||
116 | +{ | ||
117 | + test_drv_cb_common(BDRV_DRAIN_ALL, true); | ||
118 | +} | ||
119 | + | ||
120 | +static void test_drv_cb_drain(void) | ||
121 | +{ | ||
122 | + test_drv_cb_common(BDRV_DRAIN, false); | ||
123 | +} | ||
124 | + | ||
125 | int main(int argc, char **argv) | ||
126 | { | ||
127 | bdrv_init(); | ||
128 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
129 | g_test_init(&argc, &argv, NULL); | ||
130 | |||
131 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); | ||
132 | + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); | ||
133 | |||
134 | return g_test_run(); | ||
135 | } | ||
48 | -- | 136 | -- |
49 | 2.13.6 | 137 | 2.13.6 |
50 | 138 | ||
51 | 139 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | This is currently only working correctly for bdrv_drain(), not for |
---|---|---|---|
2 | bdrv_drain_all(). Leave a comment for the drain_all case, we'll address | ||
3 | it later. | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Make the change for the internals of the qcow | ||
5 | driver write function, by iterating over offset/bytes instead of | ||
6 | sector_num/nb_sectors, and with a rename of index_in_cluster and | ||
7 | repurposing of n to track bytes instead of sectors. | ||
8 | |||
9 | A later patch will then switch the qcow driver as a whole over | ||
10 | to byte-based operation. | ||
11 | |||
12 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
13 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 6 | --- |
16 | block/qcow.c | 36 +++++++++++++++++------------------- | 7 | tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ |
17 | 1 file changed, 17 insertions(+), 19 deletions(-) | 8 | 1 file changed, 45 insertions(+) |
18 | 9 | ||
19 | diff --git a/block/qcow.c b/block/qcow.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
20 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/qcow.c | 12 | --- a/tests/test-bdrv-drain.c |
22 | +++ b/block/qcow.c | 13 | +++ b/tests/test-bdrv-drain.c |
23 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | 14 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) |
24 | int flags) | 15 | test_drv_cb_common(BDRV_DRAIN, false); |
16 | } | ||
17 | |||
18 | +static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
19 | +{ | ||
20 | + BlockBackend *blk; | ||
21 | + BlockDriverState *bs, *backing; | ||
22 | + | ||
23 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
24 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
25 | + &error_abort); | ||
26 | + blk_insert_bs(blk, bs, &error_abort); | ||
27 | + | ||
28 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
29 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
30 | + | ||
31 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
32 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
33 | + | ||
34 | + do_drain_begin(drain_type, bs); | ||
35 | + | ||
36 | + g_assert_cmpint(bs->quiesce_counter, ==, 1); | ||
37 | + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); | ||
38 | + | ||
39 | + do_drain_end(drain_type, bs); | ||
40 | + | ||
41 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
43 | + | ||
44 | + bdrv_unref(backing); | ||
45 | + bdrv_unref(bs); | ||
46 | + blk_unref(blk); | ||
47 | +} | ||
48 | + | ||
49 | +static void test_quiesce_drain_all(void) | ||
50 | +{ | ||
51 | + // XXX drain_all doesn't quiesce | ||
52 | + //test_quiesce_common(BDRV_DRAIN_ALL, true); | ||
53 | +} | ||
54 | + | ||
55 | +static void test_quiesce_drain(void) | ||
56 | +{ | ||
57 | + test_quiesce_common(BDRV_DRAIN, false); | ||
58 | +} | ||
59 | + | ||
60 | int main(int argc, char **argv) | ||
25 | { | 61 | { |
26 | BDRVQcowState *s = bs->opaque; | 62 | bdrv_init(); |
27 | - int index_in_cluster; | 63 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
28 | + int offset_in_cluster; | 64 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
29 | uint64_t cluster_offset; | 65 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
30 | int ret = 0, n; | 66 | |
31 | struct iovec hd_iov; | 67 | + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
32 | QEMUIOVector hd_qiov; | 68 | + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
33 | uint8_t *buf; | 69 | + |
34 | void *orig_buf; | 70 | return g_test_run(); |
35 | + int64_t offset = sector_num * BDRV_SECTOR_SIZE; | 71 | } |
36 | + int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; | ||
37 | |||
38 | assert(!flags); | ||
39 | s->cluster_cache_offset = -1; /* disable compressed cache */ | ||
40 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
41 | |||
42 | qemu_co_mutex_lock(&s->lock); | ||
43 | |||
44 | - while (nb_sectors != 0) { | ||
45 | - | ||
46 | - index_in_cluster = sector_num & (s->cluster_sectors - 1); | ||
47 | - n = s->cluster_sectors - index_in_cluster; | ||
48 | - if (n > nb_sectors) { | ||
49 | - n = nb_sectors; | ||
50 | + while (bytes != 0) { | ||
51 | + offset_in_cluster = offset & (s->cluster_size - 1); | ||
52 | + n = s->cluster_size - offset_in_cluster; | ||
53 | + if (n > bytes) { | ||
54 | + n = bytes; | ||
55 | } | ||
56 | - ret = get_cluster_offset(bs, sector_num << 9, 1, 0, | ||
57 | - index_in_cluster << 9, | ||
58 | - (index_in_cluster + n) << 9, &cluster_offset); | ||
59 | + ret = get_cluster_offset(bs, offset, 1, 0, offset_in_cluster, | ||
60 | + offset_in_cluster + n, &cluster_offset); | ||
61 | if (ret < 0) { | ||
62 | break; | ||
63 | } | ||
64 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
65 | } | ||
66 | if (bs->encrypted) { | ||
67 | assert(s->crypto); | ||
68 | - if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, | ||
69 | - buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { | ||
70 | + if (qcrypto_block_encrypt(s->crypto, offset, buf, n, NULL) < 0) { | ||
71 | ret = -EIO; | ||
72 | break; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | hd_iov.iov_base = (void *)buf; | ||
77 | - hd_iov.iov_len = n * 512; | ||
78 | + hd_iov.iov_len = n; | ||
79 | qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); | ||
80 | qemu_co_mutex_unlock(&s->lock); | ||
81 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | ||
82 | - ret = bdrv_co_writev(bs->file, | ||
83 | - (cluster_offset >> 9) + index_in_cluster, | ||
84 | - n, &hd_qiov); | ||
85 | + ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, | ||
86 | + n, &hd_qiov, 0); | ||
87 | qemu_co_mutex_lock(&s->lock); | ||
88 | if (ret < 0) { | ||
89 | break; | ||
90 | } | ||
91 | ret = 0; | ||
92 | |||
93 | - nb_sectors -= n; | ||
94 | - sector_num += n; | ||
95 | - buf += n * 512; | ||
96 | + bytes -= n; | ||
97 | + offset += n; | ||
98 | + buf += n; | ||
99 | } | ||
100 | qemu_co_mutex_unlock(&s->lock); | ||
101 | |||
102 | -- | 72 | -- |
103 | 2.13.6 | 73 | 2.13.6 |
104 | 74 | ||
105 | 75 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Block jobs already paused themselves when their main BlockBackend |
---|---|---|---|
2 | entered a drained section. This is not good enough: We also want to | ||
3 | pause a block job and may not submit new requests if, for example, the | ||
4 | mirror target node should be drained. | ||
2 | 5 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 6 | This implements .drained_begin/end callbacks in child_job in order to |
4 | byte-based. Make the change for the internal helper function | 7 | consider all block nodes related to the job, and removes the |
5 | get_cluster_offset(), by changing n_start and n_end to be byte | 8 | BlockBackend callbacks which are unnecessary now because the root of the |
6 | offsets rather than sector indices within the cluster being | 9 | job main BlockBackend is always referenced with a child_job, too. |
7 | allocated. However, assert that these values are still | ||
8 | sector-aligned (at least qcrypto_block_encrypt() still wants that). | ||
9 | For now we get that alignment for free because we still use | ||
10 | sector-based driver callbacks. | ||
11 | 10 | ||
12 | A later patch will then switch the qcow driver as a whole over | ||
13 | to byte-based operation; but will still leave things at sector | ||
14 | alignments as it is not worth auditing the qcow image format | ||
15 | to worry about sub-sector requests. | ||
16 | |||
17 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
18 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 12 | --- |
21 | block/qcow.c | 29 +++++++++++++++-------------- | 13 | blockjob.c | 22 +++++++++------------- |
22 | 1 file changed, 15 insertions(+), 14 deletions(-) | 14 | 1 file changed, 9 insertions(+), 13 deletions(-) |
23 | 15 | ||
24 | diff --git a/block/qcow.c b/block/qcow.c | 16 | diff --git a/blockjob.c b/blockjob.c |
25 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/qcow.c | 18 | --- a/blockjob.c |
27 | +++ b/block/qcow.c | 19 | +++ b/blockjob.c |
28 | @@ -XXX,XX +XXX,XX @@ static int qcow_reopen_prepare(BDRVReopenState *state, | 20 | @@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c) |
29 | * | 21 | job->id); |
30 | * 0 to not allocate. | 22 | } |
31 | * | 23 | |
32 | - * 1 to allocate a normal cluster (for sector indexes 'n_start' to | 24 | -static const BdrvChildRole child_job = { |
33 | - * 'n_end') | 25 | - .get_parent_desc = child_job_get_parent_desc, |
34 | + * 1 to allocate a normal cluster (for sector-aligned byte offsets 'n_start' | 26 | - .stay_at_node = true, |
35 | + * to 'n_end' within the cluster) | 27 | -}; |
36 | * | 28 | - |
37 | * 2 to allocate a compressed cluster of size | 29 | -static void block_job_drained_begin(void *opaque) |
38 | * 'compressed_size'. 'compressed_size' must be > 0 and < | 30 | +static void child_job_drained_begin(BdrvChild *c) |
39 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | 31 | { |
40 | if (!allocate) | 32 | - BlockJob *job = opaque; |
41 | return 0; | 33 | + BlockJob *job = c->opaque; |
42 | BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); | 34 | block_job_pause(job); |
43 | + assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE)); | 35 | } |
44 | /* allocate a new cluster */ | 36 | |
45 | if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && | 37 | -static void block_job_drained_end(void *opaque) |
46 | - (n_end - n_start) < s->cluster_sectors) { | 38 | +static void child_job_drained_end(BdrvChild *c) |
47 | + (n_end - n_start) < s->cluster_size) { | 39 | { |
48 | /* if the cluster is already compressed, we must | 40 | - BlockJob *job = opaque; |
49 | decompress it in the case it is not completely | 41 | + BlockJob *job = c->opaque; |
50 | overwritten */ | 42 | block_job_resume(job); |
51 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | 43 | } |
52 | /* if encrypted, we must initialize the cluster | 44 | |
53 | content which won't be written */ | 45 | -static const BlockDevOps block_job_dev_ops = { |
54 | if (bs->encrypted && | 46 | - .drained_begin = block_job_drained_begin, |
55 | - (n_end - n_start) < s->cluster_sectors) { | 47 | - .drained_end = block_job_drained_end, |
56 | - uint64_t start_sect; | 48 | +static const BdrvChildRole child_job = { |
57 | + (n_end - n_start) < s->cluster_size) { | 49 | + .get_parent_desc = child_job_get_parent_desc, |
58 | + uint64_t start_offset; | 50 | + .drained_begin = child_job_drained_begin, |
59 | assert(s->crypto); | 51 | + .drained_end = child_job_drained_end, |
60 | - start_sect = (offset & ~(s->cluster_size - 1)) >> 9; | 52 | + .stay_at_node = true, |
61 | - for(i = 0; i < s->cluster_sectors; i++) { | 53 | }; |
62 | + start_offset = offset & ~(s->cluster_size - 1); | 54 | |
63 | + for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) { | 55 | void block_job_remove_all_bdrv(BlockJob *job) |
64 | if (i < n_start || i >= n_end) { | 56 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, |
65 | - memset(s->cluster_data, 0x00, 512); | 57 | block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); |
66 | + memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE); | 58 | bs->job = job; |
67 | if (qcrypto_block_encrypt(s->crypto, | 59 | |
68 | - (start_sect + i) * | 60 | - blk_set_dev_ops(blk, &block_job_dev_ops, job); |
69 | - BDRV_SECTOR_SIZE, | 61 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); |
70 | + start_offset + i, | 62 | |
71 | s->cluster_data, | 63 | QLIST_INSERT_HEAD(&block_jobs, job, job_list); |
72 | BDRV_SECTOR_SIZE, | ||
73 | NULL) < 0) { | ||
74 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs, | ||
75 | } | ||
76 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | ||
77 | ret = bdrv_pwrite(bs->file, | ||
78 | - cluster_offset + i * 512, | ||
79 | - s->cluster_data, 512); | ||
80 | + cluster_offset + i, | ||
81 | + s->cluster_data, | ||
82 | + BDRV_SECTOR_SIZE); | ||
83 | if (ret < 0) { | ||
84 | return ret; | ||
85 | } | ||
86 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
87 | n = nb_sectors; | ||
88 | } | ||
89 | ret = get_cluster_offset(bs, sector_num << 9, 1, 0, | ||
90 | - index_in_cluster, | ||
91 | - index_in_cluster + n, &cluster_offset); | ||
92 | + index_in_cluster << 9, | ||
93 | + (index_in_cluster + n) << 9, &cluster_offset); | ||
94 | if (ret < 0) { | ||
95 | break; | ||
96 | } | ||
97 | -- | 64 | -- |
98 | 2.13.6 | 65 | 2.13.6 |
99 | 66 | ||
100 | 67 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | Block jobs must be paused if any of the involved nodes are drained. |
---|---|---|---|
2 | 2 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. Make the change for the last few sector-based calls | ||
5 | into the block layer from the parallels driver. | ||
6 | |||
7 | Ideally, the parallels driver should switch to doing everything | ||
8 | byte-based, but that's a more invasive change that requires a | ||
9 | bit more auditing. | ||
10 | |||
11 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
12 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
14 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
16 | --- | 4 | --- |
17 | block/parallels.c | 16 ++++++++++------ | 5 | tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ |
18 | 1 file changed, 10 insertions(+), 6 deletions(-) | 6 | 1 file changed, 121 insertions(+) |
19 | 7 | ||
20 | diff --git a/block/parallels.c b/block/parallels.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
21 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/block/parallels.c | 10 | --- a/tests/test-bdrv-drain.c |
23 | +++ b/block/parallels.c | 11 | +++ b/tests/test-bdrv-drain.c |
24 | @@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, | 12 | @@ -XXX,XX +XXX,XX @@ |
25 | }; | 13 | |
26 | qemu_iovec_init_external(&qiov, &iov, 1); | 14 | #include "qemu/osdep.h" |
27 | 15 | #include "block/block.h" | |
28 | - ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors, | 16 | +#include "block/blockjob_int.h" |
29 | - &qiov); | 17 | #include "sysemu/block-backend.h" |
30 | + ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, | 18 | #include "qapi/error.h" |
31 | + nb_cow_bytes, &qiov, 0); | 19 | |
32 | if (ret < 0) { | 20 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
33 | qemu_vfree(iov.iov_base); | 21 | test_quiesce_common(BDRV_DRAIN, false); |
34 | return ret; | 22 | } |
35 | } | 23 | |
36 | 24 | + | |
37 | - ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov); | 25 | +typedef struct TestBlockJob { |
38 | + ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, | 26 | + BlockJob common; |
39 | + nb_cow_bytes, &qiov, 0); | 27 | + bool should_complete; |
40 | qemu_vfree(iov.iov_base); | 28 | +} TestBlockJob; |
41 | if (ret < 0) { | 29 | + |
42 | return ret; | 30 | +static void test_job_completed(BlockJob *job, void *opaque) |
43 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, | 31 | +{ |
44 | qemu_iovec_reset(&hd_qiov); | 32 | + block_job_completed(job, 0); |
45 | qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); | 33 | +} |
46 | 34 | + | |
47 | - ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); | 35 | +static void coroutine_fn test_job_start(void *opaque) |
48 | + ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, | 36 | +{ |
49 | + &hd_qiov, 0); | 37 | + TestBlockJob *s = opaque; |
50 | if (ret < 0) { | 38 | + |
51 | break; | 39 | + while (!s->should_complete) { |
52 | } | 40 | + block_job_sleep_ns(&s->common, 100000); |
53 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, | 41 | + } |
54 | 42 | + | |
55 | if (position < 0) { | 43 | + block_job_defer_to_main_loop(&s->common, test_job_completed, NULL); |
56 | if (bs->backing) { | 44 | +} |
57 | - ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); | 45 | + |
58 | + ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE, | 46 | +static void test_job_complete(BlockJob *job, Error **errp) |
59 | + nbytes, &hd_qiov, 0); | 47 | +{ |
60 | if (ret < 0) { | 48 | + TestBlockJob *s = container_of(job, TestBlockJob, common); |
61 | break; | 49 | + s->should_complete = true; |
62 | } | 50 | +} |
63 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, | 51 | + |
64 | qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); | 52 | +BlockJobDriver test_job_driver = { |
65 | } | 53 | + .instance_size = sizeof(TestBlockJob), |
66 | } else { | 54 | + .start = test_job_start, |
67 | - ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); | 55 | + .complete = test_job_complete, |
68 | + ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes, | 56 | +}; |
69 | + &hd_qiov, 0); | 57 | + |
70 | if (ret < 0) { | 58 | +static void test_blockjob_common(enum drain_type drain_type) |
71 | break; | 59 | +{ |
72 | } | 60 | + BlockBackend *blk_src, *blk_target; |
61 | + BlockDriverState *src, *target; | ||
62 | + BlockJob *job; | ||
63 | + int ret; | ||
64 | + | ||
65 | + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, | ||
66 | + &error_abort); | ||
67 | + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
68 | + blk_insert_bs(blk_src, src, &error_abort); | ||
69 | + | ||
70 | + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, | ||
71 | + &error_abort); | ||
72 | + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
73 | + blk_insert_bs(blk_target, target, &error_abort); | ||
74 | + | ||
75 | + job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0, | ||
76 | + 0, NULL, NULL, &error_abort); | ||
77 | + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); | ||
78 | + block_job_start(job); | ||
79 | + | ||
80 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
81 | + g_assert_false(job->paused); | ||
82 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
83 | + | ||
84 | + do_drain_begin(drain_type, src); | ||
85 | + | ||
86 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
87 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
88 | + * additional block_job_pause_all() */ | ||
89 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
90 | + } else { | ||
91 | + g_assert_cmpint(job->pause_count, ==, 1); | ||
92 | + } | ||
93 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
94 | + /* g_assert_true(job->paused); */ | ||
95 | + g_assert_false(job->busy); /* The job is paused */ | ||
96 | + | ||
97 | + do_drain_end(drain_type, src); | ||
98 | + | ||
99 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
100 | + g_assert_false(job->paused); | ||
101 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
102 | + | ||
103 | + do_drain_begin(drain_type, target); | ||
104 | + | ||
105 | + if (drain_type == BDRV_DRAIN_ALL) { | ||
106 | + /* bdrv_drain_all() drains both src and target, and involves an | ||
107 | + * additional block_job_pause_all() */ | ||
108 | + g_assert_cmpint(job->pause_count, ==, 3); | ||
109 | + } else { | ||
110 | + g_assert_cmpint(job->pause_count, ==, 1); | ||
111 | + } | ||
112 | + /* XXX We don't wait until the job is actually paused. Is this okay? */ | ||
113 | + /* g_assert_true(job->paused); */ | ||
114 | + g_assert_false(job->busy); /* The job is paused */ | ||
115 | + | ||
116 | + do_drain_end(drain_type, target); | ||
117 | + | ||
118 | + g_assert_cmpint(job->pause_count, ==, 0); | ||
119 | + g_assert_false(job->paused); | ||
120 | + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ | ||
121 | + | ||
122 | + ret = block_job_complete_sync(job, &error_abort); | ||
123 | + g_assert_cmpint(ret, ==, 0); | ||
124 | + | ||
125 | + blk_unref(blk_src); | ||
126 | + blk_unref(blk_target); | ||
127 | + bdrv_unref(src); | ||
128 | + bdrv_unref(target); | ||
129 | +} | ||
130 | + | ||
131 | +static void test_blockjob_drain_all(void) | ||
132 | +{ | ||
133 | + test_blockjob_common(BDRV_DRAIN_ALL); | ||
134 | +} | ||
135 | + | ||
136 | +static void test_blockjob_drain(void) | ||
137 | +{ | ||
138 | + test_blockjob_common(BDRV_DRAIN); | ||
139 | +} | ||
140 | + | ||
141 | int main(int argc, char **argv) | ||
142 | { | ||
143 | bdrv_init(); | ||
144 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
145 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
146 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
147 | |||
148 | + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
149 | + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
150 | + | ||
151 | return g_test_run(); | ||
152 | } | ||
73 | -- | 153 | -- |
74 | 2.13.6 | 154 | 2.13.6 |
75 | 155 | ||
76 | 156 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | Block jobs are already paused using the BdrvChildRole drain callbacks, |
---|---|---|---|
2 | so we don't need an additional block_job_pause_all() call. | ||
2 | 3 | ||
3 | Not updating src_offset will result in wrong data being written to dst | ||
4 | image. | ||
5 | |||
6 | Reported-by: Max Reitz <mreitz@redhat.com> | ||
7 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 5 | --- |
10 | block/qcow2.c | 1 + | 6 | block/io.c | 4 ---- |
11 | tests/qemu-iotests/063 | 9 +++++++++ | 7 | tests/test-bdrv-drain.c | 10 ++++------ |
12 | tests/qemu-iotests/063.out | 12 ++++++++++++ | 8 | 2 files changed, 4 insertions(+), 10 deletions(-) |
13 | 3 files changed, 22 insertions(+) | ||
14 | 9 | ||
15 | diff --git a/block/qcow2.c b/block/qcow2.c | 10 | diff --git a/block/io.c b/block/io.c |
16 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/qcow2.c | 12 | --- a/block/io.c |
18 | +++ b/block/qcow2.c | 13 | +++ b/block/io.c |
19 | @@ -XXX,XX +XXX,XX @@ qcow2_co_copy_range_to(BlockDriverState *bs, | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
20 | } | 15 | * context. */ |
21 | 16 | assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | |
22 | bytes -= cur_bytes; | 17 | |
23 | + src_offset += cur_bytes; | 18 | - block_job_pause_all(); |
24 | dst_offset += cur_bytes; | 19 | - |
20 | for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { | ||
21 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
22 | |||
23 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) | ||
24 | aio_enable_external(aio_context); | ||
25 | aio_context_release(aio_context); | ||
25 | } | 26 | } |
26 | ret = 0; | 27 | - |
27 | diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063 | 28 | - block_job_resume_all(); |
28 | index XXXXXXX..XXXXXXX 100755 | 29 | } |
29 | --- a/tests/qemu-iotests/063 | 30 | |
30 | +++ b/tests/qemu-iotests/063 | 31 | void bdrv_drain_all(void) |
31 | @@ -XXX,XX +XXX,XX @@ if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n "$TEST_IMG.orig" "$TEST_IMG" >/dev | 32 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
32 | exit 1 | ||
33 | fi | ||
34 | |||
35 | +echo "== Regression testing for copy offloading bug ==" | ||
36 | + | ||
37 | +_make_test_img 1M | ||
38 | +TEST_IMG="$TEST_IMG.target" _make_test_img 1M | ||
39 | +$QEMU_IO -c 'write -P 1 0 512k' -c 'write -P 2 512k 512k' "$TEST_IMG" | _filter_qemu_io | ||
40 | +$QEMU_IO -c 'write -P 4 512k 512k' -c 'write -P 3 0 512k' "$TEST_IMG.target" | _filter_qemu_io | ||
41 | +$QEMU_IMG convert -n -O $IMGFMT "$TEST_IMG" "$TEST_IMG.target" | ||
42 | +$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.target" | ||
43 | + | ||
44 | echo "*** done" | ||
45 | rm -f $seq.full | ||
46 | status=0 | ||
47 | diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out | ||
48 | index XXXXXXX..XXXXXXX 100644 | 33 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/tests/qemu-iotests/063.out | 34 | --- a/tests/test-bdrv-drain.c |
50 | +++ b/tests/qemu-iotests/063.out | 35 | +++ b/tests/test-bdrv-drain.c |
51 | @@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 | 36 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) |
52 | No errors were found on the image. | 37 | do_drain_begin(drain_type, src); |
53 | == Testing conversion to a smaller file fails == | 38 | |
54 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152 | 39 | if (drain_type == BDRV_DRAIN_ALL) { |
55 | +== Regression testing for copy offloading bug == | 40 | - /* bdrv_drain_all() drains both src and target, and involves an |
56 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 | 41 | - * additional block_job_pause_all() */ |
57 | +Formatting 'TEST_DIR/t.IMGFMT.target', fmt=IMGFMT size=1048576 | 42 | - g_assert_cmpint(job->pause_count, ==, 3); |
58 | +wrote 524288/524288 bytes at offset 0 | 43 | + /* bdrv_drain_all() drains both src and target */ |
59 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 44 | + g_assert_cmpint(job->pause_count, ==, 2); |
60 | +wrote 524288/524288 bytes at offset 524288 | 45 | } else { |
61 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 46 | g_assert_cmpint(job->pause_count, ==, 1); |
62 | +wrote 524288/524288 bytes at offset 524288 | 47 | } |
63 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 48 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type) |
64 | +wrote 524288/524288 bytes at offset 0 | 49 | do_drain_begin(drain_type, target); |
65 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 50 | |
66 | +Images are identical. | 51 | if (drain_type == BDRV_DRAIN_ALL) { |
67 | *** done | 52 | - /* bdrv_drain_all() drains both src and target, and involves an |
53 | - * additional block_job_pause_all() */ | ||
54 | - g_assert_cmpint(job->pause_count, ==, 3); | ||
55 | + /* bdrv_drain_all() drains both src and target */ | ||
56 | + g_assert_cmpint(job->pause_count, ==, 2); | ||
57 | } else { | ||
58 | g_assert_cmpint(job->pause_count, ==, 1); | ||
59 | } | ||
68 | -- | 60 | -- |
69 | 2.13.6 | 61 | 2.13.6 |
70 | 62 | ||
71 | 63 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | bdrv_do_drained_begin() restricts the call of parent callbacks and |
---|---|---|---|
2 | aio_disable_external() to the outermost drain section, but the block | ||
3 | driver callbacks are always called. bdrv_do_drained_end() must match | ||
4 | this behaviour, otherwise nodes stay drained even if begin/end calls | ||
5 | were balanced. | ||
2 | 6 | ||
3 | in_flight and tracked requests need to be tracked in every layer during | ||
4 | recursion. For now the only user is qemu-img convert where overlapping | ||
5 | requests and IOThreads don't exist, therefore this change doesn't make | ||
6 | much difference form user point of view, but it is incorrect as part of | ||
7 | the API. Fix it. | ||
8 | |||
9 | Reported-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 8 | --- |
14 | block/io.c | 59 ++++++++++++++++++++++++++++------------------------------- | 9 | block/io.c | 12 +++++++----- |
15 | 1 file changed, 28 insertions(+), 31 deletions(-) | 10 | 1 file changed, 7 insertions(+), 5 deletions(-) |
16 | 11 | ||
17 | diff --git a/block/io.c b/block/io.c | 12 | diff --git a/block/io.c b/block/io.c |
18 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/io.c | 14 | --- a/block/io.c |
20 | +++ b/block/io.c | 15 | +++ b/block/io.c |
21 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, | 16 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs) |
22 | BdrvRequestFlags flags, | 17 | |
23 | bool recurse_src) | 18 | void bdrv_drained_end(BlockDriverState *bs) |
24 | { | 19 | { |
25 | + BdrvTrackedRequest src_req, dst_req; | 20 | + int old_quiesce_counter; |
26 | + BlockDriverState *src_bs = src->bs; | 21 | + |
27 | + BlockDriverState *dst_bs = dst->bs; | 22 | if (qemu_in_coroutine()) { |
28 | int ret; | 23 | bdrv_co_yield_to_drain(bs, false); |
29 | 24 | return; | |
30 | if (!src || !dst || !src->bs || !dst->bs) { | ||
31 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, | ||
32 | || src->bs->encrypted || dst->bs->encrypted) { | ||
33 | return -ENOTSUP; | ||
34 | } | 25 | } |
35 | + bdrv_inc_in_flight(src_bs); | 26 | assert(bs->quiesce_counter > 0); |
36 | + bdrv_inc_in_flight(dst_bs); | 27 | - if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { |
37 | + tracked_request_begin(&src_req, src_bs, src_offset, | 28 | - return; |
38 | + bytes, BDRV_TRACKED_READ); | 29 | - } |
39 | + tracked_request_begin(&dst_req, dst_bs, dst_offset, | 30 | + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); |
40 | + bytes, BDRV_TRACKED_WRITE); | 31 | |
41 | + | 32 | /* Re-enable things in child-to-parent order */ |
42 | + wait_serialising_requests(&src_req); | 33 | bdrv_drain_invoke(bs, false, false); |
43 | + wait_serialising_requests(&dst_req); | 34 | - bdrv_parent_drained_end(bs); |
44 | if (recurse_src) { | 35 | - aio_enable_external(bdrv_get_aio_context(bs)); |
45 | - return src->bs->drv->bdrv_co_copy_range_from(src->bs, | 36 | + if (old_quiesce_counter == 1) { |
46 | - src, src_offset, | 37 | + bdrv_parent_drained_end(bs); |
47 | - dst, dst_offset, | 38 | + aio_enable_external(bdrv_get_aio_context(bs)); |
48 | - bytes, flags); | 39 | + } |
49 | + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, | ||
50 | + src, src_offset, | ||
51 | + dst, dst_offset, | ||
52 | + bytes, flags); | ||
53 | } else { | ||
54 | - return dst->bs->drv->bdrv_co_copy_range_to(dst->bs, | ||
55 | - src, src_offset, | ||
56 | - dst, dst_offset, | ||
57 | - bytes, flags); | ||
58 | + ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs, | ||
59 | + src, src_offset, | ||
60 | + dst, dst_offset, | ||
61 | + bytes, flags); | ||
62 | } | ||
63 | + tracked_request_end(&src_req); | ||
64 | + tracked_request_end(&dst_req); | ||
65 | + bdrv_dec_in_flight(src_bs); | ||
66 | + bdrv_dec_in_flight(dst_bs); | ||
67 | + return ret; | ||
68 | } | 40 | } |
69 | 41 | ||
70 | /* Copy range from @src to @dst. | 42 | /* |
71 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, | ||
72 | BdrvChild *dst, uint64_t dst_offset, | ||
73 | uint64_t bytes, BdrvRequestFlags flags) | ||
74 | { | ||
75 | - BdrvTrackedRequest src_req, dst_req; | ||
76 | - BlockDriverState *src_bs = src->bs; | ||
77 | - BlockDriverState *dst_bs = dst->bs; | ||
78 | - int ret; | ||
79 | - | ||
80 | - bdrv_inc_in_flight(src_bs); | ||
81 | - bdrv_inc_in_flight(dst_bs); | ||
82 | - tracked_request_begin(&src_req, src_bs, src_offset, | ||
83 | - bytes, BDRV_TRACKED_READ); | ||
84 | - tracked_request_begin(&dst_req, dst_bs, dst_offset, | ||
85 | - bytes, BDRV_TRACKED_WRITE); | ||
86 | - | ||
87 | - wait_serialising_requests(&src_req); | ||
88 | - wait_serialising_requests(&dst_req); | ||
89 | - ret = bdrv_co_copy_range_from(src, src_offset, | ||
90 | - dst, dst_offset, | ||
91 | - bytes, flags); | ||
92 | - | ||
93 | - tracked_request_end(&src_req); | ||
94 | - tracked_request_end(&dst_req); | ||
95 | - bdrv_dec_in_flight(src_bs); | ||
96 | - bdrv_dec_in_flight(dst_bs); | ||
97 | - return ret; | ||
98 | + return bdrv_co_copy_range_from(src, src_offset, | ||
99 | + dst, dst_offset, | ||
100 | + bytes, flags); | ||
101 | } | ||
102 | |||
103 | static void bdrv_parent_cb_resize(BlockDriverState *bs) | ||
104 | -- | 43 | -- |
105 | 2.13.6 | 44 | 2.13.6 |
106 | 45 | ||
107 | 46 | diff view generated by jsdifflib |
1 | From: Markus Armbruster <armbru@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Coverity can't see that qobject_input_visitor_new_flat_confused() | ||
4 | returns non-null when it doesn't set @local_err. Check the return | ||
5 | value instead, like all the other callers do. | ||
6 | |||
7 | Fixes: CID 1393615 | ||
8 | Fixes: CID 1393616 | ||
9 | Signed-off-by: Markus Armbruster <armbru@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 2 | --- |
12 | block/crypto.c | 4 ++-- | 3 | tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ |
13 | 1 file changed, 2 insertions(+), 2 deletions(-) | 4 | 1 file changed, 57 insertions(+) |
14 | 5 | ||
15 | diff --git a/block/crypto.c b/block/crypto.c | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
16 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/crypto.c | 8 | --- a/tests/test-bdrv-drain.c |
18 | +++ b/block/crypto.c | 9 | +++ b/tests/test-bdrv-drain.c |
19 | @@ -XXX,XX +XXX,XX @@ block_crypto_open_opts_init(QCryptoBlockFormat format, | 10 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
20 | ret->format = format; | 11 | enum drain_type { |
21 | 12 | BDRV_DRAIN_ALL, | |
22 | v = qobject_input_visitor_new_flat_confused(opts, &local_err); | 13 | BDRV_DRAIN, |
23 | - if (local_err) { | 14 | + DRAIN_TYPE_MAX, |
24 | + if (!v) { | 15 | }; |
25 | goto out; | 16 | |
26 | } | 17 | static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) |
27 | 18 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) | |
28 | @@ -XXX,XX +XXX,XX @@ block_crypto_create_opts_init(QCryptoBlockFormat format, | 19 | test_quiesce_common(BDRV_DRAIN, false); |
29 | ret->format = format; | 20 | } |
30 | 21 | ||
31 | v = qobject_input_visitor_new_flat_confused(opts, &local_err); | 22 | +static void test_nested(void) |
32 | - if (local_err) { | 23 | +{ |
33 | + if (!v) { | 24 | + BlockBackend *blk; |
34 | goto out; | 25 | + BlockDriverState *bs, *backing; |
35 | } | 26 | + BDRVTestState *s, *backing_s; |
27 | + enum drain_type outer, inner; | ||
28 | + | ||
29 | + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
30 | + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, | ||
31 | + &error_abort); | ||
32 | + s = bs->opaque; | ||
33 | + blk_insert_bs(blk, bs, &error_abort); | ||
34 | + | ||
35 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
36 | + backing_s = backing->opaque; | ||
37 | + bdrv_set_backing_hd(bs, backing, &error_abort); | ||
38 | + | ||
39 | + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { | ||
40 | + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { | ||
41 | + /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ | ||
42 | + int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
43 | + (inner != BDRV_DRAIN_ALL); | ||
44 | + int backing_quiesce = 0; | ||
45 | + int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
46 | + (inner != BDRV_DRAIN); | ||
47 | + | ||
48 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
49 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
50 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
51 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
52 | + | ||
53 | + do_drain_begin(outer, bs); | ||
54 | + do_drain_begin(inner, bs); | ||
55 | + | ||
56 | + g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); | ||
57 | + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); | ||
58 | + g_assert_cmpint(s->drain_count, ==, 2); | ||
59 | + g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); | ||
60 | + | ||
61 | + do_drain_end(inner, bs); | ||
62 | + do_drain_end(outer, bs); | ||
63 | + | ||
64 | + g_assert_cmpint(bs->quiesce_counter, ==, 0); | ||
65 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
66 | + g_assert_cmpint(s->drain_count, ==, 0); | ||
67 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
68 | + } | ||
69 | + } | ||
70 | + | ||
71 | + bdrv_unref(backing); | ||
72 | + bdrv_unref(bs); | ||
73 | + blk_unref(blk); | ||
74 | +} | ||
75 | + | ||
76 | |||
77 | typedef struct TestBlockJob { | ||
78 | BlockJob common; | ||
79 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
80 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); | ||
81 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | ||
82 | |||
83 | + g_test_add_func("/bdrv-drain/nested", test_nested); | ||
84 | + | ||
85 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
86 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
36 | 87 | ||
37 | -- | 88 | -- |
38 | 2.13.6 | 89 | 2.13.6 |
39 | 90 | ||
40 | 91 | diff view generated by jsdifflib |
1 | bdrv_truncate() is an operation that can block (even for a quite long | 1 | This is in preparation for subtree drains, i.e. drained sections that |
---|---|---|---|
2 | time, depending on the PreallocMode) in I/O paths that shouldn't block. | 2 | affect not only a single node, but recursively all child nodes, too. |
3 | Convert it to a coroutine_fn so that we have the infrastructure for | 3 | |
4 | drivers to make their .bdrv_co_truncate implementation asynchronous. | 4 | Calling the parent callbacks for drain is pointless when we just came |
5 | 5 | from that parent node recursively and leads to multiple increases of | |
6 | This change could potentially introduce new race conditions because | 6 | bs->quiesce_counter in a single drain call. Don't do it. |
7 | bdrv_truncate() isn't necessarily executed atomically any more. Whether | 7 | |
8 | this is a problem needs to be evaluated for each block driver that | 8 | In order for this to work correctly, the parent callback must be called |
9 | supports truncate: | 9 | for every bdrv_drain_begin/end() call, not only for the outermost one: |
10 | 10 | ||
11 | * file-posix/win32, gluster, iscsi, nfs, rbd, ssh, sheepdog: The | 11 | If we have a node N with two parents A and B, recursive draining of A |
12 | protocol drivers are trivially safe because they don't actually yield | 12 | should cause the quiesce_counter of B to increase because its child N is |
13 | yet, so there is no change in behaviour. | 13 | drained independently of B. If now B is recursively drained, too, A must |
14 | 14 | increase its quiesce_counter because N is drained independently of A | |
15 | * copy-on-read, crypto, raw-format: Essentially just filter drivers that | 15 | only now, even if N is going from quiesce_counter 1 to 2. |
16 | pass the request to a child node, no problem. | ||
17 | |||
18 | * qcow2: The implementation modifies metadata, so it needs to hold | ||
19 | s->lock to be safe with concurrent I/O requests. In order to avoid | ||
20 | double locking, this requires pulling the locking out into | ||
21 | preallocate_co() and using qcow2_write_caches() instead of | ||
22 | bdrv_flush(). | ||
23 | |||
24 | * qed: Does a single header update, this is fine without locking. | ||
25 | 16 | ||
26 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
27 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
28 | --- | 18 | --- |
29 | include/block/block.h | 4 +++ | 19 | include/block/block.h | 4 ++-- |
30 | include/block/block_int.h | 4 +-- | 20 | block.c | 13 +++++++++---- |
31 | block.c | 63 +++++++++++++++++++++++++++++++++++----- | 21 | block/io.c | 47 ++++++++++++++++++++++++++++++++++------------- |
32 | block/copy-on-read.c | 8 ++--- | 22 | 3 files changed, 45 insertions(+), 19 deletions(-) |
33 | block/crypto.c | 9 +++--- | ||
34 | block/file-posix.c | 12 ++++---- | ||
35 | block/file-win32.c | 6 ++-- | ||
36 | block/gluster.c | 14 +++++---- | ||
37 | block/iscsi.c | 8 ++--- | ||
38 | block/nfs.c | 7 +++-- | ||
39 | block/qcow2.c | 74 ++++++++++++++++++++++++++++------------------- | ||
40 | block/qed.c | 8 +++-- | ||
41 | block/raw-format.c | 8 ++--- | ||
42 | block/rbd.c | 8 +++-- | ||
43 | block/sheepdog.c | 12 ++++---- | ||
44 | block/ssh.c | 6 ++-- | ||
45 | 16 files changed, 162 insertions(+), 89 deletions(-) | ||
46 | 23 | ||
47 | diff --git a/include/block/block.h b/include/block/block.h | 24 | diff --git a/include/block/block.h b/include/block/block.h |
48 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/include/block/block.h | 26 | --- a/include/block/block.h |
50 | +++ b/include/block/block.h | 27 | +++ b/include/block/block.h |
51 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, | 28 | @@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs); |
52 | BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, | 29 | * Begin a quiesced section of all users of @bs. This is part of |
53 | const char *backing_file); | 30 | * bdrv_drained_begin. |
54 | void bdrv_refresh_filename(BlockDriverState *bs); | 31 | */ |
55 | + | 32 | -void bdrv_parent_drained_begin(BlockDriverState *bs); |
56 | +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 33 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); |
57 | + PreallocMode prealloc, Error **errp); | 34 | |
58 | int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | 35 | /** |
59 | Error **errp); | 36 | * bdrv_parent_drained_end: |
60 | + | 37 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs); |
61 | int64_t bdrv_nb_sectors(BlockDriverState *bs); | 38 | * End a quiesced section of all users of @bs. This is part of |
62 | int64_t bdrv_getlength(BlockDriverState *bs); | 39 | * bdrv_drained_end. |
63 | int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); | 40 | */ |
64 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 41 | -void bdrv_parent_drained_end(BlockDriverState *bs); |
65 | index XXXXXXX..XXXXXXX 100644 | 42 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
66 | --- a/include/block/block_int.h | 43 | |
67 | +++ b/include/block/block_int.h | 44 | /** |
68 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | 45 | * bdrv_drained_begin: |
69 | * bdrv_parse_filename. | ||
70 | */ | ||
71 | const char *protocol_name; | ||
72 | - int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset, | ||
73 | - PreallocMode prealloc, Error **errp); | ||
74 | + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, | ||
75 | + PreallocMode prealloc, Error **errp); | ||
76 | |||
77 | int64_t (*bdrv_getlength)(BlockDriverState *bs); | ||
78 | bool has_variable_length; | ||
79 | diff --git a/block.c b/block.c | 46 | diff --git a/block.c b/block.c |
80 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
81 | --- a/block.c | 48 | --- a/block.c |
82 | +++ b/block.c | 49 | +++ b/block.c |
83 | @@ -XXX,XX +XXX,XX @@ exit: | 50 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
84 | /** | 51 | BlockDriverState *new_bs) |
85 | * Truncate file to 'offset' bytes (needed only for file protocols) | 52 | { |
86 | */ | 53 | BlockDriverState *old_bs = child->bs; |
87 | -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | 54 | + int i; |
88 | - Error **errp) | 55 | |
89 | +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 56 | if (old_bs && new_bs) { |
90 | + PreallocMode prealloc, Error **errp) | 57 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); |
91 | { | 58 | } |
92 | BlockDriverState *bs = child->bs; | 59 | if (old_bs) { |
93 | BlockDriver *drv = bs->drv; | 60 | if (old_bs->quiesce_counter && child->role->drained_end) { |
94 | @@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | 61 | - child->role->drained_end(child); |
95 | return -EINVAL; | 62 | + for (i = 0; i < old_bs->quiesce_counter; i++) { |
96 | } | 63 | + child->role->drained_end(child); |
97 | 64 | + } | |
98 | - if (!drv->bdrv_truncate) { | 65 | } |
99 | + bdrv_inc_in_flight(bs); | 66 | if (child->role->detach) { |
67 | child->role->detach(child); | ||
68 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
69 | if (new_bs) { | ||
70 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | ||
71 | if (new_bs->quiesce_counter && child->role->drained_begin) { | ||
72 | - child->role->drained_begin(child); | ||
73 | + for (i = 0; i < new_bs->quiesce_counter; i++) { | ||
74 | + child->role->drained_begin(child); | ||
75 | + } | ||
76 | } | ||
77 | |||
78 | if (child->role->attach) { | ||
79 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
80 | AioContext *ctx = bdrv_get_aio_context(bs); | ||
81 | |||
82 | aio_disable_external(ctx); | ||
83 | - bdrv_parent_drained_begin(bs); | ||
84 | + bdrv_parent_drained_begin(bs, NULL); | ||
85 | bdrv_drain(bs); /* ensure there are no in-flight requests */ | ||
86 | |||
87 | while (aio_poll(ctx, false)) { | ||
88 | @@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) | ||
89 | */ | ||
90 | aio_context_acquire(new_context); | ||
91 | bdrv_attach_aio_context(bs, new_context); | ||
92 | - bdrv_parent_drained_end(bs); | ||
93 | + bdrv_parent_drained_end(bs, NULL); | ||
94 | aio_enable_external(ctx); | ||
95 | aio_context_release(new_context); | ||
96 | } | ||
97 | diff --git a/block/io.c b/block/io.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/block/io.c | ||
100 | +++ b/block/io.c | ||
101 | @@ -XXX,XX +XXX,XX @@ | ||
102 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
103 | int64_t offset, int bytes, BdrvRequestFlags flags); | ||
104 | |||
105 | -void bdrv_parent_drained_begin(BlockDriverState *bs) | ||
106 | +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) | ||
107 | { | ||
108 | BdrvChild *c, *next; | ||
109 | |||
110 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
111 | + if (c == ignore) { | ||
112 | + continue; | ||
113 | + } | ||
114 | if (c->role->drained_begin) { | ||
115 | c->role->drained_begin(c); | ||
116 | } | ||
117 | } | ||
118 | } | ||
119 | |||
120 | -void bdrv_parent_drained_end(BlockDriverState *bs) | ||
121 | +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) | ||
122 | { | ||
123 | BdrvChild *c, *next; | ||
124 | |||
125 | QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { | ||
126 | + if (c == ignore) { | ||
127 | + continue; | ||
128 | + } | ||
129 | if (c->role->drained_end) { | ||
130 | c->role->drained_end(c); | ||
131 | } | ||
132 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
133 | BlockDriverState *bs; | ||
134 | bool done; | ||
135 | bool begin; | ||
136 | + BdrvChild *parent; | ||
137 | } BdrvCoDrainData; | ||
138 | |||
139 | static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
141 | return waited; | ||
142 | } | ||
143 | |||
144 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); | ||
145 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); | ||
100 | + | 146 | + |
101 | + if (!drv->bdrv_co_truncate) { | 147 | static void bdrv_co_drain_bh_cb(void *opaque) |
102 | if (bs->file && drv->is_filter) { | 148 | { |
103 | - return bdrv_truncate(bs->file, offset, prealloc, errp); | 149 | BdrvCoDrainData *data = opaque; |
104 | + ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); | 150 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
105 | + goto out; | 151 | |
106 | } | 152 | bdrv_dec_in_flight(bs); |
107 | error_setg(errp, "Image format driver does not support resize"); | 153 | if (data->begin) { |
108 | - return -ENOTSUP; | 154 | - bdrv_drained_begin(bs); |
109 | + ret = -ENOTSUP; | 155 | + bdrv_do_drained_begin(bs, data->parent); |
110 | + goto out; | 156 | } else { |
111 | } | 157 | - bdrv_drained_end(bs); |
112 | if (bs->read_only) { | 158 | + bdrv_do_drained_end(bs, data->parent); |
113 | error_setg(errp, "Image is read-only"); | 159 | } |
114 | - return -EACCES; | 160 | |
115 | + ret = -EACCES; | 161 | data->done = true; |
116 | + goto out; | 162 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
117 | } | 163 | } |
118 | 164 | ||
119 | assert(!(bs->open_flags & BDRV_O_INACTIVE)); | 165 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
120 | 166 | - bool begin) | |
121 | - ret = drv->bdrv_truncate(bs, offset, prealloc, errp); | 167 | + bool begin, BdrvChild *parent) |
122 | + ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); | 168 | { |
123 | if (ret < 0) { | 169 | BdrvCoDrainData data; |
124 | - return ret; | 170 | |
125 | + goto out; | 171 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
126 | } | 172 | .bs = bs, |
127 | ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); | 173 | .done = false, |
128 | if (ret < 0) { | 174 | .begin = begin, |
129 | @@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | 175 | + .parent = parent, |
130 | bdrv_dirty_bitmap_truncate(bs, offset); | 176 | }; |
131 | bdrv_parent_cb_resize(bs); | 177 | bdrv_inc_in_flight(bs); |
132 | atomic_inc(&bs->write_gen); | 178 | aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), |
133 | + | 179 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
134 | +out: | 180 | assert(data.done); |
135 | + bdrv_dec_in_flight(bs); | 181 | } |
136 | return ret; | 182 | |
137 | } | 183 | -void bdrv_drained_begin(BlockDriverState *bs) |
138 | 184 | +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | |
139 | +typedef struct TruncateCo { | 185 | { |
140 | + BdrvChild *child; | 186 | if (qemu_in_coroutine()) { |
141 | + int64_t offset; | 187 | - bdrv_co_yield_to_drain(bs, true); |
142 | + PreallocMode prealloc; | 188 | + bdrv_co_yield_to_drain(bs, true, parent); |
143 | + Error **errp; | 189 | return; |
144 | + int ret; | 190 | } |
145 | +} TruncateCo; | 191 | |
146 | + | 192 | /* Stop things in parent-to-child order */ |
147 | +static void coroutine_fn bdrv_truncate_co_entry(void *opaque) | 193 | if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { |
194 | aio_disable_external(bdrv_get_aio_context(bs)); | ||
195 | - bdrv_parent_drained_begin(bs); | ||
196 | } | ||
197 | |||
198 | + bdrv_parent_drained_begin(bs, parent); | ||
199 | bdrv_drain_invoke(bs, true, false); | ||
200 | bdrv_drain_recurse(bs); | ||
201 | } | ||
202 | |||
203 | -void bdrv_drained_end(BlockDriverState *bs) | ||
204 | +void bdrv_drained_begin(BlockDriverState *bs) | ||
148 | +{ | 205 | +{ |
149 | + TruncateCo *tco = opaque; | 206 | + bdrv_do_drained_begin(bs, NULL); |
150 | + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, | ||
151 | + tco->errp); | ||
152 | +} | 207 | +} |
153 | + | 208 | + |
154 | +int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | 209 | +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) |
155 | + Error **errp) | 210 | { |
211 | int old_quiesce_counter; | ||
212 | |||
213 | if (qemu_in_coroutine()) { | ||
214 | - bdrv_co_yield_to_drain(bs, false); | ||
215 | + bdrv_co_yield_to_drain(bs, false, parent); | ||
216 | return; | ||
217 | } | ||
218 | assert(bs->quiesce_counter > 0); | ||
219 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs) | ||
220 | |||
221 | /* Re-enable things in child-to-parent order */ | ||
222 | bdrv_drain_invoke(bs, false, false); | ||
223 | + bdrv_parent_drained_end(bs, parent); | ||
224 | if (old_quiesce_counter == 1) { | ||
225 | - bdrv_parent_drained_end(bs); | ||
226 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
227 | } | ||
228 | } | ||
229 | |||
230 | +void bdrv_drained_end(BlockDriverState *bs) | ||
156 | +{ | 231 | +{ |
157 | + Coroutine *co; | 232 | + bdrv_do_drained_end(bs, NULL); |
158 | + TruncateCo tco = { | ||
159 | + .child = child, | ||
160 | + .offset = offset, | ||
161 | + .prealloc = prealloc, | ||
162 | + .errp = errp, | ||
163 | + .ret = NOT_DONE, | ||
164 | + }; | ||
165 | + | ||
166 | + if (qemu_in_coroutine()) { | ||
167 | + /* Fast-path if already in coroutine context */ | ||
168 | + bdrv_truncate_co_entry(&tco); | ||
169 | + } else { | ||
170 | + co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); | ||
171 | + qemu_coroutine_enter(co); | ||
172 | + BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); | ||
173 | + } | ||
174 | + | ||
175 | + return tco.ret; | ||
176 | +} | 233 | +} |
177 | + | 234 | + |
178 | /** | 235 | /* |
179 | * Length of a allocated file in bytes. Sparse files are counted by actual | 236 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
180 | * allocated space. Return < 0 if error or unknown. | 237 | * and suspend block driver's internal I/O until next request arrives. |
181 | diff --git a/block/copy-on-read.c b/block/copy-on-read.c | 238 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
182 | index XXXXXXX..XXXXXXX 100644 | 239 | /* Stop things in parent-to-child order */ |
183 | --- a/block/copy-on-read.c | 240 | aio_context_acquire(aio_context); |
184 | +++ b/block/copy-on-read.c | 241 | aio_disable_external(aio_context); |
185 | @@ -XXX,XX +XXX,XX @@ static int64_t cor_getlength(BlockDriverState *bs) | 242 | - bdrv_parent_drained_begin(bs); |
186 | } | 243 | + bdrv_parent_drained_begin(bs, NULL); |
187 | 244 | bdrv_drain_invoke(bs, true, true); | |
188 | 245 | aio_context_release(aio_context); | |
189 | -static int cor_truncate(BlockDriverState *bs, int64_t offset, | 246 | |
190 | - PreallocMode prealloc, Error **errp) | 247 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void) |
191 | +static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset, | 248 | /* Re-enable things in child-to-parent order */ |
192 | + PreallocMode prealloc, Error **errp) | 249 | aio_context_acquire(aio_context); |
193 | { | 250 | bdrv_drain_invoke(bs, false, true); |
194 | - return bdrv_truncate(bs->file, offset, prealloc, errp); | 251 | - bdrv_parent_drained_end(bs); |
195 | + return bdrv_co_truncate(bs->file, offset, prealloc, errp); | 252 | + bdrv_parent_drained_end(bs, NULL); |
196 | } | 253 | aio_enable_external(aio_context); |
197 | 254 | aio_context_release(aio_context); | |
198 | 255 | } | |
199 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_copy_on_read = { | ||
200 | .bdrv_child_perm = cor_child_perm, | ||
201 | |||
202 | .bdrv_getlength = cor_getlength, | ||
203 | - .bdrv_truncate = cor_truncate, | ||
204 | + .bdrv_co_truncate = cor_co_truncate, | ||
205 | |||
206 | .bdrv_co_preadv = cor_co_preadv, | ||
207 | .bdrv_co_pwritev = cor_co_pwritev, | ||
208 | diff --git a/block/crypto.c b/block/crypto.c | ||
209 | index XXXXXXX..XXXXXXX 100644 | ||
210 | --- a/block/crypto.c | ||
211 | +++ b/block/crypto.c | ||
212 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs, | ||
213 | return ret; | ||
214 | } | ||
215 | |||
216 | -static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, | ||
217 | - PreallocMode prealloc, Error **errp) | ||
218 | +static int coroutine_fn | ||
219 | +block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, | ||
220 | + PreallocMode prealloc, Error **errp) | ||
221 | { | ||
222 | BlockCrypto *crypto = bs->opaque; | ||
223 | uint64_t payload_offset = | ||
224 | @@ -XXX,XX +XXX,XX @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, | ||
225 | |||
226 | offset += payload_offset; | ||
227 | |||
228 | - return bdrv_truncate(bs->file, offset, prealloc, errp); | ||
229 | + return bdrv_co_truncate(bs->file, offset, prealloc, errp); | ||
230 | } | ||
231 | |||
232 | static void block_crypto_close(BlockDriverState *bs) | ||
233 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_crypto_luks = { | ||
234 | .bdrv_child_perm = bdrv_format_default_perms, | ||
235 | .bdrv_co_create = block_crypto_co_create_luks, | ||
236 | .bdrv_co_create_opts = block_crypto_co_create_opts_luks, | ||
237 | - .bdrv_truncate = block_crypto_truncate, | ||
238 | + .bdrv_co_truncate = block_crypto_co_truncate, | ||
239 | .create_opts = &block_crypto_create_opts_luks, | ||
240 | |||
241 | .bdrv_reopen_prepare = block_crypto_reopen_prepare, | ||
242 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/block/file-posix.c | ||
245 | +++ b/block/file-posix.c | ||
246 | @@ -XXX,XX +XXX,XX @@ out: | ||
247 | return result; | ||
248 | } | ||
249 | |||
250 | -static int raw_truncate(BlockDriverState *bs, int64_t offset, | ||
251 | - PreallocMode prealloc, Error **errp) | ||
252 | +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
253 | + PreallocMode prealloc, Error **errp) | ||
254 | { | ||
255 | BDRVRawState *s = bs->opaque; | ||
256 | struct stat st; | ||
257 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
258 | .bdrv_io_unplug = raw_aio_unplug, | ||
259 | .bdrv_attach_aio_context = raw_aio_attach_aio_context, | ||
260 | |||
261 | - .bdrv_truncate = raw_truncate, | ||
262 | + .bdrv_co_truncate = raw_co_truncate, | ||
263 | .bdrv_getlength = raw_getlength, | ||
264 | .bdrv_get_info = raw_get_info, | ||
265 | .bdrv_get_allocated_file_size | ||
266 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = { | ||
267 | .bdrv_io_plug = raw_aio_plug, | ||
268 | .bdrv_io_unplug = raw_aio_unplug, | ||
269 | |||
270 | - .bdrv_truncate = raw_truncate, | ||
271 | + .bdrv_co_truncate = raw_co_truncate, | ||
272 | .bdrv_getlength = raw_getlength, | ||
273 | .bdrv_get_info = raw_get_info, | ||
274 | .bdrv_get_allocated_file_size | ||
275 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
276 | .bdrv_io_plug = raw_aio_plug, | ||
277 | .bdrv_io_unplug = raw_aio_unplug, | ||
278 | |||
279 | - .bdrv_truncate = raw_truncate, | ||
280 | + .bdrv_co_truncate = raw_co_truncate, | ||
281 | .bdrv_getlength = raw_getlength, | ||
282 | .has_variable_length = true, | ||
283 | .bdrv_get_allocated_file_size | ||
284 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = { | ||
285 | .bdrv_io_plug = raw_aio_plug, | ||
286 | .bdrv_io_unplug = raw_aio_unplug, | ||
287 | |||
288 | - .bdrv_truncate = raw_truncate, | ||
289 | + .bdrv_co_truncate = raw_co_truncate, | ||
290 | .bdrv_getlength = raw_getlength, | ||
291 | .has_variable_length = true, | ||
292 | .bdrv_get_allocated_file_size | ||
293 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
294 | index XXXXXXX..XXXXXXX 100644 | ||
295 | --- a/block/file-win32.c | ||
296 | +++ b/block/file-win32.c | ||
297 | @@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs) | ||
298 | } | ||
299 | } | ||
300 | |||
301 | -static int raw_truncate(BlockDriverState *bs, int64_t offset, | ||
302 | - PreallocMode prealloc, Error **errp) | ||
303 | +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
304 | + PreallocMode prealloc, Error **errp) | ||
305 | { | ||
306 | BDRVRawState *s = bs->opaque; | ||
307 | LONG low, high; | ||
308 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
309 | .bdrv_aio_pwritev = raw_aio_pwritev, | ||
310 | .bdrv_aio_flush = raw_aio_flush, | ||
311 | |||
312 | - .bdrv_truncate = raw_truncate, | ||
313 | + .bdrv_co_truncate = raw_co_truncate, | ||
314 | .bdrv_getlength = raw_getlength, | ||
315 | .bdrv_get_allocated_file_size | ||
316 | = raw_get_allocated_file_size, | ||
317 | diff --git a/block/gluster.c b/block/gluster.c | ||
318 | index XXXXXXX..XXXXXXX 100644 | ||
319 | --- a/block/gluster.c | ||
320 | +++ b/block/gluster.c | ||
321 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, | ||
322 | return acb.ret; | ||
323 | } | ||
324 | |||
325 | -static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset, | ||
326 | - PreallocMode prealloc, Error **errp) | ||
327 | +static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, | ||
328 | + int64_t offset, | ||
329 | + PreallocMode prealloc, | ||
330 | + Error **errp) | ||
331 | { | ||
332 | BDRVGlusterState *s = bs->opaque; | ||
333 | return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp); | ||
334 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = { | ||
335 | .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
336 | .bdrv_getlength = qemu_gluster_getlength, | ||
337 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
338 | - .bdrv_truncate = qemu_gluster_truncate, | ||
339 | + .bdrv_co_truncate = qemu_gluster_co_truncate, | ||
340 | .bdrv_co_readv = qemu_gluster_co_readv, | ||
341 | .bdrv_co_writev = qemu_gluster_co_writev, | ||
342 | .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, | ||
343 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = { | ||
344 | .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
345 | .bdrv_getlength = qemu_gluster_getlength, | ||
346 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
347 | - .bdrv_truncate = qemu_gluster_truncate, | ||
348 | + .bdrv_co_truncate = qemu_gluster_co_truncate, | ||
349 | .bdrv_co_readv = qemu_gluster_co_readv, | ||
350 | .bdrv_co_writev = qemu_gluster_co_writev, | ||
351 | .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, | ||
352 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = { | ||
353 | .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
354 | .bdrv_getlength = qemu_gluster_getlength, | ||
355 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
356 | - .bdrv_truncate = qemu_gluster_truncate, | ||
357 | + .bdrv_co_truncate = qemu_gluster_co_truncate, | ||
358 | .bdrv_co_readv = qemu_gluster_co_readv, | ||
359 | .bdrv_co_writev = qemu_gluster_co_writev, | ||
360 | .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, | ||
361 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = { | ||
362 | .bdrv_co_create_opts = qemu_gluster_co_create_opts, | ||
363 | .bdrv_getlength = qemu_gluster_getlength, | ||
364 | .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, | ||
365 | - .bdrv_truncate = qemu_gluster_truncate, | ||
366 | + .bdrv_co_truncate = qemu_gluster_co_truncate, | ||
367 | .bdrv_co_readv = qemu_gluster_co_readv, | ||
368 | .bdrv_co_writev = qemu_gluster_co_writev, | ||
369 | .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, | ||
370 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
371 | index XXXXXXX..XXXXXXX 100644 | ||
372 | --- a/block/iscsi.c | ||
373 | +++ b/block/iscsi.c | ||
374 | @@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) | ||
375 | } | ||
376 | } | ||
377 | |||
378 | -static int iscsi_truncate(BlockDriverState *bs, int64_t offset, | ||
379 | - PreallocMode prealloc, Error **errp) | ||
380 | +static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, | ||
381 | + PreallocMode prealloc, Error **errp) | ||
382 | { | ||
383 | IscsiLun *iscsilun = bs->opaque; | ||
384 | Error *local_err = NULL; | ||
385 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = { | ||
386 | |||
387 | .bdrv_getlength = iscsi_getlength, | ||
388 | .bdrv_get_info = iscsi_get_info, | ||
389 | - .bdrv_truncate = iscsi_truncate, | ||
390 | + .bdrv_co_truncate = iscsi_co_truncate, | ||
391 | .bdrv_refresh_limits = iscsi_refresh_limits, | ||
392 | |||
393 | .bdrv_co_block_status = iscsi_co_block_status, | ||
394 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = { | ||
395 | |||
396 | .bdrv_getlength = iscsi_getlength, | ||
397 | .bdrv_get_info = iscsi_get_info, | ||
398 | - .bdrv_truncate = iscsi_truncate, | ||
399 | + .bdrv_co_truncate = iscsi_co_truncate, | ||
400 | .bdrv_refresh_limits = iscsi_refresh_limits, | ||
401 | |||
402 | .bdrv_co_block_status = iscsi_co_block_status, | ||
403 | diff --git a/block/nfs.c b/block/nfs.c | ||
404 | index XXXXXXX..XXXXXXX 100644 | ||
405 | --- a/block/nfs.c | ||
406 | +++ b/block/nfs.c | ||
407 | @@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) | ||
408 | return (task.ret < 0 ? task.ret : st.st_blocks * 512); | ||
409 | } | ||
410 | |||
411 | -static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, | ||
412 | - PreallocMode prealloc, Error **errp) | ||
413 | +static int coroutine_fn | ||
414 | +nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, | ||
415 | + PreallocMode prealloc, Error **errp) | ||
416 | { | ||
417 | NFSClient *client = bs->opaque; | ||
418 | int ret; | ||
419 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = { | ||
420 | |||
421 | .bdrv_has_zero_init = nfs_has_zero_init, | ||
422 | .bdrv_get_allocated_file_size = nfs_get_allocated_file_size, | ||
423 | - .bdrv_truncate = nfs_file_truncate, | ||
424 | + .bdrv_co_truncate = nfs_file_co_truncate, | ||
425 | |||
426 | .bdrv_file_open = nfs_file_open, | ||
427 | .bdrv_close = nfs_file_close, | ||
428 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
429 | index XXXXXXX..XXXXXXX 100644 | ||
430 | --- a/block/qcow2.c | ||
431 | +++ b/block/qcow2.c | ||
432 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn preallocate_co(void *opaque) | ||
433 | BlockDriverState *bs = params->bs; | ||
434 | uint64_t offset = params->offset; | ||
435 | uint64_t new_length = params->new_length; | ||
436 | - BDRVQcow2State *s = bs->opaque; | ||
437 | uint64_t bytes; | ||
438 | uint64_t host_offset = 0; | ||
439 | unsigned int cur_bytes; | ||
440 | int ret; | ||
441 | QCowL2Meta *meta; | ||
442 | |||
443 | - qemu_co_mutex_lock(&s->lock); | ||
444 | - | ||
445 | assert(offset <= new_length); | ||
446 | bytes = new_length - offset; | ||
447 | |||
448 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn preallocate_co(void *opaque) | ||
449 | ret = 0; | ||
450 | |||
451 | done: | ||
452 | - qemu_co_mutex_unlock(&s->lock); | ||
453 | params->ret = ret; | ||
454 | } | ||
455 | |||
456 | @@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) | ||
457 | |||
458 | /* And if we're supposed to preallocate metadata, do that now */ | ||
459 | if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { | ||
460 | + BDRVQcow2State *s = blk_bs(blk)->opaque; | ||
461 | + qemu_co_mutex_lock(&s->lock); | ||
462 | ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); | ||
463 | + qemu_co_mutex_unlock(&s->lock); | ||
464 | + | ||
465 | if (ret < 0) { | ||
466 | error_setg_errno(errp, -ret, "Could not preallocate metadata"); | ||
467 | goto out; | ||
468 | @@ -XXX,XX +XXX,XX @@ fail: | ||
469 | return ret; | ||
470 | } | ||
471 | |||
472 | -static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
473 | - PreallocMode prealloc, Error **errp) | ||
474 | +static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, | ||
475 | + PreallocMode prealloc, Error **errp) | ||
476 | { | ||
477 | BDRVQcow2State *s = bs->opaque; | ||
478 | uint64_t old_length; | ||
479 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
480 | return -EINVAL; | ||
481 | } | ||
482 | |||
483 | + qemu_co_mutex_lock(&s->lock); | ||
484 | + | ||
485 | /* cannot proceed if image has snapshots */ | ||
486 | if (s->nb_snapshots) { | ||
487 | error_setg(errp, "Can't resize an image which has snapshots"); | ||
488 | - return -ENOTSUP; | ||
489 | + ret = -ENOTSUP; | ||
490 | + goto fail; | ||
491 | } | ||
492 | |||
493 | /* cannot proceed if image has bitmaps */ | ||
494 | if (s->nb_bitmaps) { | ||
495 | /* TODO: resize bitmaps in the image */ | ||
496 | error_setg(errp, "Can't resize an image which has bitmaps"); | ||
497 | - return -ENOTSUP; | ||
498 | + ret = -ENOTSUP; | ||
499 | + goto fail; | ||
500 | } | ||
501 | |||
502 | old_length = bs->total_sectors * 512; | ||
503 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
504 | if (prealloc != PREALLOC_MODE_OFF) { | ||
505 | error_setg(errp, | ||
506 | "Preallocation can't be used for shrinking an image"); | ||
507 | - return -EINVAL; | ||
508 | + ret = -EINVAL; | ||
509 | + goto fail; | ||
510 | } | ||
511 | |||
512 | ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), | ||
513 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
514 | QCOW2_DISCARD_ALWAYS, true); | ||
515 | if (ret < 0) { | ||
516 | error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); | ||
517 | - return ret; | ||
518 | + goto fail; | ||
519 | } | ||
520 | |||
521 | ret = qcow2_shrink_l1_table(bs, new_l1_size); | ||
522 | if (ret < 0) { | ||
523 | error_setg_errno(errp, -ret, | ||
524 | "Failed to reduce the number of L2 tables"); | ||
525 | - return ret; | ||
526 | + goto fail; | ||
527 | } | ||
528 | |||
529 | ret = qcow2_shrink_reftable(bs); | ||
530 | if (ret < 0) { | ||
531 | error_setg_errno(errp, -ret, | ||
532 | "Failed to discard unused refblocks"); | ||
533 | - return ret; | ||
534 | + goto fail; | ||
535 | } | ||
536 | |||
537 | old_file_size = bdrv_getlength(bs->file->bs); | ||
538 | if (old_file_size < 0) { | ||
539 | error_setg_errno(errp, -old_file_size, | ||
540 | "Failed to inquire current file length"); | ||
541 | - return old_file_size; | ||
542 | + ret = old_file_size; | ||
543 | + goto fail; | ||
544 | } | ||
545 | last_cluster = qcow2_get_last_cluster(bs, old_file_size); | ||
546 | if (last_cluster < 0) { | ||
547 | error_setg_errno(errp, -last_cluster, | ||
548 | "Failed to find the last cluster"); | ||
549 | - return last_cluster; | ||
550 | + ret = last_cluster; | ||
551 | + goto fail; | ||
552 | } | ||
553 | if ((last_cluster + 1) * s->cluster_size < old_file_size) { | ||
554 | Error *local_err = NULL; | ||
555 | |||
556 | - bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, | ||
557 | - PREALLOC_MODE_OFF, &local_err); | ||
558 | + bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, | ||
559 | + PREALLOC_MODE_OFF, &local_err); | ||
560 | if (local_err) { | ||
561 | warn_reportf_err(local_err, | ||
562 | "Failed to truncate the tail of the image: "); | ||
563 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
564 | ret = qcow2_grow_l1_table(bs, new_l1_size, true); | ||
565 | if (ret < 0) { | ||
566 | error_setg_errno(errp, -ret, "Failed to grow the L1 table"); | ||
567 | - return ret; | ||
568 | + goto fail; | ||
569 | } | ||
570 | } | ||
571 | |||
572 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
573 | ret = preallocate(bs, old_length, offset); | ||
574 | if (ret < 0) { | ||
575 | error_setg_errno(errp, -ret, "Preallocation failed"); | ||
576 | - return ret; | ||
577 | + goto fail; | ||
578 | } | ||
579 | break; | ||
580 | |||
581 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
582 | if (old_file_size < 0) { | ||
583 | error_setg_errno(errp, -old_file_size, | ||
584 | "Failed to inquire current file length"); | ||
585 | - return old_file_size; | ||
586 | + ret = old_file_size; | ||
587 | + goto fail; | ||
588 | } | ||
589 | old_file_size = ROUND_UP(old_file_size, s->cluster_size); | ||
590 | |||
591 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
592 | if (allocation_start < 0) { | ||
593 | error_setg_errno(errp, -allocation_start, | ||
594 | "Failed to resize refcount structures"); | ||
595 | - return allocation_start; | ||
596 | + ret = allocation_start; | ||
597 | + goto fail; | ||
598 | } | ||
599 | |||
600 | clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, | ||
601 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
602 | if (clusters_allocated < 0) { | ||
603 | error_setg_errno(errp, -clusters_allocated, | ||
604 | "Failed to allocate data clusters"); | ||
605 | - return clusters_allocated; | ||
606 | + ret = clusters_allocated; | ||
607 | + goto fail; | ||
608 | } | ||
609 | |||
610 | assert(clusters_allocated == nb_new_data_clusters); | ||
611 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
612 | /* Allocate the data area */ | ||
613 | new_file_size = allocation_start + | ||
614 | nb_new_data_clusters * s->cluster_size; | ||
615 | - ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp); | ||
616 | + ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); | ||
617 | if (ret < 0) { | ||
618 | error_prepend(errp, "Failed to resize underlying file: "); | ||
619 | qcow2_free_clusters(bs, allocation_start, | ||
620 | nb_new_data_clusters * s->cluster_size, | ||
621 | QCOW2_DISCARD_OTHER); | ||
622 | - return ret; | ||
623 | + goto fail; | ||
624 | } | ||
625 | |||
626 | /* Create the necessary L2 entries */ | ||
627 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
628 | qcow2_free_clusters(bs, host_offset, | ||
629 | nb_new_data_clusters * s->cluster_size, | ||
630 | QCOW2_DISCARD_OTHER); | ||
631 | - return ret; | ||
632 | + goto fail; | ||
633 | } | ||
634 | |||
635 | guest_offset += nb_clusters * s->cluster_size; | ||
636 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
637 | |||
638 | if (prealloc != PREALLOC_MODE_OFF) { | ||
639 | /* Flush metadata before actually changing the image size */ | ||
640 | - ret = bdrv_flush(bs); | ||
641 | + ret = qcow2_write_caches(bs); | ||
642 | if (ret < 0) { | ||
643 | error_setg_errno(errp, -ret, | ||
644 | "Failed to flush the preallocated area to disk"); | ||
645 | - return ret; | ||
646 | + goto fail; | ||
647 | } | ||
648 | } | ||
649 | |||
650 | @@ -XXX,XX +XXX,XX @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
651 | &offset, sizeof(uint64_t)); | ||
652 | if (ret < 0) { | ||
653 | error_setg_errno(errp, -ret, "Failed to update the image size"); | ||
654 | - return ret; | ||
655 | + goto fail; | ||
656 | } | ||
657 | |||
658 | s->l1_vm_state_index = new_l1_size; | ||
659 | - return 0; | ||
660 | + ret = 0; | ||
661 | +fail: | ||
662 | + qemu_co_mutex_unlock(&s->lock); | ||
663 | + return ret; | ||
664 | } | ||
665 | |||
666 | /* XXX: put compressed sectors first, then all the cluster aligned | ||
667 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
668 | if (cluster_offset < 0) { | ||
669 | return cluster_offset; | ||
670 | } | ||
671 | - return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); | ||
672 | + return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, | ||
673 | + NULL); | ||
674 | } | ||
675 | |||
676 | if (offset_into_cluster(s, offset)) { | ||
677 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | ||
678 | .bdrv_co_pdiscard = qcow2_co_pdiscard, | ||
679 | .bdrv_co_copy_range_from = qcow2_co_copy_range_from, | ||
680 | .bdrv_co_copy_range_to = qcow2_co_copy_range_to, | ||
681 | - .bdrv_truncate = qcow2_truncate, | ||
682 | + .bdrv_co_truncate = qcow2_co_truncate, | ||
683 | .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, | ||
684 | .bdrv_make_empty = qcow2_make_empty, | ||
685 | |||
686 | diff --git a/block/qed.c b/block/qed.c | ||
687 | index XXXXXXX..XXXXXXX 100644 | ||
688 | --- a/block/qed.c | ||
689 | +++ b/block/qed.c | ||
690 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, | ||
691 | QED_AIOCB_WRITE | QED_AIOCB_ZERO); | ||
692 | } | ||
693 | |||
694 | -static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, | ||
695 | - PreallocMode prealloc, Error **errp) | ||
696 | +static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, | ||
697 | + int64_t offset, | ||
698 | + PreallocMode prealloc, | ||
699 | + Error **errp) | ||
700 | { | ||
701 | BDRVQEDState *s = bs->opaque; | ||
702 | uint64_t old_image_size; | ||
703 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = { | ||
704 | .bdrv_co_readv = bdrv_qed_co_readv, | ||
705 | .bdrv_co_writev = bdrv_qed_co_writev, | ||
706 | .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, | ||
707 | - .bdrv_truncate = bdrv_qed_truncate, | ||
708 | + .bdrv_co_truncate = bdrv_qed_co_truncate, | ||
709 | .bdrv_getlength = bdrv_qed_getlength, | ||
710 | .bdrv_get_info = bdrv_qed_get_info, | ||
711 | .bdrv_refresh_limits = bdrv_qed_refresh_limits, | ||
712 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
713 | index XXXXXXX..XXXXXXX 100644 | ||
714 | --- a/block/raw-format.c | ||
715 | +++ b/block/raw-format.c | ||
716 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) | ||
717 | } | ||
718 | } | ||
719 | |||
720 | -static int raw_truncate(BlockDriverState *bs, int64_t offset, | ||
721 | - PreallocMode prealloc, Error **errp) | ||
722 | +static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, | ||
723 | + PreallocMode prealloc, Error **errp) | ||
724 | { | ||
725 | BDRVRawState *s = bs->opaque; | ||
726 | |||
727 | @@ -XXX,XX +XXX,XX @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, | ||
728 | |||
729 | s->size = offset; | ||
730 | offset += s->offset; | ||
731 | - return bdrv_truncate(bs->file, offset, prealloc, errp); | ||
732 | + return bdrv_co_truncate(bs->file, offset, prealloc, errp); | ||
733 | } | ||
734 | |||
735 | static void raw_eject(BlockDriverState *bs, bool eject_flag) | ||
736 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = { | ||
737 | .bdrv_co_block_status = &raw_co_block_status, | ||
738 | .bdrv_co_copy_range_from = &raw_co_copy_range_from, | ||
739 | .bdrv_co_copy_range_to = &raw_co_copy_range_to, | ||
740 | - .bdrv_truncate = &raw_truncate, | ||
741 | + .bdrv_co_truncate = &raw_co_truncate, | ||
742 | .bdrv_getlength = &raw_getlength, | ||
743 | .has_variable_length = true, | ||
744 | .bdrv_measure = &raw_measure, | ||
745 | diff --git a/block/rbd.c b/block/rbd.c | ||
746 | index XXXXXXX..XXXXXXX 100644 | ||
747 | --- a/block/rbd.c | ||
748 | +++ b/block/rbd.c | ||
749 | @@ -XXX,XX +XXX,XX @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) | ||
750 | return info.size; | ||
751 | } | ||
752 | |||
753 | -static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, | ||
754 | - PreallocMode prealloc, Error **errp) | ||
755 | +static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, | ||
756 | + int64_t offset, | ||
757 | + PreallocMode prealloc, | ||
758 | + Error **errp) | ||
759 | { | ||
760 | BDRVRBDState *s = bs->opaque; | ||
761 | int r; | ||
762 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = { | ||
763 | .bdrv_get_info = qemu_rbd_getinfo, | ||
764 | .create_opts = &qemu_rbd_create_opts, | ||
765 | .bdrv_getlength = qemu_rbd_getlength, | ||
766 | - .bdrv_truncate = qemu_rbd_truncate, | ||
767 | + .bdrv_co_truncate = qemu_rbd_co_truncate, | ||
768 | .protocol_name = "rbd", | ||
769 | |||
770 | .bdrv_aio_preadv = qemu_rbd_aio_preadv, | ||
771 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
772 | index XXXXXXX..XXXXXXX 100644 | ||
773 | --- a/block/sheepdog.c | ||
774 | +++ b/block/sheepdog.c | ||
775 | @@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs) | ||
776 | return s->inode.vdi_size; | ||
777 | } | ||
778 | |||
779 | -static int sd_truncate(BlockDriverState *bs, int64_t offset, | ||
780 | - PreallocMode prealloc, Error **errp) | ||
781 | +static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, | ||
782 | + PreallocMode prealloc, Error **errp) | ||
783 | { | ||
784 | BDRVSheepdogState *s = bs->opaque; | ||
785 | int ret, fd; | ||
786 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
787 | |||
788 | assert(!flags); | ||
789 | if (offset > s->inode.vdi_size) { | ||
790 | - ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); | ||
791 | + ret = sd_co_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); | ||
792 | if (ret < 0) { | ||
793 | return ret; | ||
794 | } | ||
795 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = { | ||
796 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
797 | .bdrv_getlength = sd_getlength, | ||
798 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
799 | - .bdrv_truncate = sd_truncate, | ||
800 | + .bdrv_co_truncate = sd_co_truncate, | ||
801 | |||
802 | .bdrv_co_readv = sd_co_readv, | ||
803 | .bdrv_co_writev = sd_co_writev, | ||
804 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = { | ||
805 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
806 | .bdrv_getlength = sd_getlength, | ||
807 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
808 | - .bdrv_truncate = sd_truncate, | ||
809 | + .bdrv_co_truncate = sd_co_truncate, | ||
810 | |||
811 | .bdrv_co_readv = sd_co_readv, | ||
812 | .bdrv_co_writev = sd_co_writev, | ||
813 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = { | ||
814 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | ||
815 | .bdrv_getlength = sd_getlength, | ||
816 | .bdrv_get_allocated_file_size = sd_get_allocated_file_size, | ||
817 | - .bdrv_truncate = sd_truncate, | ||
818 | + .bdrv_co_truncate = sd_co_truncate, | ||
819 | |||
820 | .bdrv_co_readv = sd_co_readv, | ||
821 | .bdrv_co_writev = sd_co_writev, | ||
822 | diff --git a/block/ssh.c b/block/ssh.c | ||
823 | index XXXXXXX..XXXXXXX 100644 | ||
824 | --- a/block/ssh.c | ||
825 | +++ b/block/ssh.c | ||
826 | @@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs) | ||
827 | return length; | ||
828 | } | ||
829 | |||
830 | -static int ssh_truncate(BlockDriverState *bs, int64_t offset, | ||
831 | - PreallocMode prealloc, Error **errp) | ||
832 | +static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, | ||
833 | + PreallocMode prealloc, Error **errp) | ||
834 | { | ||
835 | BDRVSSHState *s = bs->opaque; | ||
836 | |||
837 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = { | ||
838 | .bdrv_co_readv = ssh_co_readv, | ||
839 | .bdrv_co_writev = ssh_co_writev, | ||
840 | .bdrv_getlength = ssh_getlength, | ||
841 | - .bdrv_truncate = ssh_truncate, | ||
842 | + .bdrv_co_truncate = ssh_co_truncate, | ||
843 | .bdrv_co_flush_to_disk = ssh_co_flush, | ||
844 | .create_opts = &ssh_create_opts, | ||
845 | }; | ||
846 | -- | 256 | -- |
847 | 2.13.6 | 257 | 2.13.6 |
848 | 258 | ||
849 | 259 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | bdrv_drained_begin() waits for the completion of requests in the whole |
---|---|---|---|
2 | subtree, but it only actually keeps its immediate bs parameter quiesced | ||
3 | until bdrv_drained_end(). | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 5 | Add a version that keeps the whole subtree drained. As of this commit, |
4 | byte-based. Now that all callers of vectored I/O have been converted | 6 | graph changes cannot be allowed during a subtree drained section, but |
5 | to use our preferred byte-based bdrv_co_p{read,write}v(), we can | 7 | this will be fixed soon. |
6 | delete the unused bdrv_co_{read,write}v(). | ||
7 | 8 | ||
8 | Furthermore, this gets rid of the signature difference between the | ||
9 | public bdrv_co_writev() and the callback .bdrv_co_writev (the | ||
10 | latter still exists, because some drivers still need more work | ||
11 | before they are fully byte-based). | ||
12 | |||
13 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
17 | --- | 10 | --- |
18 | include/block/block.h | 4 ---- | 11 | include/block/block.h | 13 +++++++++++++ |
19 | block/io.c | 36 ------------------------------------ | 12 | block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++----------- |
20 | 2 files changed, 40 deletions(-) | 13 | 2 files changed, 56 insertions(+), 11 deletions(-) |
21 | 14 | ||
22 | diff --git a/include/block/block.h b/include/block/block.h | 15 | diff --git a/include/block/block.h b/include/block/block.h |
23 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/include/block/block.h | 17 | --- a/include/block/block.h |
25 | +++ b/include/block/block.h | 18 | +++ b/include/block/block.h |
26 | @@ -XXX,XX +XXX,XX @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes); | 19 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); |
27 | int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); | 20 | void bdrv_drained_begin(BlockDriverState *bs); |
28 | int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, | 21 | |
29 | const void *buf, int count); | 22 | /** |
30 | -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, | 23 | + * Like bdrv_drained_begin, but recursively begins a quiesced section for |
31 | - int nb_sectors, QEMUIOVector *qiov); | 24 | + * exclusive access to all child nodes as well. |
32 | -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, | 25 | + * |
33 | - int nb_sectors, QEMUIOVector *qiov); | 26 | + * Graph changes are not allowed during a subtree drain section. |
34 | /* | 27 | + */ |
35 | * Efficiently zero a region of the disk image. Note that this is a regular | 28 | +void bdrv_subtree_drained_begin(BlockDriverState *bs); |
36 | * I/O request like read or write and should have a reasonable size. This | 29 | + |
30 | +/** | ||
31 | * bdrv_drained_end: | ||
32 | * | ||
33 | * End a quiescent section started by bdrv_drained_begin(). | ||
34 | */ | ||
35 | void bdrv_drained_end(BlockDriverState *bs); | ||
36 | |||
37 | +/** | ||
38 | + * End a quiescent section started by bdrv_subtree_drained_begin(). | ||
39 | + */ | ||
40 | +void bdrv_subtree_drained_end(BlockDriverState *bs); | ||
41 | + | ||
42 | void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, | ||
43 | Error **errp); | ||
44 | void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); | ||
37 | diff --git a/block/io.c b/block/io.c | 45 | diff --git a/block/io.c b/block/io.c |
38 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/block/io.c | 47 | --- a/block/io.c |
40 | +++ b/block/io.c | 48 | +++ b/block/io.c |
41 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 49 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
42 | return ret; | 50 | BlockDriverState *bs; |
51 | bool done; | ||
52 | bool begin; | ||
53 | + bool recursive; | ||
54 | BdrvChild *parent; | ||
55 | } BdrvCoDrainData; | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs) | ||
58 | return waited; | ||
43 | } | 59 | } |
44 | 60 | ||
45 | -static int coroutine_fn bdrv_co_do_readv(BdrvChild *child, | 61 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent); |
46 | - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, | 62 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); |
47 | - BdrvRequestFlags flags) | 63 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
48 | -{ | 64 | + BdrvChild *parent); |
49 | - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { | 65 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, |
50 | - return -EINVAL; | 66 | + BdrvChild *parent); |
51 | - } | 67 | |
52 | - | 68 | static void bdrv_co_drain_bh_cb(void *opaque) |
53 | - return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS, | ||
54 | - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); | ||
55 | -} | ||
56 | - | ||
57 | -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, | ||
58 | - int nb_sectors, QEMUIOVector *qiov) | ||
59 | -{ | ||
60 | - return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); | ||
61 | -} | ||
62 | - | ||
63 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
64 | int64_t offset, int bytes, BdrvRequestFlags flags) | ||
65 | { | 69 | { |
66 | @@ -XXX,XX +XXX,XX @@ out: | 70 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) |
67 | return ret; | 71 | |
72 | bdrv_dec_in_flight(bs); | ||
73 | if (data->begin) { | ||
74 | - bdrv_do_drained_begin(bs, data->parent); | ||
75 | + bdrv_do_drained_begin(bs, data->recursive, data->parent); | ||
76 | } else { | ||
77 | - bdrv_do_drained_end(bs, data->parent); | ||
78 | + bdrv_do_drained_end(bs, data->recursive, data->parent); | ||
79 | } | ||
80 | |||
81 | data->done = true; | ||
82 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque) | ||
68 | } | 83 | } |
69 | 84 | ||
70 | -static int coroutine_fn bdrv_co_do_writev(BdrvChild *child, | 85 | static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
71 | - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, | 86 | - bool begin, BdrvChild *parent) |
72 | - BdrvRequestFlags flags) | 87 | + bool begin, bool recursive, |
73 | -{ | 88 | + BdrvChild *parent) |
74 | - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { | ||
75 | - return -EINVAL; | ||
76 | - } | ||
77 | - | ||
78 | - return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS, | ||
79 | - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); | ||
80 | -} | ||
81 | - | ||
82 | -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, | ||
83 | - int nb_sectors, QEMUIOVector *qiov) | ||
84 | -{ | ||
85 | - return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0); | ||
86 | -} | ||
87 | - | ||
88 | int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, | ||
89 | int bytes, BdrvRequestFlags flags) | ||
90 | { | 89 | { |
90 | BdrvCoDrainData data; | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
93 | .bs = bs, | ||
94 | .done = false, | ||
95 | .begin = begin, | ||
96 | + .recursive = recursive, | ||
97 | .parent = parent, | ||
98 | }; | ||
99 | bdrv_inc_in_flight(bs); | ||
100 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, | ||
101 | assert(data.done); | ||
102 | } | ||
103 | |||
104 | -static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
105 | +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, | ||
106 | + BdrvChild *parent) | ||
107 | { | ||
108 | + BdrvChild *child, *next; | ||
109 | + | ||
110 | if (qemu_in_coroutine()) { | ||
111 | - bdrv_co_yield_to_drain(bs, true, parent); | ||
112 | + bdrv_co_yield_to_drain(bs, true, recursive, parent); | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent) | ||
117 | bdrv_parent_drained_begin(bs, parent); | ||
118 | bdrv_drain_invoke(bs, true, false); | ||
119 | bdrv_drain_recurse(bs); | ||
120 | + | ||
121 | + if (recursive) { | ||
122 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
123 | + bdrv_do_drained_begin(child->bs, true, child); | ||
124 | + } | ||
125 | + } | ||
126 | } | ||
127 | |||
128 | void bdrv_drained_begin(BlockDriverState *bs) | ||
129 | { | ||
130 | - bdrv_do_drained_begin(bs, NULL); | ||
131 | + bdrv_do_drained_begin(bs, false, NULL); | ||
132 | +} | ||
133 | + | ||
134 | +void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
135 | +{ | ||
136 | + bdrv_do_drained_begin(bs, true, NULL); | ||
137 | } | ||
138 | |||
139 | -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
140 | +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
141 | + BdrvChild *parent) | ||
142 | { | ||
143 | + BdrvChild *child, *next; | ||
144 | int old_quiesce_counter; | ||
145 | |||
146 | if (qemu_in_coroutine()) { | ||
147 | - bdrv_co_yield_to_drain(bs, false, parent); | ||
148 | + bdrv_co_yield_to_drain(bs, false, recursive, parent); | ||
149 | return; | ||
150 | } | ||
151 | assert(bs->quiesce_counter > 0); | ||
152 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) | ||
153 | if (old_quiesce_counter == 1) { | ||
154 | aio_enable_external(bdrv_get_aio_context(bs)); | ||
155 | } | ||
156 | + | ||
157 | + if (recursive) { | ||
158 | + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
159 | + bdrv_do_drained_end(child->bs, true, child); | ||
160 | + } | ||
161 | + } | ||
162 | } | ||
163 | |||
164 | void bdrv_drained_end(BlockDriverState *bs) | ||
165 | { | ||
166 | - bdrv_do_drained_end(bs, NULL); | ||
167 | + bdrv_do_drained_end(bs, false, NULL); | ||
168 | +} | ||
169 | + | ||
170 | +void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
171 | +{ | ||
172 | + bdrv_do_drained_end(bs, true, NULL); | ||
173 | } | ||
174 | |||
175 | /* | ||
91 | -- | 176 | -- |
92 | 2.13.6 | 177 | 2.13.6 |
93 | 178 | ||
94 | 179 | diff view generated by jsdifflib |
1 | This adds a test for a temporary write failure, which simulates the | 1 | Add a subtree drain version to the existing test cases. |
---|---|---|---|
2 | situation after werror=stop/enospc has stopped the VM. We shouldn't | ||
3 | leave leaked clusters behind in such cases. | ||
4 | 2 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
7 | --- | 4 | --- |
8 | tests/qemu-iotests/026 | 17 +++++++++++++++++ | 5 | tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++- |
9 | tests/qemu-iotests/026.out | 8 ++++++++ | 6 | 1 file changed, 26 insertions(+), 1 deletion(-) |
10 | tests/qemu-iotests/026.out.nocache | 8 ++++++++ | ||
11 | 3 files changed, 33 insertions(+) | ||
12 | 7 | ||
13 | diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
14 | index XXXXXXX..XXXXXXX 100755 | 9 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/qemu-iotests/026 | 10 | --- a/tests/test-bdrv-drain.c |
16 | +++ b/tests/qemu-iotests/026 | 11 | +++ b/tests/test-bdrv-drain.c |
17 | @@ -XXX,XX +XXX,XX @@ done | 12 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
18 | done | 13 | enum drain_type { |
19 | done | 14 | BDRV_DRAIN_ALL, |
20 | 15 | BDRV_DRAIN, | |
21 | +echo | 16 | + BDRV_SUBTREE_DRAIN, |
22 | +echo === Avoid cluster leaks after temporary failure === | 17 | DRAIN_TYPE_MAX, |
23 | +echo | 18 | }; |
19 | |||
20 | @@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) | ||
21 | switch (drain_type) { | ||
22 | case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; | ||
23 | case BDRV_DRAIN: bdrv_drained_begin(bs); break; | ||
24 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; | ||
25 | default: g_assert_not_reached(); | ||
26 | } | ||
27 | } | ||
28 | @@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) | ||
29 | switch (drain_type) { | ||
30 | case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; | ||
31 | case BDRV_DRAIN: bdrv_drained_end(bs); break; | ||
32 | + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; | ||
33 | default: g_assert_not_reached(); | ||
34 | } | ||
35 | } | ||
36 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void) | ||
37 | test_drv_cb_common(BDRV_DRAIN, false); | ||
38 | } | ||
39 | |||
40 | +static void test_drv_cb_drain_subtree(void) | ||
41 | +{ | ||
42 | + test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | ||
43 | +} | ||
24 | + | 44 | + |
25 | +cat > "$TEST_DIR/blkdebug.conf" <<EOF | 45 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) |
26 | +[inject-error] | 46 | { |
27 | +event = "write_aio" | 47 | BlockBackend *blk; |
28 | +errno = "5" | 48 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void) |
29 | +once = "on" | 49 | test_quiesce_common(BDRV_DRAIN, false); |
30 | +EOF | 50 | } |
51 | |||
52 | +static void test_quiesce_drain_subtree(void) | ||
53 | +{ | ||
54 | + test_quiesce_common(BDRV_SUBTREE_DRAIN, true); | ||
55 | +} | ||
31 | + | 56 | + |
32 | +# After the failed first write, do a second write so that the updated refcount | 57 | static void test_nested(void) |
33 | +# block is actually written back | 58 | { |
34 | +_make_test_img 64M | 59 | BlockBackend *blk; |
35 | +$QEMU_IO -c "write 0 1M" -c "write 0 1M" "$BLKDBG_TEST_IMG" | _filter_qemu_io | 60 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
36 | +_check_test_img | 61 | /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ |
62 | int bs_quiesce = (outer != BDRV_DRAIN_ALL) + | ||
63 | (inner != BDRV_DRAIN_ALL); | ||
64 | - int backing_quiesce = 0; | ||
65 | + int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + | ||
66 | + (inner == BDRV_SUBTREE_DRAIN); | ||
67 | int backing_cb_cnt = (outer != BDRV_DRAIN) + | ||
68 | (inner != BDRV_DRAIN); | ||
69 | |||
70 | @@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void) | ||
71 | test_blockjob_common(BDRV_DRAIN); | ||
72 | } | ||
73 | |||
74 | +static void test_blockjob_drain_subtree(void) | ||
75 | +{ | ||
76 | + test_blockjob_common(BDRV_SUBTREE_DRAIN); | ||
77 | +} | ||
37 | + | 78 | + |
38 | # success, all done | 79 | int main(int argc, char **argv) |
39 | echo "*** done" | 80 | { |
40 | rm -f $seq.full | 81 | bdrv_init(); |
41 | diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out | 82 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
42 | index XXXXXXX..XXXXXXX 100644 | 83 | |
43 | --- a/tests/qemu-iotests/026.out | 84 | g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); |
44 | +++ b/tests/qemu-iotests/026.out | 85 | g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); |
45 | @@ -XXX,XX +XXX,XX @@ write failed: No space left on device | 86 | + g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", |
46 | 87 | + test_drv_cb_drain_subtree); | |
47 | 96 leaked clusters were found on the image. | 88 | |
48 | This means waste of disk space, but no harm to data. | 89 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
49 | + | 90 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); |
50 | +=== Avoid cluster leaks after temporary failure === | 91 | + g_test_add_func("/bdrv-drain/quiesce/drain_subtree", |
51 | + | 92 | + test_quiesce_drain_subtree); |
52 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 | 93 | |
53 | +write failed: Input/output error | 94 | g_test_add_func("/bdrv-drain/nested", test_nested); |
54 | +wrote 1048576/1048576 bytes at offset 0 | 95 | |
55 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | 96 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
56 | +No errors were found on the image. | 97 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); |
57 | *** done | 98 | + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", |
58 | diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache | 99 | + test_blockjob_drain_subtree); |
59 | index XXXXXXX..XXXXXXX 100644 | 100 | |
60 | --- a/tests/qemu-iotests/026.out.nocache | 101 | return g_test_run(); |
61 | +++ b/tests/qemu-iotests/026.out.nocache | 102 | } |
62 | @@ -XXX,XX +XXX,XX @@ write failed: No space left on device | ||
63 | |||
64 | 96 leaked clusters were found on the image. | ||
65 | This means waste of disk space, but no harm to data. | ||
66 | + | ||
67 | +=== Avoid cluster leaks after temporary failure === | ||
68 | + | ||
69 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 | ||
70 | +write failed: Input/output error | ||
71 | +wrote 1048576/1048576 bytes at offset 0 | ||
72 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
73 | +No errors were found on the image. | ||
74 | *** done | ||
75 | -- | 103 | -- |
76 | 2.13.6 | 104 | 2.13.6 |
77 | 105 | ||
78 | 106 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | If bdrv_do_drained_begin/end() are called in coroutine context, they |
---|---|---|---|
2 | first use a BH to get out of the coroutine context. Call some existing | ||
3 | tests again from a coroutine to cover this code path. | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | ||
4 | byte-based. The qcow driver is now ready to fully utilize the | ||
5 | byte-based callback interface, as long as we override the default | ||
6 | alignment to still be 512 (needed at least for asserts present | ||
7 | because of encryption, but easier to do everywhere than to audit | ||
8 | which sub-sector requests are handled correctly, especially since | ||
9 | we no longer recommend qcow for new disk images). | ||
10 | |||
11 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
12 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 6 | --- |
15 | block/qcow.c | 35 ++++++++++++++++++++--------------- | 7 | tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ |
16 | 1 file changed, 20 insertions(+), 15 deletions(-) | 8 | 1 file changed, 59 insertions(+) |
17 | 9 | ||
18 | diff --git a/block/qcow.c b/block/qcow.c | 10 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
19 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/qcow.c | 12 | --- a/tests/test-bdrv-drain.c |
21 | +++ b/block/qcow.c | 13 | +++ b/tests/test-bdrv-drain.c |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct QCowHeader { | 14 | @@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret) |
23 | typedef struct BDRVQcowState { | 15 | *aio_ret = ret; |
24 | int cluster_bits; | ||
25 | int cluster_size; | ||
26 | - int cluster_sectors; | ||
27 | int l2_bits; | ||
28 | int l2_size; | ||
29 | unsigned int l1_size; | ||
30 | @@ -XXX,XX +XXX,XX @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, | ||
31 | } | ||
32 | s->cluster_bits = header.cluster_bits; | ||
33 | s->cluster_size = 1 << s->cluster_bits; | ||
34 | - s->cluster_sectors = 1 << (s->cluster_bits - 9); | ||
35 | s->l2_bits = header.l2_bits; | ||
36 | s->l2_size = 1 << s->l2_bits; | ||
37 | bs->total_sectors = header.size / 512; | ||
38 | @@ -XXX,XX +XXX,XX @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) | ||
39 | return 0; | ||
40 | } | 16 | } |
41 | 17 | ||
42 | -static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | 18 | +typedef struct CallInCoroutineData { |
43 | - int nb_sectors, QEMUIOVector *qiov) | 19 | + void (*entry)(void); |
44 | +static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) | 20 | + bool done; |
21 | +} CallInCoroutineData; | ||
22 | + | ||
23 | +static coroutine_fn void call_in_coroutine_entry(void *opaque) | ||
45 | +{ | 24 | +{ |
46 | + /* At least encrypted images require 512-byte alignment. Apply the | 25 | + CallInCoroutineData *data = opaque; |
47 | + * limit universally, rather than just on encrypted images, as | 26 | + |
48 | + * it's easier to let the block layer handle rounding than to | 27 | + data->entry(); |
49 | + * audit this code further. */ | 28 | + data->done = true; |
50 | + bs->bl.request_alignment = BDRV_SECTOR_SIZE; | ||
51 | +} | 29 | +} |
52 | + | 30 | + |
53 | +static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, | 31 | +static void call_in_coroutine(void (*entry)(void)) |
54 | + uint64_t bytes, QEMUIOVector *qiov, | 32 | +{ |
55 | + int flags) | 33 | + Coroutine *co; |
34 | + CallInCoroutineData data = { | ||
35 | + .entry = entry, | ||
36 | + .done = false, | ||
37 | + }; | ||
38 | + | ||
39 | + co = qemu_coroutine_create(call_in_coroutine_entry, &data); | ||
40 | + qemu_coroutine_enter(co); | ||
41 | + while (!data.done) { | ||
42 | + aio_poll(qemu_get_aio_context(), true); | ||
43 | + } | ||
44 | +} | ||
45 | + | ||
46 | enum drain_type { | ||
47 | BDRV_DRAIN_ALL, | ||
48 | BDRV_DRAIN, | ||
49 | @@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void) | ||
50 | test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); | ||
51 | } | ||
52 | |||
53 | +static void test_drv_cb_co_drain(void) | ||
54 | +{ | ||
55 | + call_in_coroutine(test_drv_cb_drain); | ||
56 | +} | ||
57 | + | ||
58 | +static void test_drv_cb_co_drain_subtree(void) | ||
59 | +{ | ||
60 | + call_in_coroutine(test_drv_cb_drain_subtree); | ||
61 | +} | ||
62 | + | ||
63 | static void test_quiesce_common(enum drain_type drain_type, bool recursive) | ||
56 | { | 64 | { |
57 | BDRVQcowState *s = bs->opaque; | 65 | BlockBackend *blk; |
58 | int offset_in_cluster; | 66 | @@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void) |
59 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | 67 | test_quiesce_common(BDRV_SUBTREE_DRAIN, true); |
60 | QEMUIOVector hd_qiov; | ||
61 | uint8_t *buf; | ||
62 | void *orig_buf; | ||
63 | - int64_t offset = sector_num * BDRV_SECTOR_SIZE; | ||
64 | - int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; | ||
65 | |||
66 | + assert(!flags); | ||
67 | if (qiov->niov > 1) { | ||
68 | buf = orig_buf = qemu_try_blockalign(bs, qiov->size); | ||
69 | if (buf == NULL) { | ||
70 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
71 | return ret; | ||
72 | } | 68 | } |
73 | 69 | ||
74 | -static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | 70 | +static void test_quiesce_co_drain(void) |
75 | - int nb_sectors, QEMUIOVector *qiov, | 71 | +{ |
76 | - int flags) | 72 | + call_in_coroutine(test_quiesce_drain); |
77 | +static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, | 73 | +} |
78 | + uint64_t bytes, QEMUIOVector *qiov, | 74 | + |
79 | + int flags) | 75 | +static void test_quiesce_co_drain_subtree(void) |
76 | +{ | ||
77 | + call_in_coroutine(test_quiesce_drain_subtree); | ||
78 | +} | ||
79 | + | ||
80 | static void test_nested(void) | ||
80 | { | 81 | { |
81 | BDRVQcowState *s = bs->opaque; | 82 | BlockBackend *blk; |
82 | int offset_in_cluster; | 83 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) |
83 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, | 84 | g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", |
84 | QEMUIOVector hd_qiov; | 85 | test_drv_cb_drain_subtree); |
85 | uint8_t *buf; | 86 | |
86 | void *orig_buf; | 87 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
87 | - int64_t offset = sector_num * BDRV_SECTOR_SIZE; | 88 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); |
88 | - int64_t bytes = nb_sectors * BDRV_SECTOR_SIZE; | 89 | + g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", |
89 | 90 | + test_drv_cb_co_drain_subtree); | |
90 | assert(!flags); | 91 | + |
91 | s->cluster_cache_offset = -1; /* disable compressed cache */ | 92 | + |
92 | @@ -XXX,XX +XXX,XX @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | 93 | g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); |
93 | 94 | g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); | |
94 | if (ret != Z_STREAM_END || out_len >= s->cluster_size) { | 95 | g_test_add_func("/bdrv-drain/quiesce/drain_subtree", |
95 | /* could not compress: write normal cluster */ | 96 | test_quiesce_drain_subtree); |
96 | - ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS, | 97 | |
97 | - bytes >> BDRV_SECTOR_BITS, qiov, 0); | 98 | + // XXX bdrv_drain_all() doesn't work in coroutine context |
98 | + ret = qcow_co_pwritev(bs, offset, bytes, qiov, 0); | 99 | + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); |
99 | if (ret < 0) { | 100 | + g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", |
100 | goto fail; | 101 | + test_quiesce_co_drain_subtree); |
101 | } | 102 | + |
102 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qcow = { | 103 | g_test_add_func("/bdrv-drain/nested", test_nested); |
103 | .bdrv_co_create_opts = qcow_co_create_opts, | 104 | |
104 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 105 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); |
105 | .supports_backing = true, | ||
106 | + .bdrv_refresh_limits = qcow_refresh_limits, | ||
107 | |||
108 | - .bdrv_co_readv = qcow_co_readv, | ||
109 | - .bdrv_co_writev = qcow_co_writev, | ||
110 | + .bdrv_co_preadv = qcow_co_preadv, | ||
111 | + .bdrv_co_pwritev = qcow_co_pwritev, | ||
112 | .bdrv_co_block_status = qcow_co_block_status, | ||
113 | |||
114 | .bdrv_make_empty = qcow_make_empty, | ||
115 | -- | 106 | -- |
116 | 2.13.6 | 107 | 2.13.6 |
117 | 108 | ||
118 | 109 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | Test that drain sections are correctly propagated through the graph. |
---|---|---|---|
2 | 2 | ||
3 | In the beginning of the function, we initialize the local variable to 0, | ||
4 | and in the body of the function, we check the assigned values and exit | ||
5 | the loop immediately. So here it can never be non-zero. | ||
6 | |||
7 | Reported-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 4 | --- |
13 | block/qcow2.c | 2 +- | 5 | tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ |
14 | 1 file changed, 1 insertion(+), 1 deletion(-) | 6 | 1 file changed, 74 insertions(+) |
15 | 7 | ||
16 | diff --git a/block/qcow2.c b/block/qcow2.c | 8 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
17 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/qcow2.c | 10 | --- a/tests/test-bdrv-drain.c |
19 | +++ b/block/qcow2.c | 11 | +++ b/tests/test-bdrv-drain.c |
20 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, | 12 | @@ -XXX,XX +XXX,XX @@ static void test_nested(void) |
21 | while (l2meta != NULL) { | 13 | blk_unref(blk); |
22 | QCowL2Meta *next; | 14 | } |
23 | 15 | ||
24 | - if (!ret && link_l2) { | 16 | +static void test_multiparent(void) |
25 | + if (link_l2) { | 17 | +{ |
26 | ret = qcow2_alloc_cluster_link_l2(bs, l2meta); | 18 | + BlockBackend *blk_a, *blk_b; |
27 | if (ret) { | 19 | + BlockDriverState *bs_a, *bs_b, *backing; |
28 | goto out; | 20 | + BDRVTestState *a_s, *b_s, *backing_s; |
21 | + | ||
22 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
23 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, | ||
24 | + &error_abort); | ||
25 | + a_s = bs_a->opaque; | ||
26 | + blk_insert_bs(blk_a, bs_a, &error_abort); | ||
27 | + | ||
28 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); | ||
29 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, | ||
30 | + &error_abort); | ||
31 | + b_s = bs_b->opaque; | ||
32 | + blk_insert_bs(blk_b, bs_b, &error_abort); | ||
33 | + | ||
34 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | ||
35 | + backing_s = backing->opaque; | ||
36 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); | ||
37 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
38 | + | ||
39 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
40 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
41 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
42 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
43 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
44 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
45 | + | ||
46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); | ||
47 | + | ||
48 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
49 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
50 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
51 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
52 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
53 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
54 | + | ||
55 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); | ||
56 | + | ||
57 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 2); | ||
58 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
59 | + g_assert_cmpint(backing->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(a_s->drain_count, ==, 2); | ||
61 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
62 | + g_assert_cmpint(backing_s->drain_count, ==, 2); | ||
63 | + | ||
64 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
65 | + | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 1); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 1); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 1); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 1); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
74 | + | ||
75 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
76 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
77 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
78 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
79 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
80 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
81 | + | ||
82 | + bdrv_unref(backing); | ||
83 | + bdrv_unref(bs_a); | ||
84 | + bdrv_unref(bs_b); | ||
85 | + blk_unref(blk_a); | ||
86 | + blk_unref(blk_b); | ||
87 | +} | ||
88 | + | ||
89 | |||
90 | typedef struct TestBlockJob { | ||
91 | BlockJob common; | ||
92 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
93 | test_quiesce_co_drain_subtree); | ||
94 | |||
95 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
96 | + g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
97 | |||
98 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
99 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
29 | -- | 100 | -- |
30 | 2.13.6 | 101 | 2.13.6 |
31 | 102 | ||
32 | 103 | diff view generated by jsdifflib |
1 | This moves the bdrv_truncate() implementation from block.c to block/io.c | 1 | We need to remember how many of the drain sections in which a node is |
---|---|---|---|
2 | so it can have access to the tracked requests infrastructure. | 2 | were recursive (i.e. subtree drain rather than node drain), so that they |
3 | 3 | can be correctly applied when children are added or removed during the | |
4 | This involves making refresh_total_sectors() public (in block_int.h). | 4 | drained section. |
5 | |||
6 | With this change, it is safe to modify the graph even inside a | ||
7 | bdrv_subtree_drained_begin/end() section. | ||
5 | 8 | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | --- | 10 | --- |
9 | include/block/block_int.h | 2 + | 11 | include/block/block.h | 2 -- |
10 | block.c | 111 +--------------------------------------------- | 12 | include/block/block_int.h | 5 +++++ |
11 | block/io.c | 109 +++++++++++++++++++++++++++++++++++++++++++++ | 13 | block.c | 32 +++++++++++++++++++++++++++++--- |
12 | 3 files changed, 112 insertions(+), 110 deletions(-) | 14 | block/io.c | 28 ++++++++++++++++++++++++---- |
13 | 15 | 4 files changed, 58 insertions(+), 9 deletions(-) | |
16 | |||
17 | diff --git a/include/block/block.h b/include/block/block.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/block/block.h | ||
20 | +++ b/include/block/block.h | ||
21 | @@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs); | ||
22 | /** | ||
23 | * Like bdrv_drained_begin, but recursively begins a quiesced section for | ||
24 | * exclusive access to all child nodes as well. | ||
25 | - * | ||
26 | - * Graph changes are not allowed during a subtree drain section. | ||
27 | */ | ||
28 | void bdrv_subtree_drained_begin(BlockDriverState *bs); | ||
29 | |||
14 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 30 | diff --git a/include/block/block_int.h b/include/block/block_int.h |
15 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/include/block/block_int.h | 32 | --- a/include/block/block_int.h |
17 | +++ b/include/block/block_int.h | 33 | +++ b/include/block/block_int.h |
18 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, | 34 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { |
19 | BdrvChild *dst, uint64_t dst_offset, | 35 | |
20 | uint64_t bytes, BdrvRequestFlags flags); | 36 | /* Accessed with atomic ops. */ |
21 | 37 | int quiesce_counter; | |
22 | +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); | 38 | + int recursive_quiesce_counter; |
23 | + | 39 | + |
24 | #endif /* BLOCK_INT_H */ | 40 | unsigned int write_gen; /* Current data generation */ |
41 | |||
42 | /* Protected by reqs_lock. */ | ||
43 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | ||
44 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | ||
45 | BdrvRequestFlags flags); | ||
46 | |||
47 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); | ||
48 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); | ||
49 | + | ||
50 | int get_tmp_filename(char *filename, int size); | ||
51 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, | ||
52 | const char *filename); | ||
25 | diff --git a/block.c b/block.c | 53 | diff --git a/block.c b/block.c |
26 | index XXXXXXX..XXXXXXX 100644 | 54 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/block.c | 55 | --- a/block.c |
28 | +++ b/block.c | 56 | +++ b/block.c |
29 | @@ -XXX,XX +XXX,XX @@ static int find_image_format(BlockBackend *file, const char *filename, | 57 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child) |
30 | * Set the current 'total_sectors' value | 58 | bdrv_drained_end(bs); |
31 | * Return 0 on success, -errno on error. | 59 | } |
32 | */ | 60 | |
33 | -static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) | 61 | +static void bdrv_child_cb_attach(BdrvChild *child) |
34 | +int refresh_total_sectors(BlockDriverState *bs, int64_t hint) | 62 | +{ |
63 | + BlockDriverState *bs = child->opaque; | ||
64 | + bdrv_apply_subtree_drain(child, bs); | ||
65 | +} | ||
66 | + | ||
67 | +static void bdrv_child_cb_detach(BdrvChild *child) | ||
68 | +{ | ||
69 | + BlockDriverState *bs = child->opaque; | ||
70 | + bdrv_unapply_subtree_drain(child, bs); | ||
71 | +} | ||
72 | + | ||
73 | static int bdrv_child_cb_inactivate(BdrvChild *child) | ||
35 | { | 74 | { |
36 | BlockDriver *drv = bs->drv; | 75 | BlockDriverState *bs = child->opaque; |
37 | 76 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = { | |
38 | @@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) | 77 | .inherit_options = bdrv_inherited_options, |
78 | .drained_begin = bdrv_child_cb_drained_begin, | ||
79 | .drained_end = bdrv_child_cb_drained_end, | ||
80 | + .attach = bdrv_child_cb_attach, | ||
81 | + .detach = bdrv_child_cb_detach, | ||
82 | .inactivate = bdrv_child_cb_inactivate, | ||
83 | }; | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = { | ||
86 | .inherit_options = bdrv_inherited_fmt_options, | ||
87 | .drained_begin = bdrv_child_cb_drained_begin, | ||
88 | .drained_end = bdrv_child_cb_drained_end, | ||
89 | + .attach = bdrv_child_cb_attach, | ||
90 | + .detach = bdrv_child_cb_detach, | ||
91 | .inactivate = bdrv_child_cb_inactivate, | ||
92 | }; | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c) | ||
95 | parent->backing_blocker); | ||
96 | bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, | ||
97 | parent->backing_blocker); | ||
98 | + | ||
99 | + bdrv_child_cb_attach(c); | ||
100 | } | ||
101 | |||
102 | static void bdrv_backing_detach(BdrvChild *c) | ||
103 | @@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c) | ||
104 | bdrv_op_unblock_all(c->bs, parent->backing_blocker); | ||
105 | error_free(parent->backing_blocker); | ||
106 | parent->backing_blocker = NULL; | ||
107 | + | ||
108 | + bdrv_child_cb_detach(c); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, | ||
113 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); | ||
39 | } | 114 | } |
40 | } | 115 | if (old_bs) { |
41 | 116 | + /* Detach first so that the recursive drain sections coming from @child | |
42 | -static void bdrv_parent_cb_resize(BlockDriverState *bs) | 117 | + * are already gone and we only end the drain sections that came from |
43 | -{ | 118 | + * elsewhere. */ |
44 | - BdrvChild *c; | 119 | + if (child->role->detach) { |
45 | - QLIST_FOREACH(c, &bs->parents, next_parent) { | 120 | + child->role->detach(child); |
46 | - if (c->role->resize) { | 121 | + } |
47 | - c->role->resize(c); | 122 | if (old_bs->quiesce_counter && child->role->drained_end) { |
123 | for (i = 0; i < old_bs->quiesce_counter; i++) { | ||
124 | child->role->drained_end(child); | ||
125 | } | ||
126 | } | ||
127 | - if (child->role->detach) { | ||
128 | - child->role->detach(child); | ||
48 | - } | 129 | - } |
49 | - } | 130 | QLIST_REMOVE(child, next_parent); |
50 | -} | 131 | } |
51 | - | 132 | |
52 | /* | 133 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
53 | * Sets the backing file link of a BDS. A new reference is created; callers | 134 | } |
54 | * which don't need their own reference any more must call bdrv_unref(). | 135 | } |
55 | @@ -XXX,XX +XXX,XX @@ exit: | 136 | |
56 | } | 137 | + /* Attach only after starting new drained sections, so that recursive |
57 | 138 | + * drain sections coming from @child don't get an extra .drained_begin | |
58 | /** | 139 | + * callback. */ |
59 | - * Truncate file to 'offset' bytes (needed only for file protocols) | 140 | if (child->role->attach) { |
60 | - */ | 141 | child->role->attach(child); |
61 | -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 142 | } |
62 | - PreallocMode prealloc, Error **errp) | ||
63 | -{ | ||
64 | - BlockDriverState *bs = child->bs; | ||
65 | - BlockDriver *drv = bs->drv; | ||
66 | - int ret; | ||
67 | - | ||
68 | - assert(child->perm & BLK_PERM_RESIZE); | ||
69 | - | ||
70 | - /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ | ||
71 | - if (!drv) { | ||
72 | - error_setg(errp, "No medium inserted"); | ||
73 | - return -ENOMEDIUM; | ||
74 | - } | ||
75 | - if (offset < 0) { | ||
76 | - error_setg(errp, "Image size cannot be negative"); | ||
77 | - return -EINVAL; | ||
78 | - } | ||
79 | - | ||
80 | - bdrv_inc_in_flight(bs); | ||
81 | - | ||
82 | - if (!drv->bdrv_co_truncate) { | ||
83 | - if (bs->file && drv->is_filter) { | ||
84 | - ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); | ||
85 | - goto out; | ||
86 | - } | ||
87 | - error_setg(errp, "Image format driver does not support resize"); | ||
88 | - ret = -ENOTSUP; | ||
89 | - goto out; | ||
90 | - } | ||
91 | - if (bs->read_only) { | ||
92 | - error_setg(errp, "Image is read-only"); | ||
93 | - ret = -EACCES; | ||
94 | - goto out; | ||
95 | - } | ||
96 | - | ||
97 | - assert(!(bs->open_flags & BDRV_O_INACTIVE)); | ||
98 | - | ||
99 | - ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); | ||
100 | - if (ret < 0) { | ||
101 | - goto out; | ||
102 | - } | ||
103 | - ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); | ||
104 | - if (ret < 0) { | ||
105 | - error_setg_errno(errp, -ret, "Could not refresh total sector count"); | ||
106 | - } else { | ||
107 | - offset = bs->total_sectors * BDRV_SECTOR_SIZE; | ||
108 | - } | ||
109 | - bdrv_dirty_bitmap_truncate(bs, offset); | ||
110 | - bdrv_parent_cb_resize(bs); | ||
111 | - atomic_inc(&bs->write_gen); | ||
112 | - | ||
113 | -out: | ||
114 | - bdrv_dec_in_flight(bs); | ||
115 | - return ret; | ||
116 | -} | ||
117 | - | ||
118 | -typedef struct TruncateCo { | ||
119 | - BdrvChild *child; | ||
120 | - int64_t offset; | ||
121 | - PreallocMode prealloc; | ||
122 | - Error **errp; | ||
123 | - int ret; | ||
124 | -} TruncateCo; | ||
125 | - | ||
126 | -static void coroutine_fn bdrv_truncate_co_entry(void *opaque) | ||
127 | -{ | ||
128 | - TruncateCo *tco = opaque; | ||
129 | - tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, | ||
130 | - tco->errp); | ||
131 | -} | ||
132 | - | ||
133 | -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | ||
134 | - Error **errp) | ||
135 | -{ | ||
136 | - Coroutine *co; | ||
137 | - TruncateCo tco = { | ||
138 | - .child = child, | ||
139 | - .offset = offset, | ||
140 | - .prealloc = prealloc, | ||
141 | - .errp = errp, | ||
142 | - .ret = NOT_DONE, | ||
143 | - }; | ||
144 | - | ||
145 | - if (qemu_in_coroutine()) { | ||
146 | - /* Fast-path if already in coroutine context */ | ||
147 | - bdrv_truncate_co_entry(&tco); | ||
148 | - } else { | ||
149 | - co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); | ||
150 | - qemu_coroutine_enter(co); | ||
151 | - BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); | ||
152 | - } | ||
153 | - | ||
154 | - return tco.ret; | ||
155 | -} | ||
156 | - | ||
157 | -/** | ||
158 | * Length of a allocated file in bytes. Sparse files are counted by actual | ||
159 | * allocated space. Return < 0 if error or unknown. | ||
160 | */ | ||
161 | diff --git a/block/io.c b/block/io.c | 143 | diff --git a/block/io.c b/block/io.c |
162 | index XXXXXXX..XXXXXXX 100644 | 144 | index XXXXXXX..XXXXXXX 100644 |
163 | --- a/block/io.c | 145 | --- a/block/io.c |
164 | +++ b/block/io.c | 146 | +++ b/block/io.c |
165 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, | 147 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, |
166 | bdrv_dec_in_flight(dst_bs); | 148 | assert(data.done); |
167 | return ret; | 149 | } |
168 | } | 150 | |
169 | + | 151 | -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
170 | +static void bdrv_parent_cb_resize(BlockDriverState *bs) | 152 | - BdrvChild *parent) |
171 | +{ | 153 | +void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
172 | + BdrvChild *c; | 154 | + BdrvChild *parent) |
173 | + QLIST_FOREACH(c, &bs->parents, next_parent) { | 155 | { |
174 | + if (c->role->resize) { | 156 | BdrvChild *child, *next; |
175 | + c->role->resize(c); | 157 | |
176 | + } | 158 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, |
159 | bdrv_drain_recurse(bs); | ||
160 | |||
161 | if (recursive) { | ||
162 | + bs->recursive_quiesce_counter++; | ||
163 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
164 | bdrv_do_drained_begin(child->bs, true, child); | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) | ||
167 | bdrv_do_drained_begin(bs, true, NULL); | ||
168 | } | ||
169 | |||
170 | -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
171 | - BdrvChild *parent) | ||
172 | +void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
173 | + BdrvChild *parent) | ||
174 | { | ||
175 | BdrvChild *child, *next; | ||
176 | int old_quiesce_counter; | ||
177 | @@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, | ||
178 | } | ||
179 | |||
180 | if (recursive) { | ||
181 | + bs->recursive_quiesce_counter--; | ||
182 | QLIST_FOREACH_SAFE(child, &bs->children, next, next) { | ||
183 | bdrv_do_drained_end(child->bs, true, child); | ||
184 | } | ||
185 | @@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs) | ||
186 | bdrv_do_drained_end(bs, true, NULL); | ||
187 | } | ||
188 | |||
189 | +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) | ||
190 | +{ | ||
191 | + int i; | ||
192 | + | ||
193 | + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { | ||
194 | + bdrv_do_drained_begin(child->bs, true, child); | ||
177 | + } | 195 | + } |
178 | +} | 196 | +} |
179 | + | 197 | + |
180 | +/** | 198 | +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) |
181 | + * Truncate file to 'offset' bytes (needed only for file protocols) | 199 | +{ |
182 | + */ | 200 | + int i; |
183 | +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, | 201 | + |
184 | + PreallocMode prealloc, Error **errp) | 202 | + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { |
185 | +{ | 203 | + bdrv_do_drained_end(child->bs, true, child); |
186 | + BlockDriverState *bs = child->bs; | ||
187 | + BlockDriver *drv = bs->drv; | ||
188 | + int ret; | ||
189 | + | ||
190 | + assert(child->perm & BLK_PERM_RESIZE); | ||
191 | + | ||
192 | + /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ | ||
193 | + if (!drv) { | ||
194 | + error_setg(errp, "No medium inserted"); | ||
195 | + return -ENOMEDIUM; | ||
196 | + } | 204 | + } |
197 | + if (offset < 0) { | 205 | +} |
198 | + error_setg(errp, "Image size cannot be negative"); | 206 | + |
199 | + return -EINVAL; | 207 | /* |
200 | + } | 208 | * Wait for pending requests to complete on a single BlockDriverState subtree, |
201 | + | 209 | * and suspend block driver's internal I/O until next request arrives. |
202 | + bdrv_inc_in_flight(bs); | ||
203 | + | ||
204 | + if (!drv->bdrv_co_truncate) { | ||
205 | + if (bs->file && drv->is_filter) { | ||
206 | + ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); | ||
207 | + goto out; | ||
208 | + } | ||
209 | + error_setg(errp, "Image format driver does not support resize"); | ||
210 | + ret = -ENOTSUP; | ||
211 | + goto out; | ||
212 | + } | ||
213 | + if (bs->read_only) { | ||
214 | + error_setg(errp, "Image is read-only"); | ||
215 | + ret = -EACCES; | ||
216 | + goto out; | ||
217 | + } | ||
218 | + | ||
219 | + assert(!(bs->open_flags & BDRV_O_INACTIVE)); | ||
220 | + | ||
221 | + ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); | ||
222 | + if (ret < 0) { | ||
223 | + goto out; | ||
224 | + } | ||
225 | + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); | ||
226 | + if (ret < 0) { | ||
227 | + error_setg_errno(errp, -ret, "Could not refresh total sector count"); | ||
228 | + } else { | ||
229 | + offset = bs->total_sectors * BDRV_SECTOR_SIZE; | ||
230 | + } | ||
231 | + bdrv_dirty_bitmap_truncate(bs, offset); | ||
232 | + bdrv_parent_cb_resize(bs); | ||
233 | + atomic_inc(&bs->write_gen); | ||
234 | + | ||
235 | +out: | ||
236 | + bdrv_dec_in_flight(bs); | ||
237 | + return ret; | ||
238 | +} | ||
239 | + | ||
240 | +typedef struct TruncateCo { | ||
241 | + BdrvChild *child; | ||
242 | + int64_t offset; | ||
243 | + PreallocMode prealloc; | ||
244 | + Error **errp; | ||
245 | + int ret; | ||
246 | +} TruncateCo; | ||
247 | + | ||
248 | +static void coroutine_fn bdrv_truncate_co_entry(void *opaque) | ||
249 | +{ | ||
250 | + TruncateCo *tco = opaque; | ||
251 | + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, | ||
252 | + tco->errp); | ||
253 | +} | ||
254 | + | ||
255 | +int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, | ||
256 | + Error **errp) | ||
257 | +{ | ||
258 | + Coroutine *co; | ||
259 | + TruncateCo tco = { | ||
260 | + .child = child, | ||
261 | + .offset = offset, | ||
262 | + .prealloc = prealloc, | ||
263 | + .errp = errp, | ||
264 | + .ret = NOT_DONE, | ||
265 | + }; | ||
266 | + | ||
267 | + if (qemu_in_coroutine()) { | ||
268 | + /* Fast-path if already in coroutine context */ | ||
269 | + bdrv_truncate_co_entry(&tco); | ||
270 | + } else { | ||
271 | + co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco); | ||
272 | + qemu_coroutine_enter(co); | ||
273 | + BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE); | ||
274 | + } | ||
275 | + | ||
276 | + return tco.ret; | ||
277 | +} | ||
278 | -- | 210 | -- |
279 | 2.13.6 | 211 | 2.13.6 |
280 | 212 | ||
281 | 213 | diff view generated by jsdifflib |
1 | From: Markus Armbruster <armbru@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Commit f1b34a248e9 replaced less-than-obvious test in | ||
4 | qdict_flatten_qdict() by the obvious one. Sadly, it made something | ||
5 | else non-obvious: the fact that @new_key passed to qdict_put_obj() | ||
6 | can't be null, because that depends on the function's precondition | ||
7 | (target == qdict) == !prefix. | ||
8 | |||
9 | Tweak the function some more to help Coverity and human readers alike. | ||
10 | |||
11 | Fixes: CID 1393620 | ||
12 | Signed-off-by: Markus Armbruster <armbru@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 2 | --- |
15 | qobject/block-qdict.c | 16 ++++++++-------- | 3 | tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ |
16 | 1 file changed, 8 insertions(+), 8 deletions(-) | 4 | 1 file changed, 80 insertions(+) |
17 | 5 | ||
18 | diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c | 6 | diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c |
19 | index XXXXXXX..XXXXXXX 100644 | 7 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/qobject/block-qdict.c | 8 | --- a/tests/test-bdrv-drain.c |
21 | +++ b/qobject/block-qdict.c | 9 | +++ b/tests/test-bdrv-drain.c |
22 | @@ -XXX,XX +XXX,XX @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) | 10 | @@ -XXX,XX +XXX,XX @@ static void test_multiparent(void) |
23 | const QDictEntry *entry, *next; | 11 | blk_unref(blk_b); |
24 | QDict *dict_val; | 12 | } |
25 | QList *list_val; | 13 | |
26 | - char *new_key; | 14 | +static void test_graph_change(void) |
27 | + char *key, *new_key; | 15 | +{ |
28 | 16 | + BlockBackend *blk_a, *blk_b; | |
29 | entry = qdict_first(qdict); | 17 | + BlockDriverState *bs_a, *bs_b, *backing; |
30 | 18 | + BDRVTestState *a_s, *b_s, *backing_s; | |
31 | @@ -XXX,XX +XXX,XX @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) | 19 | + |
32 | value = qdict_entry_value(entry); | 20 | + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
33 | dict_val = qobject_to(QDict, value); | 21 | + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, |
34 | list_val = qobject_to(QList, value); | 22 | + &error_abort); |
35 | - new_key = NULL; | 23 | + a_s = bs_a->opaque; |
36 | 24 | + blk_insert_bs(blk_a, bs_a, &error_abort); | |
37 | if (prefix) { | 25 | + |
38 | - new_key = g_strdup_printf("%s.%s", prefix, entry->key); | 26 | + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); |
39 | + key = new_key = g_strdup_printf("%s.%s", prefix, entry->key); | 27 | + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, |
40 | + } else { | 28 | + &error_abort); |
41 | + key = entry->key; | 29 | + b_s = bs_b->opaque; |
42 | + new_key = NULL; | 30 | + blk_insert_bs(blk_b, bs_b, &error_abort); |
43 | } | 31 | + |
44 | 32 | + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); | |
45 | /* | 33 | + backing_s = backing->opaque; |
46 | @@ -XXX,XX +XXX,XX @@ static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) | 34 | + bdrv_set_backing_hd(bs_a, backing, &error_abort); |
47 | * well advised not to modify them altogether.) | 35 | + |
48 | */ | 36 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); |
49 | if (dict_val && qdict_size(dict_val)) { | 37 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); |
50 | - qdict_flatten_qdict(dict_val, target, | 38 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); |
51 | - new_key ? new_key : entry->key); | 39 | + g_assert_cmpint(a_s->drain_count, ==, 0); |
52 | + qdict_flatten_qdict(dict_val, target, key); | 40 | + g_assert_cmpint(b_s->drain_count, ==, 0); |
53 | if (target == qdict) { | 41 | + g_assert_cmpint(backing_s->drain_count, ==, 0); |
54 | qdict_del(qdict, entry->key); | 42 | + |
55 | } | 43 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
56 | } else if (list_val && !qlist_empty(list_val)) { | 44 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
57 | - qdict_flatten_qlist(list_val, target, | 45 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); |
58 | - new_key ? new_key : entry->key); | 46 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); |
59 | + qdict_flatten_qlist(list_val, target, key); | 47 | + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); |
60 | if (target == qdict) { | 48 | + |
61 | qdict_del(qdict, entry->key); | 49 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); |
62 | } | 50 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); |
63 | } else if (target != qdict) { | 51 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); |
64 | - qdict_put_obj(target, new_key, qobject_ref(value)); | 52 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); |
65 | + qdict_put_obj(target, key, qobject_ref(value)); | 53 | + g_assert_cmpint(a_s->drain_count, ==, 5); |
66 | } | 54 | + g_assert_cmpint(b_s->drain_count, ==, 5); |
67 | 55 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | |
68 | g_free(new_key); | 56 | + |
57 | + bdrv_set_backing_hd(bs_b, NULL, &error_abort); | ||
58 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 3); | ||
59 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); | ||
60 | + g_assert_cmpint(backing->quiesce_counter, ==, 3); | ||
61 | + g_assert_cmpint(a_s->drain_count, ==, 3); | ||
62 | + g_assert_cmpint(b_s->drain_count, ==, 2); | ||
63 | + g_assert_cmpint(backing_s->drain_count, ==, 3); | ||
64 | + | ||
65 | + bdrv_set_backing_hd(bs_b, backing, &error_abort); | ||
66 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); | ||
67 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); | ||
68 | + g_assert_cmpint(backing->quiesce_counter, ==, 5); | ||
69 | + g_assert_cmpint(a_s->drain_count, ==, 5); | ||
70 | + g_assert_cmpint(b_s->drain_count, ==, 5); | ||
71 | + g_assert_cmpint(backing_s->drain_count, ==, 5); | ||
72 | + | ||
73 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
74 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); | ||
75 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
76 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
77 | + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); | ||
78 | + | ||
79 | + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); | ||
80 | + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); | ||
81 | + g_assert_cmpint(backing->quiesce_counter, ==, 0); | ||
82 | + g_assert_cmpint(a_s->drain_count, ==, 0); | ||
83 | + g_assert_cmpint(b_s->drain_count, ==, 0); | ||
84 | + g_assert_cmpint(backing_s->drain_count, ==, 0); | ||
85 | + | ||
86 | + bdrv_unref(backing); | ||
87 | + bdrv_unref(bs_a); | ||
88 | + bdrv_unref(bs_b); | ||
89 | + blk_unref(blk_a); | ||
90 | + blk_unref(blk_b); | ||
91 | +} | ||
92 | + | ||
93 | |||
94 | typedef struct TestBlockJob { | ||
95 | BlockJob common; | ||
96 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
97 | |||
98 | g_test_add_func("/bdrv-drain/nested", test_nested); | ||
99 | g_test_add_func("/bdrv-drain/multiparent", test_multiparent); | ||
100 | + g_test_add_func("/bdrv-drain/graph-change", test_graph_change); | ||
101 | |||
102 | g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); | ||
103 | g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); | ||
69 | -- | 104 | -- |
70 | 2.13.6 | 105 | 2.13.6 |
71 | 106 | ||
72 | 107 | diff view generated by jsdifflib |
1 | The error handling policy was traditionally set with -drive, but with | 1 | Since commit bde70715, base is the only node that is reopened in |
---|---|---|---|
2 | -blockdev it is no longer possible to set frontend options. scsi-disk | 2 | commit_start(). This means that the code, which still involves an |
3 | (and other block devices) have long supported qdev properties to | 3 | explicit BlockReopenQueue, can now be simplified by using bdrv_reopen(). |
4 | configure the error handling policy, so let's add these options to | ||
5 | usb-storage as well and just forward them to the internal scsi-disk | ||
6 | instance. | ||
7 | 4 | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | 6 | Reviewed-by: Fam Zheng <famz@redhat.com> |
10 | --- | 7 | --- |
11 | include/hw/scsi/scsi.h | 2 ++ | 8 | block/commit.c | 8 +------- |
12 | hw/scsi/scsi-bus.c | 11 ++++++++++- | 9 | 1 file changed, 1 insertion(+), 7 deletions(-) |
13 | hw/usb/dev-storage.c | 2 ++ | ||
14 | 3 files changed, 14 insertions(+), 1 deletion(-) | ||
15 | 10 | ||
16 | diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h | 11 | diff --git a/block/commit.c b/block/commit.c |
17 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/hw/scsi/scsi.h | 13 | --- a/block/commit.c |
19 | +++ b/include/hw/scsi/scsi.h | 14 | +++ b/block/commit.c |
20 | @@ -XXX,XX +XXX,XX @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d) | 15 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
21 | SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, | 16 | const char *filter_node_name, Error **errp) |
22 | int unit, bool removable, int bootindex, | ||
23 | bool share_rw, | ||
24 | + BlockdevOnError rerror, | ||
25 | + BlockdevOnError werror, | ||
26 | const char *serial, Error **errp); | ||
27 | void scsi_bus_legacy_handle_cmdline(SCSIBus *bus); | ||
28 | void scsi_legacy_handle_cmdline(void); | ||
29 | diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/hw/scsi/scsi-bus.c | ||
32 | +++ b/hw/scsi/scsi-bus.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) | ||
34 | SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, | ||
35 | int unit, bool removable, int bootindex, | ||
36 | bool share_rw, | ||
37 | + BlockdevOnError rerror, | ||
38 | + BlockdevOnError werror, | ||
39 | const char *serial, Error **errp) | ||
40 | { | 17 | { |
41 | const char *driver; | 18 | CommitBlockJob *s; |
42 | @@ -XXX,XX +XXX,XX @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk, | 19 | - BlockReopenQueue *reopen_queue = NULL; |
43 | object_unparent(OBJECT(dev)); | 20 | int orig_base_flags; |
44 | return NULL; | 21 | BlockDriverState *iter; |
45 | } | 22 | BlockDriverState *commit_top_bs = NULL; |
46 | + | 23 | @@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs, |
47 | + qdev_prop_set_enum(dev, "rerror", rerror); | 24 | /* convert base to r/w, if necessary */ |
48 | + qdev_prop_set_enum(dev, "werror", werror); | 25 | orig_base_flags = bdrv_get_flags(base); |
49 | + | 26 | if (!(orig_base_flags & BDRV_O_RDWR)) { |
50 | object_property_set_bool(OBJECT(dev), true, "realized", &err); | 27 | - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, |
51 | if (err != NULL) { | 28 | - orig_base_flags | BDRV_O_RDWR); |
52 | error_propagate(errp, err); | 29 | - } |
53 | @@ -XXX,XX +XXX,XX @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus) | 30 | - |
54 | } | 31 | - if (reopen_queue) { |
55 | qemu_opts_loc_restore(dinfo->opts); | 32 | - bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); |
56 | scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo), | 33 | + bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); |
57 | - unit, false, -1, false, NULL, &error_fatal); | 34 | if (local_err != NULL) { |
58 | + unit, false, -1, false, | 35 | error_propagate(errp, local_err); |
59 | + BLOCKDEV_ON_ERROR_AUTO, | 36 | goto fail; |
60 | + BLOCKDEV_ON_ERROR_AUTO, | ||
61 | + NULL, &error_fatal); | ||
62 | } | ||
63 | loc_pop(&loc); | ||
64 | } | ||
65 | diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/hw/usb/dev-storage.c | ||
68 | +++ b/hw/usb/dev-storage.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) | ||
70 | &usb_msd_scsi_info_storage, NULL); | ||
71 | scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable, | ||
72 | s->conf.bootindex, s->conf.share_rw, | ||
73 | + s->conf.rerror, s->conf.werror, | ||
74 | dev->serial, | ||
75 | errp); | ||
76 | blk_unref(blk); | ||
77 | @@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_usb_msd = { | ||
78 | |||
79 | static Property msd_properties[] = { | ||
80 | DEFINE_BLOCK_PROPERTIES(MSDState, conf), | ||
81 | + DEFINE_BLOCK_ERROR_PROPERTIES(MSDState, conf), | ||
82 | DEFINE_PROP_BIT("removable", MSDState, removable, 0, false), | ||
83 | DEFINE_PROP_END_OF_LIST(), | ||
84 | }; | ||
85 | -- | 37 | -- |
86 | 2.13.6 | 38 | 2.13.6 |
87 | 39 | ||
88 | 40 | diff view generated by jsdifflib |
1 | From: Eric Blake <eblake@redhat.com> | 1 | The bdrv_reopen*() implementation doesn't like it if the graph is |
---|---|---|---|
2 | changed between queuing nodes for reopen and actually reopening them | ||
3 | (one of the reasons is that queuing can be recursive). | ||
2 | 4 | ||
3 | We are gradually moving away from sector-based interfaces, towards | 5 | So instead of draining the device only in bdrv_reopen_multiple(), |
4 | byte-based. Make the change for the last few sector-based calls | 6 | require that callers already drained all affected nodes, and assert this |
5 | into the block layer from the replication driver. | 7 | in bdrv_reopen_queue(). |
6 | 8 | ||
7 | Ideally, the replication driver should switch to doing everything | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | byte-based, but that's a more invasive change that requires a | 10 | Reviewed-by: Fam Zheng <famz@redhat.com> |
9 | bit more auditing. | 11 | --- |
12 | block.c | 23 ++++++++++++++++------- | ||
13 | block/replication.c | 6 ++++++ | ||
14 | qemu-io-cmds.c | 3 +++ | ||
15 | 3 files changed, 25 insertions(+), 7 deletions(-) | ||
10 | 16 | ||
11 | Signed-off-by: Eric Blake <eblake@redhat.com> | 17 | diff --git a/block.c b/block.c |
12 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 18 | index XXXXXXX..XXXXXXX 100644 |
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 19 | --- a/block.c |
14 | --- | 20 | +++ b/block.c |
15 | block/replication.c | 14 ++++++++------ | 21 | @@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, |
16 | 1 file changed, 8 insertions(+), 6 deletions(-) | 22 | * returns a pointer to bs_queue, which is either the newly allocated |
17 | 23 | * bs_queue, or the existing bs_queue being used. | |
24 | * | ||
25 | + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). | ||
26 | */ | ||
27 | static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, | ||
28 | BlockDriverState *bs, | ||
29 | @@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, | ||
30 | BdrvChild *child; | ||
31 | QDict *old_options, *explicit_options; | ||
32 | |||
33 | + /* Make sure that the caller remembered to use a drained section. This is | ||
34 | + * important to avoid graph changes between the recursive queuing here and | ||
35 | + * bdrv_reopen_multiple(). */ | ||
36 | + assert(bs->quiesce_counter > 0); | ||
37 | + | ||
38 | if (bs_queue == NULL) { | ||
39 | bs_queue = g_new0(BlockReopenQueue, 1); | ||
40 | QSIMPLEQ_INIT(bs_queue); | ||
41 | @@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, | ||
42 | * If all devices prepare successfully, then the changes are committed | ||
43 | * to all devices. | ||
44 | * | ||
45 | + * All affected nodes must be drained between bdrv_reopen_queue() and | ||
46 | + * bdrv_reopen_multiple(). | ||
47 | */ | ||
48 | int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) | ||
49 | { | ||
50 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er | ||
51 | |||
52 | assert(bs_queue != NULL); | ||
53 | |||
54 | - aio_context_release(ctx); | ||
55 | - bdrv_drain_all_begin(); | ||
56 | - aio_context_acquire(ctx); | ||
57 | - | ||
58 | QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { | ||
59 | + assert(bs_entry->state.bs->quiesce_counter > 0); | ||
60 | if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { | ||
61 | error_propagate(errp, local_err); | ||
62 | goto cleanup; | ||
63 | @@ -XXX,XX +XXX,XX @@ cleanup: | ||
64 | } | ||
65 | g_free(bs_queue); | ||
66 | |||
67 | - bdrv_drain_all_end(); | ||
68 | - | ||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) | ||
73 | { | ||
74 | int ret = -1; | ||
75 | Error *local_err = NULL; | ||
76 | - BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); | ||
77 | + BlockReopenQueue *queue; | ||
78 | |||
79 | + bdrv_subtree_drained_begin(bs); | ||
80 | + | ||
81 | + queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); | ||
82 | ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); | ||
83 | if (local_err != NULL) { | ||
84 | error_propagate(errp, local_err); | ||
85 | } | ||
86 | + | ||
87 | + bdrv_subtree_drained_end(bs); | ||
88 | + | ||
89 | return ret; | ||
90 | } | ||
91 | |||
18 | diff --git a/block/replication.c b/block/replication.c | 92 | diff --git a/block/replication.c b/block/replication.c |
19 | index XXXXXXX..XXXXXXX 100644 | 93 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/replication.c | 94 | --- a/block/replication.c |
21 | +++ b/block/replication.c | 95 | +++ b/block/replication.c |
22 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs, | 96 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, |
23 | backup_cow_request_begin(&req, child->bs->job, | 97 | new_secondary_flags = s->orig_secondary_flags; |
24 | sector_num * BDRV_SECTOR_SIZE, | ||
25 | remaining_bytes); | ||
26 | - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, | ||
27 | - qiov); | ||
28 | + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, | ||
29 | + remaining_bytes, qiov, 0); | ||
30 | backup_cow_request_end(&req); | ||
31 | goto out; | ||
32 | } | 98 | } |
33 | 99 | ||
34 | - ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov); | 100 | + bdrv_subtree_drained_begin(s->hidden_disk->bs); |
35 | + ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, | 101 | + bdrv_subtree_drained_begin(s->secondary_disk->bs); |
36 | + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); | 102 | + |
37 | out: | 103 | if (orig_hidden_flags != new_hidden_flags) { |
38 | return replication_return_value(s, ret); | 104 | reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, |
105 | new_hidden_flags); | ||
106 | @@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, | ||
107 | reopen_queue, &local_err); | ||
108 | error_propagate(errp, local_err); | ||
109 | } | ||
110 | + | ||
111 | + bdrv_subtree_drained_end(s->hidden_disk->bs); | ||
112 | + bdrv_subtree_drained_end(s->secondary_disk->bs); | ||
39 | } | 113 | } |
40 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, | 114 | |
41 | } | 115 | static void backup_job_cleanup(BlockDriverState *bs) |
42 | 116 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | |
43 | if (ret == 0) { | 117 | index XXXXXXX..XXXXXXX 100644 |
44 | - ret = bdrv_co_writev(top, sector_num, | 118 | --- a/qemu-io-cmds.c |
45 | - remaining_sectors, qiov); | 119 | +++ b/qemu-io-cmds.c |
46 | + ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE, | 120 | @@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) |
47 | + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); | 121 | opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; |
48 | return replication_return_value(s, ret); | 122 | qemu_opts_reset(&reopen_opts); |
49 | } | 123 | |
50 | 124 | + bdrv_subtree_drained_begin(bs); | |
51 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs, | 125 | brq = bdrv_reopen_queue(NULL, bs, opts, flags); |
52 | qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count); | 126 | bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); |
53 | 127 | + bdrv_subtree_drained_end(bs); | |
54 | target = ret ? top : base; | 128 | + |
55 | - ret = bdrv_co_writev(target, sector_num, n, &hd_qiov); | 129 | if (local_err) { |
56 | + ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE, | 130 | error_report_err(local_err); |
57 | + n * BDRV_SECTOR_SIZE, &hd_qiov, 0); | 131 | } else { |
58 | if (ret < 0) { | ||
59 | goto out1; | ||
60 | } | ||
61 | -- | 132 | -- |
62 | 2.13.6 | 133 | 2.13.6 |
63 | 134 | ||
64 | 135 | diff view generated by jsdifflib |