1 | The following changes since commit dd2db39d78431ab5a0b78777afaab3d61e94533e: | 1 | The following changes since commit 8cb41fda78c7ebde0dd248c6afe1d336efb0de50: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-01 21:23:26 +0100) | 3 | Merge remote-tracking branch 'remotes/philmd/tags/machine-20211101' into staging (2021-11-02 05:53:45 -0400) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to b317006a3f1f04191a7981cef83417cb2477213b: | 9 | for you to fetch changes up to a8951438946d72d74c9bdbdb38fce95aa2973a88: |
10 | 10 | ||
11 | docs/secure-coding-practices: Describe how to use 'null-co' block driver (2021-06-02 14:29:14 +0200) | 11 | block/nvme: Extract nvme_free_queue() from nvme_free_queue_pair() (2021-11-02 15:49:13 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches | 14 | Block layer patches |
15 | 15 | ||
16 | - NBD server: Fix crashes related to switching between AioContexts | 16 | - Fail gracefully when blockdev-snapshot creates loops |
17 | - file-posix: Workaround for discard/write_zeroes on buggy filesystems | 17 | - ide: Fix IDENTIFY DEVICE for disks > 128 GiB |
18 | - Follow-up fixes for the reopen vs. permission changes | 18 | - file-posix: Fix return value translation for AIO discards |
19 | - quorum: Fix error handling for flush | 19 | - file-posix: add 'aio-max-batch' option |
20 | - block-copy: Refactor copy_range handling | 20 | - rbd: implement bdrv_co_block_status |
21 | - docs: Describe how to use 'null-co' block driver | 21 | - Code cleanups and build fixes |
22 | 22 | ||
23 | ---------------------------------------------------------------- | 23 | ---------------------------------------------------------------- |
24 | Lukas Straub (1): | 24 | Ari Sundholm (1): |
25 | block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk | 25 | block/file-posix: Fix return value translation for AIO discards |
26 | 26 | ||
27 | Philippe Mathieu-Daudé (1): | 27 | Fabrice Fontaine (1): |
28 | docs/secure-coding-practices: Describe how to use 'null-co' block driver | 28 | block/export/fuse.c: fix musl build |
29 | 29 | ||
30 | Sergio Lopez (2): | 30 | Hanna Reitz (1): |
31 | block-backend: add drained_poll | 31 | block-backend: Silence clang -m32 compiler warning |
32 | nbd/server: Use drained block ops to quiesce the server | ||
33 | 32 | ||
34 | Thomas Huth (2): | 33 | Kevin Wolf (1): |
35 | block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS | 34 | block: Fail gracefully when blockdev-snapshot creates loops |
36 | block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE | ||
37 | 35 | ||
38 | Vladimir Sementsov-Ogievskiy (14): | 36 | Peter Lieven (1): |
39 | qemu-io-cmds: assert that we don't have .perm requested in no-blk case | 37 | block/rbd: implement bdrv_co_block_status |
40 | block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash | ||
41 | block/vvfat: fix vvfat_child_perm crash | ||
42 | block: consistently use bdrv_is_read_only() | ||
43 | block: drop BlockDriverState::read_only | ||
44 | block: drop BlockBackendRootState::read_only | ||
45 | block: document child argument of bdrv_attach_child_common() | ||
46 | block-backend: improve blk_root_get_parent_desc() | ||
47 | block: improve bdrv_child_get_parent_desc() | ||
48 | block/vvfat: inherit child_vvfat_qcow from child_of_bds | ||
49 | block: simplify bdrv_child_user_desc() | ||
50 | block: improve permission conflict error message | ||
51 | block-copy: fix block_copy_task_entry() progress update | ||
52 | block-copy: refactor copy_range handling | ||
53 | 38 | ||
54 | docs/devel/secure-coding-practices.rst | 9 ++++ | 39 | Philippe Mathieu-Daudé (3): |
55 | include/block/block.h | 1 + | 40 | block/nvme: Automatically free qemu_memalign() with QEMU_AUTO_VFREE |
56 | include/block/block_int.h | 2 - | 41 | block/nvme: Display CQ/SQ pointer in nvme_free_queue_pair() |
57 | include/sysemu/block-backend.h | 4 ++ | 42 | block/nvme: Extract nvme_free_queue() from nvme_free_queue_pair() |
58 | block.c | 82 ++++++++++++++++++++-------------- | 43 | |
59 | block/block-backend.c | 26 +++++------ | 44 | Samuel Thibault (1): |
60 | block/block-copy.c | 80 ++++++++++++++++++++++----------- | 45 | ide: Cap LBA28 capacity announcement to 2^28-1 |
61 | block/commit.c | 2 +- | 46 | |
62 | block/file-posix.c | 29 ++++++++---- | 47 | Stefano Garzarella (3): |
63 | block/io.c | 4 +- | 48 | file-posix: add `aio-max-batch` option |
64 | block/qapi.c | 2 +- | 49 | linux-aio: add `dev_max_batch` parameter to laio_co_submit() |
65 | block/qcow2-snapshot.c | 2 +- | 50 | linux-aio: add `dev_max_batch` parameter to laio_io_unplug() |
66 | block/qcow2.c | 5 +-- | 51 | |
67 | block/quorum.c | 2 +- | 52 | qapi/block-core.json | 7 +++ |
68 | block/snapshot.c | 2 +- | 53 | include/block/raw-aio.h | 6 ++- |
69 | block/vhdx-log.c | 2 +- | 54 | block.c | 10 ++++ |
70 | block/vvfat.c | 14 +++--- | 55 | block/block-backend.c | 2 +- |
71 | blockdev.c | 3 +- | 56 | block/export/fuse.c | 4 ++ |
72 | nbd/server.c | 82 +++++++++++++++++++++++++--------- | 57 | block/file-posix.c | 18 ++++++-- |
73 | qemu-io-cmds.c | 14 +++++- | 58 | block/linux-aio.c | 38 ++++++++++----- |
74 | tests/unit/test-block-iothread.c | 6 --- | 59 | block/nvme.c | 22 +++++---- |
75 | tests/qemu-iotests/283.out | 2 +- | 60 | block/rbd.c | 112 +++++++++++++++++++++++++++++++++++++++++++++ |
76 | tests/qemu-iotests/307.out | 2 +- | 61 | hw/ide/core.c | 8 +++- |
77 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- | 62 | block/trace-events | 2 +- |
78 | 24 files changed, 241 insertions(+), 138 deletions(-) | 63 | tests/qemu-iotests/085 | 31 ++++++++++++- |
64 | tests/qemu-iotests/085.out | 33 +++++++++++-- | ||
65 | 13 files changed, 258 insertions(+), 35 deletions(-) | ||
79 | 66 | ||
80 | 67 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Lukas Straub <lukasstraub2@web.de> | ||
2 | 1 | ||
3 | The quorum block driver uses a custom flush callback to handle the | ||
4 | case when some children return io errors. In that case it still | ||
5 | returns success if enough children are healthy. | ||
6 | However, it provides it as the .bdrv_co_flush_to_disk callback, not | ||
7 | as .bdrv_co_flush. This causes the block layer to do it's own | ||
8 | generic flushing for the children instead, which doesn't handle | ||
9 | errors properly. | ||
10 | |||
11 | Fix this by providing .bdrv_co_flush instead of | ||
12 | .bdrv_co_flush_to_disk so the block layer uses the custom flush | ||
13 | callback. | ||
14 | |||
15 | Signed-off-by: Lukas Straub <lukasstraub2@web.de> | ||
16 | Reported-by: Minghao Yuan <meeho@qq.com> | ||
17 | Message-Id: <20210518134214.11ccf05f@gecko.fritz.box> | ||
18 | Tested-by: Zhang Chen <chen.zhang@intel.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
20 | --- | ||
21 | block/quorum.c | 2 +- | ||
22 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
23 | |||
24 | diff --git a/block/quorum.c b/block/quorum.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/block/quorum.c | ||
27 | +++ b/block/quorum.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_quorum = { | ||
29 | .bdrv_dirname = quorum_dirname, | ||
30 | .bdrv_co_block_status = quorum_co_block_status, | ||
31 | |||
32 | - .bdrv_co_flush_to_disk = quorum_co_flush, | ||
33 | + .bdrv_co_flush = quorum_co_flush, | ||
34 | |||
35 | .bdrv_getlength = quorum_getlength, | ||
36 | |||
37 | -- | ||
38 | 2.30.2 | ||
39 | |||
40 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Coverity thinks blk may be NULL. It's a false-positive, as described in | ||
4 | a new comment. | ||
5 | |||
6 | Fixes: Coverity CID 1453194 | ||
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20210519090532.3753-1-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | qemu-io-cmds.c | 14 ++++++++++++-- | ||
12 | 1 file changed, 12 insertions(+), 2 deletions(-) | ||
13 | |||
14 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/qemu-io-cmds.c | ||
17 | +++ b/qemu-io-cmds.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, | ||
19 | return -EINVAL; | ||
20 | } | ||
21 | |||
22 | - /* Request additional permissions if necessary for this command. The caller | ||
23 | + /* | ||
24 | + * Request additional permissions if necessary for this command. The caller | ||
25 | * is responsible for restoring the original permissions afterwards if this | ||
26 | - * is what it wants. */ | ||
27 | + * is what it wants. | ||
28 | + * | ||
29 | + * Coverity thinks that blk may be NULL in the following if condition. It's | ||
30 | + * not so: in init_check_command() we fail if blk is NULL for command with | ||
31 | + * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in | ||
32 | + * qemuio_add_command() we assert that command with non-zero .perm field | ||
33 | + * doesn't set this flags. So, the following assertion is to silence | ||
34 | + * Coverity: | ||
35 | + */ | ||
36 | + assert(blk || !ct->perm); | ||
37 | if (ct->perm && blk_is_available(blk)) { | ||
38 | uint64_t orig_perm, orig_shared_perm; | ||
39 | blk_get_perm(blk, &orig_perm, &orig_shared_perm); | ||
40 | -- | ||
41 | 2.30.2 | ||
42 | |||
43 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Commit 3ca1f3225727419ba573673b744edac10904276f | ||
4 | "block: BdrvChildClass: add .get_parent_aio_context handler" introduced | ||
5 | new handler and commit 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
6 | "block: drop ctx argument from bdrv_root_attach_child" made a generic | ||
7 | use of it. But 3ca1f3225727419ba573673b744edac10904276f didn't update | ||
8 | child_vvfat_qcow. Fix that. | ||
9 | |||
10 | Before that fix the command | ||
11 | |||
12 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
13 | -drive file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
14 | |||
15 | crashes: | ||
16 | |||
17 | 1 bdrv_child_get_parent_aio_context (c=0x559d62426d20) | ||
18 | at ../block.c:1440 | ||
19 | 2 bdrv_attach_child_common | ||
20 | (child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", | ||
21 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
22 | perm=3, shared_perm=4, opaque=0x559d62445690, | ||
23 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) | ||
24 | at ../block.c:2795 | ||
25 | 3 bdrv_attach_child_noperm | ||
26 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
27 | child_name=0x559d606f9e3d "write-target", | ||
28 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
29 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at | ||
30 | ../block.c:2855 | ||
31 | 4 bdrv_attach_child | ||
32 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
33 | child_name=0x559d606f9e3d "write-target", | ||
34 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
35 | errp=0x7ffc74c2ae60) at ../block.c:2953 | ||
36 | 5 bdrv_open_child | ||
37 | (filename=0x559d62464b80 "/var/tmp/vl.h3TIS4", | ||
38 | options=0x559d6246ec20, bdref_key=0x559d606f9e3d "write-target", | ||
39 | parent=0x559d62445690, child_class=0x559d60c58d20 | ||
40 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
41 | errp=0x7ffc74c2ae60) at ../block.c:3351 | ||
42 | 6 enable_write_target (bs=0x559d62445690, errp=0x7ffc74c2ae60) at | ||
43 | ../block/vvfat.c:3176 | ||
44 | 7 vvfat_open (bs=0x559d62445690, options=0x559d6244adb0, flags=155650, | ||
45 | errp=0x7ffc74c2ae60) at ../block/vvfat.c:1236 | ||
46 | 8 bdrv_open_driver (bs=0x559d62445690, drv=0x559d60d4f7e0 | ||
47 | <bdrv_vvfat>, node_name=0x0, | ||
48 | options=0x559d6244adb0, open_flags=155650, | ||
49 | errp=0x7ffc74c2af70) at ../block.c:1557 | ||
50 | 9 bdrv_open_common (bs=0x559d62445690, file=0x0, | ||
51 | options=0x559d6244adb0, errp=0x7ffc74c2af70) at | ||
52 | ... | ||
53 | |||
54 | (gdb) fr 1 | ||
55 | #1 0x0000559d603ea3bf in bdrv_child_get_parent_aio_context | ||
56 | (c=0x559d62426d20) at ../block.c:1440 | ||
57 | 1440 return c->klass->get_parent_aio_context(c); | ||
58 | (gdb) p c->klass | ||
59 | $1 = (const BdrvChildClass *) 0x559d60c58d20 <child_vvfat_qcow> | ||
60 | (gdb) p c->klass->get_parent_aio_context | ||
61 | $2 = (AioContext *(*)(BdrvChild *)) 0x0 | ||
62 | |||
63 | Fixes: 3ca1f3225727419ba573673b744edac10904276f | ||
64 | Fixes: 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
65 | Reported-by: John Arbuckle <programmingkidx@gmail.com> | ||
66 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
67 | Message-Id: <20210524101257.119377-2-vsementsov@virtuozzo.com> | ||
68 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
69 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
70 | --- | ||
71 | include/block/block.h | 1 + | ||
72 | block.c | 4 ++-- | ||
73 | block/vvfat.c | 1 + | ||
74 | 3 files changed, 4 insertions(+), 2 deletions(-) | ||
75 | |||
76 | diff --git a/include/block/block.h b/include/block/block.h | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/include/block/block.h | ||
79 | +++ b/include/block/block.h | ||
80 | @@ -XXX,XX +XXX,XX @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, | ||
81 | bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, | ||
82 | GSList **ignore, Error **errp); | ||
83 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); | ||
84 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); | ||
85 | |||
86 | int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); | ||
87 | int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); | ||
88 | diff --git a/block.c b/block.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/block.c | ||
91 | +++ b/block.c | ||
92 | @@ -XXX,XX +XXX,XX @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | -static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) | ||
97 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) | ||
98 | { | ||
99 | BlockDriverState *bs = c->opaque; | ||
100 | |||
101 | @@ -XXX,XX +XXX,XX @@ const BdrvChildClass child_of_bds = { | ||
102 | .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, | ||
103 | .set_aio_ctx = bdrv_child_cb_set_aio_ctx, | ||
104 | .update_filename = bdrv_child_cb_update_filename, | ||
105 | - .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, | ||
106 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
107 | }; | ||
108 | |||
109 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) | ||
110 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/block/vvfat.c | ||
113 | +++ b/block/vvfat.c | ||
114 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | ||
115 | static const BdrvChildClass child_vvfat_qcow = { | ||
116 | .parent_is_bds = true, | ||
117 | .inherit_options = vvfat_qcow_options, | ||
118 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
119 | }; | ||
120 | |||
121 | static int enable_write_target(BlockDriverState *bs, Error **errp) | ||
122 | -- | ||
123 | 2.30.2 | ||
124 | |||
125 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | It's wrong to rely on s->qcow in vvfat_child_perm, as on permission | ||
4 | update during bdrv_open_child() call this field is not set yet. | ||
5 | |||
6 | Still prior to aa5a04c7db27eea6b36de32f241b155f0d9ce34d, it didn't | ||
7 | crash, as bdrv_open_child passed NULL as child to bdrv_child_perm(), | ||
8 | and NULL was equal to NULL in assertion (still, it was bad guarantee | ||
9 | for child being s->qcow, not backing :). | ||
10 | |||
11 | Since aa5a04c7db27eea6b36de32f241b155f0d9ce34d | ||
12 | "add bdrv_attach_child_noperm" bdrv_refresh_perms called on parent node | ||
13 | when attaching child, and new correct child pointer is passed to | ||
14 | .bdrv_child_perm. Still, s->qcow is NULL at the moment. Let's rely only | ||
15 | on role instead. | ||
16 | |||
17 | Without that fix, | ||
18 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
19 | -drive \ | ||
20 | file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
21 | |||
22 | crashes: | ||
23 | (gdb) bt | ||
24 | 0 raise () at /lib64/libc.so.6 | ||
25 | 1 abort () at /lib64/libc.so.6 | ||
26 | 2 _nl_load_domain.cold () at /lib64/libc.so.6 | ||
27 | 3 annobin_assert.c_end () at /lib64/libc.so.6 | ||
28 | 4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
29 | reopen_queue=0x0, perm=0, shared=31, | ||
30 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
31 | ../block/vvfat.c:3214 | ||
32 | 5 bdrv_child_perm (bs=0x559186f3d690, child_bs=0x559186f60190, | ||
33 | c=0x559186f1ed20, role=3, reopen_queue=0x0, | ||
34 | parent_perm=0, parent_shared=31, | ||
35 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) | ||
36 | at ../block.c:2094 | ||
37 | 6 bdrv_node_refresh_perm (bs=0x559186f3d690, q=0x0, | ||
38 | tran=0x559186f65850, errp=0x7ffe56f28530) at | ||
39 | ../block.c:2336 | ||
40 | 7 bdrv_list_refresh_perms (list=0x559186db5b90 = {...}, q=0x0, | ||
41 | tran=0x559186f65850, errp=0x7ffe56f28530) | ||
42 | at ../block.c:2358 | ||
43 | 8 bdrv_refresh_perms (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
44 | ../block.c:2419 | ||
45 | 9 bdrv_attach_child | ||
46 | (parent_bs=0x559186f3d690, child_bs=0x559186f60190, | ||
47 | child_name=0x559184d83e3d "write-target", | ||
48 | child_class=0x5591852f3b00 <child_vvfat_qcow>, child_role=3, | ||
49 | errp=0x7ffe56f28530) at ../block.c:2959 | ||
50 | 10 bdrv_open_child | ||
51 | (filename=0x559186f5cb80 "/var/tmp/vl.7WYmFU", | ||
52 | options=0x559186f66c20, bdref_key=0x559184d83e3d "write-target", | ||
53 | parent=0x559186f3d690, child_class=0x5591852f3b00 | ||
54 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
55 | errp=0x7ffe56f28530) at ../block.c:3351 | ||
56 | 11 enable_write_target (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
57 | ../block/vvfat.c:3177 | ||
58 | 12 vvfat_open (bs=0x559186f3d690, options=0x559186f42db0, flags=155650, | ||
59 | errp=0x7ffe56f28530) at ../block/vvfat.c:1236 | ||
60 | 13 bdrv_open_driver (bs=0x559186f3d690, drv=0x5591853d97e0 | ||
61 | <bdrv_vvfat>, node_name=0x0, | ||
62 | options=0x559186f42db0, open_flags=155650, | ||
63 | errp=0x7ffe56f28640) at ../block.c:1557 | ||
64 | 14 bdrv_open_common (bs=0x559186f3d690, file=0x0, | ||
65 | options=0x559186f42db0, errp=0x7ffe56f28640) at | ||
66 | ../block.c:1833 | ||
67 | ... | ||
68 | |||
69 | (gdb) fr 4 | ||
70 | #4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
71 | reopen_queue=0x0, perm=0, shared=31, | ||
72 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
73 | ../block/vvfat.c:3214 | ||
74 | 3214 assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
75 | (gdb) p role | ||
76 | $1 = 3 # BDRV_CHILD_DATA | BDRV_CHILD_METADATA | ||
77 | (gdb) p *c | ||
78 | $2 = {bs = 0x559186f60190, name = 0x559186f669d0 "write-target", klass | ||
79 | = 0x5591852f3b00 <child_vvfat_qcow>, role = 3, opaque = | ||
80 | 0x559186f3d690, perm = 3, shared_perm = 4, frozen = false, | ||
81 | parent_quiesce_counter = 0, next = {le_next = 0x0, le_prev = | ||
82 | 0x559186f41818}, next_parent = {le_next = 0x0, le_prev = | ||
83 | 0x559186f64320}} | ||
84 | (gdb) p s->qcow | ||
85 | $3 = (BdrvChild *) 0x0 | ||
86 | |||
87 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
88 | Message-Id: <20210524101257.119377-3-vsementsov@virtuozzo.com> | ||
89 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
90 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
91 | --- | ||
92 | block/vvfat.c | 7 ++----- | ||
93 | 1 file changed, 2 insertions(+), 5 deletions(-) | ||
94 | |||
95 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/block/vvfat.c | ||
98 | +++ b/block/vvfat.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
100 | uint64_t perm, uint64_t shared, | ||
101 | uint64_t *nperm, uint64_t *nshared) | ||
102 | { | ||
103 | - BDRVVVFATState *s = bs->opaque; | ||
104 | - | ||
105 | - assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
106 | - | ||
107 | - if (c == s->qcow) { | ||
108 | + if (role & BDRV_CHILD_DATA) { | ||
109 | /* This is a private node, nobody should try to attach to it */ | ||
110 | *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; | ||
111 | *nshared = BLK_PERM_WRITE_UNCHANGED; | ||
112 | } else { | ||
113 | + assert(role & BDRV_CHILD_COW); | ||
114 | /* The backing file is there so 'commit' can use it. vvfat doesn't | ||
115 | * access it in any way. */ | ||
116 | *nperm = 0; | ||
117 | -- | ||
118 | 2.30.2 | ||
119 | |||
120 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | It's better to use accessor function instead of bs->read_only directly. | ||
4 | In some places use bdrv_is_writable() instead of | ||
5 | checking both BDRV_O_RDWR set and BDRV_O_INACTIVE not set. | ||
6 | |||
7 | In bdrv_open_common() it's a bit strange to add one more variable, but | ||
8 | we are going to drop bs->read_only in the next patch, so new ro local | ||
9 | variable substitutes it here. | ||
10 | |||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Message-Id: <20210527154056.70294-2-vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
14 | --- | ||
15 | block.c | 11 +++++++---- | ||
16 | block/block-backend.c | 2 +- | ||
17 | block/commit.c | 2 +- | ||
18 | block/io.c | 4 ++-- | ||
19 | block/qapi.c | 2 +- | ||
20 | block/qcow2-snapshot.c | 2 +- | ||
21 | block/qcow2.c | 5 ++--- | ||
22 | block/snapshot.c | 2 +- | ||
23 | block/vhdx-log.c | 2 +- | ||
24 | 9 files changed, 17 insertions(+), 15 deletions(-) | ||
25 | |||
26 | diff --git a/block.c b/block.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/block.c | ||
29 | +++ b/block.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
31 | QemuOpts *opts; | ||
32 | BlockDriver *drv; | ||
33 | Error *local_err = NULL; | ||
34 | + bool ro; | ||
35 | |||
36 | assert(bs->file == NULL); | ||
37 | assert(options != NULL && bs->options != options); | ||
38 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
39 | |||
40 | bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
41 | |||
42 | - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { | ||
43 | - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { | ||
44 | + ro = bdrv_is_read_only(bs); | ||
45 | + | ||
46 | + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
47 | + if (!ro && bdrv_is_whitelisted(drv, true)) { | ||
48 | ret = bdrv_apply_auto_read_only(bs, NULL, NULL); | ||
49 | } else { | ||
50 | ret = -ENOTSUP; | ||
51 | } | ||
52 | if (ret < 0) { | ||
53 | error_setg(errp, | ||
54 | - !bs->read_only && bdrv_is_whitelisted(drv, true) | ||
55 | + !ro && bdrv_is_whitelisted(drv, true) | ||
56 | ? "Driver '%s' can only be used for read-only devices" | ||
57 | : "Driver '%s' is not whitelisted", | ||
58 | drv->format_name); | ||
59 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
60 | assert(qatomic_read(&bs->copy_on_read) == 0); | ||
61 | |||
62 | if (bs->open_flags & BDRV_O_COPY_ON_READ) { | ||
63 | - if (!bs->read_only) { | ||
64 | + if (!ro) { | ||
65 | bdrv_enable_copy_on_read(bs); | ||
66 | } else { | ||
67 | error_setg(errp, "Can't use copy-on-read on read-only device"); | ||
68 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/block/block-backend.c | ||
71 | +++ b/block/block-backend.c | ||
72 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | ||
73 | assert(blk->root); | ||
74 | |||
75 | blk->root_state.open_flags = blk->root->bs->open_flags; | ||
76 | - blk->root_state.read_only = blk->root->bs->read_only; | ||
77 | + blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | ||
78 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | ||
79 | } | ||
80 | |||
81 | diff --git a/block/commit.c b/block/commit.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/block/commit.c | ||
84 | +++ b/block/commit.c | ||
85 | @@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs) | ||
86 | return -EBUSY; | ||
87 | } | ||
88 | |||
89 | - ro = backing_file_bs->read_only; | ||
90 | + ro = bdrv_is_read_only(backing_file_bs); | ||
91 | |||
92 | if (ro) { | ||
93 | if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { | ||
94 | diff --git a/block/io.c b/block/io.c | ||
95 | index XXXXXXX..XXXXXXX 100644 | ||
96 | --- a/block/io.c | ||
97 | +++ b/block/io.c | ||
98 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, | ||
99 | |||
100 | bdrv_check_request(offset, bytes, &error_abort); | ||
101 | |||
102 | - if (bs->read_only) { | ||
103 | + if (bdrv_is_read_only(bs)) { | ||
104 | return -EPERM; | ||
105 | } | ||
106 | |||
107 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
108 | if (new_bytes) { | ||
109 | bdrv_make_request_serialising(&req, 1); | ||
110 | } | ||
111 | - if (bs->read_only) { | ||
112 | + if (bdrv_is_read_only(bs)) { | ||
113 | error_setg(errp, "Image is read-only"); | ||
114 | ret = -EACCES; | ||
115 | goto out; | ||
116 | diff --git a/block/qapi.c b/block/qapi.c | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/block/qapi.c | ||
119 | +++ b/block/qapi.c | ||
120 | @@ -XXX,XX +XXX,XX @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, | ||
121 | |||
122 | info = g_malloc0(sizeof(*info)); | ||
123 | info->file = g_strdup(bs->filename); | ||
124 | - info->ro = bs->read_only; | ||
125 | + info->ro = bdrv_is_read_only(bs); | ||
126 | info->drv = g_strdup(bs->drv->format_name); | ||
127 | info->encrypted = bs->encrypted; | ||
128 | |||
129 | diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/qcow2-snapshot.c | ||
132 | +++ b/block/qcow2-snapshot.c | ||
133 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, | ||
134 | int new_l1_bytes; | ||
135 | int ret; | ||
136 | |||
137 | - assert(bs->read_only); | ||
138 | + assert(bdrv_is_read_only(bs)); | ||
139 | |||
140 | /* Search the snapshot */ | ||
141 | snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); | ||
142 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/block/qcow2.c | ||
145 | +++ b/block/qcow2.c | ||
146 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
147 | |||
148 | /* Clear unknown autoclear feature bits */ | ||
149 | update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; | ||
150 | - update_header = | ||
151 | - update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); | ||
152 | + update_header = update_header && bdrv_is_writable(bs); | ||
153 | if (update_header) { | ||
154 | s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; | ||
155 | } | ||
156 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
157 | bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; | ||
158 | |||
159 | /* Repair image if dirty */ | ||
160 | - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && | ||
161 | + if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && | ||
162 | (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { | ||
163 | BdrvCheckResult result = {0}; | ||
164 | |||
165 | diff --git a/block/snapshot.c b/block/snapshot.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/block/snapshot.c | ||
168 | +++ b/block/snapshot.c | ||
169 | @@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, | ||
170 | error_setg(errp, "snapshot_id and name are both NULL"); | ||
171 | return -EINVAL; | ||
172 | } | ||
173 | - if (!bs->read_only) { | ||
174 | + if (!bdrv_is_read_only(bs)) { | ||
175 | error_setg(errp, "Device is not readonly"); | ||
176 | return -EINVAL; | ||
177 | } | ||
178 | diff --git a/block/vhdx-log.c b/block/vhdx-log.c | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/block/vhdx-log.c | ||
181 | +++ b/block/vhdx-log.c | ||
182 | @@ -XXX,XX +XXX,XX @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, | ||
183 | } | ||
184 | |||
185 | if (logs.valid) { | ||
186 | - if (bs->read_only) { | ||
187 | + if (bdrv_is_read_only(bs)) { | ||
188 | bdrv_refresh_filename(bs); | ||
189 | ret = -EPERM; | ||
190 | error_setg(errp, | ||
191 | -- | ||
192 | 2.30.2 | ||
193 | |||
194 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | This variable is just a cache for !(bs->open_flags & BDRV_O_RDWR), | ||
4 | which we have to synchronize everywhere. Let's just drop it and | ||
5 | consistently use bdrv_is_read_only(). | ||
6 | |||
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20210527154056.70294-3-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | include/block/block_int.h | 1 - | ||
12 | block.c | 7 +------ | ||
13 | tests/unit/test-block-iothread.c | 6 ------ | ||
14 | 3 files changed, 1 insertion(+), 13 deletions(-) | ||
15 | |||
16 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/include/block/block_int.h | ||
19 | +++ b/include/block/block_int.h | ||
20 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
21 | * locking needed during I/O... | ||
22 | */ | ||
23 | int open_flags; /* flags used to open the file, re-used for re-open */ | ||
24 | - bool read_only; /* if true, the media is read only */ | ||
25 | bool encrypted; /* if true, the media is encrypted */ | ||
26 | bool sg; /* if true, the device is a /dev/sg* */ | ||
27 | bool probed; /* if true, format was probed rather than specified */ | ||
28 | diff --git a/block.c b/block.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/block.c | ||
31 | +++ b/block.c | ||
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, | ||
33 | * image is inactivated. */ | ||
34 | bool bdrv_is_read_only(BlockDriverState *bs) | ||
35 | { | ||
36 | - return bs->read_only; | ||
37 | + return !(bs->open_flags & BDRV_O_RDWR); | ||
38 | } | ||
39 | |||
40 | int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, | ||
41 | @@ -XXX,XX +XXX,XX @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, | ||
42 | goto fail; | ||
43 | } | ||
44 | |||
45 | - bs->read_only = true; | ||
46 | bs->open_flags &= ~BDRV_O_RDWR; | ||
47 | |||
48 | return 0; | ||
49 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, | ||
50 | } | ||
51 | |||
52 | bs->drv = drv; | ||
53 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
54 | bs->opaque = g_malloc0(drv->instance_size); | ||
55 | |||
56 | if (drv->bdrv_file_open) { | ||
57 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
58 | trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, | ||
59 | drv->format_name); | ||
60 | |||
61 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
62 | - | ||
63 | ro = bdrv_is_read_only(bs); | ||
64 | |||
65 | if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
66 | @@ -XXX,XX +XXX,XX @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) | ||
67 | bs->explicit_options = reopen_state->explicit_options; | ||
68 | bs->options = reopen_state->options; | ||
69 | bs->open_flags = reopen_state->flags; | ||
70 | - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); | ||
71 | bs->detect_zeroes = reopen_state->detect_zeroes; | ||
72 | |||
73 | if (reopen_state->replace_backing_bs) { | ||
74 | diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tests/unit/test-block-iothread.c | ||
77 | +++ b/tests/unit/test-block-iothread.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c) | ||
79 | g_assert_cmpint(ret, ==, -EINVAL); | ||
80 | |||
81 | /* Error: Read-only image */ | ||
82 | - c->bs->read_only = true; | ||
83 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
84 | |||
85 | ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
86 | g_assert_cmpint(ret, ==, -EACCES); | ||
87 | |||
88 | - c->bs->read_only = false; | ||
89 | c->bs->open_flags |= BDRV_O_RDWR; | ||
90 | } | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_flush(BdrvChild *c) | ||
93 | g_assert_cmpint(ret, ==, 0); | ||
94 | |||
95 | /* Early success: Read-only image */ | ||
96 | - c->bs->read_only = true; | ||
97 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
98 | |||
99 | ret = bdrv_flush(c->bs); | ||
100 | g_assert_cmpint(ret, ==, 0); | ||
101 | |||
102 | - c->bs->read_only = false; | ||
103 | c->bs->open_flags |= BDRV_O_RDWR; | ||
104 | } | ||
105 | |||
106 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_blk_flush(BlockBackend *blk) | ||
107 | g_assert_cmpint(ret, ==, 0); | ||
108 | |||
109 | /* Early success: Read-only image */ | ||
110 | - bs->read_only = true; | ||
111 | bs->open_flags &= ~BDRV_O_RDWR; | ||
112 | |||
113 | ret = blk_flush(blk); | ||
114 | g_assert_cmpint(ret, ==, 0); | ||
115 | |||
116 | - bs->read_only = false; | ||
117 | bs->open_flags |= BDRV_O_RDWR; | ||
118 | } | ||
119 | |||
120 | -- | ||
121 | 2.30.2 | ||
122 | |||
123 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: Ari Sundholm <ari@tuxera.com> |
---|---|---|---|
2 | 2 | ||
3 | A customer reported that running | 3 | AIO discards regressed as a result of the following commit: |
4 | 0dfc7af2 block/file-posix: Optimize for macOS | ||
4 | 5 | ||
5 | qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2 | 6 | When trying to run blkdiscard within a Linux guest, the request would |
7 | fail, with some errors in dmesg: | ||
6 | 8 | ||
7 | fails for them with the following error message when the images are | 9 | ---- [ snip ] ---- |
8 | stored on a GPFS file system : | 10 | [ 4.010070] sd 2:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_OK |
11 | driverbyte=DRIVER_SENSE | ||
12 | [ 4.011061] sd 2:0:0:0: [sda] tag#0 Sense Key : Aborted Command | ||
13 | [current] | ||
14 | [ 4.011061] sd 2:0:0:0: [sda] tag#0 Add. Sense: I/O process | ||
15 | terminated | ||
16 | [ 4.011061] sd 2:0:0:0: [sda] tag#0 CDB: Unmap/Read sub-channel 42 | ||
17 | 00 00 00 00 00 00 00 18 00 | ||
18 | [ 4.011061] blk_update_request: I/O error, dev sda, sector 0 | ||
19 | ---- [ snip ] ---- | ||
9 | 20 | ||
10 | qemu-img: error while writing sector 0: Invalid argument | 21 | This turns out to be a result of a flaw in changes to the error value |
22 | translation logic in handle_aiocb_discard(). The default return value | ||
23 | may be left untranslated in some configurations, and the wrong variable | ||
24 | is used in one translation. | ||
11 | 25 | ||
12 | After analyzing the strace output, it seems like the problem is in | 26 | Fix both issues. |
13 | handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE) | ||
14 | returns EINVAL, which can apparently happen if the file system has | ||
15 | a different idea of the granularity of the operation. It's arguably | ||
16 | a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL | ||
17 | according to the man-page of fallocate(), but the file system is out | ||
18 | there in production and so we have to deal with it. In commit 294682cc3a | ||
19 | ("block: workaround for unaligned byte range in fallocate()") we also | ||
20 | already applied the a work-around for the same problem to the earlier | ||
21 | fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the | ||
22 | PUNCH_HOLE call. But instead of silently catching and returning | ||
23 | -ENOTSUP (which causes the caller to fall back to writing zeroes), | ||
24 | let's rather inform the user once about the buggy file system and | ||
25 | try the other fallback instead. | ||
26 | 27 | ||
27 | Signed-off-by: Thomas Huth <thuth@redhat.com> | 28 | Fixes: 0dfc7af2b28 ("block/file-posix: Optimize for macOS") |
28 | Message-Id: <20210527172020.847617-2-thuth@redhat.com> | 29 | Cc: qemu-stable@nongnu.org |
30 | Signed-off-by: Ari Sundholm <ari@tuxera.com> | ||
31 | Signed-off-by: Emil Karlson <jkarlson@tuxera.com> | ||
32 | Reviewed-by: Akihiko Odaki <akihiko.odaki@gmail.com> | ||
33 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
34 | Message-Id: <20211019110954.4170931-1-ari@tuxera.com> | ||
29 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 35 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
30 | --- | 36 | --- |
31 | block/file-posix.c | 11 +++++++++++ | 37 | block/file-posix.c | 4 ++-- |
32 | 1 file changed, 11 insertions(+) | 38 | 1 file changed, 2 insertions(+), 2 deletions(-) |
33 | 39 | ||
34 | diff --git a/block/file-posix.c b/block/file-posix.c | 40 | diff --git a/block/file-posix.c b/block/file-posix.c |
35 | index XXXXXXX..XXXXXXX 100644 | 41 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/block/file-posix.c | 42 | --- a/block/file-posix.c |
37 | +++ b/block/file-posix.c | 43 | +++ b/block/file-posix.c |
38 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | 44 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_copy_range(void *opaque) |
39 | return ret; | 45 | static int handle_aiocb_discard(void *opaque) |
40 | } | 46 | { |
41 | s->has_fallocate = false; | 47 | RawPosixAIOData *aiocb = opaque; |
42 | + } else if (ret == -EINVAL) { | 48 | - int ret = -EOPNOTSUPP; |
43 | + /* | 49 | + int ret = -ENOTSUP; |
44 | + * Some file systems like older versions of GPFS do not like un- | 50 | BDRVRawState *s = aiocb->bs->opaque; |
45 | + * aligned byte ranges, and return EINVAL in such a case, though | 51 | |
46 | + * they should not do it according to the man-page of fallocate(). | 52 | if (!s->has_discard) { |
47 | + * Warn about the bad filesystem and try the final fallback instead. | 53 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque) |
48 | + */ | 54 | #ifdef CONFIG_FALLOCATE_PUNCH_HOLE |
49 | + warn_report_once("Your file system is misbehaving: " | 55 | ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
50 | + "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " | 56 | aiocb->aio_offset, aiocb->aio_nbytes); |
51 | + "Please report this bug to your file sytem " | 57 | - ret = translate_err(-errno); |
52 | + "vendor."); | 58 | + ret = translate_err(ret); |
53 | } else if (ret != -ENOTSUP) { | 59 | #elif defined(__APPLE__) && (__MACH__) |
54 | return ret; | 60 | fpunchhole_t fpunchhole; |
55 | } else { | 61 | fpunchhole.fp_flags = 0; |
56 | -- | 62 | -- |
57 | 2.30.2 | 63 | 2.31.1 |
58 | 64 | ||
59 | 65 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | Using blockdev-snapshot to append a node as an overlay to itself, or to |
---|---|---|---|
2 | any of its parents, causes crashes. Catch the condition and return an | ||
3 | error for these cases instead. | ||
2 | 4 | ||
3 | The logic around **child is not obvious: this reference is used not | 5 | Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1824363 |
4 | only to return resulting child, but also to rollback NULL value on | 6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
5 | transaction abort. | 7 | Message-Id: <20211018134714.48438-1-kwolf@redhat.com> |
6 | 8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | |
7 | So, let's add documentation and some assertions. | ||
8 | |||
9 | While being here, drop extra declaration of bdrv_attach_child_noperm(). | ||
10 | |||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Message-Id: <20210601075218.79249-2-vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 10 | --- |
15 | block.c | 24 +++++++++++++++--------- | 11 | block.c | 10 ++++++++++ |
16 | 1 file changed, 15 insertions(+), 9 deletions(-) | 12 | tests/qemu-iotests/085 | 31 ++++++++++++++++++++++++++++++- |
13 | tests/qemu-iotests/085.out | 33 ++++++++++++++++++++++++++++++--- | ||
14 | 3 files changed, 70 insertions(+), 4 deletions(-) | ||
17 | 15 | ||
18 | diff --git a/block.c b/block.c | 16 | diff --git a/block.c b/block.c |
19 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block.c | 18 | --- a/block.c |
21 | +++ b/block.c | 19 | +++ b/block.c |
22 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, | 20 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, |
23 | 21 | BdrvChildRole child_role, | |
22 | Error **errp); | ||
23 | |||
24 | +static bool bdrv_recurse_has_child(BlockDriverState *bs, | ||
25 | + BlockDriverState *child); | ||
26 | + | ||
24 | static void bdrv_replace_child_noperm(BdrvChild *child, | 27 | static void bdrv_replace_child_noperm(BdrvChild *child, |
25 | BlockDriverState *new_bs); | 28 | BlockDriverState *new_bs); |
26 | -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | 29 | static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, |
27 | - BlockDriverState *child_bs, | 30 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
28 | - const char *child_name, | 31 | int drain_saldo; |
29 | - const BdrvChildClass *child_class, | 32 | |
30 | - BdrvChildRole child_role, | 33 | assert(!child->frozen); |
31 | - BdrvChild **child, | 34 | + assert(old_bs != new_bs); |
32 | - Transaction *tran, | 35 | |
33 | - Error **errp); | 36 | if (old_bs && new_bs) { |
34 | static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, | 37 | assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); |
35 | Transaction *tran); | 38 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, |
36 | 39 | ||
37 | @@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_attach_child_common_drv = { | 40 | assert(parent_bs->drv); |
38 | 41 | ||
39 | /* | 42 | + if (bdrv_recurse_has_child(child_bs, parent_bs)) { |
40 | * Common part of attaching bdrv child to bs or to blk or to job | 43 | + error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", |
41 | + * | 44 | + child_bs->node_name, child_name, parent_bs->node_name); |
42 | + * Resulting new child is returned through @child. | 45 | + return -EINVAL; |
43 | + * At start *@child must be NULL. | 46 | + } |
44 | + * @child is saved to a new entry of @tran, so that *@child could be reverted to | 47 | + |
45 | + * NULL on abort(). So referenced variable must live at least until transaction | 48 | bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); |
46 | + * end. | 49 | bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, |
47 | */ | 50 | perm, shared_perm, &perm, &shared_perm); |
48 | static int bdrv_attach_child_common(BlockDriverState *child_bs, | 51 | diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085 |
49 | const char *child_name, | 52 | index XXXXXXX..XXXXXXX 100755 |
50 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | 53 | --- a/tests/qemu-iotests/085 |
51 | return 0; | 54 | +++ b/tests/qemu-iotests/085 |
55 | @@ -XXX,XX +XXX,XX @@ do_blockdev_add() | ||
52 | } | 56 | } |
53 | 57 | ||
54 | +/* | 58 | # ${1}: unique identifier for the snapshot filename |
55 | + * Variable referenced by @child must live at least until transaction end. | 59 | -add_snapshot_image() |
56 | + * (see bdrv_attach_child_common() doc for details) | 60 | +create_snapshot_image() |
57 | + */ | 61 | { |
58 | static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | 62 | base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}" |
59 | BlockDriverState *child_bs, | 63 | snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}" |
60 | const char *child_name, | 64 | TEST_IMG=$snapshot_file _make_test_img -u -b "${base_image}" -F $IMGFMT "$size" |
61 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | 65 | +} |
62 | child_role, perm, shared_perm, opaque, | ||
63 | &child, tran, errp); | ||
64 | if (ret < 0) { | ||
65 | - assert(child == NULL); | ||
66 | goto out; | ||
67 | } | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | ||
70 | |||
71 | out: | ||
72 | tran_finalize(tran, ret); | ||
73 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | ||
74 | + assert((ret < 0) == !child); | ||
75 | + | 66 | + |
76 | bdrv_unref(child_bs); | 67 | +# ${1}: unique identifier for the snapshot filename |
77 | return child; | 68 | +add_snapshot_image() |
69 | +{ | ||
70 | + snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}" | ||
71 | + create_snapshot_image "$1" | ||
72 | do_blockdev_add "$1" "'backing': null, " "${snapshot_file}" | ||
78 | } | 73 | } |
79 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, | 74 | |
80 | 75 | @@ -XXX,XX +XXX,XX @@ _make_test_img -b "${TEST_IMG}.base" -F $IMGFMT "$size" | |
81 | out: | 76 | do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}" |
82 | tran_finalize(tran, ret); | 77 | blockdev_snapshot ${SNAPSHOTS} error |
83 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | 78 | |
84 | + assert((ret < 0) == !child); | 79 | +echo |
85 | 80 | +echo === Invalid command - creating loops === | |
86 | bdrv_unref(child_bs); | 81 | +echo |
87 | 82 | + | |
83 | +SNAPSHOTS=$((${SNAPSHOTS}+1)) | ||
84 | +add_snapshot_image ${SNAPSHOTS} | ||
85 | + | ||
86 | +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', | ||
87 | + 'arguments': { 'node':'snap_${SNAPSHOTS}', | ||
88 | + 'overlay':'snap_${SNAPSHOTS}' } | ||
89 | + }" "error" | ||
90 | + | ||
91 | +SNAPSHOTS=$((${SNAPSHOTS}+1)) | ||
92 | +create_snapshot_image ${SNAPSHOTS} | ||
93 | +do_blockdev_add ${SNAPSHOTS} "'backing': 'snap_$((${SNAPSHOTS}-1))', " \ | ||
94 | + "${TEST_DIR}/${SNAPSHOTS}-${snapshot_virt0}" | ||
95 | + | ||
96 | +_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot', | ||
97 | + 'arguments': { 'node':'snap_${SNAPSHOTS}', | ||
98 | + 'overlay':'snap_$((${SNAPSHOTS}-1))' } | ||
99 | + }" "error" | ||
100 | + | ||
101 | echo | ||
102 | echo === Invalid command - The node does not exist === | ||
103 | echo | ||
104 | diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/tests/qemu-iotests/085.out | ||
107 | +++ b/tests/qemu-iotests/085.out | ||
108 | @@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ | ||
109 | 'overlay':'snap_13' } } | ||
110 | {"error": {"class": "GenericError", "desc": "The overlay already has a backing image"}} | ||
111 | |||
112 | +=== Invalid command - creating loops === | ||
113 | + | ||
114 | +Formatting 'TEST_DIR/14-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/13-snapshot-v0.IMGFMT backing_fmt=IMGFMT | ||
115 | +{ 'execute': 'blockdev-add', 'arguments': | ||
116 | + { 'driver': 'IMGFMT', 'node-name': 'snap_14', 'backing': null, | ||
117 | + 'file': | ||
118 | + { 'driver': 'file', 'filename': 'TEST_DIR/14-snapshot-v0.IMGFMT', | ||
119 | + 'node-name': 'file_14' } } } | ||
120 | +{"return": {}} | ||
121 | +{ 'execute': 'blockdev-snapshot', | ||
122 | + 'arguments': { 'node':'snap_14', | ||
123 | + 'overlay':'snap_14' } | ||
124 | + } | ||
125 | +{"error": {"class": "GenericError", "desc": "Making 'snap_14' a backing child of 'snap_14' would create a cycle"}} | ||
126 | +Formatting 'TEST_DIR/15-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/14-snapshot-v0.IMGFMT backing_fmt=IMGFMT | ||
127 | +{ 'execute': 'blockdev-add', 'arguments': | ||
128 | + { 'driver': 'IMGFMT', 'node-name': 'snap_15', 'backing': 'snap_14', | ||
129 | + 'file': | ||
130 | + { 'driver': 'file', 'filename': 'TEST_DIR/15-snapshot-v0.IMGFMT', | ||
131 | + 'node-name': 'file_15' } } } | ||
132 | +{"return": {}} | ||
133 | +{ 'execute': 'blockdev-snapshot', | ||
134 | + 'arguments': { 'node':'snap_15', | ||
135 | + 'overlay':'snap_14' } | ||
136 | + } | ||
137 | +{"error": {"class": "GenericError", "desc": "Making 'snap_15' a backing child of 'snap_14' would create a cycle"}} | ||
138 | + | ||
139 | === Invalid command - The node does not exist === | ||
140 | |||
141 | { 'execute': 'blockdev-snapshot', | ||
142 | 'arguments': { 'node': 'virtio0', | ||
143 | - 'overlay':'snap_14' } } | ||
144 | -{"error": {"class": "GenericError", "desc": "Cannot find device='snap_14' nor node-name='snap_14'"}} | ||
145 | + 'overlay':'snap_16' } } | ||
146 | +{"error": {"class": "GenericError", "desc": "Cannot find device='snap_16' nor node-name='snap_16'"}} | ||
147 | { 'execute': 'blockdev-snapshot', | ||
148 | 'arguments': { 'node':'nodevice', | ||
149 | - 'overlay':'snap_13' } | ||
150 | + 'overlay':'snap_15' } | ||
151 | } | ||
152 | {"error": {"class": "GenericError", "desc": "Cannot find device='nodevice' nor node-name='nodevice'"}} | ||
153 | *** done | ||
88 | -- | 154 | -- |
89 | 2.30.2 | 155 | 2.31.1 |
90 | 156 | ||
91 | 157 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Peter Lieven <pl@kamp.de> |
---|---|---|---|
2 | 2 | ||
3 | Now permissions are updated as follows: | 3 | the qemu rbd driver currently lacks support for bdrv_co_block_status. |
4 | 1. do graph modifications ignoring permissions | 4 | This results mainly in incorrect progress during block operations (e.g. |
5 | 2. do permission update | 5 | qemu-img convert with an rbd image as source). |
6 | 6 | ||
7 | (of course, we rollback [1] if [2] fails) | 7 | This patch utilizes the rbd_diff_iterate2 call from librbd to detect |
8 | allocated and unallocated (all zero areas). | ||
8 | 9 | ||
9 | So, on stage [2] we can't say which users are "old" and which are | 10 | To avoid querying the ceph OSDs for the answer this is only done if |
10 | "new" and exist only since [1]. And current error message is a bit | 11 | the image has the fast-diff feature which depends on the object-map and |
11 | outdated. Let's improve it, to make everything clean. | 12 | exclusive-lock features. In this case it is guaranteed that the information |
13 | is present in memory in the librbd client and thus very fast. | ||
12 | 14 | ||
13 | While being here, add also a comment and some good assertions. | 15 | If fast-diff is not available all areas are reported to be allocated |
16 | which is the current behaviour if bdrv_co_block_status is not implemented. | ||
14 | 17 | ||
15 | iotests 283, 307, qsd-jobs outputs are updated. | 18 | Signed-off-by: Peter Lieven <pl@kamp.de> |
16 | 19 | Message-Id: <20211012152231.24868-1-pl@kamp.de> | |
17 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 20 | Reviewed-by: Ilya Dryomov <idryomov@gmail.com> |
18 | Message-Id: <20210601075218.79249-7-vsementsov@virtuozzo.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 22 | --- |
21 | block.c | 29 ++++++++++++++++++++------- | 23 | block/rbd.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++ |
22 | tests/qemu-iotests/283.out | 2 +- | 24 | 1 file changed, 112 insertions(+) |
23 | tests/qemu-iotests/307.out | 2 +- | ||
24 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- | ||
25 | 4 files changed, 25 insertions(+), 10 deletions(-) | ||
26 | 25 | ||
27 | diff --git a/block.c b/block.c | 26 | diff --git a/block/rbd.c b/block/rbd.c |
28 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/block.c | 28 | --- a/block/rbd.c |
30 | +++ b/block.c | 29 | +++ b/block/rbd.c |
31 | @@ -XXX,XX +XXX,XX @@ static char *bdrv_child_user_desc(BdrvChild *c) | 30 | @@ -XXX,XX +XXX,XX @@ typedef struct RBDTask { |
32 | return c->klass->get_parent_desc(c); | 31 | int64_t ret; |
32 | } RBDTask; | ||
33 | |||
34 | +typedef struct RBDDiffIterateReq { | ||
35 | + uint64_t offs; | ||
36 | + uint64_t bytes; | ||
37 | + bool exists; | ||
38 | +} RBDDiffIterateReq; | ||
39 | + | ||
40 | static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, | ||
41 | BlockdevOptionsRbd *opts, bool cache, | ||
42 | const char *keypairs, const char *secretid, | ||
43 | @@ -XXX,XX +XXX,XX @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, | ||
44 | return spec_info; | ||
33 | } | 45 | } |
34 | 46 | ||
35 | +/* | 47 | +/* |
36 | + * Check that @a allows everything that @b needs. @a and @b must reference same | 48 | + * rbd_diff_iterate2 allows to interrupt the exection by returning a negative |
37 | + * child node. | 49 | + * value in the callback routine. Choose a value that does not conflict with |
50 | + * an existing exitcode and return it if we want to prematurely stop the | ||
51 | + * execution because we detected a change in the allocation status. | ||
38 | + */ | 52 | + */ |
39 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | 53 | +#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000 |
54 | + | ||
55 | +static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, | ||
56 | + int exists, void *opaque) | ||
57 | +{ | ||
58 | + RBDDiffIterateReq *req = opaque; | ||
59 | + | ||
60 | + assert(req->offs + req->bytes <= offs); | ||
61 | + /* | ||
62 | + * we do not diff against a snapshot so we should never receive a callback | ||
63 | + * for a hole. | ||
64 | + */ | ||
65 | + assert(exists); | ||
66 | + | ||
67 | + if (!req->exists && offs > req->offs) { | ||
68 | + /* | ||
69 | + * we started in an unallocated area and hit the first allocated | ||
70 | + * block. req->bytes must be set to the length of the unallocated area | ||
71 | + * before the allocated area. stop further processing. | ||
72 | + */ | ||
73 | + req->bytes = offs - req->offs; | ||
74 | + return QEMU_RBD_EXIT_DIFF_ITERATE2; | ||
75 | + } | ||
76 | + | ||
77 | + if (req->exists && offs > req->offs + req->bytes) { | ||
78 | + /* | ||
79 | + * we started in an allocated area and jumped over an unallocated area, | ||
80 | + * req->bytes contains the length of the allocated area before the | ||
81 | + * unallocated area. stop further processing. | ||
82 | + */ | ||
83 | + return QEMU_RBD_EXIT_DIFF_ITERATE2; | ||
84 | + } | ||
85 | + | ||
86 | + req->bytes += len; | ||
87 | + req->exists = true; | ||
88 | + | ||
89 | + return 0; | ||
90 | +} | ||
91 | + | ||
92 | +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, | ||
93 | + bool want_zero, int64_t offset, | ||
94 | + int64_t bytes, int64_t *pnum, | ||
95 | + int64_t *map, | ||
96 | + BlockDriverState **file) | ||
97 | +{ | ||
98 | + BDRVRBDState *s = bs->opaque; | ||
99 | + int status, r; | ||
100 | + RBDDiffIterateReq req = { .offs = offset }; | ||
101 | + uint64_t features, flags; | ||
102 | + | ||
103 | + assert(offset + bytes <= s->image_size); | ||
104 | + | ||
105 | + /* default to all sectors allocated */ | ||
106 | + status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; | ||
107 | + *map = offset; | ||
108 | + *file = bs; | ||
109 | + *pnum = bytes; | ||
110 | + | ||
111 | + /* check if RBD image supports fast-diff */ | ||
112 | + r = rbd_get_features(s->image, &features); | ||
113 | + if (r < 0) { | ||
114 | + return status; | ||
115 | + } | ||
116 | + if (!(features & RBD_FEATURE_FAST_DIFF)) { | ||
117 | + return status; | ||
118 | + } | ||
119 | + | ||
120 | + /* check if RBD fast-diff result is valid */ | ||
121 | + r = rbd_get_flags(s->image, &flags); | ||
122 | + if (r < 0) { | ||
123 | + return status; | ||
124 | + } | ||
125 | + if (flags & RBD_FLAG_FAST_DIFF_INVALID) { | ||
126 | + return status; | ||
127 | + } | ||
128 | + | ||
129 | + r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, | ||
130 | + qemu_rbd_diff_iterate_cb, &req); | ||
131 | + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { | ||
132 | + return status; | ||
133 | + } | ||
134 | + assert(req.bytes <= bytes); | ||
135 | + if (!req.exists) { | ||
136 | + if (r == 0) { | ||
137 | + /* | ||
138 | + * rbd_diff_iterate2 does not invoke callbacks for unallocated | ||
139 | + * areas. This here catches the case where no callback was | ||
140 | + * invoked at all (req.bytes == 0). | ||
141 | + */ | ||
142 | + assert(req.bytes == 0); | ||
143 | + req.bytes = bytes; | ||
144 | + } | ||
145 | + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; | ||
146 | + } | ||
147 | + | ||
148 | + *pnum = req.bytes; | ||
149 | + return status; | ||
150 | +} | ||
151 | + | ||
152 | static int64_t qemu_rbd_getlength(BlockDriverState *bs) | ||
40 | { | 153 | { |
41 | - g_autofree char *user = NULL; | 154 | BDRVRBDState *s = bs->opaque; |
42 | - g_autofree char *perm_names = NULL; | 155 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = { |
43 | + const char *child_bs_name; | 156 | #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES |
44 | + g_autofree char *a_user = NULL; | 157 | .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, |
45 | + g_autofree char *b_user = NULL; | 158 | #endif |
46 | + g_autofree char *perms = NULL; | 159 | + .bdrv_co_block_status = qemu_rbd_co_block_status, |
47 | + | 160 | |
48 | + assert(a->bs); | 161 | .bdrv_snapshot_create = qemu_rbd_snap_create, |
49 | + assert(a->bs == b->bs); | 162 | .bdrv_snapshot_delete = qemu_rbd_snap_remove, |
50 | |||
51 | if ((b->perm & a->shared_perm) == b->perm) { | ||
52 | return true; | ||
53 | } | ||
54 | |||
55 | - perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); | ||
56 | - user = bdrv_child_user_desc(a); | ||
57 | - error_setg(errp, "Conflicts with use by %s as '%s', which does not " | ||
58 | - "allow '%s' on %s", | ||
59 | - user, a->name, perm_names, bdrv_get_node_name(b->bs)); | ||
60 | + child_bs_name = bdrv_get_node_name(b->bs); | ||
61 | + a_user = bdrv_child_user_desc(a); | ||
62 | + b_user = bdrv_child_user_desc(b); | ||
63 | + perms = bdrv_perm_names(b->perm & ~a->shared_perm); | ||
64 | + | ||
65 | + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " | ||
66 | + "both required by %s (uses node '%s' as '%s' child) and " | ||
67 | + "unshared by %s (uses node '%s' as '%s' child).", | ||
68 | + child_bs_name, perms, | ||
69 | + b_user, child_bs_name, b->name, | ||
70 | + a_user, child_bs_name, a->name); | ||
71 | |||
72 | return false; | ||
73 | } | ||
74 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tests/qemu-iotests/283.out | ||
77 | +++ b/tests/qemu-iotests/283.out | ||
78 | @@ -XXX,XX +XXX,XX @@ | ||
79 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | ||
80 | {"return": {}} | ||
81 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | ||
82 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | ||
83 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}} | ||
84 | |||
85 | === backup-top should be gone after job-finalize === | ||
86 | |||
87 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/tests/qemu-iotests/307.out | ||
90 | +++ b/tests/qemu-iotests/307.out | ||
91 | @@ -XXX,XX +XXX,XX @@ exports available: 1 | ||
92 | |||
93 | === Add a writable export === | ||
94 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} | ||
95 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} | ||
96 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}} | ||
97 | {"execute": "device_del", "arguments": {"id": "sda"}} | ||
98 | {"return": {}} | ||
99 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
100 | diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/tests/qemu-iotests/tests/qsd-jobs.out | ||
103 | +++ b/tests/qemu-iotests/tests/qsd-jobs.out | ||
104 | @@ -XXX,XX +XXX,XX @@ QMP_VERSION | ||
105 | {"return": {}} | ||
106 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} | ||
107 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} | ||
108 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}} | ||
109 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} | ||
110 | {"return": {}} | ||
111 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} | ||
112 | *** done | ||
113 | -- | 163 | -- |
114 | 2.30.2 | 164 | 2.31.1 |
115 | 165 | ||
116 | 166 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Samuel Thibault <samuel.thibault@ens-lyon.org> |
---|---|---|---|
2 | 2 | ||
3 | Don't report successful progress on failure, when call_state->ret is | 3 | The LBA28 capacity (at offsets 60/61 of identification) is supposed to |
4 | set. | 4 | express the maximum size supported by LBA28 commands. If the device is |
5 | larger than this, we have to cap it to 2^28-1. | ||
5 | 6 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 7 | At least NetBSD happens to be using this value to determine whether to use |
7 | Message-Id: <20210528141628.44287-2-vsementsov@virtuozzo.com> | 8 | LBA28 or LBA48 for its commands, using LBA28 for sectors that don't need |
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | LBA48. This commit thus fixes NetBSD access to disks larger than 128GiB. |
10 | |||
11 | Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org> | ||
12 | Message-Id: <20210824104344.3878849-1-samuel.thibault@ens-lyon.org> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 14 | --- |
11 | block/block-copy.c | 8 +++++--- | 15 | hw/ide/core.c | 8 ++++++-- |
12 | 1 file changed, 5 insertions(+), 3 deletions(-) | 16 | 1 file changed, 6 insertions(+), 2 deletions(-) |
13 | 17 | ||
14 | diff --git a/block/block-copy.c b/block/block-copy.c | 18 | diff --git a/hw/ide/core.c b/hw/ide/core.c |
15 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/block-copy.c | 20 | --- a/hw/ide/core.c |
17 | +++ b/block/block-copy.c | 21 | +++ b/hw/ide/core.c |
18 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_entry(AioTask *task) | 22 | @@ -XXX,XX +XXX,XX @@ static void put_le16(uint16_t *p, unsigned int v) |
19 | 23 | static void ide_identify_size(IDEState *s) | |
20 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, | 24 | { |
21 | &error_is_read); | 25 | uint16_t *p = (uint16_t *)s->identify_data; |
22 | - if (ret < 0 && !t->call_state->ret) { | 26 | - put_le16(p + 60, s->nb_sectors); |
23 | - t->call_state->ret = ret; | 27 | - put_le16(p + 61, s->nb_sectors >> 16); |
24 | - t->call_state->error_is_read = error_is_read; | 28 | + int64_t nb_sectors_lba28 = s->nb_sectors; |
25 | + if (ret < 0) { | 29 | + if (nb_sectors_lba28 >= 1 << 28) { |
26 | + if (!t->call_state->ret) { | 30 | + nb_sectors_lba28 = (1 << 28) - 1; |
27 | + t->call_state->ret = ret; | 31 | + } |
28 | + t->call_state->error_is_read = error_is_read; | 32 | + put_le16(p + 60, nb_sectors_lba28); |
29 | + } | 33 | + put_le16(p + 61, nb_sectors_lba28 >> 16); |
30 | } else { | 34 | put_le16(p + 100, s->nb_sectors); |
31 | progress_work_done(t->s->progress, t->bytes); | 35 | put_le16(p + 101, s->nb_sectors >> 16); |
32 | } | 36 | put_le16(p + 102, s->nb_sectors >> 32); |
33 | -- | 37 | -- |
34 | 2.30.2 | 38 | 2.31.1 |
35 | 39 | ||
36 | 40 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Fabrice Fontaine <fontaine.fabrice@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | All child classes have this callback. So, drop unreachable code. | 3 | Include linux/falloc.h if CONFIG_FALLOCATE_ZERO_RANGE is defined to fix |
4 | https://gitlab.com/qemu-project/qemu/-/commit/50482fda98bd62e072c30b7ea73c985c4e9d9bbb | ||
5 | and avoid the following build failure on musl: | ||
4 | 6 | ||
5 | Still add an assertion to bdrv_attach_child_common(), to early detect | 7 | ../block/export/fuse.c: In function 'fuse_fallocate': |
6 | bad classes. | 8 | ../block/export/fuse.c:643:21: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function) |
9 | 643 | else if (mode & FALLOC_FL_ZERO_RANGE) { | ||
10 | | ^~~~~~~~~~~~~~~~~~~~ | ||
7 | 11 | ||
8 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 12 | Fixes: |
9 | Message-Id: <20210601075218.79249-6-vsementsov@virtuozzo.com> | 13 | - http://autobuild.buildroot.org/results/be24433a429fda681fb66698160132c1c99bc53b |
14 | |||
15 | Fixes: 50482fda98b ("block/export/fuse.c: fix musl build") | ||
16 | Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com> | ||
17 | Message-Id: <20211022095209.1319671-1-fontaine.fabrice@gmail.com> | ||
18 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 20 | --- |
12 | block.c | 7 ++----- | 21 | block/export/fuse.c | 4 ++++ |
13 | 1 file changed, 2 insertions(+), 5 deletions(-) | 22 | 1 file changed, 4 insertions(+) |
14 | 23 | ||
15 | diff --git a/block.c b/block.c | 24 | diff --git a/block/export/fuse.c b/block/export/fuse.c |
16 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block.c | 26 | --- a/block/export/fuse.c |
18 | +++ b/block.c | 27 | +++ b/block/export/fuse.c |
19 | @@ -XXX,XX +XXX,XX @@ bool bdrv_is_writable(BlockDriverState *bs) | 28 | @@ -XXX,XX +XXX,XX @@ |
20 | 29 | #include <fuse.h> | |
21 | static char *bdrv_child_user_desc(BdrvChild *c) | 30 | #include <fuse_lowlevel.h> |
22 | { | 31 | |
23 | - if (c->klass->get_parent_desc) { | 32 | +#if defined(CONFIG_FALLOCATE_ZERO_RANGE) |
24 | - return c->klass->get_parent_desc(c); | 33 | +#include <linux/falloc.h> |
25 | - } | 34 | +#endif |
26 | - | 35 | + |
27 | - return g_strdup("another user"); | 36 | #ifdef __linux__ |
28 | + return c->klass->get_parent_desc(c); | 37 | #include <linux/fs.h> |
29 | } | 38 | #endif |
30 | |||
31 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | ||
32 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | ||
33 | |||
34 | assert(child); | ||
35 | assert(*child == NULL); | ||
36 | + assert(child_class->get_parent_desc); | ||
37 | |||
38 | new_child = g_new(BdrvChild, 1); | ||
39 | *new_child = (BdrvChild) { | ||
40 | -- | 39 | -- |
41 | 2.30.2 | 40 | 2.31.1 |
42 | 41 | ||
43 | 42 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Allow block backends to poll their devices/users to check if they have | 3 | Commit d7ddd0a161 ("linux-aio: limit the batch size using |
4 | been quiesced when entering a drained section. | 4 | `aio-max-batch` parameter") added a way to limit the batch size |
5 | of Linux AIO backend for the entire AIO context. | ||
5 | 6 | ||
6 | This will be used in the next patch to wait for the NBD server to be | 7 | The same AIO context can be shared by multiple devices, so |
7 | completely quiesced. | 8 | latency-sensitive devices may want to limit the batch size even |
9 | more to avoid increasing latency. | ||
10 | |||
11 | For this reason we add the `aio-max-batch` option to the file | ||
12 | backend, which will be used by the next commits to limit the size of | ||
13 | batches including requests generated by this device. | ||
8 | 14 | ||
9 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | 15 | Suggested-by: Kevin Wolf <kwolf@redhat.com> |
10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 16 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | 17 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> |
12 | Signed-off-by: Sergio Lopez <slp@redhat.com> | 18 | Message-Id: <20211026162346.253081-2-sgarzare@redhat.com> |
13 | Message-Id: <20210602060552.17433-2-slp@redhat.com> | 19 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
14 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 20 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
16 | --- | 21 | --- |
17 | include/sysemu/block-backend.h | 4 ++++ | 22 | qapi/block-core.json | 7 +++++++ |
18 | block/block-backend.c | 7 ++++++- | 23 | block/file-posix.c | 9 +++++++++ |
19 | 2 files changed, 10 insertions(+), 1 deletion(-) | 24 | 2 files changed, 16 insertions(+) |
20 | 25 | ||
21 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | 26 | diff --git a/qapi/block-core.json b/qapi/block-core.json |
22 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/include/sysemu/block-backend.h | 28 | --- a/qapi/block-core.json |
24 | +++ b/include/sysemu/block-backend.h | 29 | +++ b/qapi/block-core.json |
25 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { | 30 | @@ -XXX,XX +XXX,XX @@ |
26 | * Runs when the backend's last drain request ends. | 31 | # for this device (default: none, forward the commands via SG_IO; |
27 | */ | 32 | # since 2.11) |
28 | void (*drained_end)(void *opaque); | 33 | # @aio: AIO backend (default: threads) (since: 2.8) |
29 | + /* | 34 | +# @aio-max-batch: maximum number of requests to batch together into a single |
30 | + * Is the device still busy? | 35 | +# submission in the AIO backend. The smallest value between |
31 | + */ | 36 | +# this and the aio-max-batch value of the IOThread object is |
32 | + bool (*drained_poll)(void *opaque); | 37 | +# chosen. |
33 | } BlockDevOps; | 38 | +# 0 means that the AIO backend will handle it automatically. |
34 | 39 | +# (default: 0, since 6.2) | |
35 | /* This struct is embedded in (the private) BlockBackend struct and contains | 40 | # @locking: whether to enable file locking. If set to 'auto', only enable |
36 | diff --git a/block/block-backend.c b/block/block-backend.c | 41 | # when Open File Descriptor (OFD) locking API is available |
42 | # (default: auto, since 2.10) | ||
43 | @@ -XXX,XX +XXX,XX @@ | ||
44 | '*pr-manager': 'str', | ||
45 | '*locking': 'OnOffAuto', | ||
46 | '*aio': 'BlockdevAioOptions', | ||
47 | + '*aio-max-batch': 'int', | ||
48 | '*drop-cache': {'type': 'bool', | ||
49 | 'if': 'CONFIG_LINUX'}, | ||
50 | '*x-check-cache-dropped': { 'type': 'bool', | ||
51 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/block/block-backend.c | 53 | --- a/block/file-posix.c |
39 | +++ b/block/block-backend.c | 54 | +++ b/block/file-posix.c |
40 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | 55 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState { |
41 | static bool blk_root_drained_poll(BdrvChild *child) | 56 | uint64_t locked_perm; |
42 | { | 57 | uint64_t locked_shared_perm; |
43 | BlockBackend *blk = child->opaque; | 58 | |
44 | + bool busy = false; | 59 | + uint64_t aio_max_batch; |
45 | assert(blk->quiesce_counter); | ||
46 | - return !!blk->in_flight; | ||
47 | + | 60 | + |
48 | + if (blk->dev_ops && blk->dev_ops->drained_poll) { | 61 | int perm_change_fd; |
49 | + busy = blk->dev_ops->drained_poll(blk->dev_opaque); | 62 | int perm_change_flags; |
50 | + } | 63 | BDRVReopenState *reopen_state; |
51 | + return busy || !!blk->in_flight; | 64 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = { |
52 | } | 65 | .type = QEMU_OPT_STRING, |
53 | 66 | .help = "host AIO implementation (threads, native, io_uring)", | |
54 | static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) | 67 | }, |
68 | + { | ||
69 | + .name = "aio-max-batch", | ||
70 | + .type = QEMU_OPT_NUMBER, | ||
71 | + .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)", | ||
72 | + }, | ||
73 | { | ||
74 | .name = "locking", | ||
75 | .type = QEMU_OPT_STRING, | ||
76 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
77 | s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); | ||
78 | #endif | ||
79 | |||
80 | + s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0); | ||
81 | + | ||
82 | locking = qapi_enum_parse(&OnOffAuto_lookup, | ||
83 | qemu_opt_get(opts, "locking"), | ||
84 | ON_OFF_AUTO_AUTO, &local_err); | ||
55 | -- | 85 | -- |
56 | 2.30.2 | 86 | 2.31.1 |
57 | 87 | ||
58 | 88 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Instead of keeping additional boolean field, let's store the | 3 | This new parameter can be used by block devices to limit the |
4 | information in BDRV_O_RDWR bit of BlockBackendRootState::open_flags. | 4 | Linux AIO batch size more than the limit set by the AIO context. |
5 | 5 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 6 | file-posix backend supports this, passing its `aio-max-batch` option |
7 | Message-Id: <20210527154056.70294-4-vsementsov@virtuozzo.com> | 7 | previously added. |
8 | |||
9 | Add an helper function to calculate the maximum batch size. | ||
10 | |||
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
14 | Message-Id: <20211026162346.253081-3-sgarzare@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 16 | --- |
10 | include/block/block_int.h | 1 - | 17 | include/block/raw-aio.h | 3 ++- |
11 | block/block-backend.c | 10 ++-------- | 18 | block/file-posix.c | 3 ++- |
12 | blockdev.c | 3 +-- | 19 | block/linux-aio.c | 30 ++++++++++++++++++++++-------- |
13 | 3 files changed, 3 insertions(+), 11 deletions(-) | 20 | 3 files changed, 26 insertions(+), 10 deletions(-) |
14 | 21 | ||
15 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 22 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h |
16 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/block/block_int.h | 24 | --- a/include/block/raw-aio.h |
18 | +++ b/include/block/block_int.h | 25 | +++ b/include/block/raw-aio.h |
19 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | 26 | @@ -XXX,XX +XXX,XX @@ typedef struct LinuxAioState LinuxAioState; |
20 | 27 | LinuxAioState *laio_init(Error **errp); | |
21 | struct BlockBackendRootState { | 28 | void laio_cleanup(LinuxAioState *s); |
22 | int open_flags; | 29 | int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
23 | - bool read_only; | 30 | - uint64_t offset, QEMUIOVector *qiov, int type); |
24 | BlockdevDetectZeroesOptions detect_zeroes; | 31 | + uint64_t offset, QEMUIOVector *qiov, int type, |
25 | }; | 32 | + uint64_t dev_max_batch); |
26 | 33 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); | |
27 | diff --git a/block/block-backend.c b/block/block-backend.c | 34 | void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); |
35 | void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); | ||
36 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/block/block-backend.c | 38 | --- a/block/file-posix.c |
30 | +++ b/block/block-backend.c | 39 | +++ b/block/file-posix.c |
31 | @@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk) | 40 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, |
32 | if (bs) { | 41 | } else if (s->use_linux_aio) { |
33 | return !bdrv_is_read_only(bs); | 42 | LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); |
34 | } else { | 43 | assert(qiov->size == bytes); |
35 | - return !blk->root_state.read_only; | 44 | - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); |
36 | + return blk->root_state.open_flags & BDRV_O_RDWR; | 45 | + return laio_co_submit(bs, aio, s->fd, offset, qiov, type, |
46 | + s->aio_max_batch); | ||
47 | #endif | ||
48 | } | ||
49 | |||
50 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/block/linux-aio.c | ||
53 | +++ b/block/linux-aio.c | ||
54 | @@ -XXX,XX +XXX,XX @@ static void ioq_submit(LinuxAioState *s) | ||
37 | } | 55 | } |
38 | } | 56 | } |
39 | 57 | ||
40 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | 58 | +static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch) |
41 | assert(blk->root); | 59 | +{ |
42 | 60 | + uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; | |
43 | blk->root_state.open_flags = blk->root->bs->open_flags; | 61 | + |
44 | - blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | 62 | + /* |
45 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | 63 | + * AIO context can be shared between multiple block devices, so |
64 | + * `dev_max_batch` allows reducing the batch size for latency-sensitive | ||
65 | + * devices. | ||
66 | + */ | ||
67 | + max_batch = MIN_NON_ZERO(dev_max_batch, max_batch); | ||
68 | + | ||
69 | + /* limit the batch with the number of available events */ | ||
70 | + max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); | ||
71 | + | ||
72 | + return max_batch; | ||
73 | +} | ||
74 | + | ||
75 | void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) | ||
76 | { | ||
77 | s->io_q.plugged++; | ||
78 | @@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) | ||
46 | } | 79 | } |
47 | 80 | ||
48 | @@ -XXX,XX +XXX,XX @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) | 81 | static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, |
49 | */ | 82 | - int type) |
50 | int blk_get_open_flags_from_root_state(BlockBackend *blk) | 83 | + int type, uint64_t dev_max_batch) |
51 | { | 84 | { |
52 | - int bs_flags; | 85 | LinuxAioState *s = laiocb->ctx; |
86 | struct iocb *iocbs = &laiocb->iocb; | ||
87 | QEMUIOVector *qiov = laiocb->qiov; | ||
88 | - int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; | ||
53 | - | 89 | - |
54 | - bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; | 90 | - /* limit the batch with the number of available events */ |
55 | - bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; | 91 | - max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); |
56 | - | 92 | |
57 | - return bs_flags; | 93 | switch (type) { |
58 | + return blk->root_state.open_flags; | 94 | case QEMU_AIO_WRITE: |
95 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
96 | s->io_q.in_queue++; | ||
97 | if (!s->io_q.blocked && | ||
98 | (!s->io_q.plugged || | ||
99 | - s->io_q.in_queue >= max_batch)) { | ||
100 | + s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) { | ||
101 | ioq_submit(s); | ||
102 | } | ||
103 | |||
104 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
59 | } | 105 | } |
60 | 106 | ||
61 | BlockBackendRootState *blk_get_root_state(BlockBackend *blk) | 107 | int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
62 | diff --git a/blockdev.c b/blockdev.c | 108 | - uint64_t offset, QEMUIOVector *qiov, int type) |
63 | index XXXXXXX..XXXXXXX 100644 | 109 | + uint64_t offset, QEMUIOVector *qiov, int type, |
64 | --- a/blockdev.c | 110 | + uint64_t dev_max_batch) |
65 | +++ b/blockdev.c | 111 | { |
66 | @@ -XXX,XX +XXX,XX @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, | 112 | int ret; |
67 | 113 | struct qemu_laiocb laiocb = { | |
68 | blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); | 114 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
69 | blk_rs = blk_get_root_state(blk); | 115 | .qiov = qiov, |
70 | - blk_rs->open_flags = bdrv_flags; | 116 | }; |
71 | - blk_rs->read_only = read_only; | 117 | |
72 | + blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR); | 118 | - ret = laio_do_submit(fd, &laiocb, offset, type); |
73 | blk_rs->detect_zeroes = detect_zeroes; | 119 | + ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch); |
74 | 120 | if (ret < 0) { | |
75 | qobject_unref(bs_opts); | 121 | return ret; |
122 | } | ||
76 | -- | 123 | -- |
77 | 2.30.2 | 124 | 2.31.1 |
78 | 125 | ||
79 | 126 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | If fallocate(... FALLOC_FL_ZERO_RANGE ...) returns EINVAL, it's likely | 3 | Between the submission of a request and the unplug, other devices |
4 | an indication that the file system is buggy and does not implement | 4 | with larger limits may have been queued new requests without flushing |
5 | unaligned accesses right. We still might be lucky with the other | 5 | the batch. |
6 | fallback fallocate() calls later in this function, though, so we should | ||
7 | not return immediately and try the others first. | ||
8 | Since FALLOC_FL_ZERO_RANGE could also return EINVAL if the file descriptor | ||
9 | is not a regular file, we ignore this filesystem bug silently, without | ||
10 | printing an error message for the user. | ||
11 | 6 | ||
12 | Signed-off-by: Thomas Huth <thuth@redhat.com> | 7 | Using the new `dev_max_batch` parameter, laio_io_unplug() can check |
13 | Message-Id: <20210527172020.847617-3-thuth@redhat.com> | 8 | if the batch exceeds the device limit to flush the current batch. |
9 | |||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
13 | Message-Id: <20211026162346.253081-4-sgarzare@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 15 | --- |
16 | block/file-posix.c | 18 +++++++++--------- | 16 | include/block/raw-aio.h | 3 ++- |
17 | 1 file changed, 9 insertions(+), 9 deletions(-) | 17 | block/file-posix.c | 2 +- |
18 | block/linux-aio.c | 8 +++++--- | ||
19 | 3 files changed, 8 insertions(+), 5 deletions(-) | ||
18 | 20 | ||
21 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/include/block/raw-aio.h | ||
24 | +++ b/include/block/raw-aio.h | ||
25 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, | ||
26 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); | ||
27 | void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); | ||
28 | void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); | ||
29 | -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s); | ||
30 | +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
31 | + uint64_t dev_max_batch); | ||
32 | #endif | ||
33 | /* io_uring.c - Linux io_uring implementation */ | ||
34 | #ifdef CONFIG_LINUX_IO_URING | ||
19 | diff --git a/block/file-posix.c b/block/file-posix.c | 35 | diff --git a/block/file-posix.c b/block/file-posix.c |
20 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/file-posix.c | 37 | --- a/block/file-posix.c |
22 | +++ b/block/file-posix.c | 38 | +++ b/block/file-posix.c |
23 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | 39 | @@ -XXX,XX +XXX,XX @@ static void raw_aio_unplug(BlockDriverState *bs) |
24 | if (s->has_write_zeroes) { | 40 | #ifdef CONFIG_LINUX_AIO |
25 | int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, | 41 | if (s->use_linux_aio) { |
26 | aiocb->aio_offset, aiocb->aio_nbytes); | 42 | LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); |
27 | - if (ret == -EINVAL) { | 43 | - laio_io_unplug(bs, aio); |
28 | - /* | 44 | + laio_io_unplug(bs, aio, s->aio_max_batch); |
29 | - * Allow falling back to pwrite for file systems that | ||
30 | - * do not support fallocate() for an unaligned byte range. | ||
31 | - */ | ||
32 | - return -ENOTSUP; | ||
33 | - } | ||
34 | - if (ret == 0 || ret != -ENOTSUP) { | ||
35 | + if (ret == -ENOTSUP) { | ||
36 | + s->has_write_zeroes = false; | ||
37 | + } else if (ret == 0 || ret != -EINVAL) { | ||
38 | return ret; | ||
39 | } | ||
40 | - s->has_write_zeroes = false; | ||
41 | + /* | ||
42 | + * Note: Some file systems do not like unaligned byte ranges, and | ||
43 | + * return EINVAL in such a case, though they should not do it according | ||
44 | + * to the man-page of fallocate(). Thus we simply ignore this return | ||
45 | + * value and try the other fallbacks instead. | ||
46 | + */ | ||
47 | } | 45 | } |
48 | #endif | 46 | #endif |
49 | 47 | #ifdef CONFIG_LINUX_IO_URING | |
48 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/block/linux-aio.c | ||
51 | +++ b/block/linux-aio.c | ||
52 | @@ -XXX,XX +XXX,XX @@ void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) | ||
53 | s->io_q.plugged++; | ||
54 | } | ||
55 | |||
56 | -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) | ||
57 | +void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
58 | + uint64_t dev_max_batch) | ||
59 | { | ||
60 | assert(s->io_q.plugged); | ||
61 | - if (--s->io_q.plugged == 0 && | ||
62 | - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { | ||
63 | + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || | ||
64 | + (--s->io_q.plugged == 0 && | ||
65 | + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { | ||
66 | ioq_submit(s); | ||
67 | } | ||
68 | } | ||
50 | -- | 69 | -- |
51 | 2.30.2 | 70 | 2.31.1 |
52 | 71 | ||
53 | 72 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We have different types of parents: block nodes, block backends and | 3 | Similarly to e7e588d432d31ecebc26358e47201dd108db964c, there is a |
4 | jobs. So, it makes sense to specify type together with name. | 4 | warning in block/block-backend.c that qiov->size <= INT64_MAX is always |
5 | true on machines where size_t is narrower than a uint64_t. In said | ||
6 | commit, we silenced this warning by casting to uint64_t. | ||
5 | 7 | ||
6 | While being here also use g_autofree. | 8 | The commit introducing this warning here |
9 | (a93d81c84afa717b0a1a6947524d8d1fbfd6bbf5) anticipated it and so tried | ||
10 | to address it the same way. However, it only did so in one of two | ||
11 | places where this comparison occurs, and so we still need to fix up the | ||
12 | other one. | ||
7 | 13 | ||
8 | iotest 307 output is updated. | 14 | Fixes: a93d81c84afa717b0a1a6947524d8d1fbfd6bbf5 |
9 | 15 | ("block-backend: convert blk_aio_ functions to int64_t bytes | |
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 16 | paramter") |
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 17 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
12 | Message-Id: <20210601075218.79249-3-vsementsov@virtuozzo.com> | 18 | Message-Id: <20211026090745.30800-1-hreitz@redhat.com> |
19 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 20 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 21 | --- |
15 | block/block-backend.c | 9 ++++----- | 22 | block/block-backend.c | 2 +- |
16 | tests/qemu-iotests/307.out | 2 +- | 23 | 1 file changed, 1 insertion(+), 1 deletion(-) |
17 | 2 files changed, 5 insertions(+), 6 deletions(-) | ||
18 | 24 | ||
19 | diff --git a/block/block-backend.c b/block/block-backend.c | 25 | diff --git a/block/block-backend.c b/block/block-backend.c |
20 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/block-backend.c | 27 | --- a/block/block-backend.c |
22 | +++ b/block/block-backend.c | 28 | +++ b/block/block-backend.c |
23 | @@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, | 29 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, |
24 | static char *blk_root_get_parent_desc(BdrvChild *child) | 30 | QEMUIOVector *qiov, BdrvRequestFlags flags, |
31 | BlockCompletionFunc *cb, void *opaque) | ||
25 | { | 32 | { |
26 | BlockBackend *blk = child->opaque; | 33 | - assert(qiov->size <= INT64_MAX); |
27 | - char *dev_id; | 34 | + assert((uint64_t)qiov->size <= INT64_MAX); |
28 | + g_autofree char *dev_id = NULL; | 35 | return blk_aio_prwv(blk, offset, qiov->size, qiov, |
29 | 36 | blk_aio_write_entry, flags, cb, opaque); | |
30 | if (blk->name) { | ||
31 | - return g_strdup(blk->name); | ||
32 | + return g_strdup_printf("block device '%s'", blk->name); | ||
33 | } | ||
34 | |||
35 | dev_id = blk_get_attached_dev_id(blk); | ||
36 | if (*dev_id) { | ||
37 | - return dev_id; | ||
38 | + return g_strdup_printf("block device '%s'", dev_id); | ||
39 | } else { | ||
40 | /* TODO Callback into the BB owner for something more detailed */ | ||
41 | - g_free(dev_id); | ||
42 | - return g_strdup("a block device"); | ||
43 | + return g_strdup("an unnamed block device"); | ||
44 | } | ||
45 | } | 37 | } |
46 | |||
47 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/tests/qemu-iotests/307.out | ||
50 | +++ b/tests/qemu-iotests/307.out | ||
51 | @@ -XXX,XX +XXX,XX @@ exports available: 1 | ||
52 | |||
53 | === Add a writable export === | ||
54 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} | ||
55 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}} | ||
56 | +{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} | ||
57 | {"execute": "device_del", "arguments": {"id": "sda"}} | ||
58 | {"return": {}} | ||
59 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
60 | -- | 38 | -- |
61 | 2.30.2 | 39 | 2.31.1 |
62 | 40 | ||
63 | 41 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | We have different types of parents: block nodes, block backends and | ||
4 | jobs. So, it makes sense to specify type together with name. | ||
5 | |||
6 | Next, this handler us used to compose an error message about permission | ||
7 | conflict. And permission conflict occurs in a specific place of block | ||
8 | graph. We shouldn't report name of parent device (as it refers another | ||
9 | place in block graph), but exactly and only the name of the node. So, | ||
10 | use bdrv_get_node_name() directly. | ||
11 | |||
12 | iotest 283 output is updated. | ||
13 | |||
14 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
15 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
16 | Message-Id: <20210601075218.79249-4-vsementsov@virtuozzo.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
18 | --- | ||
19 | block.c | 2 +- | ||
20 | tests/qemu-iotests/283.out | 2 +- | ||
21 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
22 | |||
23 | diff --git a/block.c b/block.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/block.c | ||
26 | +++ b/block.c | ||
27 | @@ -XXX,XX +XXX,XX @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) | ||
28 | static char *bdrv_child_get_parent_desc(BdrvChild *c) | ||
29 | { | ||
30 | BlockDriverState *parent = c->opaque; | ||
31 | - return g_strdup(bdrv_get_device_or_node_name(parent)); | ||
32 | + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); | ||
33 | } | ||
34 | |||
35 | static void bdrv_child_cb_drained_begin(BdrvChild *child) | ||
36 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tests/qemu-iotests/283.out | ||
39 | +++ b/tests/qemu-iotests/283.out | ||
40 | @@ -XXX,XX +XXX,XX @@ | ||
41 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | ||
42 | {"return": {}} | ||
43 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | ||
44 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}} | ||
45 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | ||
46 | |||
47 | === backup-top should be gone after job-finalize === | ||
48 | |||
49 | -- | ||
50 | 2.30.2 | ||
51 | |||
52 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Recently we've fixed a crash by adding .get_parent_aio_context handler | ||
4 | to child_vvfat_qcow. Now we want it to support .get_parent_desc as | ||
5 | well. child_vvfat_qcow wants to implement own .inherit_options, it's | ||
6 | not bad. But omitting all other handlers is a bad idea. Let's inherit | ||
7 | the class from child_of_bds instead, similar to chain_child_class and | ||
8 | detach_by_driver_cb_class in test-bdrv-drain.c. | ||
9 | |||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Message-Id: <20210601075218.79249-5-vsementsov@virtuozzo.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | --- | ||
14 | block/vvfat.c | 8 +++----- | ||
15 | 1 file changed, 3 insertions(+), 5 deletions(-) | ||
16 | |||
17 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/vvfat.c | ||
20 | +++ b/block/vvfat.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | ||
22 | qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); | ||
23 | } | ||
24 | |||
25 | -static const BdrvChildClass child_vvfat_qcow = { | ||
26 | - .parent_is_bds = true, | ||
27 | - .inherit_options = vvfat_qcow_options, | ||
28 | - .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
29 | -}; | ||
30 | +static BdrvChildClass child_vvfat_qcow; | ||
31 | |||
32 | static int enable_write_target(BlockDriverState *bs, Error **errp) | ||
33 | { | ||
34 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = { | ||
35 | |||
36 | static void bdrv_vvfat_init(void) | ||
37 | { | ||
38 | + child_vvfat_qcow = child_of_bds; | ||
39 | + child_vvfat_qcow.inherit_options = vvfat_qcow_options; | ||
40 | bdrv_register(&bdrv_vvfat); | ||
41 | } | ||
42 | |||
43 | -- | ||
44 | 2.30.2 | ||
45 | |||
46 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Currently we update s->use_copy_range and s->copy_size in | 3 | Since commit 4d324c0bf65 ("introduce QEMU_AUTO_VFREE") buffers |
4 | block_copy_do_copy(). | 4 | allocated by qemu_memalign() can automatically freed when using |
5 | the QEMU_AUTO_VFREE macro. Use it to simplify a bit. | ||
5 | 6 | ||
6 | It's not very good: | 7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
7 | 8 | Message-Id: <20211006164931.172349-2-philmd@redhat.com> | |
8 | 1. block_copy_do_copy() is intended to be a simple function, that wraps | ||
9 | bdrv_co_<io> functions for need of block copy. That's why we don't pass | ||
10 | BlockCopyTask into it. So, block_copy_do_copy() is bad place for | ||
11 | manipulation with generic state of block-copy process | ||
12 | |||
13 | 2. We are going to make block-copy thread-safe. So, it's good to move | ||
14 | manipulation with state of block-copy to the places where we'll need | ||
15 | critical sections anyway, to not introduce extra synchronization | ||
16 | primitives in block_copy_do_copy(). | ||
17 | |||
18 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
19 | Message-Id: <20210528141628.44287-3-vsementsov@virtuozzo.com> | ||
20 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
22 | --- | 11 | --- |
23 | block/block-copy.c | 72 +++++++++++++++++++++++++++++++--------------- | 12 | block/nvme.c | 11 ++++------- |
24 | 1 file changed, 49 insertions(+), 23 deletions(-) | 13 | 1 file changed, 4 insertions(+), 7 deletions(-) |
25 | 14 | ||
26 | diff --git a/block/block-copy.c b/block/block-copy.c | 15 | diff --git a/block/nvme.c b/block/nvme.c |
27 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block/block-copy.c | 17 | --- a/block/nvme.c |
29 | +++ b/block/block-copy.c | 18 | +++ b/block/nvme.c |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyTask { | 19 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
31 | int64_t offset; | ||
32 | int64_t bytes; | ||
33 | bool zeroes; | ||
34 | + bool copy_range; | ||
35 | QLIST_ENTRY(BlockCopyTask) list; | ||
36 | CoQueue wait_queue; /* coroutines blocked on this task */ | ||
37 | } BlockCopyTask; | ||
38 | @@ -XXX,XX +XXX,XX @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, | ||
39 | .call_state = call_state, | ||
40 | .offset = offset, | ||
41 | .bytes = bytes, | ||
42 | + .copy_range = s->use_copy_range, | ||
43 | }; | ||
44 | qemu_co_queue_init(&task->wait_queue); | ||
45 | QLIST_INSERT_HEAD(&s->tasks, task, list); | ||
46 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, | ||
47 | * | ||
48 | * No sync here: nor bitmap neighter intersecting requests handling, only copy. | ||
49 | * | ||
50 | + * @copy_range is an in-out argument: if *copy_range is false, copy_range is not | ||
51 | + * done. If *copy_range is true, copy_range is attempted. If the copy_range | ||
52 | + * attempt fails, the function falls back to the usual read+write and | ||
53 | + * *copy_range is set to false. *copy_range and zeroes must not be true | ||
54 | + * simultaneously. | ||
55 | + * | ||
56 | * Returns 0 on success. | ||
57 | */ | ||
58 | static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | ||
59 | int64_t offset, int64_t bytes, | ||
60 | - bool zeroes, bool *error_is_read) | ||
61 | + bool zeroes, bool *copy_range, | ||
62 | + bool *error_is_read) | ||
63 | { | 20 | { |
64 | int ret; | 21 | BDRVNVMeState *s = bs->opaque; |
65 | int64_t nbytes = MIN(offset + bytes, s->len) - offset; | 22 | bool ret = false; |
66 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | 23 | - union { |
67 | assert(offset + bytes <= s->len || | 24 | + QEMU_AUTO_VFREE union { |
68 | offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); | 25 | NvmeIdCtrl ctrl; |
69 | assert(nbytes < INT_MAX); | 26 | NvmeIdNs ns; |
70 | + assert(!(*copy_range && zeroes)); | 27 | - } *id; |
71 | 28 | + } *id = NULL; | |
72 | if (zeroes) { | 29 | NvmeLBAF *lbaf; |
73 | ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & | 30 | uint16_t oncs; |
74 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | 31 | int r; |
75 | return ret; | 32 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
76 | } | 33 | s->blkshift = lbaf->ds; |
77 | 34 | out: | |
78 | - if (s->use_copy_range) { | 35 | qemu_vfio_dma_unmap(s->vfio, id); |
79 | + if (*copy_range) { | 36 | - qemu_vfree(id); |
80 | ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, | 37 | |
81 | 0, s->write_flags); | ||
82 | if (ret < 0) { | ||
83 | trace_block_copy_copy_range_fail(s, offset, ret); | ||
84 | - s->use_copy_range = false; | ||
85 | - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
86 | + *copy_range = false; | ||
87 | /* Fallback to read+write with allocated buffer */ | ||
88 | } else { | ||
89 | - if (s->use_copy_range) { | ||
90 | - /* | ||
91 | - * Successful copy-range. Now increase copy_size. copy_range | ||
92 | - * does not respect max_transfer (it's a TODO), so we factor | ||
93 | - * that in here. | ||
94 | - * | ||
95 | - * Note: we double-check s->use_copy_range for the case when | ||
96 | - * parallel block-copy request unsets it during previous | ||
97 | - * bdrv_co_copy_range call. | ||
98 | - */ | ||
99 | - s->copy_size = | ||
100 | - MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
101 | - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
102 | - s->target), | ||
103 | - s->cluster_size)); | ||
104 | - } | ||
105 | - goto out; | ||
106 | + return 0; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ out: | ||
111 | return ret; | 38 | return ret; |
112 | } | 39 | } |
113 | 40 | @@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | |
114 | +static void block_copy_handle_copy_range_result(BlockCopyState *s, | ||
115 | + bool is_success) | ||
116 | +{ | ||
117 | + if (!s->use_copy_range) { | ||
118 | + /* already disabled */ | ||
119 | + return; | ||
120 | + } | ||
121 | + | ||
122 | + if (is_success) { | ||
123 | + /* | ||
124 | + * Successful copy-range. Now increase copy_size. copy_range | ||
125 | + * does not respect max_transfer (it's a TODO), so we factor | ||
126 | + * that in here. | ||
127 | + */ | ||
128 | + s->copy_size = | ||
129 | + MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
130 | + QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
131 | + s->target), | ||
132 | + s->cluster_size)); | ||
133 | + } else { | ||
134 | + /* Copy-range failed, disable it. */ | ||
135 | + s->use_copy_range = false; | ||
136 | + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
137 | + } | ||
138 | +} | ||
139 | + | ||
140 | static coroutine_fn int block_copy_task_entry(AioTask *task) | ||
141 | { | 41 | { |
142 | BlockCopyTask *t = container_of(task, BlockCopyTask, task); | 42 | BDRVNVMeState *s = bs->opaque; |
143 | bool error_is_read = false; | 43 | int r; |
144 | + bool copy_range = t->copy_range; | 44 | - uint8_t *buf = NULL; |
45 | + QEMU_AUTO_VFREE uint8_t *buf = NULL; | ||
46 | QEMUIOVector local_qiov; | ||
47 | size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size); | ||
48 | assert(QEMU_IS_ALIGNED(offset, s->page_size)); | ||
49 | @@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
50 | if (!r && !is_write) { | ||
51 | qemu_iovec_from_buf(qiov, 0, buf, bytes); | ||
52 | } | ||
53 | - qemu_vfree(buf); | ||
54 | return r; | ||
55 | } | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, | ||
58 | BDRVNVMeState *s = bs->opaque; | ||
59 | NVMeQueuePair *ioq = s->queues[INDEX_IO(0)]; | ||
60 | NVMeRequest *req; | ||
61 | - NvmeDsmRange *buf; | ||
62 | + QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL; | ||
63 | QEMUIOVector local_qiov; | ||
145 | int ret; | 64 | int ret; |
146 | 65 | ||
147 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, | 66 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, |
148 | - &error_is_read); | 67 | trace_nvme_dsm_done(s, offset, bytes, ret); |
149 | + ©_range, &error_is_read); | 68 | out: |
150 | + if (t->copy_range) { | 69 | qemu_iovec_destroy(&local_qiov); |
151 | + block_copy_handle_copy_range_result(t->s, copy_range); | 70 | - qemu_vfree(buf); |
152 | + } | 71 | return ret; |
153 | if (ret < 0) { | 72 | |
154 | if (!t->call_state->ret) { | 73 | } |
155 | t->call_state->ret = ret; | ||
156 | @@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) | ||
157 | g_free(task); | ||
158 | continue; | ||
159 | } | ||
160 | - task->zeroes = ret & BDRV_BLOCK_ZERO; | ||
161 | + if (ret & BDRV_BLOCK_ZERO) { | ||
162 | + task->zeroes = true; | ||
163 | + task->copy_range = false; | ||
164 | + } | ||
165 | |||
166 | if (s->speed) { | ||
167 | if (!call_state->ignore_ratelimit) { | ||
168 | -- | 74 | -- |
169 | 2.30.2 | 75 | 2.31.1 |
170 | 76 | ||
171 | 77 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Document that security reports must use 'null-co,read-zeroes=on' | 3 | For debugging purpose it is helpful to know the CQ/SQ pointers. |
4 | because otherwise the memory is left uninitialized (which is an | 4 | We already have a trace event in nvme_free_queue_pair(), extend |
5 | on-purpose performance feature). | 5 | it to report these pointer addresses. |
6 | 6 | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
9 | Message-Id: <20210601162548.2076631-1-philmd@redhat.com> | 8 | Message-Id: <20211006164931.172349-3-philmd@redhat.com> |
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 11 | --- |
12 | docs/devel/secure-coding-practices.rst | 9 +++++++++ | 12 | block/nvme.c | 2 +- |
13 | 1 file changed, 9 insertions(+) | 13 | block/trace-events | 2 +- |
14 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
14 | 15 | ||
15 | diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst | 16 | diff --git a/block/nvme.c b/block/nvme.c |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/docs/devel/secure-coding-practices.rst | 18 | --- a/block/nvme.c |
18 | +++ b/docs/devel/secure-coding-practices.rst | 19 | +++ b/block/nvme.c |
19 | @@ -XXX,XX +XXX,XX @@ structures and only process the local copy. This prevents | 20 | @@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, |
20 | time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to | 21 | |
21 | crash when a vCPU thread modifies guest RAM while device emulation is | 22 | static void nvme_free_queue_pair(NVMeQueuePair *q) |
22 | processing it. | 23 | { |
23 | + | 24 | - trace_nvme_free_queue_pair(q->index, q); |
24 | +Use of null-co block drivers | 25 | + trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq); |
25 | +---------------------------- | 26 | if (q->completion_bh) { |
26 | + | 27 | qemu_bh_delete(q->completion_bh); |
27 | +The ``null-co`` block driver is designed for performance: its read accesses are | 28 | } |
28 | +not initialized by default. In case this driver has to be used for security | 29 | diff --git a/block/trace-events b/block/trace-events |
29 | +research, it must be used with the ``read-zeroes=on`` option which fills read | 30 | index XXXXXXX..XXXXXXX 100644 |
30 | +buffers with zeroes. Security issues reported with the default | 31 | --- a/block/trace-events |
31 | +(``read-zeroes=off``) will be discarded. | 32 | +++ b/block/trace-events |
33 | @@ -XXX,XX +XXX,XX @@ nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%" | ||
34 | nvme_dma_map_flush(void *s) "s %p" | ||
35 | nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" | ||
36 | nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d" | ||
37 | -nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" | ||
38 | +nvme_free_queue_pair(unsigned q_index, void *q, void *cq, void *sq) "index %u q %p cq %p sq %p" | ||
39 | nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" | ||
40 | nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 | ||
41 | nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" | ||
32 | -- | 42 | -- |
33 | 2.30.2 | 43 | 2.31.1 |
34 | 44 | ||
35 | 45 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Before switching between AioContexts we need to make sure that we're | 3 | Instead of duplicating code, extract the common helper to free |
4 | fully quiesced ("nb_requests == 0" for every client) when entering the | 4 | a single queue. |
5 | drained section. | ||
6 | 5 | ||
7 | To do this, we set "quiescing = true" for every client on | 6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
8 | ".drained_begin" to prevent new coroutines from being created, and | 7 | Message-Id: <20211006164931.172349-4-philmd@redhat.com> |
9 | check if "nb_requests == 0" on ".drained_poll". Finally, once we're | 8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | exiting the drained section, on ".drained_end" we set "quiescing = | ||
11 | false" and call "nbd_client_receive_next_request()" to resume the | ||
12 | processing of new requests. | ||
13 | |||
14 | With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be | ||
15 | reverted to be as simple as they were before f148ae7d36. | ||
16 | |||
17 | RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 | ||
18 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | ||
19 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
20 | Message-Id: <20210602060552.17433-3-slp@redhat.com> | ||
21 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
22 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
23 | --- | 10 | --- |
24 | nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++-------------- | 11 | block/nvme.c | 9 +++++++-- |
25 | 1 file changed, 61 insertions(+), 21 deletions(-) | 12 | 1 file changed, 7 insertions(+), 2 deletions(-) |
26 | 13 | ||
27 | diff --git a/nbd/server.c b/nbd/server.c | 14 | diff --git a/block/nvme.c b/block/nvme.c |
28 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/nbd/server.c | 16 | --- a/block/nvme.c |
30 | +++ b/nbd/server.c | 17 | +++ b/block/nvme.c |
31 | @@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req) | 18 | @@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, |
32 | g_free(req); | 19 | return r == 0; |
33 | |||
34 | client->nb_requests--; | ||
35 | + | ||
36 | + if (client->quiescing && client->nb_requests == 0) { | ||
37 | + aio_wait_kick(); | ||
38 | + } | ||
39 | + | ||
40 | nbd_client_receive_next_request(client); | ||
41 | |||
42 | nbd_client_put(client); | ||
43 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque) | ||
44 | QTAILQ_FOREACH(client, &exp->clients, next) { | ||
45 | qio_channel_attach_aio_context(client->ioc, ctx); | ||
46 | |||
47 | + assert(client->nb_requests == 0); | ||
48 | assert(client->recv_coroutine == NULL); | ||
49 | assert(client->send_coroutine == NULL); | ||
50 | - | ||
51 | - if (client->quiescing) { | ||
52 | - client->quiescing = false; | ||
53 | - nbd_client_receive_next_request(client); | ||
54 | - } | ||
55 | } | ||
56 | } | 20 | } |
57 | 21 | ||
58 | -static void nbd_aio_detach_bh(void *opaque) | 22 | +static void nvme_free_queue(NVMeQueue *q) |
59 | +static void blk_aio_detach(void *opaque) | 23 | +{ |
60 | { | 24 | + qemu_vfree(q->queue); |
61 | NBDExport *exp = opaque; | ||
62 | NBDClient *client; | ||
63 | |||
64 | + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | ||
65 | + | ||
66 | QTAILQ_FOREACH(client, &exp->clients, next) { | ||
67 | qio_channel_detach_aio_context(client->ioc); | ||
68 | + } | ||
69 | + | ||
70 | + exp->common.ctx = NULL; | ||
71 | +} | 25 | +} |
72 | + | 26 | + |
73 | +static void nbd_drained_begin(void *opaque) | 27 | static void nvme_free_queue_pair(NVMeQueuePair *q) |
74 | +{ | 28 | { |
75 | + NBDExport *exp = opaque; | 29 | trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq); |
76 | + NBDClient *client; | 30 | if (q->completion_bh) { |
77 | + | 31 | qemu_bh_delete(q->completion_bh); |
78 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
79 | client->quiescing = true; | ||
80 | + } | ||
81 | +} | ||
82 | |||
83 | - if (client->recv_coroutine) { | ||
84 | - if (client->read_yielding) { | ||
85 | - qemu_aio_coroutine_enter(exp->common.ctx, | ||
86 | - client->recv_coroutine); | ||
87 | - } else { | ||
88 | - AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); | ||
89 | - } | ||
90 | - } | ||
91 | +static void nbd_drained_end(void *opaque) | ||
92 | +{ | ||
93 | + NBDExport *exp = opaque; | ||
94 | + NBDClient *client; | ||
95 | |||
96 | - if (client->send_coroutine) { | ||
97 | - AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); | ||
98 | - } | ||
99 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
100 | + client->quiescing = false; | ||
101 | + nbd_client_receive_next_request(client); | ||
102 | } | 32 | } |
33 | + nvme_free_queue(&q->sq); | ||
34 | + nvme_free_queue(&q->cq); | ||
35 | qemu_vfree(q->prp_list_pages); | ||
36 | - qemu_vfree(q->sq.queue); | ||
37 | - qemu_vfree(q->cq.queue); | ||
38 | qemu_mutex_destroy(&q->lock); | ||
39 | g_free(q); | ||
103 | } | 40 | } |
104 | |||
105 | -static void blk_aio_detach(void *opaque) | ||
106 | +static bool nbd_drained_poll(void *opaque) | ||
107 | { | ||
108 | NBDExport *exp = opaque; | ||
109 | + NBDClient *client; | ||
110 | |||
111 | - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | ||
112 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
113 | + if (client->nb_requests != 0) { | ||
114 | + /* | ||
115 | + * If there's a coroutine waiting for a request on nbd_read_eof() | ||
116 | + * enter it here so we don't depend on the client to wake it up. | ||
117 | + */ | ||
118 | + if (client->recv_coroutine != NULL && client->read_yielding) { | ||
119 | + qemu_aio_coroutine_enter(exp->common.ctx, | ||
120 | + client->recv_coroutine); | ||
121 | + } | ||
122 | |||
123 | - aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); | ||
124 | + return true; | ||
125 | + } | ||
126 | + } | ||
127 | |||
128 | - exp->common.ctx = NULL; | ||
129 | + return false; | ||
130 | } | ||
131 | |||
132 | static void nbd_eject_notifier(Notifier *n, void *data) | ||
133 | @@ -XXX,XX +XXX,XX @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) | ||
134 | blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); | ||
135 | } | ||
136 | |||
137 | +static const BlockDevOps nbd_block_ops = { | ||
138 | + .drained_begin = nbd_drained_begin, | ||
139 | + .drained_end = nbd_drained_end, | ||
140 | + .drained_poll = nbd_drained_poll, | ||
141 | +}; | ||
142 | + | ||
143 | static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
144 | Error **errp) | ||
145 | { | ||
146 | @@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
147 | |||
148 | exp->allocation_depth = arg->allocation_depth; | ||
149 | |||
150 | + /* | ||
151 | + * We need to inhibit request queuing in the block layer to ensure we can | ||
152 | + * be properly quiesced when entering a drained section, as our coroutines | ||
153 | + * servicing pending requests might enter blk_pread(). | ||
154 | + */ | ||
155 | + blk_set_disable_request_queuing(blk, true); | ||
156 | + | ||
157 | blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); | ||
158 | |||
159 | + blk_set_dev_ops(blk, &nbd_block_ops, exp); | ||
160 | + | ||
161 | QTAILQ_INSERT_TAIL(&exports, exp, next); | ||
162 | |||
163 | return 0; | ||
164 | @@ -XXX,XX +XXX,XX @@ static void nbd_export_delete(BlockExport *blk_exp) | ||
165 | } | ||
166 | blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, | ||
167 | blk_aio_detach, exp); | ||
168 | + blk_set_disable_request_queuing(exp->common.blk, false); | ||
169 | } | ||
170 | |||
171 | for (i = 0; i < exp->nr_export_bitmaps; i++) { | ||
172 | -- | 41 | -- |
173 | 2.30.2 | 42 | 2.31.1 |
174 | 43 | ||
175 | 44 | diff view generated by jsdifflib |