1 | The following changes since commit dd2db39d78431ab5a0b78777afaab3d61e94533e: | 1 | The following changes since commit 42f6c9179be4401974dd3a75ee72defd16b5092d: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-01 21:23:26 +0100) | 3 | Merge tag 'pull-ppc-20211112' of https://github.com/legoater/qemu into staging (2021-11-12 12:28:25 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to b317006a3f1f04191a7981cef83417cb2477213b: | 9 | for you to fetch changes up to 7461272c5f6032436ef9032c091c0118539483e4: |
10 | 10 | ||
11 | docs/secure-coding-practices: Describe how to use 'null-co' block driver (2021-06-02 14:29:14 +0200) | 11 | softmmu/qdev-monitor: fix use-after-free in qdev_set_id() (2021-11-15 15:49:46 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches | 14 | Block layer patches |
15 | 15 | ||
16 | - NBD server: Fix crashes related to switching between AioContexts | 16 | - Fixes to image streaming job and block layer reconfiguration to make |
17 | - file-posix: Workaround for discard/write_zeroes on buggy filesystems | 17 | iotest 030 pass again |
18 | - Follow-up fixes for the reopen vs. permission changes | 18 | - docs: Deprecate incorrectly typed device_add arguments |
19 | - quorum: Fix error handling for flush | 19 | - file-posix: Fix alignment after reopen changing O_DIRECT |
20 | - block-copy: Refactor copy_range handling | ||
21 | - docs: Describe how to use 'null-co' block driver | ||
22 | 20 | ||
23 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
24 | Lukas Straub (1): | 22 | Hanna Reitz (10): |
25 | block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk | 23 | stream: Traverse graph after modification |
24 | block: Manipulate children list in .attach/.detach | ||
25 | block: Unite remove_empty_child and child_free | ||
26 | block: Drop detached child from ignore list | ||
27 | block: Pass BdrvChild ** to replace_child_noperm | ||
28 | block: Restructure remove_file_or_backing_child() | ||
29 | transactions: Invoke clean() after everything else | ||
30 | block: Let replace_child_tran keep indirect pointer | ||
31 | block: Let replace_child_noperm free children | ||
32 | iotests/030: Unthrottle parallel jobs in reverse | ||
26 | 33 | ||
27 | Philippe Mathieu-Daudé (1): | 34 | Kevin Wolf (2): |
28 | docs/secure-coding-practices: Describe how to use 'null-co' block driver | 35 | docs: Deprecate incorrectly typed device_add arguments |
36 | file-posix: Fix alignment after reopen changing O_DIRECT | ||
29 | 37 | ||
30 | Sergio Lopez (2): | 38 | Stefan Hajnoczi (1): |
31 | block-backend: add drained_poll | 39 | softmmu/qdev-monitor: fix use-after-free in qdev_set_id() |
32 | nbd/server: Use drained block ops to quiesce the server | ||
33 | 40 | ||
34 | Thomas Huth (2): | 41 | docs/about/deprecated.rst | 14 +++ |
35 | block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS | 42 | include/qemu/transactions.h | 3 + |
36 | block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE | 43 | block.c | 233 +++++++++++++++++++++++++++++++++----------- |
37 | 44 | block/file-posix.c | 20 +++- | |
38 | Vladimir Sementsov-Ogievskiy (14): | 45 | block/stream.c | 7 +- |
39 | qemu-io-cmds: assert that we don't have .perm requested in no-blk case | 46 | softmmu/qdev-monitor.c | 2 +- |
40 | block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash | 47 | util/transactions.c | 8 +- |
41 | block/vvfat: fix vvfat_child_perm crash | 48 | tests/qemu-iotests/030 | 11 ++- |
42 | block: consistently use bdrv_is_read_only() | 49 | tests/qemu-iotests/142 | 22 +++++ |
43 | block: drop BlockDriverState::read_only | 50 | tests/qemu-iotests/142.out | 15 +++ |
44 | block: drop BlockBackendRootState::read_only | 51 | 10 files changed, 269 insertions(+), 66 deletions(-) |
45 | block: document child argument of bdrv_attach_child_common() | ||
46 | block-backend: improve blk_root_get_parent_desc() | ||
47 | block: improve bdrv_child_get_parent_desc() | ||
48 | block/vvfat: inherit child_vvfat_qcow from child_of_bds | ||
49 | block: simplify bdrv_child_user_desc() | ||
50 | block: improve permission conflict error message | ||
51 | block-copy: fix block_copy_task_entry() progress update | ||
52 | block-copy: refactor copy_range handling | ||
53 | |||
54 | docs/devel/secure-coding-practices.rst | 9 ++++ | ||
55 | include/block/block.h | 1 + | ||
56 | include/block/block_int.h | 2 - | ||
57 | include/sysemu/block-backend.h | 4 ++ | ||
58 | block.c | 82 ++++++++++++++++++++-------------- | ||
59 | block/block-backend.c | 26 +++++------ | ||
60 | block/block-copy.c | 80 ++++++++++++++++++++++----------- | ||
61 | block/commit.c | 2 +- | ||
62 | block/file-posix.c | 29 ++++++++---- | ||
63 | block/io.c | 4 +- | ||
64 | block/qapi.c | 2 +- | ||
65 | block/qcow2-snapshot.c | 2 +- | ||
66 | block/qcow2.c | 5 +-- | ||
67 | block/quorum.c | 2 +- | ||
68 | block/snapshot.c | 2 +- | ||
69 | block/vhdx-log.c | 2 +- | ||
70 | block/vvfat.c | 14 +++--- | ||
71 | blockdev.c | 3 +- | ||
72 | nbd/server.c | 82 +++++++++++++++++++++++++--------- | ||
73 | qemu-io-cmds.c | 14 +++++- | ||
74 | tests/unit/test-block-iothread.c | 6 --- | ||
75 | tests/qemu-iotests/283.out | 2 +- | ||
76 | tests/qemu-iotests/307.out | 2 +- | ||
77 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- | ||
78 | 24 files changed, 241 insertions(+), 138 deletions(-) | ||
79 | 52 | ||
80 | 53 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Document that security reports must use 'null-co,read-zeroes=on' | 3 | bdrv_cor_filter_drop() modifies the block graph. That means that other |
4 | because otherwise the memory is left uninitialized (which is an | 4 | parties can also modify the block graph before it returns. Therefore, |
5 | on-purpose performance feature). | 5 | we cannot assume that the result of a graph traversal we did before |
6 | remains valid afterwards. | ||
6 | 7 | ||
8 | We should thus fetch `base` and `unfiltered_base` afterwards instead of | ||
9 | before. | ||
10 | |||
11 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
7 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 13 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 14 | Message-Id: <20211111120829.81329-2-hreitz@redhat.com> |
9 | Message-Id: <20210601162548.2076631-1-philmd@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 16 | --- |
12 | docs/devel/secure-coding-practices.rst | 9 +++++++++ | 17 | block/stream.c | 7 +++++-- |
13 | 1 file changed, 9 insertions(+) | 18 | 1 file changed, 5 insertions(+), 2 deletions(-) |
14 | 19 | ||
15 | diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst | 20 | diff --git a/block/stream.c b/block/stream.c |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/docs/devel/secure-coding-practices.rst | 22 | --- a/block/stream.c |
18 | +++ b/docs/devel/secure-coding-practices.rst | 23 | +++ b/block/stream.c |
19 | @@ -XXX,XX +XXX,XX @@ structures and only process the local copy. This prevents | 24 | @@ -XXX,XX +XXX,XX @@ static int stream_prepare(Job *job) |
20 | time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to | 25 | { |
21 | crash when a vCPU thread modifies guest RAM while device emulation is | 26 | StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); |
22 | processing it. | 27 | BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); |
28 | - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); | ||
29 | - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); | ||
30 | + BlockDriverState *base; | ||
31 | + BlockDriverState *unfiltered_base; | ||
32 | Error *local_err = NULL; | ||
33 | int ret = 0; | ||
34 | |||
35 | @@ -XXX,XX +XXX,XX @@ static int stream_prepare(Job *job) | ||
36 | bdrv_cor_filter_drop(s->cor_filter_bs); | ||
37 | s->cor_filter_bs = NULL; | ||
38 | |||
39 | + base = bdrv_filter_or_cow_bs(s->above_base); | ||
40 | + unfiltered_base = bdrv_skip_filters(base); | ||
23 | + | 41 | + |
24 | +Use of null-co block drivers | 42 | if (bdrv_cow_child(unfiltered_bs)) { |
25 | +---------------------------- | 43 | const char *base_id = NULL, *base_fmt = NULL; |
26 | + | 44 | if (unfiltered_base) { |
27 | +The ``null-co`` block driver is designed for performance: its read accesses are | ||
28 | +not initialized by default. In case this driver has to be used for security | ||
29 | +research, it must be used with the ``read-zeroes=on`` option which fills read | ||
30 | +buffers with zeroes. Security issues reported with the default | ||
31 | +(``read-zeroes=off``) will be discarded. | ||
32 | -- | 45 | -- |
33 | 2.30.2 | 46 | 2.31.1 |
34 | 47 | ||
35 | 48 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Now permissions are updated as follows: | 3 | The children list is specific to BDS parents. We should not modify it |
4 | 1. do graph modifications ignoring permissions | 4 | in the general children modification code, but let BDS parents deal with |
5 | 2. do permission update | 5 | it in their .attach() and .detach() methods. |
6 | 6 | ||
7 | (of course, we rollback [1] if [2] fails) | 7 | This also has the advantage that a BdrvChild is removed from the |
8 | children list before its .bs pointer can become NULL. BDS parents | ||
9 | generally assume that their children's .bs pointer is never NULL, so | ||
10 | this is actually a bug fix. | ||
8 | 11 | ||
9 | So, on stage [2] we can't say which users are "old" and which are | 12 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
10 | "new" and exist only since [1]. And current error message is a bit | 13 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
11 | outdated. Let's improve it, to make everything clean. | 14 | Message-Id: <20211111120829.81329-3-hreitz@redhat.com> |
12 | |||
13 | While being here, add also a comment and some good assertions. | ||
14 | |||
15 | iotests 283, 307, qsd-jobs outputs are updated. | ||
16 | |||
17 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
18 | Message-Id: <20210601075218.79249-7-vsementsov@virtuozzo.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 16 | --- |
21 | block.c | 29 ++++++++++++++++++++------- | 17 | block.c | 14 +++++--------- |
22 | tests/qemu-iotests/283.out | 2 +- | 18 | 1 file changed, 5 insertions(+), 9 deletions(-) |
23 | tests/qemu-iotests/307.out | 2 +- | ||
24 | tests/qemu-iotests/tests/qsd-jobs.out | 2 +- | ||
25 | 4 files changed, 25 insertions(+), 10 deletions(-) | ||
26 | 19 | ||
27 | diff --git a/block.c b/block.c | 20 | diff --git a/block.c b/block.c |
28 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/block.c | 22 | --- a/block.c |
30 | +++ b/block.c | 23 | +++ b/block.c |
31 | @@ -XXX,XX +XXX,XX @@ static char *bdrv_child_user_desc(BdrvChild *c) | 24 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_attach(BdrvChild *child) |
32 | return c->klass->get_parent_desc(c); | 25 | { |
26 | BlockDriverState *bs = child->opaque; | ||
27 | |||
28 | + QLIST_INSERT_HEAD(&bs->children, child, next); | ||
29 | + | ||
30 | if (child->role & BDRV_CHILD_COW) { | ||
31 | bdrv_backing_attach(child); | ||
32 | } | ||
33 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_detach(BdrvChild *child) | ||
34 | } | ||
35 | |||
36 | bdrv_unapply_subtree_drain(child, bs); | ||
37 | + | ||
38 | + QLIST_REMOVE(child, next); | ||
33 | } | 39 | } |
34 | 40 | ||
35 | +/* | 41 | static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, |
36 | + * Check that @a allows everything that @b needs. @a and @b must reference same | 42 | @@ -XXX,XX +XXX,XX @@ static void bdrv_child_free(void *opaque) |
37 | + * child node. | 43 | static void bdrv_remove_empty_child(BdrvChild *child) |
38 | + */ | ||
39 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | ||
40 | { | 44 | { |
41 | - g_autofree char *user = NULL; | 45 | assert(!child->bs); |
42 | - g_autofree char *perm_names = NULL; | 46 | - QLIST_SAFE_REMOVE(child, next); |
43 | + const char *child_bs_name; | 47 | + assert(!child->next.le_prev); /* not in children list */ |
44 | + g_autofree char *a_user = NULL; | 48 | bdrv_child_free(child); |
45 | + g_autofree char *b_user = NULL; | 49 | } |
46 | + g_autofree char *perms = NULL; | 50 | |
47 | + | 51 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, |
48 | + assert(a->bs); | 52 | return ret; |
49 | + assert(a->bs == b->bs); | ||
50 | |||
51 | if ((b->perm & a->shared_perm) == b->perm) { | ||
52 | return true; | ||
53 | } | 53 | } |
54 | 54 | ||
55 | - perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); | 55 | - QLIST_INSERT_HEAD(&parent_bs->children, *child, next); |
56 | - user = bdrv_child_user_desc(a); | 56 | - /* |
57 | - error_setg(errp, "Conflicts with use by %s as '%s', which does not " | 57 | - * child is removed in bdrv_attach_child_common_abort(), so don't care to |
58 | - "allow '%s' on %s", | 58 | - * abort this change separately. |
59 | - user, a->name, perm_names, bdrv_get_node_name(b->bs)); | 59 | - */ |
60 | + child_bs_name = bdrv_get_node_name(b->bs); | 60 | - |
61 | + a_user = bdrv_child_user_desc(a); | 61 | return 0; |
62 | + b_user = bdrv_child_user_desc(b); | ||
63 | + perms = bdrv_perm_names(b->perm & ~a->shared_perm); | ||
64 | + | ||
65 | + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " | ||
66 | + "both required by %s (uses node '%s' as '%s' child) and " | ||
67 | + "unshared by %s (uses node '%s' as '%s' child).", | ||
68 | + child_bs_name, perms, | ||
69 | + b_user, child_bs_name, b->name, | ||
70 | + a_user, child_bs_name, a->name); | ||
71 | |||
72 | return false; | ||
73 | } | 62 | } |
74 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | 63 | |
75 | index XXXXXXX..XXXXXXX 100644 | 64 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) |
76 | --- a/tests/qemu-iotests/283.out | 65 | BdrvRemoveFilterOrCowChild *s = opaque; |
77 | +++ b/tests/qemu-iotests/283.out | 66 | BlockDriverState *parent_bs = s->child->opaque; |
78 | @@ -XXX,XX +XXX,XX @@ | 67 | |
79 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | 68 | - QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); |
80 | {"return": {}} | 69 | if (s->is_backing) { |
81 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | 70 | parent_bs->backing = s->child; |
82 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | 71 | } else { |
83 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}} | 72 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, |
84 | 73 | }; | |
85 | === backup-top should be gone after job-finalize === | 74 | tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); |
86 | 75 | ||
87 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out | 76 | - QLIST_SAFE_REMOVE(child, next); |
88 | index XXXXXXX..XXXXXXX 100644 | 77 | if (s->is_backing) { |
89 | --- a/tests/qemu-iotests/307.out | 78 | bs->backing = NULL; |
90 | +++ b/tests/qemu-iotests/307.out | 79 | } else { |
91 | @@ -XXX,XX +XXX,XX @@ exports available: 1 | ||
92 | |||
93 | === Add a writable export === | ||
94 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} | ||
95 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} | ||
96 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}} | ||
97 | {"execute": "device_del", "arguments": {"id": "sda"}} | ||
98 | {"return": {}} | ||
99 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
100 | diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/tests/qemu-iotests/tests/qsd-jobs.out | ||
103 | +++ b/tests/qemu-iotests/tests/qsd-jobs.out | ||
104 | @@ -XXX,XX +XXX,XX @@ QMP_VERSION | ||
105 | {"return": {}} | ||
106 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} | ||
107 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} | ||
108 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}} | ||
109 | +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} | ||
110 | {"return": {}} | ||
111 | {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} | ||
112 | *** done | ||
113 | -- | 80 | -- |
114 | 2.30.2 | 81 | 2.31.1 |
115 | 82 | ||
116 | 83 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | All child classes have this callback. So, drop unreachable code. | 3 | Now that bdrv_remove_empty_child() no longer removes the child from the |
4 | parent's children list but only checks that it is not in such a list, it | ||
5 | is only a wrapper around bdrv_child_free() that checks that the child is | ||
6 | empty and unused. That should apply to all children that we free, so | ||
7 | put those checks into bdrv_child_free() and drop | ||
8 | bdrv_remove_empty_child(). | ||
4 | 9 | ||
5 | Still add an assertion to bdrv_attach_child_common(), to early detect | 10 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
6 | bad classes. | 11 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
7 | 12 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | |
8 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 13 | Message-Id: <20211111120829.81329-4-hreitz@redhat.com> |
9 | Message-Id: <20210601075218.79249-6-vsementsov@virtuozzo.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | --- | 15 | --- |
12 | block.c | 7 ++----- | 16 | block.c | 26 +++++++++++++------------- |
13 | 1 file changed, 2 insertions(+), 5 deletions(-) | 17 | 1 file changed, 13 insertions(+), 13 deletions(-) |
14 | 18 | ||
15 | diff --git a/block.c b/block.c | 19 | diff --git a/block.c b/block.c |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block.c | 21 | --- a/block.c |
18 | +++ b/block.c | 22 | +++ b/block.c |
19 | @@ -XXX,XX +XXX,XX @@ bool bdrv_is_writable(BlockDriverState *bs) | 23 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child, |
20 | 24 | } | |
21 | static char *bdrv_child_user_desc(BdrvChild *c) | 25 | } |
26 | |||
27 | -static void bdrv_child_free(void *opaque) | ||
28 | -{ | ||
29 | - BdrvChild *c = opaque; | ||
30 | - | ||
31 | - g_free(c->name); | ||
32 | - g_free(c); | ||
33 | -} | ||
34 | - | ||
35 | -static void bdrv_remove_empty_child(BdrvChild *child) | ||
36 | +/** | ||
37 | + * Free the given @child. | ||
38 | + * | ||
39 | + * The child must be empty (i.e. `child->bs == NULL`) and it must be | ||
40 | + * unused (i.e. not in a children list). | ||
41 | + */ | ||
42 | +static void bdrv_child_free(BdrvChild *child) | ||
22 | { | 43 | { |
23 | - if (c->klass->get_parent_desc) { | 44 | assert(!child->bs); |
24 | - return c->klass->get_parent_desc(c); | 45 | assert(!child->next.le_prev); /* not in children list */ |
25 | - } | 46 | - bdrv_child_free(child); |
26 | - | 47 | + |
27 | - return g_strdup("another user"); | 48 | + g_free(child->name); |
28 | + return c->klass->get_parent_desc(c); | 49 | + g_free(child); |
29 | } | 50 | } |
30 | 51 | ||
31 | static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) | 52 | typedef struct BdrvAttachChildCommonState { |
53 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_child_common_abort(void *opaque) | ||
54 | } | ||
55 | |||
56 | bdrv_unref(bs); | ||
57 | - bdrv_remove_empty_child(child); | ||
58 | + bdrv_child_free(child); | ||
59 | *s->child = NULL; | ||
60 | } | ||
61 | |||
32 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | 62 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, |
33 | 63 | ||
34 | assert(child); | 64 | if (ret < 0) { |
35 | assert(*child == NULL); | 65 | error_propagate(errp, local_err); |
36 | + assert(child_class->get_parent_desc); | 66 | - bdrv_remove_empty_child(new_child); |
37 | 67 | + bdrv_child_free(new_child); | |
38 | new_child = g_new(BdrvChild, 1); | 68 | return ret; |
39 | *new_child = (BdrvChild) { | 69 | } |
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ static void bdrv_detach_child(BdrvChild *child) | ||
72 | BlockDriverState *old_bs = child->bs; | ||
73 | |||
74 | bdrv_replace_child_noperm(child, NULL); | ||
75 | - bdrv_remove_empty_child(child); | ||
76 | + bdrv_child_free(child); | ||
77 | |||
78 | if (old_bs) { | ||
79 | /* | ||
40 | -- | 80 | -- |
41 | 2.30.2 | 81 | 2.31.1 |
42 | 82 | ||
43 | 83 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | It's better to use accessor function instead of bs->read_only directly. | 3 | bdrv_attach_child_common_abort() restores the parent's AioContext. To |
4 | In some places use bdrv_is_writable() instead of | 4 | do so, the child (which was supposed to be attached, but is now detached |
5 | checking both BDRV_O_RDWR set and BDRV_O_INACTIVE not set. | 5 | again by this abort handler) is added to the ignore list for the |
6 | AioContext changing functions. | ||
6 | 7 | ||
7 | In bdrv_open_common() it's a bit strange to add one more variable, but | 8 | However, since we modify a BDS's children list in the BdrvChildClass's |
8 | we are going to drop bs->read_only in the next patch, so new ro local | 9 | .attach and .detach handlers, the child is already effectively detached |
9 | variable substitutes it here. | 10 | from the parent by this point. We do not need to put it into the ignore |
11 | list. | ||
10 | 12 | ||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 13 | Use this opportunity to clean up the empty line structure: Keep setting |
12 | Message-Id: <20210527154056.70294-2-vsementsov@virtuozzo.com> | 14 | the ignore list, invoking the AioContext function, and freeing the |
15 | ignore list in blocks separated by empty lines. | ||
16 | |||
17 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
18 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
19 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
20 | Message-Id: <20211111120829.81329-5-hreitz@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 22 | --- |
15 | block.c | 11 +++++++---- | 23 | block.c | 8 +++++--- |
16 | block/block-backend.c | 2 +- | 24 | 1 file changed, 5 insertions(+), 3 deletions(-) |
17 | block/commit.c | 2 +- | ||
18 | block/io.c | 4 ++-- | ||
19 | block/qapi.c | 2 +- | ||
20 | block/qcow2-snapshot.c | 2 +- | ||
21 | block/qcow2.c | 5 ++--- | ||
22 | block/snapshot.c | 2 +- | ||
23 | block/vhdx-log.c | 2 +- | ||
24 | 9 files changed, 17 insertions(+), 15 deletions(-) | ||
25 | 25 | ||
26 | diff --git a/block.c b/block.c | 26 | diff --git a/block.c b/block.c |
27 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block.c | 28 | --- a/block.c |
29 | +++ b/block.c | 29 | +++ b/block.c |
30 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | 30 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_child_common_abort(void *opaque) |
31 | QemuOpts *opts; | ||
32 | BlockDriver *drv; | ||
33 | Error *local_err = NULL; | ||
34 | + bool ro; | ||
35 | |||
36 | assert(bs->file == NULL); | ||
37 | assert(options != NULL && bs->options != options); | ||
38 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
39 | |||
40 | bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
41 | |||
42 | - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { | ||
43 | - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { | ||
44 | + ro = bdrv_is_read_only(bs); | ||
45 | + | ||
46 | + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
47 | + if (!ro && bdrv_is_whitelisted(drv, true)) { | ||
48 | ret = bdrv_apply_auto_read_only(bs, NULL, NULL); | ||
49 | } else { | ||
50 | ret = -ENOTSUP; | ||
51 | } | ||
52 | if (ret < 0) { | ||
53 | error_setg(errp, | ||
54 | - !bs->read_only && bdrv_is_whitelisted(drv, true) | ||
55 | + !ro && bdrv_is_whitelisted(drv, true) | ||
56 | ? "Driver '%s' can only be used for read-only devices" | ||
57 | : "Driver '%s' is not whitelisted", | ||
58 | drv->format_name); | ||
59 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | ||
60 | assert(qatomic_read(&bs->copy_on_read) == 0); | ||
61 | |||
62 | if (bs->open_flags & BDRV_O_COPY_ON_READ) { | ||
63 | - if (!bs->read_only) { | ||
64 | + if (!ro) { | ||
65 | bdrv_enable_copy_on_read(bs); | ||
66 | } else { | ||
67 | error_setg(errp, "Can't use copy-on-read on read-only device"); | ||
68 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/block/block-backend.c | ||
71 | +++ b/block/block-backend.c | ||
72 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | ||
73 | assert(blk->root); | ||
74 | |||
75 | blk->root_state.open_flags = blk->root->bs->open_flags; | ||
76 | - blk->root_state.read_only = blk->root->bs->read_only; | ||
77 | + blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | ||
78 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | ||
79 | } | ||
80 | |||
81 | diff --git a/block/commit.c b/block/commit.c | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/block/commit.c | ||
84 | +++ b/block/commit.c | ||
85 | @@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs) | ||
86 | return -EBUSY; | ||
87 | } | 31 | } |
88 | 32 | ||
89 | - ro = backing_file_bs->read_only; | 33 | if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { |
90 | + ro = bdrv_is_read_only(backing_file_bs); | 34 | - GSList *ignore = g_slist_prepend(NULL, child); |
91 | 35 | + GSList *ignore; | |
92 | if (ro) { | 36 | |
93 | if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { | 37 | + /* No need to ignore `child`, because it has been detached already */ |
94 | diff --git a/block/io.c b/block/io.c | 38 | + ignore = NULL; |
95 | index XXXXXXX..XXXXXXX 100644 | 39 | child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, |
96 | --- a/block/io.c | 40 | &error_abort); |
97 | +++ b/block/io.c | 41 | g_slist_free(ignore); |
98 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, | 42 | - ignore = g_slist_prepend(NULL, child); |
99 | 43 | - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); | |
100 | bdrv_check_request(offset, bytes, &error_abort); | 44 | |
101 | 45 | + ignore = NULL; | |
102 | - if (bs->read_only) { | 46 | + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); |
103 | + if (bdrv_is_read_only(bs)) { | 47 | g_slist_free(ignore); |
104 | return -EPERM; | ||
105 | } | 48 | } |
106 | 49 | ||
107 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, | ||
108 | if (new_bytes) { | ||
109 | bdrv_make_request_serialising(&req, 1); | ||
110 | } | ||
111 | - if (bs->read_only) { | ||
112 | + if (bdrv_is_read_only(bs)) { | ||
113 | error_setg(errp, "Image is read-only"); | ||
114 | ret = -EACCES; | ||
115 | goto out; | ||
116 | diff --git a/block/qapi.c b/block/qapi.c | ||
117 | index XXXXXXX..XXXXXXX 100644 | ||
118 | --- a/block/qapi.c | ||
119 | +++ b/block/qapi.c | ||
120 | @@ -XXX,XX +XXX,XX @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, | ||
121 | |||
122 | info = g_malloc0(sizeof(*info)); | ||
123 | info->file = g_strdup(bs->filename); | ||
124 | - info->ro = bs->read_only; | ||
125 | + info->ro = bdrv_is_read_only(bs); | ||
126 | info->drv = g_strdup(bs->drv->format_name); | ||
127 | info->encrypted = bs->encrypted; | ||
128 | |||
129 | diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/qcow2-snapshot.c | ||
132 | +++ b/block/qcow2-snapshot.c | ||
133 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, | ||
134 | int new_l1_bytes; | ||
135 | int ret; | ||
136 | |||
137 | - assert(bs->read_only); | ||
138 | + assert(bdrv_is_read_only(bs)); | ||
139 | |||
140 | /* Search the snapshot */ | ||
141 | snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); | ||
142 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/block/qcow2.c | ||
145 | +++ b/block/qcow2.c | ||
146 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
147 | |||
148 | /* Clear unknown autoclear feature bits */ | ||
149 | update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; | ||
150 | - update_header = | ||
151 | - update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); | ||
152 | + update_header = update_header && bdrv_is_writable(bs); | ||
153 | if (update_header) { | ||
154 | s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; | ||
155 | } | ||
156 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
157 | bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; | ||
158 | |||
159 | /* Repair image if dirty */ | ||
160 | - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && | ||
161 | + if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && | ||
162 | (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { | ||
163 | BdrvCheckResult result = {0}; | ||
164 | |||
165 | diff --git a/block/snapshot.c b/block/snapshot.c | ||
166 | index XXXXXXX..XXXXXXX 100644 | ||
167 | --- a/block/snapshot.c | ||
168 | +++ b/block/snapshot.c | ||
169 | @@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, | ||
170 | error_setg(errp, "snapshot_id and name are both NULL"); | ||
171 | return -EINVAL; | ||
172 | } | ||
173 | - if (!bs->read_only) { | ||
174 | + if (!bdrv_is_read_only(bs)) { | ||
175 | error_setg(errp, "Device is not readonly"); | ||
176 | return -EINVAL; | ||
177 | } | ||
178 | diff --git a/block/vhdx-log.c b/block/vhdx-log.c | ||
179 | index XXXXXXX..XXXXXXX 100644 | ||
180 | --- a/block/vhdx-log.c | ||
181 | +++ b/block/vhdx-log.c | ||
182 | @@ -XXX,XX +XXX,XX @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, | ||
183 | } | ||
184 | |||
185 | if (logs.valid) { | ||
186 | - if (bs->read_only) { | ||
187 | + if (bdrv_is_read_only(bs)) { | ||
188 | bdrv_refresh_filename(bs); | ||
189 | ret = -EPERM; | ||
190 | error_setg(errp, | ||
191 | -- | 50 | -- |
192 | 2.30.2 | 51 | 2.31.1 |
193 | 52 | ||
194 | 53 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The logic around **child is not obvious: this reference is used not | 3 | bdrv_replace_child_noperm() modifies BdrvChild.bs, and can potentially |
4 | only to return resulting child, but also to rollback NULL value on | 4 | set it to NULL. That is dangerous, because BDS parents generally assume |
5 | transaction abort. | 5 | that their children's .bs pointer is never NULL. We therefore want to |
6 | let bdrv_replace_child_noperm() set the corresponding BdrvChild pointer | ||
7 | to NULL, too. | ||
6 | 8 | ||
7 | So, let's add documentation and some assertions. | 9 | This patch lays the foundation for it by passing a BdrvChild ** pointer |
10 | to bdrv_replace_child_noperm() so that it can later use it to NULL the | ||
11 | BdrvChild pointer immediately after setting BdrvChild.bs to NULL. | ||
8 | 12 | ||
9 | While being here, drop extra declaration of bdrv_attach_child_noperm(). | 13 | (We will still need to undertake some intermediate steps, though.) |
10 | 14 | ||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 15 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
12 | Message-Id: <20210601075218.79249-2-vsementsov@virtuozzo.com> | 16 | Message-Id: <20211111120829.81329-6-hreitz@redhat.com> |
17 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 18 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 19 | --- |
15 | block.c | 24 +++++++++++++++--------- | 20 | block.c | 23 ++++++++++++----------- |
16 | 1 file changed, 15 insertions(+), 9 deletions(-) | 21 | 1 file changed, 12 insertions(+), 11 deletions(-) |
17 | 22 | ||
18 | diff --git a/block.c b/block.c | 23 | diff --git a/block.c b/block.c |
19 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block.c | 25 | --- a/block.c |
21 | +++ b/block.c | 26 | +++ b/block.c |
22 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, | 27 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, |
23 | 28 | static bool bdrv_recurse_has_child(BlockDriverState *bs, | |
24 | static void bdrv_replace_child_noperm(BdrvChild *child, | 29 | BlockDriverState *child); |
30 | |||
31 | -static void bdrv_replace_child_noperm(BdrvChild *child, | ||
32 | +static void bdrv_replace_child_noperm(BdrvChild **child, | ||
25 | BlockDriverState *new_bs); | 33 | BlockDriverState *new_bs); |
26 | -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | 34 | static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, |
27 | - BlockDriverState *child_bs, | 35 | BdrvChild *child, |
28 | - const char *child_name, | 36 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_abort(void *opaque) |
29 | - const BdrvChildClass *child_class, | 37 | BlockDriverState *new_bs = s->child->bs; |
30 | - BdrvChildRole child_role, | 38 | |
31 | - BdrvChild **child, | 39 | /* old_bs reference is transparently moved from @s to @s->child */ |
32 | - Transaction *tran, | 40 | - bdrv_replace_child_noperm(s->child, s->old_bs); |
33 | - Error **errp); | 41 | + bdrv_replace_child_noperm(&s->child, s->old_bs); |
34 | static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, | 42 | bdrv_unref(new_bs); |
35 | Transaction *tran); | 43 | } |
36 | 44 | ||
37 | @@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_attach_child_common_drv = { | 45 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, |
38 | 46 | if (new_bs) { | |
39 | /* | 47 | bdrv_ref(new_bs); |
40 | * Common part of attaching bdrv child to bs or to blk or to job | 48 | } |
41 | + * | 49 | - bdrv_replace_child_noperm(child, new_bs); |
42 | + * Resulting new child is returned through @child. | 50 | + bdrv_replace_child_noperm(&child, new_bs); |
43 | + * At start *@child must be NULL. | 51 | /* old_bs reference is transparently moved from @child to @s */ |
44 | + * @child is saved to a new entry of @tran, so that *@child could be reverted to | 52 | } |
45 | + * NULL on abort(). So referenced variable must live at least until transaction | 53 | |
46 | + * end. | 54 | @@ -XXX,XX +XXX,XX @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) |
47 | */ | 55 | return permissions[qapi_perm]; |
48 | static int bdrv_attach_child_common(BlockDriverState *child_bs, | 56 | } |
49 | const char *child_name, | 57 | |
58 | -static void bdrv_replace_child_noperm(BdrvChild *child, | ||
59 | +static void bdrv_replace_child_noperm(BdrvChild **childp, | ||
60 | BlockDriverState *new_bs) | ||
61 | { | ||
62 | + BdrvChild *child = *childp; | ||
63 | BlockDriverState *old_bs = child->bs; | ||
64 | int new_bs_quiesce_counter; | ||
65 | int drain_saldo; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_child_common_abort(void *opaque) | ||
67 | BdrvChild *child = *s->child; | ||
68 | BlockDriverState *bs = child->bs; | ||
69 | |||
70 | - bdrv_replace_child_noperm(child, NULL); | ||
71 | + bdrv_replace_child_noperm(s->child, NULL); | ||
72 | |||
73 | if (bdrv_get_aio_context(bs) != s->old_child_ctx) { | ||
74 | bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); | ||
50 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | 75 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, |
76 | } | ||
77 | |||
78 | bdrv_ref(child_bs); | ||
79 | - bdrv_replace_child_noperm(new_child, child_bs); | ||
80 | + bdrv_replace_child_noperm(&new_child, child_bs); | ||
81 | |||
82 | *child = new_child; | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | ||
51 | return 0; | 85 | return 0; |
52 | } | 86 | } |
53 | 87 | ||
54 | +/* | 88 | -static void bdrv_detach_child(BdrvChild *child) |
55 | + * Variable referenced by @child must live at least until transaction end. | 89 | +static void bdrv_detach_child(BdrvChild **childp) |
56 | + * (see bdrv_attach_child_common() doc for details) | 90 | { |
57 | + */ | 91 | - BlockDriverState *old_bs = child->bs; |
58 | static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, | 92 | + BlockDriverState *old_bs = (*childp)->bs; |
59 | BlockDriverState *child_bs, | 93 | |
60 | const char *child_name, | 94 | - bdrv_replace_child_noperm(child, NULL); |
61 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | 95 | - bdrv_child_free(child); |
62 | child_role, perm, shared_perm, opaque, | 96 | + bdrv_replace_child_noperm(childp, NULL); |
63 | &child, tran, errp); | 97 | + bdrv_child_free(*childp); |
64 | if (ret < 0) { | 98 | |
65 | - assert(child == NULL); | 99 | if (old_bs) { |
66 | goto out; | 100 | /* |
67 | } | 101 | @@ -XXX,XX +XXX,XX @@ void bdrv_root_unref_child(BdrvChild *child) |
68 | 102 | BlockDriverState *child_bs; | |
69 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, | 103 | |
70 | 104 | child_bs = child->bs; | |
71 | out: | 105 | - bdrv_detach_child(child); |
72 | tran_finalize(tran, ret); | 106 | + bdrv_detach_child(&child); |
73 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | ||
74 | + assert((ret < 0) == !child); | ||
75 | + | ||
76 | bdrv_unref(child_bs); | 107 | bdrv_unref(child_bs); |
77 | return child; | ||
78 | } | 108 | } |
79 | @@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, | ||
80 | |||
81 | out: | ||
82 | tran_finalize(tran, ret); | ||
83 | + /* child is unset on failure by bdrv_attach_child_common_abort() */ | ||
84 | + assert((ret < 0) == !child); | ||
85 | |||
86 | bdrv_unref(child_bs); | ||
87 | 109 | ||
88 | -- | 110 | -- |
89 | 2.30.2 | 111 | 2.31.1 |
90 | 112 | ||
91 | 113 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We have different types of parents: block nodes, block backends and | 3 | As of a future patch, bdrv_replace_child_tran() will take a BdrvChild ** |
4 | jobs. So, it makes sense to specify type together with name. | 4 | pointer. Prepare for that by getting such a pointer and using it where |
5 | applicable, and (dereferenced) as a parameter for | ||
6 | bdrv_replace_child_tran(). | ||
5 | 7 | ||
6 | Next, this handler us used to compose an error message about permission | 8 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
7 | conflict. And permission conflict occurs in a specific place of block | 9 | Message-Id: <20211111120829.81329-7-hreitz@redhat.com> |
8 | graph. We shouldn't report name of parent device (as it refers another | 10 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | place in block graph), but exactly and only the name of the node. So, | ||
10 | use bdrv_get_node_name() directly. | ||
11 | |||
12 | iotest 283 output is updated. | ||
13 | |||
14 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
15 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
16 | Message-Id: <20210601075218.79249-4-vsementsov@virtuozzo.com> | ||
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | --- | 12 | --- |
19 | block.c | 2 +- | 13 | block.c | 21 ++++++++++++--------- |
20 | tests/qemu-iotests/283.out | 2 +- | 14 | 1 file changed, 12 insertions(+), 9 deletions(-) |
21 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
22 | 15 | ||
23 | diff --git a/block.c b/block.c | 16 | diff --git a/block.c b/block.c |
24 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/block.c | 18 | --- a/block.c |
26 | +++ b/block.c | 19 | +++ b/block.c |
27 | @@ -XXX,XX +XXX,XX @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) | 20 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, |
28 | static char *bdrv_child_get_parent_desc(BdrvChild *c) | 21 | BdrvChild *child, |
22 | Transaction *tran) | ||
29 | { | 23 | { |
30 | BlockDriverState *parent = c->opaque; | 24 | + BdrvChild **childp; |
31 | - return g_strdup(bdrv_get_device_or_node_name(parent)); | 25 | BdrvRemoveFilterOrCowChild *s; |
32 | + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); | 26 | |
27 | - assert(child == bs->backing || child == bs->file); | ||
28 | - | ||
29 | if (!child) { | ||
30 | return; | ||
31 | } | ||
32 | |||
33 | + if (child == bs->backing) { | ||
34 | + childp = &bs->backing; | ||
35 | + } else if (child == bs->file) { | ||
36 | + childp = &bs->file; | ||
37 | + } else { | ||
38 | + g_assert_not_reached(); | ||
39 | + } | ||
40 | + | ||
41 | if (child->bs) { | ||
42 | - bdrv_replace_child_tran(child, NULL, tran); | ||
43 | + bdrv_replace_child_tran(*childp, NULL, tran); | ||
44 | } | ||
45 | |||
46 | s = g_new(BdrvRemoveFilterOrCowChild, 1); | ||
47 | *s = (BdrvRemoveFilterOrCowChild) { | ||
48 | .child = child, | ||
49 | - .is_backing = (child == bs->backing), | ||
50 | + .is_backing = (childp == &bs->backing), | ||
51 | }; | ||
52 | tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); | ||
53 | |||
54 | - if (s->is_backing) { | ||
55 | - bs->backing = NULL; | ||
56 | - } else { | ||
57 | - bs->file = NULL; | ||
58 | - } | ||
59 | + *childp = NULL; | ||
33 | } | 60 | } |
34 | 61 | ||
35 | static void bdrv_child_cb_drained_begin(BdrvChild *child) | 62 | /* |
36 | diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tests/qemu-iotests/283.out | ||
39 | +++ b/tests/qemu-iotests/283.out | ||
40 | @@ -XXX,XX +XXX,XX @@ | ||
41 | {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} | ||
42 | {"return": {}} | ||
43 | {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} | ||
44 | -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}} | ||
45 | +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} | ||
46 | |||
47 | === backup-top should be gone after job-finalize === | ||
48 | |||
49 | -- | 63 | -- |
50 | 2.30.2 | 64 | 2.31.1 |
51 | 65 | ||
52 | 66 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Before switching between AioContexts we need to make sure that we're | 3 | Invoke the transaction drivers' .clean() methods only after all |
4 | fully quiesced ("nb_requests == 0" for every client) when entering the | 4 | .commit() or .abort() handlers are done. |
5 | drained section. | ||
6 | 5 | ||
7 | To do this, we set "quiescing = true" for every client on | 6 | This makes it easier to have nested transactions where the top-level |
8 | ".drained_begin" to prevent new coroutines from being created, and | 7 | transactions pass objects to lower transactions that the latter can |
9 | check if "nb_requests == 0" on ".drained_poll". Finally, once we're | 8 | still use throughout their commit/abort phases, while the top-level |
10 | exiting the drained section, on ".drained_end" we set "quiescing = | 9 | transaction keeps a reference that is released in its .clean() method. |
11 | false" and call "nbd_client_receive_next_request()" to resume the | ||
12 | processing of new requests. | ||
13 | 10 | ||
14 | With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be | 11 | (Before this commit, that is also possible, but the top-level |
15 | reverted to be as simple as they were before f148ae7d36. | 12 | transaction would need to take care to invoke tran_add() before the |
13 | lower-level transaction does. This commit makes the ordering | ||
14 | irrelevant, which is just a bit nicer.) | ||
16 | 15 | ||
17 | RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 | 16 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
18 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | 17 | Message-Id: <20211111120829.81329-8-hreitz@redhat.com> |
19 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
20 | Message-Id: <20210602060552.17433-3-slp@redhat.com> | ||
21 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 18 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
22 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
23 | --- | 20 | --- |
24 | nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++-------------- | 21 | include/qemu/transactions.h | 3 +++ |
25 | 1 file changed, 61 insertions(+), 21 deletions(-) | 22 | util/transactions.c | 8 ++++++-- |
23 | 2 files changed, 9 insertions(+), 2 deletions(-) | ||
26 | 24 | ||
27 | diff --git a/nbd/server.c b/nbd/server.c | 25 | diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h |
28 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/nbd/server.c | 27 | --- a/include/qemu/transactions.h |
30 | +++ b/nbd/server.c | 28 | +++ b/include/qemu/transactions.h |
31 | @@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req) | 29 | @@ -XXX,XX +XXX,XX @@ |
32 | g_free(req); | 30 | * tran_create(), call your "prepare" functions on it, and finally call |
33 | 31 | * tran_abort() or tran_commit() to finalize the transaction by corresponding | |
34 | client->nb_requests--; | 32 | * finalization actions in reverse order. |
35 | + | 33 | + * |
36 | + if (client->quiescing && client->nb_requests == 0) { | 34 | + * The clean() functions registered by the drivers in a transaction are called |
37 | + aio_wait_kick(); | 35 | + * last, after all abort() or commit() functions have been called. |
36 | */ | ||
37 | |||
38 | #ifndef QEMU_TRANSACTIONS_H | ||
39 | diff --git a/util/transactions.c b/util/transactions.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/util/transactions.c | ||
42 | +++ b/util/transactions.c | ||
43 | @@ -XXX,XX +XXX,XX @@ void tran_abort(Transaction *tran) | ||
44 | { | ||
45 | TransactionAction *act, *next; | ||
46 | |||
47 | - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { | ||
48 | + QSLIST_FOREACH(act, &tran->actions, entry) { | ||
49 | if (act->drv->abort) { | ||
50 | act->drv->abort(act->opaque); | ||
51 | } | ||
38 | + } | 52 | + } |
39 | + | 53 | |
40 | nbd_client_receive_next_request(client); | 54 | + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { |
41 | 55 | if (act->drv->clean) { | |
42 | nbd_client_put(client); | 56 | act->drv->clean(act->opaque); |
43 | @@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque) | 57 | } |
44 | QTAILQ_FOREACH(client, &exp->clients, next) { | 58 | @@ -XXX,XX +XXX,XX @@ void tran_commit(Transaction *tran) |
45 | qio_channel_attach_aio_context(client->ioc, ctx); | ||
46 | |||
47 | + assert(client->nb_requests == 0); | ||
48 | assert(client->recv_coroutine == NULL); | ||
49 | assert(client->send_coroutine == NULL); | ||
50 | - | ||
51 | - if (client->quiescing) { | ||
52 | - client->quiescing = false; | ||
53 | - nbd_client_receive_next_request(client); | ||
54 | - } | ||
55 | } | ||
56 | } | ||
57 | |||
58 | -static void nbd_aio_detach_bh(void *opaque) | ||
59 | +static void blk_aio_detach(void *opaque) | ||
60 | { | 59 | { |
61 | NBDExport *exp = opaque; | 60 | TransactionAction *act, *next; |
62 | NBDClient *client; | 61 | |
63 | 62 | - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { | |
64 | + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | 63 | + QSLIST_FOREACH(act, &tran->actions, entry) { |
65 | + | 64 | if (act->drv->commit) { |
66 | QTAILQ_FOREACH(client, &exp->clients, next) { | 65 | act->drv->commit(act->opaque); |
67 | qio_channel_detach_aio_context(client->ioc); | 66 | } |
68 | + } | 67 | + } |
69 | + | 68 | |
70 | + exp->common.ctx = NULL; | 69 | + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { |
71 | +} | 70 | if (act->drv->clean) { |
72 | + | 71 | act->drv->clean(act->opaque); |
73 | +static void nbd_drained_begin(void *opaque) | ||
74 | +{ | ||
75 | + NBDExport *exp = opaque; | ||
76 | + NBDClient *client; | ||
77 | + | ||
78 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
79 | client->quiescing = true; | ||
80 | + } | ||
81 | +} | ||
82 | |||
83 | - if (client->recv_coroutine) { | ||
84 | - if (client->read_yielding) { | ||
85 | - qemu_aio_coroutine_enter(exp->common.ctx, | ||
86 | - client->recv_coroutine); | ||
87 | - } else { | ||
88 | - AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); | ||
89 | - } | ||
90 | - } | ||
91 | +static void nbd_drained_end(void *opaque) | ||
92 | +{ | ||
93 | + NBDExport *exp = opaque; | ||
94 | + NBDClient *client; | ||
95 | |||
96 | - if (client->send_coroutine) { | ||
97 | - AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); | ||
98 | - } | ||
99 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
100 | + client->quiescing = false; | ||
101 | + nbd_client_receive_next_request(client); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | -static void blk_aio_detach(void *opaque) | ||
106 | +static bool nbd_drained_poll(void *opaque) | ||
107 | { | ||
108 | NBDExport *exp = opaque; | ||
109 | + NBDClient *client; | ||
110 | |||
111 | - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); | ||
112 | + QTAILQ_FOREACH(client, &exp->clients, next) { | ||
113 | + if (client->nb_requests != 0) { | ||
114 | + /* | ||
115 | + * If there's a coroutine waiting for a request on nbd_read_eof() | ||
116 | + * enter it here so we don't depend on the client to wake it up. | ||
117 | + */ | ||
118 | + if (client->recv_coroutine != NULL && client->read_yielding) { | ||
119 | + qemu_aio_coroutine_enter(exp->common.ctx, | ||
120 | + client->recv_coroutine); | ||
121 | + } | ||
122 | |||
123 | - aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); | ||
124 | + return true; | ||
125 | + } | ||
126 | + } | ||
127 | |||
128 | - exp->common.ctx = NULL; | ||
129 | + return false; | ||
130 | } | ||
131 | |||
132 | static void nbd_eject_notifier(Notifier *n, void *data) | ||
133 | @@ -XXX,XX +XXX,XX @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) | ||
134 | blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); | ||
135 | } | ||
136 | |||
137 | +static const BlockDevOps nbd_block_ops = { | ||
138 | + .drained_begin = nbd_drained_begin, | ||
139 | + .drained_end = nbd_drained_end, | ||
140 | + .drained_poll = nbd_drained_poll, | ||
141 | +}; | ||
142 | + | ||
143 | static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
144 | Error **errp) | ||
145 | { | ||
146 | @@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, | ||
147 | |||
148 | exp->allocation_depth = arg->allocation_depth; | ||
149 | |||
150 | + /* | ||
151 | + * We need to inhibit request queuing in the block layer to ensure we can | ||
152 | + * be properly quiesced when entering a drained section, as our coroutines | ||
153 | + * servicing pending requests might enter blk_pread(). | ||
154 | + */ | ||
155 | + blk_set_disable_request_queuing(blk, true); | ||
156 | + | ||
157 | blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); | ||
158 | |||
159 | + blk_set_dev_ops(blk, &nbd_block_ops, exp); | ||
160 | + | ||
161 | QTAILQ_INSERT_TAIL(&exports, exp, next); | ||
162 | |||
163 | return 0; | ||
164 | @@ -XXX,XX +XXX,XX @@ static void nbd_export_delete(BlockExport *blk_exp) | ||
165 | } | 72 | } |
166 | blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, | ||
167 | blk_aio_detach, exp); | ||
168 | + blk_set_disable_request_queuing(exp->common.blk, false); | ||
169 | } | ||
170 | |||
171 | for (i = 0; i < exp->nr_export_bitmaps; i++) { | ||
172 | -- | 73 | -- |
173 | 2.30.2 | 74 | 2.31.1 |
174 | 75 | ||
175 | 76 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Currently we update s->use_copy_range and s->copy_size in | 3 | As of a future commit, bdrv_replace_child_noperm() will clear the |
4 | block_copy_do_copy(). | 4 | indirect BdrvChild pointer passed to it if the new child BDS is NULL. |
5 | 5 | bdrv_replace_child_tran() will want to let it do that, but revert this | |
6 | It's not very good: | 6 | change in its abort handler. For that, we need to have it receive a |
7 | 7 | BdrvChild ** pointer, too, and keep it stored in the | |
8 | 1. block_copy_do_copy() is intended to be a simple function, that wraps | 8 | BdrvReplaceChildState object that we attach to the transaction. |
9 | bdrv_co_<io> functions for need of block copy. That's why we don't pass | 9 | |
10 | BlockCopyTask into it. So, block_copy_do_copy() is bad place for | 10 | Note that we do not need to store it in the BdrvReplaceChildState when |
11 | manipulation with generic state of block-copy process | 11 | new_bs is not NULL, because then there is nothing to revert. This is |
12 | 12 | important so that bdrv_replace_node_noperm() can pass a pointer to a | |
13 | 2. We are going to make block-copy thread-safe. So, it's good to move | 13 | loop-local variable to bdrv_replace_child_tran() without worrying that |
14 | manipulation with state of block-copy to the places where we'll need | 14 | this pointer will outlive one loop iteration. |
15 | critical sections anyway, to not introduce extra synchronization | 15 | |
16 | primitives in block_copy_do_copy(). | 16 | (Of course, for that to work, bdrv_replace_node_noperm() and in turn |
17 | 17 | bdrv_replace_node() and its relatives may not be called with a NULL @to | |
18 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 18 | node. Luckily, they already are not, but now we should assert this.) |
19 | Message-Id: <20210528141628.44287-3-vsementsov@virtuozzo.com> | 19 | |
20 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 20 | bdrv_remove_file_or_backing_child() on the other hand needs to ensure |
21 | that the indirect pointer it passes will stay valid for the duration of | ||
22 | the transaction. Ensure this by keeping a strong reference to the BDS | ||
23 | whose &bs->backing or &bs->file it passes to bdrv_replace_child_tran(), | ||
24 | and giving up that reference only in the transaction .clean() handler. | ||
25 | |||
26 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
27 | Message-Id: <20211111120829.81329-9-hreitz@redhat.com> | ||
28 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
21 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 29 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
22 | --- | 30 | --- |
23 | block/block-copy.c | 72 +++++++++++++++++++++++++++++++--------------- | 31 | block.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++------- |
24 | 1 file changed, 49 insertions(+), 23 deletions(-) | 32 | 1 file changed, 73 insertions(+), 10 deletions(-) |
25 | 33 | ||
26 | diff --git a/block/block-copy.c b/block/block-copy.c | 34 | diff --git a/block.c b/block.c |
27 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/block/block-copy.c | 36 | --- a/block.c |
29 | +++ b/block/block-copy.c | 37 | +++ b/block.c |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyTask { | 38 | @@ -XXX,XX +XXX,XX @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, |
31 | int64_t offset; | 39 | |
32 | int64_t bytes; | 40 | typedef struct BdrvReplaceChildState { |
33 | bool zeroes; | 41 | BdrvChild *child; |
34 | + bool copy_range; | 42 | + BdrvChild **childp; |
35 | QLIST_ENTRY(BlockCopyTask) list; | 43 | BlockDriverState *old_bs; |
36 | CoQueue wait_queue; /* coroutines blocked on this task */ | 44 | } BdrvReplaceChildState; |
37 | } BlockCopyTask; | 45 | |
38 | @@ -XXX,XX +XXX,XX @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, | 46 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_abort(void *opaque) |
39 | .call_state = call_state, | 47 | BdrvReplaceChildState *s = opaque; |
40 | .offset = offset, | 48 | BlockDriverState *new_bs = s->child->bs; |
41 | .bytes = bytes, | 49 | |
42 | + .copy_range = s->use_copy_range, | 50 | - /* old_bs reference is transparently moved from @s to @s->child */ |
51 | + /* | ||
52 | + * old_bs reference is transparently moved from @s to s->child. | ||
53 | + * | ||
54 | + * Pass &s->child here instead of s->childp, because: | ||
55 | + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not | ||
56 | + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it | ||
57 | + * will not modify s->child. From that perspective, it does not matter | ||
58 | + * whether we pass s->childp or &s->child. | ||
59 | + * (TODO: Right now, bdrv_replace_child_noperm() never modifies that | ||
60 | + * pointer anyway (though it will in the future), so at this point it | ||
61 | + * absolutely does not matter whether we pass s->childp or &s->child.) | ||
62 | + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use | ||
63 | + * it here. | ||
64 | + * (3) If new_bs is NULL, *s->childp will have been NULLed by | ||
65 | + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we | ||
66 | + * must not pass a NULL *s->childp here. | ||
67 | + * (TODO: In its current state, bdrv_replace_child_noperm() will not | ||
68 | + * have NULLed *s->childp, so this does not apply yet. It will in the | ||
69 | + * future.) | ||
70 | + * | ||
71 | + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in | ||
72 | + * any case, there is no reason to pass it anyway. | ||
73 | + */ | ||
74 | bdrv_replace_child_noperm(&s->child, s->old_bs); | ||
75 | bdrv_unref(new_bs); | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_replace_child_drv = { | ||
78 | * Note: real unref of old_bs is done only on commit. | ||
79 | * | ||
80 | * The function doesn't update permissions, caller is responsible for this. | ||
81 | + * | ||
82 | + * Note that if new_bs == NULL, @childp is stored in a state object attached | ||
83 | + * to @tran, so that the old child can be reinstated in the abort handler. | ||
84 | + * Therefore, if @new_bs can be NULL, @childp must stay valid until the | ||
85 | + * transaction is committed or aborted. | ||
86 | + * | ||
87 | + * (TODO: The reinstating does not happen yet, but it will once | ||
88 | + * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) | ||
89 | */ | ||
90 | -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, | ||
91 | +static void bdrv_replace_child_tran(BdrvChild **childp, | ||
92 | + BlockDriverState *new_bs, | ||
93 | Transaction *tran) | ||
94 | { | ||
95 | BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); | ||
96 | *s = (BdrvReplaceChildState) { | ||
97 | - .child = child, | ||
98 | - .old_bs = child->bs, | ||
99 | + .child = *childp, | ||
100 | + .childp = new_bs == NULL ? childp : NULL, | ||
101 | + .old_bs = (*childp)->bs, | ||
43 | }; | 102 | }; |
44 | qemu_co_queue_init(&task->wait_queue); | 103 | tran_add(tran, &bdrv_replace_child_drv, s); |
45 | QLIST_INSERT_HEAD(&s->tasks, task, list); | 104 | |
46 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, | 105 | if (new_bs) { |
106 | bdrv_ref(new_bs); | ||
107 | } | ||
108 | - bdrv_replace_child_noperm(&child, new_bs); | ||
109 | - /* old_bs reference is transparently moved from @child to @s */ | ||
110 | + bdrv_replace_child_noperm(childp, new_bs); | ||
111 | + /* old_bs reference is transparently moved from *childp to @s */ | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | @@ -XXX,XX +XXX,XX @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) | ||
116 | |||
117 | typedef struct BdrvRemoveFilterOrCowChild { | ||
118 | BdrvChild *child; | ||
119 | + BlockDriverState *bs; | ||
120 | bool is_backing; | ||
121 | } BdrvRemoveFilterOrCowChild; | ||
122 | |||
123 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) | ||
124 | bdrv_child_free(s->child); | ||
125 | } | ||
126 | |||
127 | +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) | ||
128 | +{ | ||
129 | + BdrvRemoveFilterOrCowChild *s = opaque; | ||
130 | + | ||
131 | + /* Drop the bs reference after the transaction is done */ | ||
132 | + bdrv_unref(s->bs); | ||
133 | + g_free(s); | ||
134 | +} | ||
135 | + | ||
136 | static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { | ||
137 | .abort = bdrv_remove_filter_or_cow_child_abort, | ||
138 | .commit = bdrv_remove_filter_or_cow_child_commit, | ||
139 | - .clean = g_free, | ||
140 | + .clean = bdrv_remove_filter_or_cow_child_clean, | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, | ||
145 | return; | ||
146 | } | ||
147 | |||
148 | + /* | ||
149 | + * Keep a reference to @bs so @childp will stay valid throughout the | ||
150 | + * transaction (required by bdrv_replace_child_tran()) | ||
151 | + */ | ||
152 | + bdrv_ref(bs); | ||
153 | if (child == bs->backing) { | ||
154 | childp = &bs->backing; | ||
155 | } else if (child == bs->file) { | ||
156 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, | ||
157 | } | ||
158 | |||
159 | if (child->bs) { | ||
160 | - bdrv_replace_child_tran(*childp, NULL, tran); | ||
161 | + bdrv_replace_child_tran(childp, NULL, tran); | ||
162 | } | ||
163 | |||
164 | s = g_new(BdrvRemoveFilterOrCowChild, 1); | ||
165 | *s = (BdrvRemoveFilterOrCowChild) { | ||
166 | .child = child, | ||
167 | + .bs = bs, | ||
168 | .is_backing = (childp == &bs->backing), | ||
169 | }; | ||
170 | tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); | ||
171 | @@ -XXX,XX +XXX,XX @@ static int bdrv_replace_node_noperm(BlockDriverState *from, | ||
172 | { | ||
173 | BdrvChild *c, *next; | ||
174 | |||
175 | + assert(to != NULL); | ||
176 | + | ||
177 | QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { | ||
178 | assert(c->bs == from); | ||
179 | if (!should_update_child(c, to)) { | ||
180 | @@ -XXX,XX +XXX,XX @@ static int bdrv_replace_node_noperm(BlockDriverState *from, | ||
181 | c->name, from->node_name); | ||
182 | return -EPERM; | ||
183 | } | ||
184 | - bdrv_replace_child_tran(c, to, tran); | ||
185 | + | ||
186 | + /* | ||
187 | + * Passing a pointer to the local variable @c is fine here, because | ||
188 | + * @to is not NULL, and so &c will not be attached to the transaction. | ||
189 | + */ | ||
190 | + bdrv_replace_child_tran(&c, to, tran); | ||
191 | } | ||
192 | |||
193 | return 0; | ||
194 | @@ -XXX,XX +XXX,XX @@ static int bdrv_replace_node_noperm(BlockDriverState *from, | ||
47 | * | 195 | * |
48 | * No sync here: nor bitmap neighter intersecting requests handling, only copy. | 196 | * With @detach_subchain=true @to must be in a backing chain of @from. In this |
49 | * | 197 | * case backing link of the cow-parent of @to is removed. |
50 | + * @copy_range is an in-out argument: if *copy_range is false, copy_range is not | ||
51 | + * done. If *copy_range is true, copy_range is attempted. If the copy_range | ||
52 | + * attempt fails, the function falls back to the usual read+write and | ||
53 | + * *copy_range is set to false. *copy_range and zeroes must not be true | ||
54 | + * simultaneously. | ||
55 | + * | 198 | + * |
56 | * Returns 0 on success. | 199 | + * @to must not be NULL. |
57 | */ | 200 | */ |
58 | static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | 201 | static int bdrv_replace_node_common(BlockDriverState *from, |
59 | int64_t offset, int64_t bytes, | 202 | BlockDriverState *to, |
60 | - bool zeroes, bool *error_is_read) | 203 | @@ -XXX,XX +XXX,XX @@ static int bdrv_replace_node_common(BlockDriverState *from, |
61 | + bool zeroes, bool *copy_range, | 204 | BlockDriverState *to_cow_parent = NULL; |
62 | + bool *error_is_read) | ||
63 | { | ||
64 | int ret; | 205 | int ret; |
65 | int64_t nbytes = MIN(offset + bytes, s->len) - offset; | 206 | |
66 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | 207 | + assert(to != NULL); |
67 | assert(offset + bytes <= s->len || | 208 | + |
68 | offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); | 209 | if (detach_subchain) { |
69 | assert(nbytes < INT_MAX); | 210 | assert(bdrv_chain_contains(from, to)); |
70 | + assert(!(*copy_range && zeroes)); | 211 | assert(from != to); |
71 | |||
72 | if (zeroes) { | ||
73 | ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & | ||
74 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | - if (s->use_copy_range) { | ||
79 | + if (*copy_range) { | ||
80 | ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, | ||
81 | 0, s->write_flags); | ||
82 | if (ret < 0) { | ||
83 | trace_block_copy_copy_range_fail(s, offset, ret); | ||
84 | - s->use_copy_range = false; | ||
85 | - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
86 | + *copy_range = false; | ||
87 | /* Fallback to read+write with allocated buffer */ | ||
88 | } else { | ||
89 | - if (s->use_copy_range) { | ||
90 | - /* | ||
91 | - * Successful copy-range. Now increase copy_size. copy_range | ||
92 | - * does not respect max_transfer (it's a TODO), so we factor | ||
93 | - * that in here. | ||
94 | - * | ||
95 | - * Note: we double-check s->use_copy_range for the case when | ||
96 | - * parallel block-copy request unsets it during previous | ||
97 | - * bdrv_co_copy_range call. | ||
98 | - */ | ||
99 | - s->copy_size = | ||
100 | - MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
101 | - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
102 | - s->target), | ||
103 | - s->cluster_size)); | ||
104 | - } | ||
105 | - goto out; | ||
106 | + return 0; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ out: | 212 | @@ -XXX,XX +XXX,XX @@ out: |
111 | return ret; | 213 | return ret; |
112 | } | 214 | } |
113 | 215 | ||
114 | +static void block_copy_handle_copy_range_result(BlockCopyState *s, | 216 | +/** |
115 | + bool is_success) | 217 | + * Replace node @from by @to (where neither may be NULL). |
116 | +{ | 218 | + */ |
117 | + if (!s->use_copy_range) { | 219 | int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, |
118 | + /* already disabled */ | 220 | Error **errp) |
119 | + return; | ||
120 | + } | ||
121 | + | ||
122 | + if (is_success) { | ||
123 | + /* | ||
124 | + * Successful copy-range. Now increase copy_size. copy_range | ||
125 | + * does not respect max_transfer (it's a TODO), so we factor | ||
126 | + * that in here. | ||
127 | + */ | ||
128 | + s->copy_size = | ||
129 | + MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), | ||
130 | + QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, | ||
131 | + s->target), | ||
132 | + s->cluster_size)); | ||
133 | + } else { | ||
134 | + /* Copy-range failed, disable it. */ | ||
135 | + s->use_copy_range = false; | ||
136 | + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); | ||
137 | + } | ||
138 | +} | ||
139 | + | ||
140 | static coroutine_fn int block_copy_task_entry(AioTask *task) | ||
141 | { | 221 | { |
142 | BlockCopyTask *t = container_of(task, BlockCopyTask, task); | 222 | @@ -XXX,XX +XXX,XX @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, |
143 | bool error_is_read = false; | 223 | bdrv_drained_begin(old_bs); |
144 | + bool copy_range = t->copy_range; | 224 | bdrv_drained_begin(new_bs); |
145 | int ret; | 225 | |
146 | 226 | - bdrv_replace_child_tran(child, new_bs, tran); | |
147 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, | 227 | + bdrv_replace_child_tran(&child, new_bs, tran); |
148 | - &error_is_read); | 228 | |
149 | + ©_range, &error_is_read); | 229 | found = g_hash_table_new(NULL, NULL); |
150 | + if (t->copy_range) { | 230 | refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); |
151 | + block_copy_handle_copy_range_result(t->s, copy_range); | ||
152 | + } | ||
153 | if (ret < 0) { | ||
154 | if (!t->call_state->ret) { | ||
155 | t->call_state->ret = ret; | ||
156 | @@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) | ||
157 | g_free(task); | ||
158 | continue; | ||
159 | } | ||
160 | - task->zeroes = ret & BDRV_BLOCK_ZERO; | ||
161 | + if (ret & BDRV_BLOCK_ZERO) { | ||
162 | + task->zeroes = true; | ||
163 | + task->copy_range = false; | ||
164 | + } | ||
165 | |||
166 | if (s->speed) { | ||
167 | if (!call_state->ignore_ratelimit) { | ||
168 | -- | 231 | -- |
169 | 2.30.2 | 232 | 2.31.1 |
170 | 233 | ||
171 | 234 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This variable is just a cache for !(bs->open_flags & BDRV_O_RDWR), | 3 | In most of the block layer, especially when traversing down from other |
4 | which we have to synchronize everywhere. Let's just drop it and | 4 | BlockDriverStates, we assume that BdrvChild.bs can never be NULL. When |
5 | consistently use bdrv_is_read_only(). | 5 | it becomes NULL, it is expected that the corresponding BdrvChild pointer |
6 | 6 | also becomes NULL and the BdrvChild object is freed. | |
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 7 | |
8 | Message-Id: <20210527154056.70294-3-vsementsov@virtuozzo.com> | 8 | Therefore, once bdrv_replace_child_noperm() sets the BdrvChild.bs |
9 | pointer to NULL, it should also immediately set the corresponding | ||
10 | BdrvChild pointer (like bs->file or bs->backing) to NULL. | ||
11 | |||
12 | In that context, it also makes sense for this function to free the | ||
13 | child. Sometimes we cannot do so, though, because it is called in a | ||
14 | transactional context where the caller might still want to reinstate the | ||
15 | child in the abort branch (and free it only on commit), so this behavior | ||
16 | has to remain optional. | ||
17 | |||
18 | In bdrv_replace_child_tran()'s abort handler, we now rely on the fact | ||
19 | that the BdrvChild passed to bdrv_replace_child_tran() must have had a | ||
20 | non-NULL .bs pointer initially. Make a note of that and assert it. | ||
21 | |||
22 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
23 | Message-Id: <20211111120829.81329-10-hreitz@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 24 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 25 | --- |
11 | include/block/block_int.h | 1 - | 26 | block.c | 102 +++++++++++++++++++++++++++++++++++++++++++------------- |
12 | block.c | 7 +------ | 27 | 1 file changed, 79 insertions(+), 23 deletions(-) |
13 | tests/unit/test-block-iothread.c | 6 ------ | 28 | |
14 | 3 files changed, 1 insertion(+), 13 deletions(-) | ||
15 | |||
16 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/include/block/block_int.h | ||
19 | +++ b/include/block/block_int.h | ||
20 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
21 | * locking needed during I/O... | ||
22 | */ | ||
23 | int open_flags; /* flags used to open the file, re-used for re-open */ | ||
24 | - bool read_only; /* if true, the media is read only */ | ||
25 | bool encrypted; /* if true, the media is encrypted */ | ||
26 | bool sg; /* if true, the device is a /dev/sg* */ | ||
27 | bool probed; /* if true, format was probed rather than specified */ | ||
28 | diff --git a/block.c b/block.c | 29 | diff --git a/block.c b/block.c |
29 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/block.c | 31 | --- a/block.c |
31 | +++ b/block.c | 32 | +++ b/block.c |
32 | @@ -XXX,XX +XXX,XX @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, | 33 | @@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename, |
33 | * image is inactivated. */ | 34 | static bool bdrv_recurse_has_child(BlockDriverState *bs, |
34 | bool bdrv_is_read_only(BlockDriverState *bs) | 35 | BlockDriverState *child); |
35 | { | 36 | |
36 | - return bs->read_only; | 37 | +static void bdrv_child_free(BdrvChild *child); |
37 | + return !(bs->open_flags & BDRV_O_RDWR); | 38 | static void bdrv_replace_child_noperm(BdrvChild **child, |
38 | } | 39 | - BlockDriverState *new_bs); |
39 | 40 | + BlockDriverState *new_bs, | |
40 | int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, | 41 | + bool free_empty_child); |
41 | @@ -XXX,XX +XXX,XX @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, | 42 | static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, |
42 | goto fail; | 43 | BdrvChild *child, |
43 | } | 44 | Transaction *tran); |
44 | 45 | @@ -XXX,XX +XXX,XX @@ typedef struct BdrvReplaceChildState { | |
45 | - bs->read_only = true; | 46 | BdrvChild *child; |
46 | bs->open_flags &= ~BDRV_O_RDWR; | 47 | BdrvChild **childp; |
48 | BlockDriverState *old_bs; | ||
49 | + bool free_empty_child; | ||
50 | } BdrvReplaceChildState; | ||
51 | |||
52 | static void bdrv_replace_child_commit(void *opaque) | ||
53 | { | ||
54 | BdrvReplaceChildState *s = opaque; | ||
55 | |||
56 | + if (s->free_empty_child && !s->child->bs) { | ||
57 | + bdrv_child_free(s->child); | ||
58 | + } | ||
59 | bdrv_unref(s->old_bs); | ||
60 | } | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_abort(void *opaque) | ||
63 | * modify the BdrvChild * pointer we indirectly pass to it, i.e. it | ||
64 | * will not modify s->child. From that perspective, it does not matter | ||
65 | * whether we pass s->childp or &s->child. | ||
66 | - * (TODO: Right now, bdrv_replace_child_noperm() never modifies that | ||
67 | - * pointer anyway (though it will in the future), so at this point it | ||
68 | - * absolutely does not matter whether we pass s->childp or &s->child.) | ||
69 | * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use | ||
70 | * it here. | ||
71 | * (3) If new_bs is NULL, *s->childp will have been NULLed by | ||
72 | * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we | ||
73 | * must not pass a NULL *s->childp here. | ||
74 | - * (TODO: In its current state, bdrv_replace_child_noperm() will not | ||
75 | - * have NULLed *s->childp, so this does not apply yet. It will in the | ||
76 | - * future.) | ||
77 | * | ||
78 | * So whether new_bs was NULL or not, we cannot pass s->childp here; and in | ||
79 | * any case, there is no reason to pass it anyway. | ||
80 | */ | ||
81 | - bdrv_replace_child_noperm(&s->child, s->old_bs); | ||
82 | + bdrv_replace_child_noperm(&s->child, s->old_bs, true); | ||
83 | + /* | ||
84 | + * The child was pre-existing, so s->old_bs must be non-NULL, and | ||
85 | + * s->child thus must not have been freed | ||
86 | + */ | ||
87 | + assert(s->child != NULL); | ||
88 | + if (!new_bs) { | ||
89 | + /* As described above, *s->childp was cleared, so restore it */ | ||
90 | + assert(s->childp != NULL); | ||
91 | + *s->childp = s->child; | ||
92 | + } | ||
93 | bdrv_unref(new_bs); | ||
94 | } | ||
95 | |||
96 | @@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_replace_child_drv = { | ||
97 | * | ||
98 | * The function doesn't update permissions, caller is responsible for this. | ||
99 | * | ||
100 | + * (*childp)->bs must not be NULL. | ||
101 | + * | ||
102 | * Note that if new_bs == NULL, @childp is stored in a state object attached | ||
103 | * to @tran, so that the old child can be reinstated in the abort handler. | ||
104 | * Therefore, if @new_bs can be NULL, @childp must stay valid until the | ||
105 | * transaction is committed or aborted. | ||
106 | * | ||
107 | - * (TODO: The reinstating does not happen yet, but it will once | ||
108 | - * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) | ||
109 | + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is | ||
110 | + * freed (on commit). @free_empty_child should only be false if the | ||
111 | + * caller will free the BDrvChild themselves (which may be important | ||
112 | + * if this is in turn called in another transactional context). | ||
113 | */ | ||
114 | static void bdrv_replace_child_tran(BdrvChild **childp, | ||
115 | BlockDriverState *new_bs, | ||
116 | - Transaction *tran) | ||
117 | + Transaction *tran, | ||
118 | + bool free_empty_child) | ||
119 | { | ||
120 | BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); | ||
121 | *s = (BdrvReplaceChildState) { | ||
122 | .child = *childp, | ||
123 | .childp = new_bs == NULL ? childp : NULL, | ||
124 | .old_bs = (*childp)->bs, | ||
125 | + .free_empty_child = free_empty_child, | ||
126 | }; | ||
127 | tran_add(tran, &bdrv_replace_child_drv, s); | ||
128 | |||
129 | + /* The abort handler relies on this */ | ||
130 | + assert(s->old_bs != NULL); | ||
131 | + | ||
132 | if (new_bs) { | ||
133 | bdrv_ref(new_bs); | ||
134 | } | ||
135 | - bdrv_replace_child_noperm(childp, new_bs); | ||
136 | + /* | ||
137 | + * Pass free_empty_child=false, we will free the child (if | ||
138 | + * necessary) in bdrv_replace_child_commit() (if our | ||
139 | + * @free_empty_child parameter was true). | ||
140 | + */ | ||
141 | + bdrv_replace_child_noperm(childp, new_bs, false); | ||
142 | /* old_bs reference is transparently moved from *childp to @s */ | ||
143 | } | ||
144 | |||
145 | @@ -XXX,XX +XXX,XX @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) | ||
146 | return permissions[qapi_perm]; | ||
147 | } | ||
148 | |||
149 | +/** | ||
150 | + * Replace (*childp)->bs by @new_bs. | ||
151 | + * | ||
152 | + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents | ||
153 | + * generally cannot handle a BdrvChild with .bs == NULL, so clearing | ||
154 | + * BdrvChild.bs should generally immediately be followed by the | ||
155 | + * BdrvChild pointer being cleared as well. | ||
156 | + * | ||
157 | + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is | ||
158 | + * freed. @free_empty_child should only be false if the caller will | ||
159 | + * free the BdrvChild themselves (this may be important in a | ||
160 | + * transactional context, where it may only be freed on commit). | ||
161 | + */ | ||
162 | static void bdrv_replace_child_noperm(BdrvChild **childp, | ||
163 | - BlockDriverState *new_bs) | ||
164 | + BlockDriverState *new_bs, | ||
165 | + bool free_empty_child) | ||
166 | { | ||
167 | BdrvChild *child = *childp; | ||
168 | BlockDriverState *old_bs = child->bs; | ||
169 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild **childp, | ||
170 | } | ||
171 | |||
172 | child->bs = new_bs; | ||
173 | + if (!new_bs) { | ||
174 | + *childp = NULL; | ||
175 | + } | ||
176 | |||
177 | if (new_bs) { | ||
178 | QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); | ||
179 | @@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild **childp, | ||
180 | bdrv_parent_drained_end_single(child); | ||
181 | drain_saldo++; | ||
182 | } | ||
183 | + | ||
184 | + if (free_empty_child && !child->bs) { | ||
185 | + bdrv_child_free(child); | ||
186 | + } | ||
187 | } | ||
188 | |||
189 | /** | ||
190 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_child_common_abort(void *opaque) | ||
191 | BdrvChild *child = *s->child; | ||
192 | BlockDriverState *bs = child->bs; | ||
193 | |||
194 | - bdrv_replace_child_noperm(s->child, NULL); | ||
195 | + /* | ||
196 | + * Pass free_empty_child=false, because we still need the child | ||
197 | + * for the AioContext operations on the parent below; those | ||
198 | + * BdrvChildClass methods all work on a BdrvChild object, so we | ||
199 | + * need to keep it as an empty shell (after this function, it will | ||
200 | + * not be attached to any parent, and it will not have a .bs). | ||
201 | + */ | ||
202 | + bdrv_replace_child_noperm(s->child, NULL, false); | ||
203 | |||
204 | if (bdrv_get_aio_context(bs) != s->old_child_ctx) { | ||
205 | bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); | ||
206 | @@ -XXX,XX +XXX,XX @@ static void bdrv_attach_child_common_abort(void *opaque) | ||
207 | |||
208 | bdrv_unref(bs); | ||
209 | bdrv_child_free(child); | ||
210 | - *s->child = NULL; | ||
211 | } | ||
212 | |||
213 | static TransactionActionDrv bdrv_attach_child_common_drv = { | ||
214 | @@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, | ||
215 | } | ||
216 | |||
217 | bdrv_ref(child_bs); | ||
218 | - bdrv_replace_child_noperm(&new_child, child_bs); | ||
219 | + bdrv_replace_child_noperm(&new_child, child_bs, true); | ||
220 | + /* child_bs was non-NULL, so new_child must not have been freed */ | ||
221 | + assert(new_child != NULL); | ||
222 | |||
223 | *child = new_child; | ||
224 | |||
225 | @@ -XXX,XX +XXX,XX @@ static void bdrv_detach_child(BdrvChild **childp) | ||
226 | { | ||
227 | BlockDriverState *old_bs = (*childp)->bs; | ||
228 | |||
229 | - bdrv_replace_child_noperm(childp, NULL); | ||
230 | - bdrv_child_free(*childp); | ||
231 | + bdrv_replace_child_noperm(childp, NULL, true); | ||
232 | |||
233 | if (old_bs) { | ||
234 | /* | ||
235 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, | ||
236 | } | ||
237 | |||
238 | if (child->bs) { | ||
239 | - bdrv_replace_child_tran(childp, NULL, tran); | ||
240 | + /* | ||
241 | + * Pass free_empty_child=false, we will free the child in | ||
242 | + * bdrv_remove_filter_or_cow_child_commit() | ||
243 | + */ | ||
244 | + bdrv_replace_child_tran(childp, NULL, tran, false); | ||
245 | } | ||
246 | |||
247 | s = g_new(BdrvRemoveFilterOrCowChild, 1); | ||
248 | @@ -XXX,XX +XXX,XX @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, | ||
249 | .is_backing = (childp == &bs->backing), | ||
250 | }; | ||
251 | tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); | ||
252 | - | ||
253 | - *childp = NULL; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | @@ -XXX,XX +XXX,XX @@ static int bdrv_replace_node_noperm(BlockDriverState *from, | ||
258 | * Passing a pointer to the local variable @c is fine here, because | ||
259 | * @to is not NULL, and so &c will not be attached to the transaction. | ||
260 | */ | ||
261 | - bdrv_replace_child_tran(&c, to, tran); | ||
262 | + bdrv_replace_child_tran(&c, to, tran, true); | ||
263 | } | ||
47 | 264 | ||
48 | return 0; | 265 | return 0; |
49 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, | 266 | @@ -XXX,XX +XXX,XX @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, |
50 | } | 267 | bdrv_drained_begin(old_bs); |
51 | 268 | bdrv_drained_begin(new_bs); | |
52 | bs->drv = drv; | 269 | |
53 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | 270 | - bdrv_replace_child_tran(&child, new_bs, tran); |
54 | bs->opaque = g_malloc0(drv->instance_size); | 271 | + bdrv_replace_child_tran(&child, new_bs, tran, true); |
55 | 272 | + /* @new_bs must have been non-NULL, so @child must not have been freed */ | |
56 | if (drv->bdrv_file_open) { | 273 | + assert(child != NULL); |
57 | @@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, | 274 | |
58 | trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, | 275 | found = g_hash_table_new(NULL, NULL); |
59 | drv->format_name); | 276 | refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); |
60 | |||
61 | - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); | ||
62 | - | ||
63 | ro = bdrv_is_read_only(bs); | ||
64 | |||
65 | if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { | ||
66 | @@ -XXX,XX +XXX,XX @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) | ||
67 | bs->explicit_options = reopen_state->explicit_options; | ||
68 | bs->options = reopen_state->options; | ||
69 | bs->open_flags = reopen_state->flags; | ||
70 | - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); | ||
71 | bs->detect_zeroes = reopen_state->detect_zeroes; | ||
72 | |||
73 | if (reopen_state->replace_backing_bs) { | ||
74 | diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tests/unit/test-block-iothread.c | ||
77 | +++ b/tests/unit/test-block-iothread.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c) | ||
79 | g_assert_cmpint(ret, ==, -EINVAL); | ||
80 | |||
81 | /* Error: Read-only image */ | ||
82 | - c->bs->read_only = true; | ||
83 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
84 | |||
85 | ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); | ||
86 | g_assert_cmpint(ret, ==, -EACCES); | ||
87 | |||
88 | - c->bs->read_only = false; | ||
89 | c->bs->open_flags |= BDRV_O_RDWR; | ||
90 | } | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_flush(BdrvChild *c) | ||
93 | g_assert_cmpint(ret, ==, 0); | ||
94 | |||
95 | /* Early success: Read-only image */ | ||
96 | - c->bs->read_only = true; | ||
97 | c->bs->open_flags &= ~BDRV_O_RDWR; | ||
98 | |||
99 | ret = bdrv_flush(c->bs); | ||
100 | g_assert_cmpint(ret, ==, 0); | ||
101 | |||
102 | - c->bs->read_only = false; | ||
103 | c->bs->open_flags |= BDRV_O_RDWR; | ||
104 | } | ||
105 | |||
106 | @@ -XXX,XX +XXX,XX @@ static void test_sync_op_blk_flush(BlockBackend *blk) | ||
107 | g_assert_cmpint(ret, ==, 0); | ||
108 | |||
109 | /* Early success: Read-only image */ | ||
110 | - bs->read_only = true; | ||
111 | bs->open_flags &= ~BDRV_O_RDWR; | ||
112 | |||
113 | ret = blk_flush(blk); | ||
114 | g_assert_cmpint(ret, ==, 0); | ||
115 | |||
116 | - bs->read_only = false; | ||
117 | bs->open_flags |= BDRV_O_RDWR; | ||
118 | } | ||
119 | |||
120 | -- | 277 | -- |
121 | 2.30.2 | 278 | 2.31.1 |
122 | 279 | ||
123 | 280 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Hanna Reitz <hreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Don't report successful progress on failure, when call_state->ret is | 3 | See the comment for why this is necessary. |
4 | set. | ||
5 | 4 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 5 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
7 | Message-Id: <20210528141628.44287-2-vsementsov@virtuozzo.com> | 6 | Message-Id: <20211111120829.81329-11-hreitz@redhat.com> |
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 8 | --- |
11 | block/block-copy.c | 8 +++++--- | 9 | tests/qemu-iotests/030 | 11 ++++++++++- |
12 | 1 file changed, 5 insertions(+), 3 deletions(-) | 10 | 1 file changed, 10 insertions(+), 1 deletion(-) |
13 | 11 | ||
14 | diff --git a/block/block-copy.c b/block/block-copy.c | 12 | diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100755 |
16 | --- a/block/block-copy.c | 14 | --- a/tests/qemu-iotests/030 |
17 | +++ b/block/block-copy.c | 15 | +++ b/tests/qemu-iotests/030 |
18 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_entry(AioTask *task) | 16 | @@ -XXX,XX +XXX,XX @@ class TestParallelOps(iotests.QMPTestCase): |
19 | 17 | speed=1024) | |
20 | ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, | 18 | self.assert_qmp(result, 'return', {}) |
21 | &error_is_read); | 19 | |
22 | - if (ret < 0 && !t->call_state->ret) { | 20 | - for job in pending_jobs: |
23 | - t->call_state->ret = ret; | 21 | + # Do this in reverse: After unthrottling them, some jobs may finish |
24 | - t->call_state->error_is_read = error_is_read; | 22 | + # before we have unthrottled all of them. This will drain their |
25 | + if (ret < 0) { | 23 | + # subgraph, and this will make jobs above them advance (despite those |
26 | + if (!t->call_state->ret) { | 24 | + # jobs on top being throttled). In the worst case, all jobs below the |
27 | + t->call_state->ret = ret; | 25 | + # top one are finished before we can unthrottle it, and this makes it |
28 | + t->call_state->error_is_read = error_is_read; | 26 | + # advance so far that it completes before we can unthrottle it - which |
29 | + } | 27 | + # results in an error. |
30 | } else { | 28 | + # Starting from the top (i.e. in reverse) does not have this problem: |
31 | progress_work_done(t->s->progress, t->bytes); | 29 | + # When a job finishes, the ones below it are not advanced. |
32 | } | 30 | + for job in reversed(pending_jobs): |
31 | result = self.vm.qmp('block-job-set-speed', device=job, speed=0) | ||
32 | self.assert_qmp(result, 'return', {}) | ||
33 | |||
33 | -- | 34 | -- |
34 | 2.30.2 | 35 | 2.31.1 |
35 | 36 | ||
36 | 37 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | While introducing a non-QemuOpts code path for device creation for JSON |
---|---|---|---|
2 | -device, we noticed that QMP device_add doesn't check its input | ||
3 | correctly (accepting arguments that should have been rejected), and that | ||
4 | users may be relying on this behaviour (libvirt did until it was fixed | ||
5 | recently). | ||
2 | 6 | ||
3 | Recently we've fixed a crash by adding .get_parent_aio_context handler | 7 | Let's use a deprecation period before we fix this bug in QEMU to avoid |
4 | to child_vvfat_qcow. Now we want it to support .get_parent_desc as | 8 | nasty surprises for users. |
5 | well. child_vvfat_qcow wants to implement own .inherit_options, it's | ||
6 | not bad. But omitting all other handlers is a bad idea. Let's inherit | ||
7 | the class from child_of_bds instead, similar to chain_child_class and | ||
8 | detach_by_driver_cb_class in test-bdrv-drain.c. | ||
9 | 9 | ||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
11 | Message-Id: <20210601075218.79249-5-vsementsov@virtuozzo.com> | 11 | Message-Id: <20211111143530.18985-1-kwolf@redhat.com> |
12 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
13 | Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 15 | --- |
14 | block/vvfat.c | 8 +++----- | 16 | docs/about/deprecated.rst | 14 ++++++++++++++ |
15 | 1 file changed, 3 insertions(+), 5 deletions(-) | 17 | 1 file changed, 14 insertions(+) |
16 | 18 | ||
17 | diff --git a/block/vvfat.c b/block/vvfat.c | 19 | diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst |
18 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/vvfat.c | 21 | --- a/docs/about/deprecated.rst |
20 | +++ b/block/vvfat.c | 22 | +++ b/docs/about/deprecated.rst |
21 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | 23 | @@ -XXX,XX +XXX,XX @@ options are removed in favor of using explicit ``blockdev-create`` and |
22 | qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); | 24 | ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for |
23 | } | 25 | details. |
24 | 26 | ||
25 | -static const BdrvChildClass child_vvfat_qcow = { | 27 | +Incorrectly typed ``device_add`` arguments (since 6.2) |
26 | - .parent_is_bds = true, | 28 | +'''''''''''''''''''''''''''''''''''''''''''''''''''''' |
27 | - .inherit_options = vvfat_qcow_options, | 29 | + |
28 | - .get_parent_aio_context = child_of_bds_get_parent_aio_context, | 30 | +Due to shortcomings in the internal implementation of ``device_add``, QEMU |
29 | -}; | 31 | +incorrectly accepts certain invalid arguments: Any object or list arguments are |
30 | +static BdrvChildClass child_vvfat_qcow; | 32 | +silently ignored. Other argument types are not checked, but an implicit |
31 | 33 | +conversion happens, so that e.g. string values can be assigned to integer | |
32 | static int enable_write_target(BlockDriverState *bs, Error **errp) | 34 | +device properties or vice versa. |
33 | { | 35 | + |
34 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = { | 36 | +This is a bug in QEMU that will be fixed in the future so that previously |
35 | 37 | +accepted incorrect commands will return an error. Users should make sure that | |
36 | static void bdrv_vvfat_init(void) | 38 | +all arguments passed to ``device_add`` are consistent with the documented |
37 | { | 39 | +property types. |
38 | + child_vvfat_qcow = child_of_bds; | 40 | + |
39 | + child_vvfat_qcow.inherit_options = vvfat_qcow_options; | 41 | System accelerators |
40 | bdrv_register(&bdrv_vvfat); | 42 | ------------------- |
41 | } | ||
42 | 43 | ||
43 | -- | 44 | -- |
44 | 2.30.2 | 45 | 2.31.1 |
45 | 46 | ||
46 | 47 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | At the end of a reopen, we already call bdrv_refresh_limits(), which |
---|---|---|---|
2 | should update bs->request_alignment according to the new file | ||
3 | descriptor. However, raw_probe_alignment() relies on s->needs_alignment | ||
4 | and just uses 1 if it isn't set. We neglected to update this field, so | ||
5 | starting with cache=writeback and then reopening with cache=none means | ||
6 | that we get an incorrect bs->request_alignment == 1 and unaligned | ||
7 | requests fail instead of being automatically aligned. | ||
2 | 8 | ||
3 | If fallocate(... FALLOC_FL_ZERO_RANGE ...) returns EINVAL, it's likely | 9 | Fix this by recalculating s->needs_alignment in raw_refresh_limits() |
4 | an indication that the file system is buggy and does not implement | 10 | before calling raw_probe_alignment(). |
5 | unaligned accesses right. We still might be lucky with the other | ||
6 | fallback fallocate() calls later in this function, though, so we should | ||
7 | not return immediately and try the others first. | ||
8 | Since FALLOC_FL_ZERO_RANGE could also return EINVAL if the file descriptor | ||
9 | is not a regular file, we ignore this filesystem bug silently, without | ||
10 | printing an error message for the user. | ||
11 | 11 | ||
12 | Signed-off-by: Thomas Huth <thuth@redhat.com> | 12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | Message-Id: <20210527172020.847617-3-thuth@redhat.com> | 13 | Message-Id: <20211104113109.56336-1-kwolf@redhat.com> |
14 | Reviewed-by: Hanna Reitz <hreitz@redhat.com> | ||
15 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 17 | --- |
16 | block/file-posix.c | 18 +++++++++--------- | 18 | block/file-posix.c | 20 ++++++++++++++++---- |
17 | 1 file changed, 9 insertions(+), 9 deletions(-) | 19 | tests/qemu-iotests/142 | 22 ++++++++++++++++++++++ |
20 | tests/qemu-iotests/142.out | 15 +++++++++++++++ | ||
21 | 3 files changed, 53 insertions(+), 4 deletions(-) | ||
18 | 22 | ||
19 | diff --git a/block/file-posix.c b/block/file-posix.c | 23 | diff --git a/block/file-posix.c b/block/file-posix.c |
20 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/file-posix.c | 25 | --- a/block/file-posix.c |
22 | +++ b/block/file-posix.c | 26 | +++ b/block/file-posix.c |
23 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | 27 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState { |
24 | if (s->has_write_zeroes) { | 28 | int page_cache_inconsistent; /* errno from fdatasync failure */ |
25 | int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, | 29 | bool has_fallocate; |
26 | aiocb->aio_offset, aiocb->aio_nbytes); | 30 | bool needs_alignment; |
27 | - if (ret == -EINVAL) { | 31 | + bool force_alignment; |
28 | - /* | 32 | bool drop_cache; |
29 | - * Allow falling back to pwrite for file systems that | 33 | bool check_cache_dropped; |
30 | - * do not support fallocate() for an unaligned byte range. | 34 | struct { |
31 | - */ | 35 | @@ -XXX,XX +XXX,XX @@ static bool dio_byte_aligned(int fd) |
32 | - return -ENOTSUP; | 36 | return false; |
33 | - } | 37 | } |
34 | - if (ret == 0 || ret != -ENOTSUP) { | 38 | |
35 | + if (ret == -ENOTSUP) { | 39 | +static bool raw_needs_alignment(BlockDriverState *bs) |
36 | + s->has_write_zeroes = false; | 40 | +{ |
37 | + } else if (ret == 0 || ret != -EINVAL) { | 41 | + BDRVRawState *s = bs->opaque; |
38 | return ret; | 42 | + |
39 | } | 43 | + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { |
40 | - s->has_write_zeroes = false; | 44 | + return true; |
41 | + /* | 45 | + } |
42 | + * Note: Some file systems do not like unaligned byte ranges, and | 46 | + |
43 | + * return EINVAL in such a case, though they should not do it according | 47 | + return s->force_alignment; |
44 | + * to the man-page of fallocate(). Thus we simply ignore this return | 48 | +} |
45 | + * value and try the other fallbacks instead. | 49 | + |
46 | + */ | 50 | /* Check if read is allowed with given memory buffer and length. |
51 | * | ||
52 | * This function is used to check O_DIRECT memory buffer and request alignment. | ||
53 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
54 | |||
55 | s->has_discard = true; | ||
56 | s->has_write_zeroes = true; | ||
57 | - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { | ||
58 | - s->needs_alignment = true; | ||
59 | - } | ||
60 | |||
61 | if (fstat(s->fd, &st) < 0) { | ||
62 | ret = -errno; | ||
63 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
64 | * so QEMU makes sure all IO operations on the device are aligned | ||
65 | * to sector size, or else FreeBSD will reject them with EINVAL. | ||
66 | */ | ||
67 | - s->needs_alignment = true; | ||
68 | + s->force_alignment = true; | ||
47 | } | 69 | } |
48 | #endif | 70 | #endif |
49 | 71 | + s->needs_alignment = raw_needs_alignment(bs); | |
72 | |||
73 | #ifdef CONFIG_XFS | ||
74 | if (platform_test_xfs_fd(s->fd)) { | ||
75 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) | ||
76 | BDRVRawState *s = bs->opaque; | ||
77 | struct stat st; | ||
78 | |||
79 | + s->needs_alignment = raw_needs_alignment(bs); | ||
80 | raw_probe_alignment(bs, s->fd, errp); | ||
81 | + | ||
82 | bs->bl.min_mem_alignment = s->buf_align; | ||
83 | bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); | ||
84 | |||
85 | diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142 | ||
86 | index XXXXXXX..XXXXXXX 100755 | ||
87 | --- a/tests/qemu-iotests/142 | ||
88 | +++ b/tests/qemu-iotests/142 | ||
89 | @@ -XXX,XX +XXX,XX @@ info block backing-file" | ||
90 | |||
91 | echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache" | ||
92 | |||
93 | +echo | ||
94 | +echo "--- Alignment after changing O_DIRECT ---" | ||
95 | +echo | ||
96 | + | ||
97 | +# Directly test the protocol level: Can unaligned requests succeed even if | ||
98 | +# O_DIRECT was only enabled through a reopen and vice versa? | ||
99 | + | ||
100 | +$QEMU_IO --cache=writeback -f file $TEST_IMG <<EOF | _filter_qemu_io | ||
101 | +read 42 42 | ||
102 | +reopen -o cache.direct=on | ||
103 | +read 42 42 | ||
104 | +reopen -o cache.direct=off | ||
105 | +read 42 42 | ||
106 | +EOF | ||
107 | +$QEMU_IO --cache=none -f file $TEST_IMG <<EOF | _filter_qemu_io | ||
108 | +read 42 42 | ||
109 | +reopen -o cache.direct=off | ||
110 | +read 42 42 | ||
111 | +reopen -o cache.direct=on | ||
112 | +read 42 42 | ||
113 | +EOF | ||
114 | + | ||
115 | # success, all done | ||
116 | echo "*** done" | ||
117 | rm -f $seq.full | ||
118 | diff --git a/tests/qemu-iotests/142.out b/tests/qemu-iotests/142.out | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/tests/qemu-iotests/142.out | ||
121 | +++ b/tests/qemu-iotests/142.out | ||
122 | @@ -XXX,XX +XXX,XX @@ cache.no-flush=on on backing-file | ||
123 | Cache mode: writeback | ||
124 | Cache mode: writeback, direct | ||
125 | Cache mode: writeback, ignore flushes | ||
126 | + | ||
127 | +--- Alignment after changing O_DIRECT --- | ||
128 | + | ||
129 | +read 42/42 bytes at offset 42 | ||
130 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
131 | +read 42/42 bytes at offset 42 | ||
132 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
133 | +read 42/42 bytes at offset 42 | ||
134 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
135 | +read 42/42 bytes at offset 42 | ||
136 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
137 | +read 42/42 bytes at offset 42 | ||
138 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
139 | +read 42/42 bytes at offset 42 | ||
140 | +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
141 | *** done | ||
50 | -- | 142 | -- |
51 | 2.30.2 | 143 | 2.31.1 |
52 | 144 | ||
53 | 145 | diff view generated by jsdifflib |
1 | From: Lukas Straub <lukasstraub2@web.de> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The quorum block driver uses a custom flush callback to handle the | 3 | Reported by Coverity (CID 1465222). |
4 | case when some children return io errors. In that case it still | ||
5 | returns success if enough children are healthy. | ||
6 | However, it provides it as the .bdrv_co_flush_to_disk callback, not | ||
7 | as .bdrv_co_flush. This causes the block layer to do it's own | ||
8 | generic flushing for the children instead, which doesn't handle | ||
9 | errors properly. | ||
10 | 4 | ||
11 | Fix this by providing .bdrv_co_flush instead of | 5 | Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id") |
12 | .bdrv_co_flush_to_disk so the block layer uses the custom flush | 6 | Cc: Damien Hedde <damien.hedde@greensocs.com> |
13 | callback. | 7 | Cc: Kevin Wolf <kwolf@redhat.com> |
14 | 8 | Cc: Michael S. Tsirkin <mst@redhat.com> | |
15 | Signed-off-by: Lukas Straub <lukasstraub2@web.de> | 9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
16 | Reported-by: Minghao Yuan <meeho@qq.com> | 10 | Message-Id: <20211102163342.31162-1-stefanha@redhat.com> |
17 | Message-Id: <20210518134214.11ccf05f@gecko.fritz.box> | 11 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
18 | Tested-by: Zhang Chen <chen.zhang@intel.com> | 12 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> |
13 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
14 | Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> | ||
15 | Reviewed-by: Damien Hedde <damien.hedde@greensocs.com> | ||
16 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 18 | --- |
21 | block/quorum.c | 2 +- | 19 | softmmu/qdev-monitor.c | 2 +- |
22 | 1 file changed, 1 insertion(+), 1 deletion(-) | 20 | 1 file changed, 1 insertion(+), 1 deletion(-) |
23 | 21 | ||
24 | diff --git a/block/quorum.c b/block/quorum.c | 22 | diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c |
25 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/quorum.c | 24 | --- a/softmmu/qdev-monitor.c |
27 | +++ b/block/quorum.c | 25 | +++ b/softmmu/qdev-monitor.c |
28 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_quorum = { | 26 | @@ -XXX,XX +XXX,XX @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) |
29 | .bdrv_dirname = quorum_dirname, | 27 | if (prop) { |
30 | .bdrv_co_block_status = quorum_co_block_status, | 28 | dev->id = id; |
31 | 29 | } else { | |
32 | - .bdrv_co_flush_to_disk = quorum_co_flush, | 30 | - g_free(id); |
33 | + .bdrv_co_flush = quorum_co_flush, | 31 | error_setg(errp, "Duplicate device ID '%s'", id); |
34 | 32 | + g_free(id); | |
35 | .bdrv_getlength = quorum_getlength, | 33 | return NULL; |
36 | 34 | } | |
35 | } else { | ||
37 | -- | 36 | -- |
38 | 2.30.2 | 37 | 2.31.1 |
39 | 38 | ||
40 | 39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Coverity thinks blk may be NULL. It's a false-positive, as described in | ||
4 | a new comment. | ||
5 | |||
6 | Fixes: Coverity CID 1453194 | ||
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
8 | Message-Id: <20210519090532.3753-1-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | --- | ||
11 | qemu-io-cmds.c | 14 ++++++++++++-- | ||
12 | 1 file changed, 12 insertions(+), 2 deletions(-) | ||
13 | |||
14 | diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/qemu-io-cmds.c | ||
17 | +++ b/qemu-io-cmds.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, | ||
19 | return -EINVAL; | ||
20 | } | ||
21 | |||
22 | - /* Request additional permissions if necessary for this command. The caller | ||
23 | + /* | ||
24 | + * Request additional permissions if necessary for this command. The caller | ||
25 | * is responsible for restoring the original permissions afterwards if this | ||
26 | - * is what it wants. */ | ||
27 | + * is what it wants. | ||
28 | + * | ||
29 | + * Coverity thinks that blk may be NULL in the following if condition. It's | ||
30 | + * not so: in init_check_command() we fail if blk is NULL for command with | ||
31 | + * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in | ||
32 | + * qemuio_add_command() we assert that command with non-zero .perm field | ||
33 | + * doesn't set this flags. So, the following assertion is to silence | ||
34 | + * Coverity: | ||
35 | + */ | ||
36 | + assert(blk || !ct->perm); | ||
37 | if (ct->perm && blk_is_available(blk)) { | ||
38 | uint64_t orig_perm, orig_shared_perm; | ||
39 | blk_get_perm(blk, &orig_perm, &orig_shared_perm); | ||
40 | -- | ||
41 | 2.30.2 | ||
42 | |||
43 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Commit 3ca1f3225727419ba573673b744edac10904276f | ||
4 | "block: BdrvChildClass: add .get_parent_aio_context handler" introduced | ||
5 | new handler and commit 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
6 | "block: drop ctx argument from bdrv_root_attach_child" made a generic | ||
7 | use of it. But 3ca1f3225727419ba573673b744edac10904276f didn't update | ||
8 | child_vvfat_qcow. Fix that. | ||
9 | |||
10 | Before that fix the command | ||
11 | |||
12 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
13 | -drive file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
14 | |||
15 | crashes: | ||
16 | |||
17 | 1 bdrv_child_get_parent_aio_context (c=0x559d62426d20) | ||
18 | at ../block.c:1440 | ||
19 | 2 bdrv_attach_child_common | ||
20 | (child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", | ||
21 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
22 | perm=3, shared_perm=4, opaque=0x559d62445690, | ||
23 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) | ||
24 | at ../block.c:2795 | ||
25 | 3 bdrv_attach_child_noperm | ||
26 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
27 | child_name=0x559d606f9e3d "write-target", | ||
28 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
29 | child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at | ||
30 | ../block.c:2855 | ||
31 | 4 bdrv_attach_child | ||
32 | (parent_bs=0x559d62445690, child_bs=0x559d62468190, | ||
33 | child_name=0x559d606f9e3d "write-target", | ||
34 | child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3, | ||
35 | errp=0x7ffc74c2ae60) at ../block.c:2953 | ||
36 | 5 bdrv_open_child | ||
37 | (filename=0x559d62464b80 "/var/tmp/vl.h3TIS4", | ||
38 | options=0x559d6246ec20, bdref_key=0x559d606f9e3d "write-target", | ||
39 | parent=0x559d62445690, child_class=0x559d60c58d20 | ||
40 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
41 | errp=0x7ffc74c2ae60) at ../block.c:3351 | ||
42 | 6 enable_write_target (bs=0x559d62445690, errp=0x7ffc74c2ae60) at | ||
43 | ../block/vvfat.c:3176 | ||
44 | 7 vvfat_open (bs=0x559d62445690, options=0x559d6244adb0, flags=155650, | ||
45 | errp=0x7ffc74c2ae60) at ../block/vvfat.c:1236 | ||
46 | 8 bdrv_open_driver (bs=0x559d62445690, drv=0x559d60d4f7e0 | ||
47 | <bdrv_vvfat>, node_name=0x0, | ||
48 | options=0x559d6244adb0, open_flags=155650, | ||
49 | errp=0x7ffc74c2af70) at ../block.c:1557 | ||
50 | 9 bdrv_open_common (bs=0x559d62445690, file=0x0, | ||
51 | options=0x559d6244adb0, errp=0x7ffc74c2af70) at | ||
52 | ... | ||
53 | |||
54 | (gdb) fr 1 | ||
55 | #1 0x0000559d603ea3bf in bdrv_child_get_parent_aio_context | ||
56 | (c=0x559d62426d20) at ../block.c:1440 | ||
57 | 1440 return c->klass->get_parent_aio_context(c); | ||
58 | (gdb) p c->klass | ||
59 | $1 = (const BdrvChildClass *) 0x559d60c58d20 <child_vvfat_qcow> | ||
60 | (gdb) p c->klass->get_parent_aio_context | ||
61 | $2 = (AioContext *(*)(BdrvChild *)) 0x0 | ||
62 | |||
63 | Fixes: 3ca1f3225727419ba573673b744edac10904276f | ||
64 | Fixes: 228ca37e12f97788e05bd0c92f89b3e5e4019607 | ||
65 | Reported-by: John Arbuckle <programmingkidx@gmail.com> | ||
66 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
67 | Message-Id: <20210524101257.119377-2-vsementsov@virtuozzo.com> | ||
68 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
69 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
70 | --- | ||
71 | include/block/block.h | 1 + | ||
72 | block.c | 4 ++-- | ||
73 | block/vvfat.c | 1 + | ||
74 | 3 files changed, 4 insertions(+), 2 deletions(-) | ||
75 | |||
76 | diff --git a/include/block/block.h b/include/block/block.h | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/include/block/block.h | ||
79 | +++ b/include/block/block.h | ||
80 | @@ -XXX,XX +XXX,XX @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, | ||
81 | bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, | ||
82 | GSList **ignore, Error **errp); | ||
83 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); | ||
84 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); | ||
85 | |||
86 | int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); | ||
87 | int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); | ||
88 | diff --git a/block.c b/block.c | ||
89 | index XXXXXXX..XXXXXXX 100644 | ||
90 | --- a/block.c | ||
91 | +++ b/block.c | ||
92 | @@ -XXX,XX +XXX,XX @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | -static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) | ||
97 | +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) | ||
98 | { | ||
99 | BlockDriverState *bs = c->opaque; | ||
100 | |||
101 | @@ -XXX,XX +XXX,XX @@ const BdrvChildClass child_of_bds = { | ||
102 | .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, | ||
103 | .set_aio_ctx = bdrv_child_cb_set_aio_ctx, | ||
104 | .update_filename = bdrv_child_cb_update_filename, | ||
105 | - .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, | ||
106 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
107 | }; | ||
108 | |||
109 | AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) | ||
110 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/block/vvfat.c | ||
113 | +++ b/block/vvfat.c | ||
114 | @@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, | ||
115 | static const BdrvChildClass child_vvfat_qcow = { | ||
116 | .parent_is_bds = true, | ||
117 | .inherit_options = vvfat_qcow_options, | ||
118 | + .get_parent_aio_context = child_of_bds_get_parent_aio_context, | ||
119 | }; | ||
120 | |||
121 | static int enable_write_target(BlockDriverState *bs, Error **errp) | ||
122 | -- | ||
123 | 2.30.2 | ||
124 | |||
125 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | It's wrong to rely on s->qcow in vvfat_child_perm, as on permission | ||
4 | update during bdrv_open_child() call this field is not set yet. | ||
5 | |||
6 | Still prior to aa5a04c7db27eea6b36de32f241b155f0d9ce34d, it didn't | ||
7 | crash, as bdrv_open_child passed NULL as child to bdrv_child_perm(), | ||
8 | and NULL was equal to NULL in assertion (still, it was bad guarantee | ||
9 | for child being s->qcow, not backing :). | ||
10 | |||
11 | Since aa5a04c7db27eea6b36de32f241b155f0d9ce34d | ||
12 | "add bdrv_attach_child_noperm" bdrv_refresh_perms called on parent node | ||
13 | when attaching child, and new correct child pointer is passed to | ||
14 | .bdrv_child_perm. Still, s->qcow is NULL at the moment. Let's rely only | ||
15 | on role instead. | ||
16 | |||
17 | Without that fix, | ||
18 | ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ | ||
19 | -drive \ | ||
20 | file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none | ||
21 | |||
22 | crashes: | ||
23 | (gdb) bt | ||
24 | 0 raise () at /lib64/libc.so.6 | ||
25 | 1 abort () at /lib64/libc.so.6 | ||
26 | 2 _nl_load_domain.cold () at /lib64/libc.so.6 | ||
27 | 3 annobin_assert.c_end () at /lib64/libc.so.6 | ||
28 | 4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
29 | reopen_queue=0x0, perm=0, shared=31, | ||
30 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
31 | ../block/vvfat.c:3214 | ||
32 | 5 bdrv_child_perm (bs=0x559186f3d690, child_bs=0x559186f60190, | ||
33 | c=0x559186f1ed20, role=3, reopen_queue=0x0, | ||
34 | parent_perm=0, parent_shared=31, | ||
35 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) | ||
36 | at ../block.c:2094 | ||
37 | 6 bdrv_node_refresh_perm (bs=0x559186f3d690, q=0x0, | ||
38 | tran=0x559186f65850, errp=0x7ffe56f28530) at | ||
39 | ../block.c:2336 | ||
40 | 7 bdrv_list_refresh_perms (list=0x559186db5b90 = {...}, q=0x0, | ||
41 | tran=0x559186f65850, errp=0x7ffe56f28530) | ||
42 | at ../block.c:2358 | ||
43 | 8 bdrv_refresh_perms (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
44 | ../block.c:2419 | ||
45 | 9 bdrv_attach_child | ||
46 | (parent_bs=0x559186f3d690, child_bs=0x559186f60190, | ||
47 | child_name=0x559184d83e3d "write-target", | ||
48 | child_class=0x5591852f3b00 <child_vvfat_qcow>, child_role=3, | ||
49 | errp=0x7ffe56f28530) at ../block.c:2959 | ||
50 | 10 bdrv_open_child | ||
51 | (filename=0x559186f5cb80 "/var/tmp/vl.7WYmFU", | ||
52 | options=0x559186f66c20, bdref_key=0x559184d83e3d "write-target", | ||
53 | parent=0x559186f3d690, child_class=0x5591852f3b00 | ||
54 | <child_vvfat_qcow>, child_role=3, allow_none=false, | ||
55 | errp=0x7ffe56f28530) at ../block.c:3351 | ||
56 | 11 enable_write_target (bs=0x559186f3d690, errp=0x7ffe56f28530) at | ||
57 | ../block/vvfat.c:3177 | ||
58 | 12 vvfat_open (bs=0x559186f3d690, options=0x559186f42db0, flags=155650, | ||
59 | errp=0x7ffe56f28530) at ../block/vvfat.c:1236 | ||
60 | 13 bdrv_open_driver (bs=0x559186f3d690, drv=0x5591853d97e0 | ||
61 | <bdrv_vvfat>, node_name=0x0, | ||
62 | options=0x559186f42db0, open_flags=155650, | ||
63 | errp=0x7ffe56f28640) at ../block.c:1557 | ||
64 | 14 bdrv_open_common (bs=0x559186f3d690, file=0x0, | ||
65 | options=0x559186f42db0, errp=0x7ffe56f28640) at | ||
66 | ../block.c:1833 | ||
67 | ... | ||
68 | |||
69 | (gdb) fr 4 | ||
70 | #4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, | ||
71 | reopen_queue=0x0, perm=0, shared=31, | ||
72 | nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at | ||
73 | ../block/vvfat.c:3214 | ||
74 | 3214 assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
75 | (gdb) p role | ||
76 | $1 = 3 # BDRV_CHILD_DATA | BDRV_CHILD_METADATA | ||
77 | (gdb) p *c | ||
78 | $2 = {bs = 0x559186f60190, name = 0x559186f669d0 "write-target", klass | ||
79 | = 0x5591852f3b00 <child_vvfat_qcow>, role = 3, opaque = | ||
80 | 0x559186f3d690, perm = 3, shared_perm = 4, frozen = false, | ||
81 | parent_quiesce_counter = 0, next = {le_next = 0x0, le_prev = | ||
82 | 0x559186f41818}, next_parent = {le_next = 0x0, le_prev = | ||
83 | 0x559186f64320}} | ||
84 | (gdb) p s->qcow | ||
85 | $3 = (BdrvChild *) 0x0 | ||
86 | |||
87 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
88 | Message-Id: <20210524101257.119377-3-vsementsov@virtuozzo.com> | ||
89 | Tested-by: John Arbuckle <programmingkidx@gmail.com> | ||
90 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
91 | --- | ||
92 | block/vvfat.c | 7 ++----- | ||
93 | 1 file changed, 2 insertions(+), 5 deletions(-) | ||
94 | |||
95 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/block/vvfat.c | ||
98 | +++ b/block/vvfat.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
100 | uint64_t perm, uint64_t shared, | ||
101 | uint64_t *nperm, uint64_t *nshared) | ||
102 | { | ||
103 | - BDRVVVFATState *s = bs->opaque; | ||
104 | - | ||
105 | - assert(c == s->qcow || (role & BDRV_CHILD_COW)); | ||
106 | - | ||
107 | - if (c == s->qcow) { | ||
108 | + if (role & BDRV_CHILD_DATA) { | ||
109 | /* This is a private node, nobody should try to attach to it */ | ||
110 | *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; | ||
111 | *nshared = BLK_PERM_WRITE_UNCHANGED; | ||
112 | } else { | ||
113 | + assert(role & BDRV_CHILD_COW); | ||
114 | /* The backing file is there so 'commit' can use it. vvfat doesn't | ||
115 | * access it in any way. */ | ||
116 | *nperm = 0; | ||
117 | -- | ||
118 | 2.30.2 | ||
119 | |||
120 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Instead of keeping additional boolean field, let's store the | ||
4 | information in BDRV_O_RDWR bit of BlockBackendRootState::open_flags. | ||
5 | |||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
7 | Message-Id: <20210527154056.70294-4-vsementsov@virtuozzo.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | --- | ||
10 | include/block/block_int.h | 1 - | ||
11 | block/block-backend.c | 10 ++-------- | ||
12 | blockdev.c | 3 +-- | ||
13 | 3 files changed, 3 insertions(+), 11 deletions(-) | ||
14 | |||
15 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/block/block_int.h | ||
18 | +++ b/include/block/block_int.h | ||
19 | @@ -XXX,XX +XXX,XX @@ struct BlockDriverState { | ||
20 | |||
21 | struct BlockBackendRootState { | ||
22 | int open_flags; | ||
23 | - bool read_only; | ||
24 | BlockdevDetectZeroesOptions detect_zeroes; | ||
25 | }; | ||
26 | |||
27 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/block/block-backend.c | ||
30 | +++ b/block/block-backend.c | ||
31 | @@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk) | ||
32 | if (bs) { | ||
33 | return !bdrv_is_read_only(bs); | ||
34 | } else { | ||
35 | - return !blk->root_state.read_only; | ||
36 | + return blk->root_state.open_flags & BDRV_O_RDWR; | ||
37 | } | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk) | ||
41 | assert(blk->root); | ||
42 | |||
43 | blk->root_state.open_flags = blk->root->bs->open_flags; | ||
44 | - blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); | ||
45 | blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; | ||
46 | } | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) | ||
49 | */ | ||
50 | int blk_get_open_flags_from_root_state(BlockBackend *blk) | ||
51 | { | ||
52 | - int bs_flags; | ||
53 | - | ||
54 | - bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; | ||
55 | - bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; | ||
56 | - | ||
57 | - return bs_flags; | ||
58 | + return blk->root_state.open_flags; | ||
59 | } | ||
60 | |||
61 | BlockBackendRootState *blk_get_root_state(BlockBackend *blk) | ||
62 | diff --git a/blockdev.c b/blockdev.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/blockdev.c | ||
65 | +++ b/blockdev.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, | ||
67 | |||
68 | blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); | ||
69 | blk_rs = blk_get_root_state(blk); | ||
70 | - blk_rs->open_flags = bdrv_flags; | ||
71 | - blk_rs->read_only = read_only; | ||
72 | + blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR); | ||
73 | blk_rs->detect_zeroes = detect_zeroes; | ||
74 | |||
75 | qobject_unref(bs_opts); | ||
76 | -- | ||
77 | 2.30.2 | ||
78 | |||
79 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
2 | 1 | ||
3 | A customer reported that running | ||
4 | |||
5 | qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2 | ||
6 | |||
7 | fails for them with the following error message when the images are | ||
8 | stored on a GPFS file system : | ||
9 | |||
10 | qemu-img: error while writing sector 0: Invalid argument | ||
11 | |||
12 | After analyzing the strace output, it seems like the problem is in | ||
13 | handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE) | ||
14 | returns EINVAL, which can apparently happen if the file system has | ||
15 | a different idea of the granularity of the operation. It's arguably | ||
16 | a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL | ||
17 | according to the man-page of fallocate(), but the file system is out | ||
18 | there in production and so we have to deal with it. In commit 294682cc3a | ||
19 | ("block: workaround for unaligned byte range in fallocate()") we also | ||
20 | already applied the a work-around for the same problem to the earlier | ||
21 | fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the | ||
22 | PUNCH_HOLE call. But instead of silently catching and returning | ||
23 | -ENOTSUP (which causes the caller to fall back to writing zeroes), | ||
24 | let's rather inform the user once about the buggy file system and | ||
25 | try the other fallback instead. | ||
26 | |||
27 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
28 | Message-Id: <20210527172020.847617-2-thuth@redhat.com> | ||
29 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
30 | --- | ||
31 | block/file-posix.c | 11 +++++++++++ | ||
32 | 1 file changed, 11 insertions(+) | ||
33 | |||
34 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/block/file-posix.c | ||
37 | +++ b/block/file-posix.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque) | ||
39 | return ret; | ||
40 | } | ||
41 | s->has_fallocate = false; | ||
42 | + } else if (ret == -EINVAL) { | ||
43 | + /* | ||
44 | + * Some file systems like older versions of GPFS do not like un- | ||
45 | + * aligned byte ranges, and return EINVAL in such a case, though | ||
46 | + * they should not do it according to the man-page of fallocate(). | ||
47 | + * Warn about the bad filesystem and try the final fallback instead. | ||
48 | + */ | ||
49 | + warn_report_once("Your file system is misbehaving: " | ||
50 | + "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " | ||
51 | + "Please report this bug to your file sytem " | ||
52 | + "vendor."); | ||
53 | } else if (ret != -ENOTSUP) { | ||
54 | return ret; | ||
55 | } else { | ||
56 | -- | ||
57 | 2.30.2 | ||
58 | |||
59 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | We have different types of parents: block nodes, block backends and | ||
4 | jobs. So, it makes sense to specify type together with name. | ||
5 | |||
6 | While being here also use g_autofree. | ||
7 | |||
8 | iotest 307 output is updated. | ||
9 | |||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
12 | Message-Id: <20210601075218.79249-3-vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
14 | --- | ||
15 | block/block-backend.c | 9 ++++----- | ||
16 | tests/qemu-iotests/307.out | 2 +- | ||
17 | 2 files changed, 5 insertions(+), 6 deletions(-) | ||
18 | |||
19 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/block-backend.c | ||
22 | +++ b/block/block-backend.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, | ||
24 | static char *blk_root_get_parent_desc(BdrvChild *child) | ||
25 | { | ||
26 | BlockBackend *blk = child->opaque; | ||
27 | - char *dev_id; | ||
28 | + g_autofree char *dev_id = NULL; | ||
29 | |||
30 | if (blk->name) { | ||
31 | - return g_strdup(blk->name); | ||
32 | + return g_strdup_printf("block device '%s'", blk->name); | ||
33 | } | ||
34 | |||
35 | dev_id = blk_get_attached_dev_id(blk); | ||
36 | if (*dev_id) { | ||
37 | - return dev_id; | ||
38 | + return g_strdup_printf("block device '%s'", dev_id); | ||
39 | } else { | ||
40 | /* TODO Callback into the BB owner for something more detailed */ | ||
41 | - g_free(dev_id); | ||
42 | - return g_strdup("a block device"); | ||
43 | + return g_strdup("an unnamed block device"); | ||
44 | } | ||
45 | } | ||
46 | |||
47 | diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/tests/qemu-iotests/307.out | ||
50 | +++ b/tests/qemu-iotests/307.out | ||
51 | @@ -XXX,XX +XXX,XX @@ exports available: 1 | ||
52 | |||
53 | === Add a writable export === | ||
54 | {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} | ||
55 | -{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}} | ||
56 | +{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} | ||
57 | {"execute": "device_del", "arguments": {"id": "sda"}} | ||
58 | {"return": {}} | ||
59 | {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
60 | -- | ||
61 | 2.30.2 | ||
62 | |||
63 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Sergio Lopez <slp@redhat.com> | ||
2 | 1 | ||
3 | Allow block backends to poll their devices/users to check if they have | ||
4 | been quiesced when entering a drained section. | ||
5 | |||
6 | This will be used in the next patch to wait for the NBD server to be | ||
7 | completely quiesced. | ||
8 | |||
9 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | ||
10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
13 | Message-Id: <20210602060552.17433-2-slp@redhat.com> | ||
14 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
16 | --- | ||
17 | include/sysemu/block-backend.h | 4 ++++ | ||
18 | block/block-backend.c | 7 ++++++- | ||
19 | 2 files changed, 10 insertions(+), 1 deletion(-) | ||
20 | |||
21 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/include/sysemu/block-backend.h | ||
24 | +++ b/include/sysemu/block-backend.h | ||
25 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { | ||
26 | * Runs when the backend's last drain request ends. | ||
27 | */ | ||
28 | void (*drained_end)(void *opaque); | ||
29 | + /* | ||
30 | + * Is the device still busy? | ||
31 | + */ | ||
32 | + bool (*drained_poll)(void *opaque); | ||
33 | } BlockDevOps; | ||
34 | |||
35 | /* This struct is embedded in (the private) BlockBackend struct and contains | ||
36 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/block/block-backend.c | ||
39 | +++ b/block/block-backend.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | ||
41 | static bool blk_root_drained_poll(BdrvChild *child) | ||
42 | { | ||
43 | BlockBackend *blk = child->opaque; | ||
44 | + bool busy = false; | ||
45 | assert(blk->quiesce_counter); | ||
46 | - return !!blk->in_flight; | ||
47 | + | ||
48 | + if (blk->dev_ops && blk->dev_ops->drained_poll) { | ||
49 | + busy = blk->dev_ops->drained_poll(blk->dev_opaque); | ||
50 | + } | ||
51 | + return busy || !!blk->in_flight; | ||
52 | } | ||
53 | |||
54 | static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) | ||
55 | -- | ||
56 | 2.30.2 | ||
57 | |||
58 | diff view generated by jsdifflib |