1 | The following changes since commit 67c1115edd98f388ca89dd38322ea3fadf034523: | 1 | The following changes since commit 813bac3d8d70d85cb7835f7945eb9eed84c2d8d0: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/kraxel/tags/ui-20210323-pull-request' into staging (2021-03-23 23:47:30 +0000) | 3 | Merge tag '2023q3-bsd-user-pull-request' of https://gitlab.com/bsdimp/qemu into staging (2023-08-29 08:58:00 -0400) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 3460fd7f3959d1fa7bcc255796844aa261c805a4: | 9 | for you to fetch changes up to 87ec6f55af38e29be5b2b65a8acf84da73e06d06: |
10 | 10 | ||
11 | migrate-bitmaps-postcopy-test: check that we can't remove in-flight bitmaps (2021-03-24 13:41:19 +0000) | 11 | aio-posix: zero out io_uring sqe user_data (2023-08-30 07:39:59 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | This dirty bitmap fix solves a crash that can be triggered in the destination | 16 | v3: |
17 | QEMU process during live migration. | 17 | - Drop UFS emulation due to CI failures |
18 | - Add "aio-posix: zero out io_uring sqe user_data" | ||
18 | 19 | ||
19 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
20 | 21 | ||
21 | Vladimir Sementsov-Ogievskiy (2): | 22 | Andrey Drobyshev (3): |
22 | migration/block-dirty-bitmap: make incoming disabled bitmaps busy | 23 | block: add subcluster_size field to BlockDriverInfo |
23 | migrate-bitmaps-postcopy-test: check that we can't remove in-flight | 24 | block/io: align requests to subcluster_size |
24 | bitmaps | 25 | tests/qemu-iotests/197: add testcase for CoR with subclusters |
25 | 26 | ||
26 | migration/block-dirty-bitmap.c | 6 ++++++ | 27 | Fabiano Rosas (1): |
27 | tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test | 10 ++++++++++ | 28 | block-migration: Ensure we don't crash during migration cleanup |
28 | 2 files changed, 16 insertions(+) | 29 | |
30 | Stefan Hajnoczi (1): | ||
31 | aio-posix: zero out io_uring sqe user_data | ||
32 | |||
33 | include/block/block-common.h | 5 ++++ | ||
34 | include/block/block-io.h | 8 +++--- | ||
35 | block.c | 7 +++++ | ||
36 | block/io.c | 50 ++++++++++++++++++------------------ | ||
37 | block/mirror.c | 8 +++--- | ||
38 | block/qcow2.c | 1 + | ||
39 | migration/block.c | 11 ++++++-- | ||
40 | util/fdmon-io_uring.c | 2 ++ | ||
41 | tests/qemu-iotests/197 | 29 +++++++++++++++++++++ | ||
42 | tests/qemu-iotests/197.out | 24 +++++++++++++++++ | ||
43 | 10 files changed, 110 insertions(+), 35 deletions(-) | ||
29 | 44 | ||
30 | -- | 45 | -- |
31 | 2.30.2 | 46 | 2.41.0 |
32 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Fabiano Rosas <farosas@suse.de> | ||
1 | 2 | ||
3 | We can fail the blk_insert_bs() at init_blk_migration(), leaving the | ||
4 | BlkMigDevState without a dirty_bitmap and BlockDriverState. Account | ||
5 | for the possibly missing elements when doing cleanup. | ||
6 | |||
7 | Fix the following crashes: | ||
8 | |||
9 | Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. | ||
10 | 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359 | ||
11 | 359 BlockDriverState *bs = bitmap->bs; | ||
12 | #0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359 | ||
13 | #1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371 | ||
14 | #2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681 | ||
15 | |||
16 | Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. | ||
17 | 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073 | ||
18 | 7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { | ||
19 | #0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073 | ||
20 | #1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095 | ||
21 | #2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690 | ||
22 | |||
23 | Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||
24 | Message-id: 20230731203338.27581-1-farosas@suse.de | ||
25 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
26 | --- | ||
27 | migration/block.c | 11 +++++++++-- | ||
28 | 1 file changed, 9 insertions(+), 2 deletions(-) | ||
29 | |||
30 | diff --git a/migration/block.c b/migration/block.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/migration/block.c | ||
33 | +++ b/migration/block.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static void unset_dirty_tracking(void) | ||
35 | BlkMigDevState *bmds; | ||
36 | |||
37 | QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { | ||
38 | - bdrv_release_dirty_bitmap(bmds->dirty_bitmap); | ||
39 | + if (bmds->dirty_bitmap) { | ||
40 | + bdrv_release_dirty_bitmap(bmds->dirty_bitmap); | ||
41 | + } | ||
42 | } | ||
43 | } | ||
44 | |||
45 | @@ -XXX,XX +XXX,XX @@ static int64_t get_remaining_dirty(void) | ||
46 | static void block_migration_cleanup_bmds(void) | ||
47 | { | ||
48 | BlkMigDevState *bmds; | ||
49 | + BlockDriverState *bs; | ||
50 | AioContext *ctx; | ||
51 | |||
52 | unset_dirty_tracking(); | ||
53 | |||
54 | while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { | ||
55 | QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); | ||
56 | - bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker); | ||
57 | + | ||
58 | + bs = blk_bs(bmds->blk); | ||
59 | + if (bs) { | ||
60 | + bdrv_op_unblock_all(bs, bmds->blocker); | ||
61 | + } | ||
62 | error_free(bmds->blocker); | ||
63 | |||
64 | /* Save ctx, because bmds->blk can disappear during blk_unref. */ | ||
65 | -- | ||
66 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> | ||
1 | 2 | ||
3 | This is going to be used in the subsequent commit as requests alignment | ||
4 | (in particular, during copy-on-read). This value only makes sense for | ||
5 | the formats which support subclusters (currently QCOW2 only). If this | ||
6 | field isn't set by driver's own bdrv_get_info() implementation, we | ||
7 | simply set it equal to the cluster size thus treating each cluster as | ||
8 | having a single subcluster. | ||
9 | |||
10 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
11 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
12 | Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> | ||
13 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Message-ID: <20230711172553.234055-2-andrey.drobyshev@virtuozzo.com> | ||
16 | --- | ||
17 | include/block/block-common.h | 5 +++++ | ||
18 | block.c | 7 +++++++ | ||
19 | block/qcow2.c | 1 + | ||
20 | 3 files changed, 13 insertions(+) | ||
21 | |||
22 | diff --git a/include/block/block-common.h b/include/block/block-common.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/block/block-common.h | ||
25 | +++ b/include/block/block-common.h | ||
26 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockZoneWps { | ||
27 | typedef struct BlockDriverInfo { | ||
28 | /* in bytes, 0 if irrelevant */ | ||
29 | int cluster_size; | ||
30 | + /* | ||
31 | + * A fraction of cluster_size, if supported (currently QCOW2 only); if | ||
32 | + * disabled or unsupported, set equal to cluster_size. | ||
33 | + */ | ||
34 | + int subcluster_size; | ||
35 | /* offset at which the VM state can be saved (0 if not possible) */ | ||
36 | int64_t vm_state_offset; | ||
37 | bool is_dirty; | ||
38 | diff --git a/block.c b/block.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/block.c | ||
41 | +++ b/block.c | ||
42 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) | ||
43 | } | ||
44 | memset(bdi, 0, sizeof(*bdi)); | ||
45 | ret = drv->bdrv_co_get_info(bs, bdi); | ||
46 | + if (bdi->subcluster_size == 0) { | ||
47 | + /* | ||
48 | + * If the driver left this unset, subclusters are not supported. | ||
49 | + * Then it is safe to treat each cluster as having only one subcluster. | ||
50 | + */ | ||
51 | + bdi->subcluster_size = bdi->cluster_size; | ||
52 | + } | ||
53 | if (ret < 0) { | ||
54 | return ret; | ||
55 | } | ||
56 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/block/qcow2.c | ||
59 | +++ b/block/qcow2.c | ||
60 | @@ -XXX,XX +XXX,XX @@ qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) | ||
61 | { | ||
62 | BDRVQcow2State *s = bs->opaque; | ||
63 | bdi->cluster_size = s->cluster_size; | ||
64 | + bdi->subcluster_size = s->subcluster_size; | ||
65 | bdi->vm_state_offset = qcow2_vm_state_offset(s); | ||
66 | bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY; | ||
67 | return 0; | ||
68 | -- | ||
69 | 2.41.0 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> | |
2 | |||
3 | When target image is using subclusters, and we align the request during | ||
4 | copy-on-read, it makes sense to align to subcluster_size rather than | ||
5 | cluster_size. Otherwise we end up with unnecessary allocations. | ||
6 | |||
7 | This commit renames bdrv_round_to_clusters() to bdrv_round_to_subclusters() | ||
8 | and utilizes subcluster_size field of BlockDriverInfo to make necessary | ||
9 | alignments. It affects copy-on-read as well as mirror job (which is | ||
10 | using bdrv_round_to_clusters()). | ||
11 | |||
12 | This change also fixes the following bug with failing assert (covered by | ||
13 | the test in the subsequent commit): | ||
14 | |||
15 | qemu-img create -f qcow2 base.qcow2 64K | ||
16 | qemu-img create -f qcow2 -o extended_l2=on,backing_file=base.qcow2,backing_fmt=qcow2 img.qcow2 64K | ||
17 | qemu-io -c "write -P 0xaa 0 2K" img.qcow2 | ||
18 | qemu-io -C -c "read -P 0x00 2K 62K" img.qcow2 | ||
19 | |||
20 | qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed. | ||
21 | |||
22 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
23 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
24 | Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> | ||
25 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
26 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
27 | Message-ID: <20230711172553.234055-3-andrey.drobyshev@virtuozzo.com> | ||
28 | --- | ||
29 | include/block/block-io.h | 8 +++---- | ||
30 | block/io.c | 50 ++++++++++++++++++++-------------------- | ||
31 | block/mirror.c | 8 +++---- | ||
32 | 3 files changed, 33 insertions(+), 33 deletions(-) | ||
33 | |||
34 | diff --git a/include/block/block-io.h b/include/block/block-io.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/include/block/block-io.h | ||
37 | +++ b/include/block/block-io.h | ||
38 | @@ -XXX,XX +XXX,XX @@ bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); | ||
39 | ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, | ||
40 | Error **errp); | ||
41 | BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); | ||
42 | -void bdrv_round_to_clusters(BlockDriverState *bs, | ||
43 | - int64_t offset, int64_t bytes, | ||
44 | - int64_t *cluster_offset, | ||
45 | - int64_t *cluster_bytes); | ||
46 | +void bdrv_round_to_subclusters(BlockDriverState *bs, | ||
47 | + int64_t offset, int64_t bytes, | ||
48 | + int64_t *cluster_offset, | ||
49 | + int64_t *cluster_bytes); | ||
50 | |||
51 | void bdrv_get_backing_filename(BlockDriverState *bs, | ||
52 | char *filename, int filename_size); | ||
53 | diff --git a/block/io.c b/block/io.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/block/io.c | ||
56 | +++ b/block/io.c | ||
57 | @@ -XXX,XX +XXX,XX @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) | ||
58 | } | ||
59 | |||
60 | /** | ||
61 | - * Round a region to cluster boundaries | ||
62 | + * Round a region to subcluster (if supported) or cluster boundaries | ||
63 | */ | ||
64 | void coroutine_fn GRAPH_RDLOCK | ||
65 | -bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes, | ||
66 | - int64_t *cluster_offset, int64_t *cluster_bytes) | ||
67 | +bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes, | ||
68 | + int64_t *align_offset, int64_t *align_bytes) | ||
69 | { | ||
70 | BlockDriverInfo bdi; | ||
71 | IO_CODE(); | ||
72 | - if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { | ||
73 | - *cluster_offset = offset; | ||
74 | - *cluster_bytes = bytes; | ||
75 | + if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) { | ||
76 | + *align_offset = offset; | ||
77 | + *align_bytes = bytes; | ||
78 | } else { | ||
79 | - int64_t c = bdi.cluster_size; | ||
80 | - *cluster_offset = QEMU_ALIGN_DOWN(offset, c); | ||
81 | - *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); | ||
82 | + int64_t c = bdi.subcluster_size; | ||
83 | + *align_offset = QEMU_ALIGN_DOWN(offset, c); | ||
84 | + *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
89 | void *bounce_buffer = NULL; | ||
90 | |||
91 | BlockDriver *drv = bs->drv; | ||
92 | - int64_t cluster_offset; | ||
93 | - int64_t cluster_bytes; | ||
94 | + int64_t align_offset; | ||
95 | + int64_t align_bytes; | ||
96 | int64_t skip_bytes; | ||
97 | int ret; | ||
98 | int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, | ||
99 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
100 | * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which | ||
101 | * is one reason we loop rather than doing it all at once. | ||
102 | */ | ||
103 | - bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); | ||
104 | - skip_bytes = offset - cluster_offset; | ||
105 | + bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes); | ||
106 | + skip_bytes = offset - align_offset; | ||
107 | |||
108 | trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, | ||
109 | - cluster_offset, cluster_bytes); | ||
110 | + align_offset, align_bytes); | ||
111 | |||
112 | - while (cluster_bytes) { | ||
113 | + while (align_bytes) { | ||
114 | int64_t pnum; | ||
115 | |||
116 | if (skip_write) { | ||
117 | ret = 1; /* "already allocated", so nothing will be copied */ | ||
118 | - pnum = MIN(cluster_bytes, max_transfer); | ||
119 | + pnum = MIN(align_bytes, max_transfer); | ||
120 | } else { | ||
121 | - ret = bdrv_is_allocated(bs, cluster_offset, | ||
122 | - MIN(cluster_bytes, max_transfer), &pnum); | ||
123 | + ret = bdrv_is_allocated(bs, align_offset, | ||
124 | + MIN(align_bytes, max_transfer), &pnum); | ||
125 | if (ret < 0) { | ||
126 | /* | ||
127 | * Safe to treat errors in querying allocation as if | ||
128 | * unallocated; we'll probably fail again soon on the | ||
129 | * read, but at least that will set a decent errno. | ||
130 | */ | ||
131 | - pnum = MIN(cluster_bytes, max_transfer); | ||
132 | + pnum = MIN(align_bytes, max_transfer); | ||
133 | } | ||
134 | |||
135 | /* Stop at EOF if the image ends in the middle of the cluster */ | ||
136 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
137 | /* Must copy-on-read; use the bounce buffer */ | ||
138 | pnum = MIN(pnum, MAX_BOUNCE_BUFFER); | ||
139 | if (!bounce_buffer) { | ||
140 | - int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); | ||
141 | + int64_t max_we_need = MAX(pnum, align_bytes - pnum); | ||
142 | int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); | ||
143 | int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); | ||
144 | |||
145 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
146 | } | ||
147 | qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); | ||
148 | |||
149 | - ret = bdrv_driver_preadv(bs, cluster_offset, pnum, | ||
150 | + ret = bdrv_driver_preadv(bs, align_offset, pnum, | ||
151 | &local_qiov, 0, 0); | ||
152 | if (ret < 0) { | ||
153 | goto err; | ||
154 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
155 | /* FIXME: Should we (perhaps conditionally) be setting | ||
156 | * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy | ||
157 | * that still correctly reads as zero? */ | ||
158 | - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, | ||
159 | + ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum, | ||
160 | BDRV_REQ_WRITE_UNCHANGED); | ||
161 | } else { | ||
162 | /* This does not change the data on the disk, it is not | ||
163 | * necessary to flush even in cache=writethrough mode. | ||
164 | */ | ||
165 | - ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, | ||
166 | + ret = bdrv_driver_pwritev(bs, align_offset, pnum, | ||
167 | &local_qiov, 0, | ||
168 | BDRV_REQ_WRITE_UNCHANGED); | ||
169 | } | ||
170 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, | ||
171 | } | ||
172 | } | ||
173 | |||
174 | - cluster_offset += pnum; | ||
175 | - cluster_bytes -= pnum; | ||
176 | + align_offset += pnum; | ||
177 | + align_bytes -= pnum; | ||
178 | progress += pnum - skip_bytes; | ||
179 | skip_bytes = 0; | ||
180 | } | ||
181 | diff --git a/block/mirror.c b/block/mirror.c | ||
182 | index XXXXXXX..XXXXXXX 100644 | ||
183 | --- a/block/mirror.c | ||
184 | +++ b/block/mirror.c | ||
185 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset, | ||
186 | need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, | ||
187 | s->cow_bitmap); | ||
188 | if (need_cow) { | ||
189 | - bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes, | ||
190 | - &align_offset, &align_bytes); | ||
191 | + bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes, | ||
192 | + &align_offset, &align_bytes); | ||
193 | } | ||
194 | |||
195 | if (align_bytes > max_bytes) { | ||
196 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) | ||
197 | int64_t target_offset; | ||
198 | int64_t target_bytes; | ||
199 | WITH_GRAPH_RDLOCK_GUARD() { | ||
200 | - bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, | ||
201 | - &target_offset, &target_bytes); | ||
202 | + bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes, | ||
203 | + &target_offset, &target_bytes); | ||
204 | } | ||
205 | if (target_offset == offset && | ||
206 | target_bytes == io_bytes) { | ||
207 | -- | ||
208 | 2.41.0 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | Check that we can't remove bitmaps being migrated on destination vm. | 3 | Add testcase which checks that allocations during copy-on-read are |
4 | The new check proves that previous commit helps. | 4 | performed on the subcluster basis when subclusters are enabled in target |
5 | image. | ||
5 | 6 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 7 | This testcase also triggers the following assert with previous commit |
8 | not being applied, so we check that as well: | ||
9 | |||
10 | qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed. | ||
11 | |||
12 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
13 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
14 | Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> | ||
15 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Message-Id: <20210322094906.5079-3-vsementsov@virtuozzo.com> | 17 | Message-ID: <20230711172553.234055-4-andrey.drobyshev@virtuozzo.com> |
9 | --- | 18 | --- |
10 | tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test | 10 ++++++++++ | 19 | tests/qemu-iotests/197 | 29 +++++++++++++++++++++++++++++ |
11 | 1 file changed, 10 insertions(+) | 20 | tests/qemu-iotests/197.out | 24 ++++++++++++++++++++++++ |
21 | 2 files changed, 53 insertions(+) | ||
12 | 22 | ||
13 | diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test | 23 | diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 |
14 | index XXXXXXX..XXXXXXX 100755 | 24 | index XXXXXXX..XXXXXXX 100755 |
15 | --- a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test | 25 | --- a/tests/qemu-iotests/197 |
16 | +++ b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test | 26 | +++ b/tests/qemu-iotests/197 |
17 | @@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): | 27 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -f qcow2 -C -c 'read 0 1024' "$TEST_WRAP" | _filter_qemu_io |
18 | self.start_postcopy() | 28 | $QEMU_IO -f qcow2 -c map "$TEST_WRAP" |
19 | 29 | _check_test_img | |
20 | self.vm_b_events += self.vm_b.get_qmp_events() | 30 | |
31 | +echo | ||
32 | +echo '=== Copy-on-read with subclusters ===' | ||
33 | +echo | ||
21 | + | 34 | + |
22 | + # While being here, let's check that we can't remove in-flight bitmaps. | 35 | +# Create base and top images 64K (1 cluster) each. Make subclusters enabled |
23 | + for vm in (self.vm_a, self.vm_b): | 36 | +# for the top image |
24 | + for i in range(0, nb_bitmaps): | 37 | +_make_test_img 64K |
25 | + result = vm.qmp('block-dirty-bitmap-remove', node='drive0', | 38 | +IMGPROTO=file IMGFMT=qcow2 TEST_IMG_FILE="$TEST_WRAP" \ |
26 | + name=f'bitmap{i}') | 39 | + _make_test_img --no-opts -o extended_l2=true -F "$IMGFMT" -b "$TEST_IMG" \ |
27 | + self.assert_qmp(result, 'error/desc', | 40 | + 64K | _filter_img_create |
28 | + f"Bitmap 'bitmap{i}' is currently in use by " | ||
29 | + "another operation and cannot be used") | ||
30 | + | 41 | + |
31 | self.vm_b.shutdown() | 42 | +$QEMU_IO -c "write -P 0xaa 0 64k" "$TEST_IMG" | _filter_qemu_io |
32 | # recreate vm_b, so there is no incoming option, which prevents | 43 | + |
33 | # loading bitmaps from disk | 44 | +# Allocate individual subclusters in the top image, and not the whole cluster |
45 | +$QEMU_IO -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \ | ||
46 | + | _filter_qemu_io | ||
47 | + | ||
48 | +# Only 2 subclusters should be allocated in the top image at this point | ||
49 | +$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map | ||
50 | + | ||
51 | +# Actual copy-on-read operation | ||
52 | +$QEMU_IO -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io | ||
53 | + | ||
54 | +# And here we should have 4 subclusters allocated right in the middle of the | ||
55 | +# top image. Make sure the whole cluster remains unallocated | ||
56 | +$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map | ||
57 | + | ||
58 | +_check_test_img | ||
59 | + | ||
60 | # success, all done | ||
61 | echo '*** done' | ||
62 | status=0 | ||
63 | diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/tests/qemu-iotests/197.out | ||
66 | +++ b/tests/qemu-iotests/197.out | ||
67 | @@ -XXX,XX +XXX,XX @@ read 1024/1024 bytes at offset 0 | ||
68 | 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
69 | 1 KiB (0x400) bytes allocated at offset 0 bytes (0x0) | ||
70 | No errors were found on the image. | ||
71 | + | ||
72 | +=== Copy-on-read with subclusters === | ||
73 | + | ||
74 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 | ||
75 | +Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=65536 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT | ||
76 | +wrote 65536/65536 bytes at offset 0 | ||
77 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
78 | +wrote 2048/2048 bytes at offset 28672 | ||
79 | +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
80 | +wrote 2048/2048 bytes at offset 34816 | ||
81 | +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
82 | +Offset Length File | ||
83 | +0 0x7000 TEST_DIR/t.IMGFMT | ||
84 | +0x7000 0x800 TEST_DIR/t.wrap.IMGFMT | ||
85 | +0x7800 0x1000 TEST_DIR/t.IMGFMT | ||
86 | +0x8800 0x800 TEST_DIR/t.wrap.IMGFMT | ||
87 | +0x9000 0x7000 TEST_DIR/t.IMGFMT | ||
88 | +read 4096/4096 bytes at offset 30720 | ||
89 | +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
90 | +Offset Length File | ||
91 | +0 0x7000 TEST_DIR/t.IMGFMT | ||
92 | +0x7000 0x2000 TEST_DIR/t.wrap.IMGFMT | ||
93 | +0x9000 0x7000 TEST_DIR/t.IMGFMT | ||
94 | +No errors were found on the image. | ||
95 | *** done | ||
34 | -- | 96 | -- |
35 | 2.30.2 | 97 | 2.41.0 |
36 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | liburing does not clear sqe->user_data. We must do it ourselves to avoid |
---|---|---|---|
2 | undefined behavior in process_cqe() when user_data is used. | ||
2 | 3 | ||
3 | Incoming enabled bitmaps are busy, because we do | 4 | Note that fdmon-io_uring is currently disabled, so this is a latent bug |
4 | bdrv_dirty_bitmap_create_successor() for them. But disabled bitmaps | 5 | that does not affect users. Let's merge this fix now to make it easier |
5 | being migrated are not marked busy, and user can remove them during the | 6 | to enable fdmon-io_uring in the future (and I'm working on that). |
6 | incoming migration. Then we may crash in cancel_incoming_locked() when | ||
7 | try to remove the bitmap that was already removed by user, like this: | ||
8 | 7 | ||
9 | #0 qemu_mutex_lock_impl (mutex=0x5593d88c50d1, file=0x559680554b20 | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | "../block/dirty-bitmap.c", line=64) at ../util/qemu-thread-posix.c:77 | 9 | Message-ID: <20230426212639.82310-1-stefanha@redhat.com> |
11 | #1 bdrv_dirty_bitmaps_lock (bs=0x5593d88c0ee9) | 10 | --- |
12 | at ../block/dirty-bitmap.c:64 | 11 | util/fdmon-io_uring.c | 2 ++ |
13 | #2 bdrv_release_dirty_bitmap (bitmap=0x5596810e9570) | 12 | 1 file changed, 2 insertions(+) |
14 | at ../block/dirty-bitmap.c:362 | ||
15 | #3 cancel_incoming_locked (s=0x559680be8208 <dbm_state+40>) | ||
16 | at ../migration/block-dirty-bitmap.c:918 | ||
17 | #4 dirty_bitmap_load (f=0x559681d02b10, opaque=0x559680be81e0 | ||
18 | <dbm_state>, version_id=1) at ../migration/block-dirty-bitmap.c:1194 | ||
19 | #5 vmstate_load (f=0x559681d02b10, se=0x559680fb5810) | ||
20 | at ../migration/savevm.c:908 | ||
21 | #6 qemu_loadvm_section_part_end (f=0x559681d02b10, | ||
22 | mis=0x559680fb4a30) at ../migration/savevm.c:2473 | ||
23 | #7 qemu_loadvm_state_main (f=0x559681d02b10, mis=0x559680fb4a30) | ||
24 | at ../migration/savevm.c:2626 | ||
25 | #8 postcopy_ram_listen_thread (opaque=0x0) | ||
26 | at ../migration/savevm.c:1871 | ||
27 | #9 qemu_thread_start (args=0x5596817ccd10) | ||
28 | at ../util/qemu-thread-posix.c:521 | ||
29 | #10 start_thread () at /lib64/libpthread.so.0 | ||
30 | #11 clone () at /lib64/libc.so.6 | ||
31 | 13 | ||
32 | Note bs pointer taken from bitmap: it's definitely bad aligned. That's | 14 | diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c |
33 | because we are in use after free, bitmap is already freed. | ||
34 | |||
35 | So, let's make disabled bitmaps (being migrated) busy during incoming | ||
36 | migration. | ||
37 | |||
38 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
39 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
40 | Message-Id: <20210322094906.5079-2-vsementsov@virtuozzo.com> | ||
41 | --- | ||
42 | migration/block-dirty-bitmap.c | 6 ++++++ | ||
43 | 1 file changed, 6 insertions(+) | ||
44 | |||
45 | diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
47 | --- a/migration/block-dirty-bitmap.c | 16 | --- a/util/fdmon-io_uring.c |
48 | +++ b/migration/block-dirty-bitmap.c | 17 | +++ b/util/fdmon-io_uring.c |
49 | @@ -XXX,XX +XXX,XX @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) | 18 | @@ -XXX,XX +XXX,XX @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node) |
50 | error_report_err(local_err); | 19 | #else |
51 | return -EINVAL; | 20 | io_uring_prep_poll_remove(sqe, node); |
52 | } | 21 | #endif |
53 | + } else { | 22 | + io_uring_sqe_set_data(sqe, NULL); |
54 | + bdrv_dirty_bitmap_set_busy(s->bitmap, true); | 23 | } |
55 | } | 24 | |
56 | 25 | /* Add a timeout that self-cancels when another cqe becomes ready */ | |
57 | b = g_new(LoadBitmapState, 1); | 26 | @@ -XXX,XX +XXX,XX @@ static void add_timeout_sqe(AioContext *ctx, int64_t ns) |
58 | @@ -XXX,XX +XXX,XX @@ static void cancel_incoming_locked(DBMLoadState *s) | 27 | |
59 | assert(!s->before_vm_start_handled || !b->migrated); | 28 | sqe = get_sqe(ctx); |
60 | if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { | 29 | io_uring_prep_timeout(sqe, &ts, 1, 0); |
61 | bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort); | 30 | + io_uring_sqe_set_data(sqe, NULL); |
62 | + } else { | 31 | } |
63 | + bdrv_dirty_bitmap_set_busy(b->bitmap, false); | 32 | |
64 | } | 33 | /* Add sqes from ctx->submit_list for submission */ |
65 | bdrv_release_dirty_bitmap(b->bitmap); | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) | ||
68 | |||
69 | if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { | ||
70 | bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort); | ||
71 | + } else { | ||
72 | + bdrv_dirty_bitmap_set_busy(s->bitmap, false); | ||
73 | } | ||
74 | |||
75 | for (item = s->bitmaps; item; item = g_slist_next(item)) { | ||
76 | -- | 34 | -- |
77 | 2.30.2 | 35 | 2.41.0 |
78 | diff view generated by jsdifflib |