1
The following changes since commit ab08440a4ee09032d1a9cb22fdcab23bc7e1c656:
1
The following changes since commit 813bac3d8d70d85cb7835f7945eb9eed84c2d8d0:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180702' into staging (2018-07-02 17:57:46 +0100)
3
Merge tag '2023q3-bsd-user-pull-request' of https://gitlab.com/bsdimp/qemu into staging (2023-08-29 08:58:00 -0400)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 9ded4a0114968e98b41494fc035ba14f84cdf700:
9
for you to fetch changes up to 87ec6f55af38e29be5b2b65a8acf84da73e06d06:
10
10
11
backup: Use copy offloading (2018-07-02 23:23:45 -0400)
11
aio-posix: zero out io_uring sqe user_data (2023-08-30 07:39:59 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block backup patches
14
Pull request
15
16
v3:
17
- Drop UFS emulation due to CI failures
18
- Add "aio-posix: zero out io_uring sqe user_data"
19
15
----------------------------------------------------------------
20
----------------------------------------------------------------
16
21
17
Fam Zheng (3):
22
Andrey Drobyshev (3):
18
block: Fix parameter checking in bdrv_co_copy_range_internal
23
block: add subcluster_size field to BlockDriverInfo
19
block: Honour BDRV_REQ_NO_SERIALISING in copy range
24
block/io: align requests to subcluster_size
20
backup: Use copy offloading
25
tests/qemu-iotests/197: add testcase for CoR with subclusters
21
26
22
block/backup.c | 150 ++++++++++++++++++++++++++++++------------
27
Fabiano Rosas (1):
23
block/io.c | 35 +++++-----
28
block-migration: Ensure we don't crash during migration cleanup
24
block/trace-events | 1 +
29
25
include/block/block.h | 5 +-
30
Stefan Hajnoczi (1):
26
4 files changed, 132 insertions(+), 59 deletions(-)
31
aio-posix: zero out io_uring sqe user_data
32
33
include/block/block-common.h | 5 ++++
34
include/block/block-io.h | 8 +++---
35
block.c | 7 +++++
36
block/io.c | 50 ++++++++++++++++++------------------
37
block/mirror.c | 8 +++---
38
block/qcow2.c | 1 +
39
migration/block.c | 11 ++++++--
40
util/fdmon-io_uring.c | 2 ++
41
tests/qemu-iotests/197 | 29 +++++++++++++++++++++
42
tests/qemu-iotests/197.out | 24 +++++++++++++++++
43
10 files changed, 110 insertions(+), 35 deletions(-)
27
44
28
--
45
--
29
2.17.1
46
2.41.0
30
31
diff view generated by jsdifflib
New patch
1
From: Fabiano Rosas <farosas@suse.de>
1
2
3
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
4
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
5
for the possibly missing elements when doing cleanup.
6
7
Fix the following crashes:
8
9
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
10
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
11
359 BlockDriverState *bs = bitmap->bs;
12
#0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
13
#1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
14
#2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
15
16
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
17
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
18
7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
19
#0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
20
#1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
21
#2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
22
23
Signed-off-by: Fabiano Rosas <farosas@suse.de>
24
Message-id: 20230731203338.27581-1-farosas@suse.de
25
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
---
27
migration/block.c | 11 +++++++++--
28
1 file changed, 9 insertions(+), 2 deletions(-)
29
30
diff --git a/migration/block.c b/migration/block.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/migration/block.c
33
+++ b/migration/block.c
34
@@ -XXX,XX +XXX,XX @@ static void unset_dirty_tracking(void)
35
BlkMigDevState *bmds;
36
37
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
38
- bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
39
+ if (bmds->dirty_bitmap) {
40
+ bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
41
+ }
42
}
43
}
44
45
@@ -XXX,XX +XXX,XX @@ static int64_t get_remaining_dirty(void)
46
static void block_migration_cleanup_bmds(void)
47
{
48
BlkMigDevState *bmds;
49
+ BlockDriverState *bs;
50
AioContext *ctx;
51
52
unset_dirty_tracking();
53
54
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
55
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
56
- bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
57
+
58
+ bs = blk_bs(bmds->blk);
59
+ if (bs) {
60
+ bdrv_op_unblock_all(bs, bmds->blocker);
61
+ }
62
error_free(bmds->blocker);
63
64
/* Save ctx, because bmds->blk can disappear during blk_unref. */
65
--
66
2.41.0
diff view generated by jsdifflib
New patch
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
1
2
3
This is going to be used in the subsequent commit as requests alignment
4
(in particular, during copy-on-read). This value only makes sense for
5
the formats which support subclusters (currently QCOW2 only). If this
6
field isn't set by driver's own bdrv_get_info() implementation, we
7
simply set it equal to the cluster size thus treating each cluster as
8
having a single subcluster.
9
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Reviewed-by: Denis V. Lunev <den@openvz.org>
12
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-ID: <20230711172553.234055-2-andrey.drobyshev@virtuozzo.com>
16
---
17
include/block/block-common.h | 5 +++++
18
block.c | 7 +++++++
19
block/qcow2.c | 1 +
20
3 files changed, 13 insertions(+)
21
22
diff --git a/include/block/block-common.h b/include/block/block-common.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/block/block-common.h
25
+++ b/include/block/block-common.h
26
@@ -XXX,XX +XXX,XX @@ typedef struct BlockZoneWps {
27
typedef struct BlockDriverInfo {
28
/* in bytes, 0 if irrelevant */
29
int cluster_size;
30
+ /*
31
+ * A fraction of cluster_size, if supported (currently QCOW2 only); if
32
+ * disabled or unsupported, set equal to cluster_size.
33
+ */
34
+ int subcluster_size;
35
/* offset at which the VM state can be saved (0 if not possible) */
36
int64_t vm_state_offset;
37
bool is_dirty;
38
diff --git a/block.c b/block.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block.c
41
+++ b/block.c
42
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
43
}
44
memset(bdi, 0, sizeof(*bdi));
45
ret = drv->bdrv_co_get_info(bs, bdi);
46
+ if (bdi->subcluster_size == 0) {
47
+ /*
48
+ * If the driver left this unset, subclusters are not supported.
49
+ * Then it is safe to treat each cluster as having only one subcluster.
50
+ */
51
+ bdi->subcluster_size = bdi->cluster_size;
52
+ }
53
if (ret < 0) {
54
return ret;
55
}
56
diff --git a/block/qcow2.c b/block/qcow2.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/block/qcow2.c
59
+++ b/block/qcow2.c
60
@@ -XXX,XX +XXX,XX @@ qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
61
{
62
BDRVQcow2State *s = bs->opaque;
63
bdi->cluster_size = s->cluster_size;
64
+ bdi->subcluster_size = s->subcluster_size;
65
bdi->vm_state_offset = qcow2_vm_state_offset(s);
66
bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
67
return 0;
68
--
69
2.41.0
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
2
2
3
src may be NULL if BDRV_REQ_ZERO_WRITE flag is set, in this case only
3
When target image is using subclusters, and we align the request during
4
check dst and dst->bs. This bug was introduced when moving in the
4
copy-on-read, it makes sense to align to subcluster_size rather than
5
request tracking code from bdrv_co_copy_range, in 37aec7d75eb.
5
cluster_size. Otherwise we end up with unnecessary allocations.
6
6
7
This especially fixes the possible segfault when initializing src_bs
7
This commit renames bdrv_round_to_clusters() to bdrv_round_to_subclusters()
8
with a NULL src.
8
and utilizes subcluster_size field of BlockDriverInfo to make necessary
9
9
alignments. It affects copy-on-read as well as mirror job (which is
10
Signed-off-by: Fam Zheng <famz@redhat.com>
10
using bdrv_round_to_clusters()).
11
Message-id: 20180703023758.14422-2-famz@redhat.com
11
12
Reviewed-by: Jeff Cody <jcody@redhat.com>
12
This change also fixes the following bug with failing assert (covered by
13
Signed-off-by: Jeff Cody <jcody@redhat.com>
13
the test in the subsequent commit):
14
15
qemu-img create -f qcow2 base.qcow2 64K
16
qemu-img create -f qcow2 -o extended_l2=on,backing_file=base.qcow2,backing_fmt=qcow2 img.qcow2 64K
17
qemu-io -c "write -P 0xaa 0 2K" img.qcow2
18
qemu-io -C -c "read -P 0x00 2K 62K" img.qcow2
19
20
qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.
21
22
Reviewed-by: Eric Blake <eblake@redhat.com>
23
Reviewed-by: Denis V. Lunev <den@openvz.org>
24
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
25
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Message-ID: <20230711172553.234055-3-andrey.drobyshev@virtuozzo.com>
14
---
28
---
15
block/io.c | 29 +++++++++++++++--------------
29
include/block/block-io.h | 8 +++----
16
1 file changed, 15 insertions(+), 14 deletions(-)
30
block/io.c | 50 ++++++++++++++++++++--------------------
17
31
block/mirror.c | 8 +++----
32
3 files changed, 33 insertions(+), 33 deletions(-)
33
34
diff --git a/include/block/block-io.h b/include/block/block-io.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/block/block-io.h
37
+++ b/include/block/block-io.h
38
@@ -XXX,XX +XXX,XX @@ bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
39
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
40
Error **errp);
41
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
42
-void bdrv_round_to_clusters(BlockDriverState *bs,
43
- int64_t offset, int64_t bytes,
44
- int64_t *cluster_offset,
45
- int64_t *cluster_bytes);
46
+void bdrv_round_to_subclusters(BlockDriverState *bs,
47
+ int64_t offset, int64_t bytes,
48
+ int64_t *cluster_offset,
49
+ int64_t *cluster_bytes);
50
51
void bdrv_get_backing_filename(BlockDriverState *bs,
52
char *filename, int filename_size);
18
diff --git a/block/io.c b/block/io.c
53
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
54
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
55
--- a/block/io.c
21
+++ b/block/io.c
56
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
57
@@ -XXX,XX +XXX,XX @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
23
bool recurse_src)
58
}
59
60
/**
61
- * Round a region to cluster boundaries
62
+ * Round a region to subcluster (if supported) or cluster boundaries
63
*/
64
void coroutine_fn GRAPH_RDLOCK
65
-bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
66
- int64_t *cluster_offset, int64_t *cluster_bytes)
67
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
68
+ int64_t *align_offset, int64_t *align_bytes)
24
{
69
{
25
BdrvTrackedRequest src_req, dst_req;
70
BlockDriverInfo bdi;
26
- BlockDriverState *src_bs = src->bs;
71
IO_CODE();
27
- BlockDriverState *dst_bs = dst->bs;
72
- if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
73
- *cluster_offset = offset;
74
- *cluster_bytes = bytes;
75
+ if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
76
+ *align_offset = offset;
77
+ *align_bytes = bytes;
78
} else {
79
- int64_t c = bdi.cluster_size;
80
- *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
81
- *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
82
+ int64_t c = bdi.subcluster_size;
83
+ *align_offset = QEMU_ALIGN_DOWN(offset, c);
84
+ *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
85
}
86
}
87
88
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
89
void *bounce_buffer = NULL;
90
91
BlockDriver *drv = bs->drv;
92
- int64_t cluster_offset;
93
- int64_t cluster_bytes;
94
+ int64_t align_offset;
95
+ int64_t align_bytes;
96
int64_t skip_bytes;
28
int ret;
97
int ret;
29
98
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
30
- if (!src || !dst || !src->bs || !dst->bs) {
99
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
31
+ if (!dst || !dst->bs) {
100
* BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
32
return -ENOMEDIUM;
101
* is one reason we loop rather than doing it all at once.
102
*/
103
- bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
104
- skip_bytes = offset - cluster_offset;
105
+ bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
106
+ skip_bytes = offset - align_offset;
107
108
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
109
- cluster_offset, cluster_bytes);
110
+ align_offset, align_bytes);
111
112
- while (cluster_bytes) {
113
+ while (align_bytes) {
114
int64_t pnum;
115
116
if (skip_write) {
117
ret = 1; /* "already allocated", so nothing will be copied */
118
- pnum = MIN(cluster_bytes, max_transfer);
119
+ pnum = MIN(align_bytes, max_transfer);
120
} else {
121
- ret = bdrv_is_allocated(bs, cluster_offset,
122
- MIN(cluster_bytes, max_transfer), &pnum);
123
+ ret = bdrv_is_allocated(bs, align_offset,
124
+ MIN(align_bytes, max_transfer), &pnum);
125
if (ret < 0) {
126
/*
127
* Safe to treat errors in querying allocation as if
128
* unallocated; we'll probably fail again soon on the
129
* read, but at least that will set a decent errno.
130
*/
131
- pnum = MIN(cluster_bytes, max_transfer);
132
+ pnum = MIN(align_bytes, max_transfer);
133
}
134
135
/* Stop at EOF if the image ends in the middle of the cluster */
136
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
137
/* Must copy-on-read; use the bounce buffer */
138
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
139
if (!bounce_buffer) {
140
- int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
141
+ int64_t max_we_need = MAX(pnum, align_bytes - pnum);
142
int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
143
int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
144
145
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
146
}
147
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
148
149
- ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
150
+ ret = bdrv_driver_preadv(bs, align_offset, pnum,
151
&local_qiov, 0, 0);
152
if (ret < 0) {
153
goto err;
154
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
155
/* FIXME: Should we (perhaps conditionally) be setting
156
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
157
* that still correctly reads as zero? */
158
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
159
+ ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
160
BDRV_REQ_WRITE_UNCHANGED);
161
} else {
162
/* This does not change the data on the disk, it is not
163
* necessary to flush even in cache=writethrough mode.
164
*/
165
- ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
166
+ ret = bdrv_driver_pwritev(bs, align_offset, pnum,
167
&local_qiov, 0,
168
BDRV_REQ_WRITE_UNCHANGED);
169
}
170
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
171
}
172
}
173
174
- cluster_offset += pnum;
175
- cluster_bytes -= pnum;
176
+ align_offset += pnum;
177
+ align_bytes -= pnum;
178
progress += pnum - skip_bytes;
179
skip_bytes = 0;
33
}
180
}
34
- ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
181
diff --git a/block/mirror.c b/block/mirror.c
35
- if (ret) {
182
index XXXXXXX..XXXXXXX 100644
36
- return ret;
183
--- a/block/mirror.c
37
- }
184
+++ b/block/mirror.c
38
-
185
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
39
ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
186
need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
40
if (ret) {
187
s->cow_bitmap);
41
return ret;
188
if (need_cow) {
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
189
- bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
43
return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
190
- &align_offset, &align_bytes);
191
+ bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes,
192
+ &align_offset, &align_bytes);
44
}
193
}
45
194
46
+ if (!src || !src->bs) {
195
if (align_bytes > max_bytes) {
47
+ return -ENOMEDIUM;
196
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
48
+ }
197
int64_t target_offset;
49
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
198
int64_t target_bytes;
50
+ if (ret) {
199
WITH_GRAPH_RDLOCK_GUARD() {
51
+ return ret;
200
- bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
52
+ }
201
- &target_offset, &target_bytes);
53
+
202
+ bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes,
54
if (!src->bs->drv->bdrv_co_copy_range_from
203
+ &target_offset, &target_bytes);
55
|| !dst->bs->drv->bdrv_co_copy_range_to
204
}
56
|| src->bs->encrypted || dst->bs->encrypted) {
205
if (target_offset == offset &&
57
return -ENOTSUP;
206
target_bytes == io_bytes) {
58
}
59
- bdrv_inc_in_flight(src_bs);
60
- bdrv_inc_in_flight(dst_bs);
61
- tracked_request_begin(&src_req, src_bs, src_offset,
62
+ bdrv_inc_in_flight(src->bs);
63
+ bdrv_inc_in_flight(dst->bs);
64
+ tracked_request_begin(&src_req, src->bs, src_offset,
65
bytes, BDRV_TRACKED_READ);
66
- tracked_request_begin(&dst_req, dst_bs, dst_offset,
67
+ tracked_request_begin(&dst_req, dst->bs, dst_offset,
68
bytes, BDRV_TRACKED_WRITE);
69
70
wait_serialising_requests(&src_req);
71
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
72
}
73
tracked_request_end(&src_req);
74
tracked_request_end(&dst_req);
75
- bdrv_dec_in_flight(src_bs);
76
- bdrv_dec_in_flight(dst_bs);
77
+ bdrv_dec_in_flight(src->bs);
78
+ bdrv_dec_in_flight(dst->bs);
79
return ret;
80
}
81
82
--
207
--
83
2.17.1
208
2.41.0
84
85
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
2
2
3
This semantics is needed by drive-backup so implement it before using
3
Add testcase which checks that allocations during copy-on-read are
4
this API there.
4
performed on the subcluster basis when subclusters are enabled in target
5
image.
5
6
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
This testcase also triggers the following assert with previous commit
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
not being applied, so we check that as well:
8
Message-id: 20180703023758.14422-3-famz@redhat.com
9
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.
11
12
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Reviewed-by: Denis V. Lunev <den@openvz.org>
14
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
15
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Message-ID: <20230711172553.234055-4-andrey.drobyshev@virtuozzo.com>
10
---
18
---
11
block/io.c | 6 ++++--
19
tests/qemu-iotests/197 | 29 +++++++++++++++++++++++++++++
12
include/block/block.h | 5 +++--
20
tests/qemu-iotests/197.out | 24 ++++++++++++++++++++++++
13
2 files changed, 7 insertions(+), 4 deletions(-)
21
2 files changed, 53 insertions(+)
14
22
15
diff --git a/block/io.c b/block/io.c
23
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
24
index XXXXXXX..XXXXXXX 100755
25
--- a/tests/qemu-iotests/197
26
+++ b/tests/qemu-iotests/197
27
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -f qcow2 -C -c 'read 0 1024' "$TEST_WRAP" | _filter_qemu_io
28
$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
29
_check_test_img
30
31
+echo
32
+echo '=== Copy-on-read with subclusters ==='
33
+echo
34
+
35
+# Create base and top images 64K (1 cluster) each. Make subclusters enabled
36
+# for the top image
37
+_make_test_img 64K
38
+IMGPROTO=file IMGFMT=qcow2 TEST_IMG_FILE="$TEST_WRAP" \
39
+ _make_test_img --no-opts -o extended_l2=true -F "$IMGFMT" -b "$TEST_IMG" \
40
+ 64K | _filter_img_create
41
+
42
+$QEMU_IO -c "write -P 0xaa 0 64k" "$TEST_IMG" | _filter_qemu_io
43
+
44
+# Allocate individual subclusters in the top image, and not the whole cluster
45
+$QEMU_IO -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \
46
+ | _filter_qemu_io
47
+
48
+# Only 2 subclusters should be allocated in the top image at this point
49
+$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
50
+
51
+# Actual copy-on-read operation
52
+$QEMU_IO -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io
53
+
54
+# And here we should have 4 subclusters allocated right in the middle of the
55
+# top image. Make sure the whole cluster remains unallocated
56
+$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
57
+
58
+_check_test_img
59
+
60
# success, all done
61
echo '*** done'
62
status=0
63
diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out
16
index XXXXXXX..XXXXXXX 100644
64
index XXXXXXX..XXXXXXX 100644
17
--- a/block/io.c
65
--- a/tests/qemu-iotests/197.out
18
+++ b/block/io.c
66
+++ b/tests/qemu-iotests/197.out
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
67
@@ -XXX,XX +XXX,XX @@ read 1024/1024 bytes at offset 0
20
tracked_request_begin(&dst_req, dst->bs, dst_offset,
68
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
21
bytes, BDRV_TRACKED_WRITE);
69
1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
22
70
No errors were found on the image.
23
- wait_serialising_requests(&src_req);
71
+
24
- wait_serialising_requests(&dst_req);
72
+=== Copy-on-read with subclusters ===
25
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
73
+
26
+ wait_serialising_requests(&src_req);
74
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
27
+ wait_serialising_requests(&dst_req);
75
+Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=65536 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
28
+ }
76
+wrote 65536/65536 bytes at offset 0
29
if (recurse_src) {
77
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
30
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
78
+wrote 2048/2048 bytes at offset 28672
31
src, src_offset,
79
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
32
diff --git a/include/block/block.h b/include/block/block.h
80
+wrote 2048/2048 bytes at offset 34816
33
index XXXXXXX..XXXXXXX 100644
81
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
34
--- a/include/block/block.h
82
+Offset Length File
35
+++ b/include/block/block.h
83
+0 0x7000 TEST_DIR/t.IMGFMT
36
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
84
+0x7000 0x800 TEST_DIR/t.wrap.IMGFMT
37
* @dst: Destination child to copy data to
85
+0x7800 0x1000 TEST_DIR/t.IMGFMT
38
* @dst_offset: offset in @dst image to write data
86
+0x8800 0x800 TEST_DIR/t.wrap.IMGFMT
39
* @bytes: number of bytes to copy
87
+0x9000 0x7000 TEST_DIR/t.IMGFMT
40
- * @flags: request flags. Must be one of:
88
+read 4096/4096 bytes at offset 30720
41
- * 0 - actually read data from src;
89
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
42
+ * @flags: request flags. Supported flags:
90
+Offset Length File
43
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
91
+0 0x7000 TEST_DIR/t.IMGFMT
44
* write on @dst as if bdrv_co_pwrite_zeroes is
92
+0x7000 0x2000 TEST_DIR/t.wrap.IMGFMT
45
* called. Used to simplify caller code, or
93
+0x9000 0x7000 TEST_DIR/t.IMGFMT
46
* during BlockDriver.bdrv_co_copy_range_from()
94
+No errors were found on the image.
47
* recursion.
95
*** done
48
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
49
+ * requests currently in flight.
50
*
51
* Returns: 0 if succeeded; negative error code if failed.
52
**/
53
--
96
--
54
2.17.1
97
2.41.0
55
56
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
liburing does not clear sqe->user_data. We must do it ourselves to avoid
2
undefined behavior in process_cqe() when user_data is used.
2
3
3
The implementation is similar to the 'qemu-img convert'. In the
4
Note that fdmon-io_uring is currently disabled, so this is a latent bug
4
beginning of the job, offloaded copy is attempted. If it fails, further
5
that does not affect users. Let's merge this fix now to make it easier
5
I/O will go through the existing bounce buffer code path.
6
to enable fdmon-io_uring in the future (and I'm working on that).
6
7
7
Then, as Kevin pointed out, both this and qemu-img convert can benefit
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
from a local check if one request fails because of, for example, the
9
Message-ID: <20230426212639.82310-1-stefanha@redhat.com>
9
offset is beyond EOF, but another may well be accepted by the protocol
10
---
10
layer. This will be implemented separately.
11
util/fdmon-io_uring.c | 2 ++
12
1 file changed, 2 insertions(+)
11
13
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
13
Signed-off-by: Fam Zheng <famz@redhat.com>
14
Message-id: 20180703023758.14422-4-famz@redhat.com
15
Signed-off-by: Jeff Cody <jcody@redhat.com>
16
---
17
block/backup.c | 150 ++++++++++++++++++++++++++++++++-------------
18
block/trace-events | 1 +
19
2 files changed, 110 insertions(+), 41 deletions(-)
20
21
diff --git a/block/backup.c b/block/backup.c
22
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
23
--- a/block/backup.c
16
--- a/util/fdmon-io_uring.c
24
+++ b/block/backup.c
17
+++ b/util/fdmon-io_uring.c
25
@@ -XXX,XX +XXX,XX @@ typedef struct BackupBlockJob {
18
@@ -XXX,XX +XXX,XX @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
26
QLIST_HEAD(, CowRequest) inflight_reqs;
19
#else
27
20
io_uring_prep_poll_remove(sqe, node);
28
HBitmap *copy_bitmap;
21
#endif
29
+ bool use_copy_range;
22
+ io_uring_sqe_set_data(sqe, NULL);
30
+ int64_t copy_range_size;
31
} BackupBlockJob;
32
33
static const BlockJobDriver backup_job_driver;
34
@@ -XXX,XX +XXX,XX @@ static void cow_request_end(CowRequest *req)
35
qemu_co_queue_restart_all(&req->wait_queue);
36
}
23
}
37
24
38
+/* Copy range to target with a bounce buffer and return the bytes copied. If
25
/* Add a timeout that self-cancels when another cqe becomes ready */
39
+ * error occured, return a negative error number */
26
@@ -XXX,XX +XXX,XX @@ static void add_timeout_sqe(AioContext *ctx, int64_t ns)
40
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
27
41
+ int64_t start,
28
sqe = get_sqe(ctx);
42
+ int64_t end,
29
io_uring_prep_timeout(sqe, &ts, 1, 0);
43
+ bool is_write_notifier,
30
+ io_uring_sqe_set_data(sqe, NULL);
44
+ bool *error_is_read,
31
}
45
+ void **bounce_buffer)
32
46
+{
33
/* Add sqes from ctx->submit_list for submission */
47
+ int ret;
48
+ struct iovec iov;
49
+ QEMUIOVector qiov;
50
+ BlockBackend *blk = job->common.blk;
51
+ int nbytes;
52
+
53
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
54
+ nbytes = MIN(job->cluster_size, job->len - start);
55
+ if (!*bounce_buffer) {
56
+ *bounce_buffer = blk_blockalign(blk, job->cluster_size);
57
+ }
58
+ iov.iov_base = *bounce_buffer;
59
+ iov.iov_len = nbytes;
60
+ qemu_iovec_init_external(&qiov, &iov, 1);
61
+
62
+ ret = blk_co_preadv(blk, start, qiov.size, &qiov,
63
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
64
+ if (ret < 0) {
65
+ trace_backup_do_cow_read_fail(job, start, ret);
66
+ if (error_is_read) {
67
+ *error_is_read = true;
68
+ }
69
+ goto fail;
70
+ }
71
+
72
+ if (qemu_iovec_is_zero(&qiov)) {
73
+ ret = blk_co_pwrite_zeroes(job->target, start,
74
+ qiov.size, BDRV_REQ_MAY_UNMAP);
75
+ } else {
76
+ ret = blk_co_pwritev(job->target, start,
77
+ qiov.size, &qiov,
78
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
79
+ }
80
+ if (ret < 0) {
81
+ trace_backup_do_cow_write_fail(job, start, ret);
82
+ if (error_is_read) {
83
+ *error_is_read = false;
84
+ }
85
+ goto fail;
86
+ }
87
+
88
+ return nbytes;
89
+fail:
90
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
91
+ return ret;
92
+
93
+}
94
+
95
+/* Copy range to target and return the bytes copied. If error occured, return a
96
+ * negative error number. */
97
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
98
+ int64_t start,
99
+ int64_t end,
100
+ bool is_write_notifier)
101
+{
102
+ int ret;
103
+ int nr_clusters;
104
+ BlockBackend *blk = job->common.blk;
105
+ int nbytes;
106
+
107
+ assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
108
+ nbytes = MIN(job->copy_range_size, end - start);
109
+ nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
110
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
111
+ nr_clusters);
112
+ ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
113
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
114
+ if (ret < 0) {
115
+ trace_backup_do_cow_copy_range_fail(job, start, ret);
116
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size,
117
+ nr_clusters);
118
+ return ret;
119
+ }
120
+
121
+ return nbytes;
122
+}
123
+
124
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
125
int64_t offset, uint64_t bytes,
126
bool *error_is_read,
127
bool is_write_notifier)
128
{
129
- BlockBackend *blk = job->common.blk;
130
CowRequest cow_request;
131
- struct iovec iov;
132
- QEMUIOVector bounce_qiov;
133
- void *bounce_buffer = NULL;
134
int ret = 0;
135
int64_t start, end; /* bytes */
136
- int n; /* bytes */
137
+ void *bounce_buffer = NULL;
138
139
qemu_co_rwlock_rdlock(&job->flush_rwlock);
140
141
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
142
wait_for_overlapping_requests(job, start, end);
143
cow_request_begin(&cow_request, job, start, end);
144
145
- for (; start < end; start += job->cluster_size) {
146
+ while (start < end) {
147
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
148
trace_backup_do_cow_skip(job, start);
149
+ start += job->cluster_size;
150
continue; /* already copied */
151
}
152
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
153
154
trace_backup_do_cow_process(job, start);
155
156
- n = MIN(job->cluster_size, job->len - start);
157
-
158
- if (!bounce_buffer) {
159
- bounce_buffer = blk_blockalign(blk, job->cluster_size);
160
- }
161
- iov.iov_base = bounce_buffer;
162
- iov.iov_len = n;
163
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
164
-
165
- ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
166
- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
167
- if (ret < 0) {
168
- trace_backup_do_cow_read_fail(job, start, ret);
169
- if (error_is_read) {
170
- *error_is_read = true;
171
+ if (job->use_copy_range) {
172
+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
173
+ if (ret < 0) {
174
+ job->use_copy_range = false;
175
}
176
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
177
- goto out;
178
}
179
-
180
- if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
181
- ret = blk_co_pwrite_zeroes(job->target, start,
182
- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
183
- } else {
184
- ret = blk_co_pwritev(job->target, start,
185
- bounce_qiov.size, &bounce_qiov,
186
- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
187
+ if (!job->use_copy_range) {
188
+ ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
189
+ error_is_read, &bounce_buffer);
190
}
191
if (ret < 0) {
192
- trace_backup_do_cow_write_fail(job, start, ret);
193
- if (error_is_read) {
194
- *error_is_read = false;
195
- }
196
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
197
- goto out;
198
+ break;
199
}
200
201
/* Publish progress, guest I/O counts as progress too. Note that the
202
* offset field is an opaque progress value, it is not a disk offset.
203
*/
204
- job->bytes_read += n;
205
- job_progress_update(&job->common.job, n);
206
+ start += ret;
207
+ job->bytes_read += ret;
208
+ job_progress_update(&job->common.job, ret);
209
+ ret = 0;
210
}
211
212
-out:
213
if (bounce_buffer) {
214
qemu_vfree(bounce_buffer);
215
}
216
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
217
} else {
218
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
219
}
220
+ job->use_copy_range = true;
221
+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
222
+ blk_get_max_transfer(job->target));
223
+ job->copy_range_size = MAX(job->cluster_size,
224
+ QEMU_ALIGN_UP(job->copy_range_size,
225
+ job->cluster_size));
226
227
/* Required permissions are already taken with target's blk_new() */
228
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
229
diff --git a/block/trace-events b/block/trace-events
230
index XXXXXXX..XXXXXXX 100644
231
--- a/block/trace-events
232
+++ b/block/trace-events
233
@@ -XXX,XX +XXX,XX @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
234
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
235
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
236
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
237
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
238
239
# blockdev.c
240
qmp_block_job_cancel(void *job) "job %p"
241
--
34
--
242
2.17.1
35
2.41.0
243
244
diff view generated by jsdifflib