1
The following changes since commit ab08440a4ee09032d1a9cb22fdcab23bc7e1c656:
1
The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180702' into staging (2018-07-02 17:57:46 +0100)
3
Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 9ded4a0114968e98b41494fc035ba14f84cdf700:
9
for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a:
10
10
11
backup: Use copy offloading (2018-07-02 23:23:45 -0400)
11
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block backup patches
14
Pull request
15
16
The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the
17
request needs to be resubmitted.
18
19
The MAINTAINERS changes carry no risk and we might as well include them in QEMU
20
6.1.
21
15
----------------------------------------------------------------
22
----------------------------------------------------------------
16
23
17
Fam Zheng (3):
24
Fabian Ebner (1):
18
block: Fix parameter checking in bdrv_co_copy_range_internal
25
block/io_uring: resubmit when result is -EAGAIN
19
block: Honour BDRV_REQ_NO_SERIALISING in copy range
20
backup: Use copy offloading
21
26
22
block/backup.c | 150 ++++++++++++++++++++++++++++++------------
27
Philippe Mathieu-Daudé (1):
23
block/io.c | 35 +++++-----
28
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver
24
block/trace-events | 1 +
29
25
include/block/block.h | 5 +-
30
Stefano Garzarella (1):
26
4 files changed, 132 insertions(+), 59 deletions(-)
31
MAINTAINERS: add Stefano Garzarella as io_uring reviewer
32
33
MAINTAINERS | 2 ++
34
block/io_uring.c | 16 +++++++++++++++-
35
2 files changed, 17 insertions(+), 1 deletion(-)
27
36
28
--
37
--
29
2.17.1
38
2.31.1
30
39
31
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
The implementation is similar to the 'qemu-img convert'. In the
3
I've been working with io_uring for a while so I'd like to help
4
beginning of the job, offloaded copy is attempted. If it fails, further
4
with reviews.
5
I/O will go through the existing bounce buffer code path.
6
5
7
Then, as Kevin pointed out, both this and qemu-img convert can benefit
6
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
8
from a local check if one request fails because of, for example, the
7
Message-Id: <20210728131515.131045-1-sgarzare@redhat.com>
9
offset is beyond EOF, but another may well be accepted by the protocol
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
layer. This will be implemented separately.
9
---
10
MAINTAINERS | 1 +
11
1 file changed, 1 insertion(+)
11
12
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
diff --git a/MAINTAINERS b/MAINTAINERS
13
Signed-off-by: Fam Zheng <famz@redhat.com>
14
index XXXXXXX..XXXXXXX 100644
14
Message-id: 20180703023758.14422-4-famz@redhat.com
15
--- a/MAINTAINERS
15
Signed-off-by: Jeff Cody <jcody@redhat.com>
16
+++ b/MAINTAINERS
16
---
17
@@ -XXX,XX +XXX,XX @@ Linux io_uring
17
block/backup.c | 150 ++++++++++++++++++++++++++++++++-------------
18
M: Aarushi Mehta <mehta.aaru20@gmail.com>
18
block/trace-events | 1 +
19
M: Julia Suvorova <jusual@redhat.com>
19
2 files changed, 110 insertions(+), 41 deletions(-)
20
M: Stefan Hajnoczi <stefanha@redhat.com>
21
+R: Stefano Garzarella <sgarzare@redhat.com>
22
L: qemu-block@nongnu.org
23
S: Maintained
24
F: block/io_uring.c
25
--
26
2.31.1
20
27
21
diff --git a/block/backup.c b/block/backup.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/backup.c
24
+++ b/block/backup.c
25
@@ -XXX,XX +XXX,XX @@ typedef struct BackupBlockJob {
26
QLIST_HEAD(, CowRequest) inflight_reqs;
27
28
HBitmap *copy_bitmap;
29
+ bool use_copy_range;
30
+ int64_t copy_range_size;
31
} BackupBlockJob;
32
33
static const BlockJobDriver backup_job_driver;
34
@@ -XXX,XX +XXX,XX @@ static void cow_request_end(CowRequest *req)
35
qemu_co_queue_restart_all(&req->wait_queue);
36
}
37
38
+/* Copy range to target with a bounce buffer and return the bytes copied. If
39
+ * error occured, return a negative error number */
40
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
41
+ int64_t start,
42
+ int64_t end,
43
+ bool is_write_notifier,
44
+ bool *error_is_read,
45
+ void **bounce_buffer)
46
+{
47
+ int ret;
48
+ struct iovec iov;
49
+ QEMUIOVector qiov;
50
+ BlockBackend *blk = job->common.blk;
51
+ int nbytes;
52
+
53
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
54
+ nbytes = MIN(job->cluster_size, job->len - start);
55
+ if (!*bounce_buffer) {
56
+ *bounce_buffer = blk_blockalign(blk, job->cluster_size);
57
+ }
58
+ iov.iov_base = *bounce_buffer;
59
+ iov.iov_len = nbytes;
60
+ qemu_iovec_init_external(&qiov, &iov, 1);
61
+
62
+ ret = blk_co_preadv(blk, start, qiov.size, &qiov,
63
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
64
+ if (ret < 0) {
65
+ trace_backup_do_cow_read_fail(job, start, ret);
66
+ if (error_is_read) {
67
+ *error_is_read = true;
68
+ }
69
+ goto fail;
70
+ }
71
+
72
+ if (qemu_iovec_is_zero(&qiov)) {
73
+ ret = blk_co_pwrite_zeroes(job->target, start,
74
+ qiov.size, BDRV_REQ_MAY_UNMAP);
75
+ } else {
76
+ ret = blk_co_pwritev(job->target, start,
77
+ qiov.size, &qiov,
78
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
79
+ }
80
+ if (ret < 0) {
81
+ trace_backup_do_cow_write_fail(job, start, ret);
82
+ if (error_is_read) {
83
+ *error_is_read = false;
84
+ }
85
+ goto fail;
86
+ }
87
+
88
+ return nbytes;
89
+fail:
90
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
91
+ return ret;
92
+
93
+}
94
+
95
+/* Copy range to target and return the bytes copied. If error occured, return a
96
+ * negative error number. */
97
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
98
+ int64_t start,
99
+ int64_t end,
100
+ bool is_write_notifier)
101
+{
102
+ int ret;
103
+ int nr_clusters;
104
+ BlockBackend *blk = job->common.blk;
105
+ int nbytes;
106
+
107
+ assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
108
+ nbytes = MIN(job->copy_range_size, end - start);
109
+ nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
110
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
111
+ nr_clusters);
112
+ ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
113
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
114
+ if (ret < 0) {
115
+ trace_backup_do_cow_copy_range_fail(job, start, ret);
116
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size,
117
+ nr_clusters);
118
+ return ret;
119
+ }
120
+
121
+ return nbytes;
122
+}
123
+
124
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
125
int64_t offset, uint64_t bytes,
126
bool *error_is_read,
127
bool is_write_notifier)
128
{
129
- BlockBackend *blk = job->common.blk;
130
CowRequest cow_request;
131
- struct iovec iov;
132
- QEMUIOVector bounce_qiov;
133
- void *bounce_buffer = NULL;
134
int ret = 0;
135
int64_t start, end; /* bytes */
136
- int n; /* bytes */
137
+ void *bounce_buffer = NULL;
138
139
qemu_co_rwlock_rdlock(&job->flush_rwlock);
140
141
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
142
wait_for_overlapping_requests(job, start, end);
143
cow_request_begin(&cow_request, job, start, end);
144
145
- for (; start < end; start += job->cluster_size) {
146
+ while (start < end) {
147
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
148
trace_backup_do_cow_skip(job, start);
149
+ start += job->cluster_size;
150
continue; /* already copied */
151
}
152
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
153
154
trace_backup_do_cow_process(job, start);
155
156
- n = MIN(job->cluster_size, job->len - start);
157
-
158
- if (!bounce_buffer) {
159
- bounce_buffer = blk_blockalign(blk, job->cluster_size);
160
- }
161
- iov.iov_base = bounce_buffer;
162
- iov.iov_len = n;
163
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
164
-
165
- ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
166
- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
167
- if (ret < 0) {
168
- trace_backup_do_cow_read_fail(job, start, ret);
169
- if (error_is_read) {
170
- *error_is_read = true;
171
+ if (job->use_copy_range) {
172
+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
173
+ if (ret < 0) {
174
+ job->use_copy_range = false;
175
}
176
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
177
- goto out;
178
}
179
-
180
- if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
181
- ret = blk_co_pwrite_zeroes(job->target, start,
182
- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
183
- } else {
184
- ret = blk_co_pwritev(job->target, start,
185
- bounce_qiov.size, &bounce_qiov,
186
- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
187
+ if (!job->use_copy_range) {
188
+ ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
189
+ error_is_read, &bounce_buffer);
190
}
191
if (ret < 0) {
192
- trace_backup_do_cow_write_fail(job, start, ret);
193
- if (error_is_read) {
194
- *error_is_read = false;
195
- }
196
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
197
- goto out;
198
+ break;
199
}
200
201
/* Publish progress, guest I/O counts as progress too. Note that the
202
* offset field is an opaque progress value, it is not a disk offset.
203
*/
204
- job->bytes_read += n;
205
- job_progress_update(&job->common.job, n);
206
+ start += ret;
207
+ job->bytes_read += ret;
208
+ job_progress_update(&job->common.job, ret);
209
+ ret = 0;
210
}
211
212
-out:
213
if (bounce_buffer) {
214
qemu_vfree(bounce_buffer);
215
}
216
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
217
} else {
218
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
219
}
220
+ job->use_copy_range = true;
221
+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
222
+ blk_get_max_transfer(job->target));
223
+ job->copy_range_size = MAX(job->cluster_size,
224
+ QEMU_ALIGN_UP(job->copy_range_size,
225
+ job->cluster_size));
226
227
/* Required permissions are already taken with target's blk_new() */
228
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
229
diff --git a/block/trace-events b/block/trace-events
230
index XXXXXXX..XXXXXXX 100644
231
--- a/block/trace-events
232
+++ b/block/trace-events
233
@@ -XXX,XX +XXX,XX @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
234
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
235
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
236
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
237
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
238
239
# blockdev.c
240
qmp_block_job_cancel(void *job) "job %p"
241
--
242
2.17.1
243
244
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Fabian Ebner <f.ebner@proxmox.com>
2
2
3
This semantics is needed by drive-backup so implement it before using
3
Linux SCSI can throw spurious -EAGAIN in some corner cases in its
4
this API there.
4
completion path, which will end up being the result in the completed
5
io_uring request.
5
6
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Resubmitting such requests should allow block jobs to complete, even
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
if such spurious errors are encountered.
8
Message-id: 20180703023758.14422-3-famz@redhat.com
9
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com>
11
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
12
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
13
Message-id: 20210729091029.65369-1-f.ebner@proxmox.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
15
---
11
block/io.c | 6 ++++--
16
block/io_uring.c | 16 +++++++++++++++-
12
include/block/block.h | 5 +++--
17
1 file changed, 15 insertions(+), 1 deletion(-)
13
2 files changed, 7 insertions(+), 4 deletions(-)
14
18
15
diff --git a/block/io.c b/block/io.c
19
diff --git a/block/io_uring.c b/block/io_uring.c
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/block/io.c
21
--- a/block/io_uring.c
18
+++ b/block/io.c
22
+++ b/block/io_uring.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
23
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s)
20
tracked_request_begin(&dst_req, dst->bs, dst_offset,
24
total_bytes = ret + luringcb->total_read;
21
bytes, BDRV_TRACKED_WRITE);
25
22
26
if (ret < 0) {
23
- wait_serialising_requests(&src_req);
27
- if (ret == -EINTR) {
24
- wait_serialising_requests(&dst_req);
28
+ /*
25
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
29
+ * Only writev/readv/fsync requests on regular files or host block
26
+ wait_serialising_requests(&src_req);
30
+ * devices are submitted. Therefore -EAGAIN is not expected but it's
27
+ wait_serialising_requests(&dst_req);
31
+ * known to happen sometimes with Linux SCSI. Submit again and hope
28
+ }
32
+ * the request completes successfully.
29
if (recurse_src) {
33
+ *
30
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
34
+ * For more information, see:
31
src, src_offset,
35
+ * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
32
diff --git a/include/block/block.h b/include/block/block.h
36
+ *
33
index XXXXXXX..XXXXXXX 100644
37
+ * If the code is changed to submit other types of requests in the
34
--- a/include/block/block.h
38
+ * future, then this workaround may need to be extended to deal with
35
+++ b/include/block/block.h
39
+ * genuine -EAGAIN results that should not be resubmitted
36
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
40
+ * immediately.
37
* @dst: Destination child to copy data to
41
+ */
38
* @dst_offset: offset in @dst image to write data
42
+ if (ret == -EINTR || ret == -EAGAIN) {
39
* @bytes: number of bytes to copy
43
luring_resubmit(s, luringcb);
40
- * @flags: request flags. Must be one of:
44
continue;
41
- * 0 - actually read data from src;
45
}
42
+ * @flags: request flags. Supported flags:
43
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
44
* write on @dst as if bdrv_co_pwrite_zeroes is
45
* called. Used to simplify caller code, or
46
* during BlockDriver.bdrv_co_copy_range_from()
47
* recursion.
48
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
49
+ * requests currently in flight.
50
*
51
* Returns: 0 if succeeded; negative error code if failed.
52
**/
53
--
46
--
54
2.17.1
47
2.31.1
55
48
56
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
src may be NULL if BDRV_REQ_ZERO_WRITE flag is set, in this case only
3
I'm interested in following the activity around the NVMe bdrv.
4
check dst and dst->bs. This bug was introduced when moving in the
5
request tracking code from bdrv_co_copy_range, in 37aec7d75eb.
6
4
7
This especially fixes the possible segfault when initializing src_bs
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
with a NULL src.
6
Message-id: 20210728183340.2018313-1-philmd@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
MAINTAINERS | 1 +
10
1 file changed, 1 insertion(+)
9
11
10
Signed-off-by: Fam Zheng <famz@redhat.com>
12
diff --git a/MAINTAINERS b/MAINTAINERS
11
Message-id: 20180703023758.14422-2-famz@redhat.com
13
index XXXXXXX..XXXXXXX 100644
12
Reviewed-by: Jeff Cody <jcody@redhat.com>
14
--- a/MAINTAINERS
13
Signed-off-by: Jeff Cody <jcody@redhat.com>
15
+++ b/MAINTAINERS
14
---
16
@@ -XXX,XX +XXX,XX @@ F: block/null.c
15
block/io.c | 29 +++++++++++++++--------------
17
NVMe Block Driver
16
1 file changed, 15 insertions(+), 14 deletions(-)
18
M: Stefan Hajnoczi <stefanha@redhat.com>
19
R: Fam Zheng <fam@euphon.net>
20
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
21
L: qemu-block@nongnu.org
22
S: Supported
23
F: block/nvme*
24
--
25
2.31.1
17
26
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
23
bool recurse_src)
24
{
25
BdrvTrackedRequest src_req, dst_req;
26
- BlockDriverState *src_bs = src->bs;
27
- BlockDriverState *dst_bs = dst->bs;
28
int ret;
29
30
- if (!src || !dst || !src->bs || !dst->bs) {
31
+ if (!dst || !dst->bs) {
32
return -ENOMEDIUM;
33
}
34
- ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
35
- if (ret) {
36
- return ret;
37
- }
38
-
39
ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
40
if (ret) {
41
return ret;
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
43
return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
44
}
45
46
+ if (!src || !src->bs) {
47
+ return -ENOMEDIUM;
48
+ }
49
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
50
+ if (ret) {
51
+ return ret;
52
+ }
53
+
54
if (!src->bs->drv->bdrv_co_copy_range_from
55
|| !dst->bs->drv->bdrv_co_copy_range_to
56
|| src->bs->encrypted || dst->bs->encrypted) {
57
return -ENOTSUP;
58
}
59
- bdrv_inc_in_flight(src_bs);
60
- bdrv_inc_in_flight(dst_bs);
61
- tracked_request_begin(&src_req, src_bs, src_offset,
62
+ bdrv_inc_in_flight(src->bs);
63
+ bdrv_inc_in_flight(dst->bs);
64
+ tracked_request_begin(&src_req, src->bs, src_offset,
65
bytes, BDRV_TRACKED_READ);
66
- tracked_request_begin(&dst_req, dst_bs, dst_offset,
67
+ tracked_request_begin(&dst_req, dst->bs, dst_offset,
68
bytes, BDRV_TRACKED_WRITE);
69
70
wait_serialising_requests(&src_req);
71
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
72
}
73
tracked_request_end(&src_req);
74
tracked_request_end(&dst_req);
75
- bdrv_dec_in_flight(src_bs);
76
- bdrv_dec_in_flight(dst_bs);
77
+ bdrv_dec_in_flight(src->bs);
78
+ bdrv_dec_in_flight(dst->bs);
79
return ret;
80
}
81
82
--
83
2.17.1
84
85
diff view generated by jsdifflib