1
The following changes since commit ab08440a4ee09032d1a9cb22fdcab23bc7e1c656:
1
The following changes since commit a3607def89f9cd68c1b994e1030527df33aa91d0:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180702' into staging (2018-07-02 17:57:46 +0100)
3
Update version for v6.2.0-rc4 release (2021-12-07 17:51:38 -0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 9ded4a0114968e98b41494fc035ba14f84cdf700:
9
for you to fetch changes up to cf4fbc3030c974fff726756a7ceef8386cdf500b:
10
10
11
backup: Use copy offloading (2018-07-02 23:23:45 -0400)
11
block/nvme: fix infinite loop in nvme_free_req_queue_cb() (2021-12-09 09:19:49 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block backup patches
14
Pull request
15
16
An infinite loop fix for the userspace NVMe driver.
17
15
----------------------------------------------------------------
18
----------------------------------------------------------------
16
19
17
Fam Zheng (3):
20
Stefan Hajnoczi (1):
18
block: Fix parameter checking in bdrv_co_copy_range_internal
21
block/nvme: fix infinite loop in nvme_free_req_queue_cb()
19
block: Honour BDRV_REQ_NO_SERIALISING in copy range
20
backup: Use copy offloading
21
22
22
block/backup.c | 150 ++++++++++++++++++++++++++++++------------
23
block/nvme.c | 5 +++--
23
block/io.c | 35 +++++-----
24
1 file changed, 3 insertions(+), 2 deletions(-)
24
block/trace-events | 1 +
25
include/block/block.h | 5 +-
26
4 files changed, 132 insertions(+), 59 deletions(-)
27
25
28
--
26
--
29
2.17.1
27
2.33.1
30
28
31
29
30
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
src may be NULL if BDRV_REQ_ZERO_WRITE flag is set, in this case only
4
check dst and dst->bs. This bug was introduced when moving in the
5
request tracking code from bdrv_co_copy_range, in 37aec7d75eb.
6
7
This especially fixes the possible segfault when initializing src_bs
8
with a NULL src.
9
10
Signed-off-by: Fam Zheng <famz@redhat.com>
11
Message-id: 20180703023758.14422-2-famz@redhat.com
12
Reviewed-by: Jeff Cody <jcody@redhat.com>
13
Signed-off-by: Jeff Cody <jcody@redhat.com>
14
---
15
block/io.c | 29 +++++++++++++++--------------
16
1 file changed, 15 insertions(+), 14 deletions(-)
17
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
23
bool recurse_src)
24
{
25
BdrvTrackedRequest src_req, dst_req;
26
- BlockDriverState *src_bs = src->bs;
27
- BlockDriverState *dst_bs = dst->bs;
28
int ret;
29
30
- if (!src || !dst || !src->bs || !dst->bs) {
31
+ if (!dst || !dst->bs) {
32
return -ENOMEDIUM;
33
}
34
- ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
35
- if (ret) {
36
- return ret;
37
- }
38
-
39
ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
40
if (ret) {
41
return ret;
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
43
return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
44
}
45
46
+ if (!src || !src->bs) {
47
+ return -ENOMEDIUM;
48
+ }
49
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
50
+ if (ret) {
51
+ return ret;
52
+ }
53
+
54
if (!src->bs->drv->bdrv_co_copy_range_from
55
|| !dst->bs->drv->bdrv_co_copy_range_to
56
|| src->bs->encrypted || dst->bs->encrypted) {
57
return -ENOTSUP;
58
}
59
- bdrv_inc_in_flight(src_bs);
60
- bdrv_inc_in_flight(dst_bs);
61
- tracked_request_begin(&src_req, src_bs, src_offset,
62
+ bdrv_inc_in_flight(src->bs);
63
+ bdrv_inc_in_flight(dst->bs);
64
+ tracked_request_begin(&src_req, src->bs, src_offset,
65
bytes, BDRV_TRACKED_READ);
66
- tracked_request_begin(&dst_req, dst_bs, dst_offset,
67
+ tracked_request_begin(&dst_req, dst->bs, dst_offset,
68
bytes, BDRV_TRACKED_WRITE);
69
70
wait_serialising_requests(&src_req);
71
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
72
}
73
tracked_request_end(&src_req);
74
tracked_request_end(&dst_req);
75
- bdrv_dec_in_flight(src_bs);
76
- bdrv_dec_in_flight(dst_bs);
77
+ bdrv_dec_in_flight(src->bs);
78
+ bdrv_dec_in_flight(dst->bs);
79
return ret;
80
}
81
82
--
83
2.17.1
84
85
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
This semantics is needed by drive-backup so implement it before using
4
this API there.
5
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
Message-id: 20180703023758.14422-3-famz@redhat.com
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
---
11
block/io.c | 6 ++++--
12
include/block/block.h | 5 +++--
13
2 files changed, 7 insertions(+), 4 deletions(-)
14
15
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/io.c
18
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
20
tracked_request_begin(&dst_req, dst->bs, dst_offset,
21
bytes, BDRV_TRACKED_WRITE);
22
23
- wait_serialising_requests(&src_req);
24
- wait_serialising_requests(&dst_req);
25
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
26
+ wait_serialising_requests(&src_req);
27
+ wait_serialising_requests(&dst_req);
28
+ }
29
if (recurse_src) {
30
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
31
src, src_offset,
32
diff --git a/include/block/block.h b/include/block/block.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block.h
35
+++ b/include/block/block.h
36
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
37
* @dst: Destination child to copy data to
38
* @dst_offset: offset in @dst image to write data
39
* @bytes: number of bytes to copy
40
- * @flags: request flags. Must be one of:
41
- * 0 - actually read data from src;
42
+ * @flags: request flags. Supported flags:
43
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
44
* write on @dst as if bdrv_co_pwrite_zeroes is
45
* called. Used to simplify caller code, or
46
* during BlockDriver.bdrv_co_copy_range_from()
47
* recursion.
48
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
49
+ * requests currently in flight.
50
*
51
* Returns: 0 if succeeded; negative error code if failed.
52
**/
53
--
54
2.17.1
55
56
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
When the request free list is exhausted the coroutine waits on
2
q->free_req_queue for the next free request. Whenever a request is
3
completed a BH is scheduled to invoke nvme_free_req_queue_cb() and wake
4
up waiting coroutines.
2
5
3
The implementation is similar to the 'qemu-img convert'. In the
6
1. nvme_get_free_req() waits for a free request:
4
beginning of the job, offloaded copy is attempted. If it fails, further
5
I/O will go through the existing bounce buffer code path.
6
7
7
Then, as Kevin pointed out, both this and qemu-img convert can benefit
8
while (q->free_req_head == -1) {
8
from a local check if one request fails because of, for example, the
9
...
9
offset is beyond EOF, but another may well be accepted by the protocol
10
trace_nvme_free_req_queue_wait(q->s, q->index);
10
layer. This will be implemented separately.
11
qemu_co_queue_wait(&q->free_req_queue, &q->lock);
12
...
13
}
11
14
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
2. nvme_free_req_queue_cb() wakes up the coroutine:
13
Signed-off-by: Fam Zheng <famz@redhat.com>
16
14
Message-id: 20180703023758.14422-4-famz@redhat.com
17
while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
15
Signed-off-by: Jeff Cody <jcody@redhat.com>
18
^--- infinite loop when free_req_head == -1
19
}
20
21
nvme_free_req_queue_cb() and the coroutine form an infinite loop when
22
q->free_req_head == -1. Fix this by checking q->free_req_head in
23
nvme_free_req_queue_cb(). If the free request list is exhausted, don't
24
wake waiting coroutines. Eventually an in-flight request will complete
25
and the BH will be scheduled again, guaranteeing forward progress.
26
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
28
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
29
Message-id: 20211208152246.244585-1-stefanha@redhat.com
30
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
31
---
17
block/backup.c | 150 ++++++++++++++++++++++++++++++++-------------
32
block/nvme.c | 5 +++--
18
block/trace-events | 1 +
33
1 file changed, 3 insertions(+), 2 deletions(-)
19
2 files changed, 110 insertions(+), 41 deletions(-)
20
34
21
diff --git a/block/backup.c b/block/backup.c
35
diff --git a/block/nvme.c b/block/nvme.c
22
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
23
--- a/block/backup.c
37
--- a/block/nvme.c
24
+++ b/block/backup.c
38
+++ b/block/nvme.c
25
@@ -XXX,XX +XXX,XX @@ typedef struct BackupBlockJob {
39
@@ -XXX,XX +XXX,XX @@ static void nvme_free_req_queue_cb(void *opaque)
26
QLIST_HEAD(, CowRequest) inflight_reqs;
40
NVMeQueuePair *q = opaque;
27
41
28
HBitmap *copy_bitmap;
42
qemu_mutex_lock(&q->lock);
29
+ bool use_copy_range;
43
- while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
30
+ int64_t copy_range_size;
44
- /* Retry all pending requests */
31
} BackupBlockJob;
45
+ while (q->free_req_head != -1 &&
32
46
+ qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
33
static const BlockJobDriver backup_job_driver;
47
+ /* Retry waiting requests */
34
@@ -XXX,XX +XXX,XX @@ static void cow_request_end(CowRequest *req)
48
}
35
qemu_co_queue_restart_all(&req->wait_queue);
49
qemu_mutex_unlock(&q->lock);
36
}
50
}
37
38
+/* Copy range to target with a bounce buffer and return the bytes copied. If
39
+ * error occured, return a negative error number */
40
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
41
+ int64_t start,
42
+ int64_t end,
43
+ bool is_write_notifier,
44
+ bool *error_is_read,
45
+ void **bounce_buffer)
46
+{
47
+ int ret;
48
+ struct iovec iov;
49
+ QEMUIOVector qiov;
50
+ BlockBackend *blk = job->common.blk;
51
+ int nbytes;
52
+
53
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
54
+ nbytes = MIN(job->cluster_size, job->len - start);
55
+ if (!*bounce_buffer) {
56
+ *bounce_buffer = blk_blockalign(blk, job->cluster_size);
57
+ }
58
+ iov.iov_base = *bounce_buffer;
59
+ iov.iov_len = nbytes;
60
+ qemu_iovec_init_external(&qiov, &iov, 1);
61
+
62
+ ret = blk_co_preadv(blk, start, qiov.size, &qiov,
63
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
64
+ if (ret < 0) {
65
+ trace_backup_do_cow_read_fail(job, start, ret);
66
+ if (error_is_read) {
67
+ *error_is_read = true;
68
+ }
69
+ goto fail;
70
+ }
71
+
72
+ if (qemu_iovec_is_zero(&qiov)) {
73
+ ret = blk_co_pwrite_zeroes(job->target, start,
74
+ qiov.size, BDRV_REQ_MAY_UNMAP);
75
+ } else {
76
+ ret = blk_co_pwritev(job->target, start,
77
+ qiov.size, &qiov,
78
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
79
+ }
80
+ if (ret < 0) {
81
+ trace_backup_do_cow_write_fail(job, start, ret);
82
+ if (error_is_read) {
83
+ *error_is_read = false;
84
+ }
85
+ goto fail;
86
+ }
87
+
88
+ return nbytes;
89
+fail:
90
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
91
+ return ret;
92
+
93
+}
94
+
95
+/* Copy range to target and return the bytes copied. If error occured, return a
96
+ * negative error number. */
97
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
98
+ int64_t start,
99
+ int64_t end,
100
+ bool is_write_notifier)
101
+{
102
+ int ret;
103
+ int nr_clusters;
104
+ BlockBackend *blk = job->common.blk;
105
+ int nbytes;
106
+
107
+ assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
108
+ nbytes = MIN(job->copy_range_size, end - start);
109
+ nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
110
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
111
+ nr_clusters);
112
+ ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
113
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
114
+ if (ret < 0) {
115
+ trace_backup_do_cow_copy_range_fail(job, start, ret);
116
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size,
117
+ nr_clusters);
118
+ return ret;
119
+ }
120
+
121
+ return nbytes;
122
+}
123
+
124
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
125
int64_t offset, uint64_t bytes,
126
bool *error_is_read,
127
bool is_write_notifier)
128
{
129
- BlockBackend *blk = job->common.blk;
130
CowRequest cow_request;
131
- struct iovec iov;
132
- QEMUIOVector bounce_qiov;
133
- void *bounce_buffer = NULL;
134
int ret = 0;
135
int64_t start, end; /* bytes */
136
- int n; /* bytes */
137
+ void *bounce_buffer = NULL;
138
139
qemu_co_rwlock_rdlock(&job->flush_rwlock);
140
141
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
142
wait_for_overlapping_requests(job, start, end);
143
cow_request_begin(&cow_request, job, start, end);
144
145
- for (; start < end; start += job->cluster_size) {
146
+ while (start < end) {
147
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
148
trace_backup_do_cow_skip(job, start);
149
+ start += job->cluster_size;
150
continue; /* already copied */
151
}
152
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
153
154
trace_backup_do_cow_process(job, start);
155
156
- n = MIN(job->cluster_size, job->len - start);
157
-
158
- if (!bounce_buffer) {
159
- bounce_buffer = blk_blockalign(blk, job->cluster_size);
160
- }
161
- iov.iov_base = bounce_buffer;
162
- iov.iov_len = n;
163
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
164
-
165
- ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
166
- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
167
- if (ret < 0) {
168
- trace_backup_do_cow_read_fail(job, start, ret);
169
- if (error_is_read) {
170
- *error_is_read = true;
171
+ if (job->use_copy_range) {
172
+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
173
+ if (ret < 0) {
174
+ job->use_copy_range = false;
175
}
176
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
177
- goto out;
178
}
179
-
180
- if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
181
- ret = blk_co_pwrite_zeroes(job->target, start,
182
- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
183
- } else {
184
- ret = blk_co_pwritev(job->target, start,
185
- bounce_qiov.size, &bounce_qiov,
186
- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
187
+ if (!job->use_copy_range) {
188
+ ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
189
+ error_is_read, &bounce_buffer);
190
}
191
if (ret < 0) {
192
- trace_backup_do_cow_write_fail(job, start, ret);
193
- if (error_is_read) {
194
- *error_is_read = false;
195
- }
196
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
197
- goto out;
198
+ break;
199
}
200
201
/* Publish progress, guest I/O counts as progress too. Note that the
202
* offset field is an opaque progress value, it is not a disk offset.
203
*/
204
- job->bytes_read += n;
205
- job_progress_update(&job->common.job, n);
206
+ start += ret;
207
+ job->bytes_read += ret;
208
+ job_progress_update(&job->common.job, ret);
209
+ ret = 0;
210
}
211
212
-out:
213
if (bounce_buffer) {
214
qemu_vfree(bounce_buffer);
215
}
216
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
217
} else {
218
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
219
}
220
+ job->use_copy_range = true;
221
+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
222
+ blk_get_max_transfer(job->target));
223
+ job->copy_range_size = MAX(job->cluster_size,
224
+ QEMU_ALIGN_UP(job->copy_range_size,
225
+ job->cluster_size));
226
227
/* Required permissions are already taken with target's blk_new() */
228
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
229
diff --git a/block/trace-events b/block/trace-events
230
index XXXXXXX..XXXXXXX 100644
231
--- a/block/trace-events
232
+++ b/block/trace-events
233
@@ -XXX,XX +XXX,XX @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
234
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
235
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
236
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
237
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
238
239
# blockdev.c
240
qmp_block_job_cancel(void *job) "job %p"
241
--
51
--
242
2.17.1
52
2.33.1
243
53
244
54
diff view generated by jsdifflib