1
The following changes since commit ab08440a4ee09032d1a9cb22fdcab23bc7e1c656:
1
The following changes since commit afc9fcde55296b83f659de9da3cdf044812a6eeb:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180702' into staging (2018-07-02 17:57:46 +0100)
3
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging (2021-10-20 06:10:51 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 9ded4a0114968e98b41494fc035ba14f84cdf700:
9
for you to fetch changes up to 4b2b3d2653f255ef4259a7689af1956536565901:
10
10
11
backup: Use copy offloading (2018-07-02 23:23:45 -0400)
11
coroutine: resize pool periodically instead of limiting size (2021-10-21 18:40:07 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block backup patches
14
Pull request
15
16
Performance optimization when guest applications submit a lot of parallel I/O.
17
This has also been found to improve clang SafeStack performance.
18
15
----------------------------------------------------------------
19
----------------------------------------------------------------
16
20
17
Fam Zheng (3):
21
Stefan Hajnoczi (1):
18
block: Fix parameter checking in bdrv_co_copy_range_internal
22
coroutine: resize pool periodically instead of limiting size
19
block: Honour BDRV_REQ_NO_SERIALISING in copy range
20
backup: Use copy offloading
21
23
22
block/backup.c | 150 ++++++++++++++++++++++++++++++------------
24
include/qemu/coroutine-pool-timer.h | 36 ++++++++++++++++
23
block/io.c | 35 +++++-----
25
include/qemu/coroutine.h | 7 ++++
24
block/trace-events | 1 +
26
iothread.c | 6 +++
25
include/block/block.h | 5 +-
27
util/coroutine-pool-timer.c | 35 ++++++++++++++++
26
4 files changed, 132 insertions(+), 59 deletions(-)
28
util/main-loop.c | 5 +++
29
util/qemu-coroutine.c | 64 ++++++++++++++++-------------
30
util/meson.build | 1 +
31
7 files changed, 125 insertions(+), 29 deletions(-)
32
create mode 100644 include/qemu/coroutine-pool-timer.h
33
create mode 100644 util/coroutine-pool-timer.c
27
34
28
--
35
--
29
2.17.1
36
2.31.1
30
37
31
38
39
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
src may be NULL if BDRV_REQ_ZERO_WRITE flag is set, in this case only
4
check dst and dst->bs. This bug was introduced when moving in the
5
request tracking code from bdrv_co_copy_range, in 37aec7d75eb.
6
7
This especially fixes the possible segfault when initializing src_bs
8
with a NULL src.
9
10
Signed-off-by: Fam Zheng <famz@redhat.com>
11
Message-id: 20180703023758.14422-2-famz@redhat.com
12
Reviewed-by: Jeff Cody <jcody@redhat.com>
13
Signed-off-by: Jeff Cody <jcody@redhat.com>
14
---
15
block/io.c | 29 +++++++++++++++--------------
16
1 file changed, 15 insertions(+), 14 deletions(-)
17
18
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
21
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
23
bool recurse_src)
24
{
25
BdrvTrackedRequest src_req, dst_req;
26
- BlockDriverState *src_bs = src->bs;
27
- BlockDriverState *dst_bs = dst->bs;
28
int ret;
29
30
- if (!src || !dst || !src->bs || !dst->bs) {
31
+ if (!dst || !dst->bs) {
32
return -ENOMEDIUM;
33
}
34
- ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
35
- if (ret) {
36
- return ret;
37
- }
38
-
39
ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
40
if (ret) {
41
return ret;
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
43
return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
44
}
45
46
+ if (!src || !src->bs) {
47
+ return -ENOMEDIUM;
48
+ }
49
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
50
+ if (ret) {
51
+ return ret;
52
+ }
53
+
54
if (!src->bs->drv->bdrv_co_copy_range_from
55
|| !dst->bs->drv->bdrv_co_copy_range_to
56
|| src->bs->encrypted || dst->bs->encrypted) {
57
return -ENOTSUP;
58
}
59
- bdrv_inc_in_flight(src_bs);
60
- bdrv_inc_in_flight(dst_bs);
61
- tracked_request_begin(&src_req, src_bs, src_offset,
62
+ bdrv_inc_in_flight(src->bs);
63
+ bdrv_inc_in_flight(dst->bs);
64
+ tracked_request_begin(&src_req, src->bs, src_offset,
65
bytes, BDRV_TRACKED_READ);
66
- tracked_request_begin(&dst_req, dst_bs, dst_offset,
67
+ tracked_request_begin(&dst_req, dst->bs, dst_offset,
68
bytes, BDRV_TRACKED_WRITE);
69
70
wait_serialising_requests(&src_req);
71
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
72
}
73
tracked_request_end(&src_req);
74
tracked_request_end(&dst_req);
75
- bdrv_dec_in_flight(src_bs);
76
- bdrv_dec_in_flight(dst_bs);
77
+ bdrv_dec_in_flight(src->bs);
78
+ bdrv_dec_in_flight(dst->bs);
79
return ret;
80
}
81
82
--
83
2.17.1
84
85
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
This semantics is needed by drive-backup so implement it before using
4
this API there.
5
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
Message-id: 20180703023758.14422-3-famz@redhat.com
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
---
11
block/io.c | 6 ++++--
12
include/block/block.h | 5 +++--
13
2 files changed, 7 insertions(+), 4 deletions(-)
14
15
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/io.c
18
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
20
tracked_request_begin(&dst_req, dst->bs, dst_offset,
21
bytes, BDRV_TRACKED_WRITE);
22
23
- wait_serialising_requests(&src_req);
24
- wait_serialising_requests(&dst_req);
25
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
26
+ wait_serialising_requests(&src_req);
27
+ wait_serialising_requests(&dst_req);
28
+ }
29
if (recurse_src) {
30
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
31
src, src_offset,
32
diff --git a/include/block/block.h b/include/block/block.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block.h
35
+++ b/include/block/block.h
36
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
37
* @dst: Destination child to copy data to
38
* @dst_offset: offset in @dst image to write data
39
* @bytes: number of bytes to copy
40
- * @flags: request flags. Must be one of:
41
- * 0 - actually read data from src;
42
+ * @flags: request flags. Supported flags:
43
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
44
* write on @dst as if bdrv_co_pwrite_zeroes is
45
* called. Used to simplify caller code, or
46
* during BlockDriver.bdrv_co_copy_range_from()
47
* recursion.
48
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
49
+ * requests currently in flight.
50
*
51
* Returns: 0 if succeeded; negative error code if failed.
52
**/
53
--
54
2.17.1
55
56
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
It was reported that enabling SafeStack reduces IOPS significantly
2
2
(>25%) with the following fio benchmark on virtio-blk using a NVMe host
3
The implementation is similar to the 'qemu-img convert'. In the
3
block device:
4
beginning of the job, offloaded copy is attempted. If it fails, further
4
5
I/O will go through the existing bounce buffer code path.
5
# fio --rw=randrw --bs=4k --iodepth=64 --runtime=1m --direct=1 \
6
6
    --filename=/dev/vdb --name=job1 --ioengine=libaio --thread \
7
Then, as Kevin pointed out, both this and qemu-img convert can benefit
7
    --group_reporting --numjobs=16 --time_based \
8
from a local check if one request fails because of, for example, the
8
--output=/tmp/fio_result
9
offset is beyond EOF, but another may well be accepted by the protocol
9
10
layer. This will be implemented separately.
10
Serge Guelton and I found that SafeStack is not really at fault, it just
11
11
increases the cost of coroutine creation. This fio workload exhausts the
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
coroutine pool and coroutine creation becomes a bottleneck. Previous
13
Signed-off-by: Fam Zheng <famz@redhat.com>
13
work by Honghao Wang also pointed to excessive coroutine creation.
14
Message-id: 20180703023758.14422-4-famz@redhat.com
14
15
Signed-off-by: Jeff Cody <jcody@redhat.com>
15
Creating new coroutines is expensive due to allocating new stacks with
16
mmap(2) and mprotect(2). Currently there are thread-local and global
17
pools that recycle old Coroutine objects and their stacks but the
18
hardcoded size limit of 64 for thread-local pools and 128 for the global
19
pool is insufficient for the fio benchmark shown above.
20
21
This patch changes the coroutine pool algorithm to a simple thread-local
22
pool without a maximum size limit. Threads periodically shrink the pool
23
down to a size sufficient for the maximum observed number of coroutines.
24
25
The global pool is removed by this patch. It can help to hide the fact
26
that local pools are easily exhausted, but it's doesn't fix the root
27
cause. I don't think there is a need for a global pool because QEMU's
28
threads are long-lived, so let's keep things simple.
29
30
Performance of the above fio benchmark is as follows:
31
32
Before After
33
IOPS 60k 97k
34
35
Memory usage varies over time as needed by the workload:
36
37
VSZ (KB) RSS (KB)
38
Before fio 4705248 843128
39
During fio 5747668 (+ ~100 MB) 849280
40
After fio 4694996 (- ~100 MB) 845184
41
42
This confirms that coroutines are indeed being freed when no longer
43
needed.
44
45
Thanks to Serge Guelton for working on identifying the bottleneck with
46
me!
47
48
Reported-by: Tingting Mao <timao@redhat.com>
49
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
50
Message-id: 20210913153524.1190696-1-stefanha@redhat.com
51
Cc: Serge Guelton <sguelton@redhat.com>
52
Cc: Honghao Wang <wanghonghao@bytedance.com>
53
Cc: Paolo Bonzini <pbonzini@redhat.com>
54
Cc: Daniele Buono <dbuono@linux.vnet.ibm.com>
55
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
56
57
[Moved atexit notifier to coroutine_delete() after GitLab CI reported a
58
memory leak in tests/unit/test-aio-multithread because the Coroutine
59
object was created in the main thread but runs in an IOThread (where
60
it's also deleted).
61
--Stefan]
16
---
62
---
17
block/backup.c | 150 ++++++++++++++++++++++++++++++++-------------
63
include/qemu/coroutine-pool-timer.h | 36 ++++++++++++++++
18
block/trace-events | 1 +
64
include/qemu/coroutine.h | 7 ++++
19
2 files changed, 110 insertions(+), 41 deletions(-)
65
iothread.c | 6 +++
20
66
util/coroutine-pool-timer.c | 35 ++++++++++++++++
21
diff --git a/block/backup.c b/block/backup.c
67
util/main-loop.c | 5 +++
22
index XXXXXXX..XXXXXXX 100644
68
util/qemu-coroutine.c | 64 ++++++++++++++++-------------
23
--- a/block/backup.c
69
util/meson.build | 1 +
24
+++ b/block/backup.c
70
7 files changed, 125 insertions(+), 29 deletions(-)
25
@@ -XXX,XX +XXX,XX @@ typedef struct BackupBlockJob {
71
create mode 100644 include/qemu/coroutine-pool-timer.h
26
QLIST_HEAD(, CowRequest) inflight_reqs;
72
create mode 100644 util/coroutine-pool-timer.c
27
73
28
HBitmap *copy_bitmap;
74
diff --git a/include/qemu/coroutine-pool-timer.h b/include/qemu/coroutine-pool-timer.h
29
+ bool use_copy_range;
75
new file mode 100644
30
+ int64_t copy_range_size;
76
index XXXXXXX..XXXXXXX
31
} BackupBlockJob;
77
--- /dev/null
32
78
+++ b/include/qemu/coroutine-pool-timer.h
33
static const BlockJobDriver backup_job_driver;
79
@@ -XXX,XX +XXX,XX @@
34
@@ -XXX,XX +XXX,XX @@ static void cow_request_end(CowRequest *req)
80
+/*
35
qemu_co_queue_restart_all(&req->wait_queue);
81
+ * QEMU coroutine pool timer
82
+ *
83
+ * Copyright (c) 2021 Red Hat, Inc.
84
+ *
85
+ * SPDX-License-Identifier: LGPL-2.1-or-later
86
+ *
87
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
88
+ * See the COPYING.LIB file in the top-level directory.
89
+ *
90
+ */
91
+#ifndef COROUTINE_POOL_TIMER_H
92
+#define COROUTINE_POOL_TIMER_H
93
+
94
+#include "qemu/osdep.h"
95
+#include "block/aio.h"
96
+
97
+/**
98
+ * A timer that periodically resizes this thread's coroutine pool, freeing
99
+ * memory if there are too many unused coroutines.
100
+ *
101
+ * Threads that make heavy use of coroutines should use this. Failure to resize
102
+ * the coroutine pool can lead to large amounts of memory sitting idle and
103
+ * never being used after the first time.
104
+ */
105
+typedef struct {
106
+ QEMUTimer *timer;
107
+} CoroutinePoolTimer;
108
+
109
+/* Call this before the thread runs the AioContext */
110
+void coroutine_pool_timer_init(CoroutinePoolTimer *pt, AioContext *ctx);
111
+
112
+/* Call this before the AioContext from the init function is destroyed */
113
+void coroutine_pool_timer_cleanup(CoroutinePoolTimer *pt);
114
+
115
+#endif /* COROUTINE_POOL_TIMER_H */
116
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/include/qemu/coroutine.h
119
+++ b/include/qemu/coroutine.h
120
@@ -XXX,XX +XXX,XX @@ bool qemu_in_coroutine(void);
121
*/
122
bool qemu_coroutine_entered(Coroutine *co);
123
124
+/**
125
+ * Optionally call this function periodically to shrink the thread-local pool
126
+ * down. Spiky workloads can create many coroutines and then never reach that
127
+ * level again. Shrinking the pool reclaims memory in this case.
128
+ */
129
+void qemu_coroutine_pool_periodic_resize(void);
130
+
131
/**
132
* Provides a mutex that can be used to synchronise coroutines
133
*/
134
diff --git a/iothread.c b/iothread.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/iothread.c
137
+++ b/iothread.c
138
@@ -XXX,XX +XXX,XX @@
139
#include "qemu/error-report.h"
140
#include "qemu/rcu.h"
141
#include "qemu/main-loop.h"
142
+#include "qemu/coroutine-pool-timer.h"
143
144
typedef ObjectClass IOThreadClass;
145
146
@@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
147
static void *iothread_run(void *opaque)
148
{
149
IOThread *iothread = opaque;
150
+ CoroutinePoolTimer co_pool_timer;
151
152
rcu_register_thread();
153
/*
154
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
155
iothread->thread_id = qemu_get_thread_id();
156
qemu_sem_post(&iothread->init_done_sem);
157
158
+ coroutine_pool_timer_init(&co_pool_timer, iothread->ctx);
159
+
160
while (iothread->running) {
161
/*
162
* Note: from functional-wise the g_main_loop_run() below can
163
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
164
}
165
}
166
167
+ coroutine_pool_timer_cleanup(&co_pool_timer);
168
+
169
g_main_context_pop_thread_default(iothread->worker_context);
170
rcu_unregister_thread();
171
return NULL;
172
diff --git a/util/coroutine-pool-timer.c b/util/coroutine-pool-timer.c
173
new file mode 100644
174
index XXXXXXX..XXXXXXX
175
--- /dev/null
176
+++ b/util/coroutine-pool-timer.c
177
@@ -XXX,XX +XXX,XX @@
178
+/*
179
+ * QEMU coroutine pool timer
180
+ *
181
+ * Copyright (c) 2021 Red Hat, Inc.
182
+ *
183
+ * SPDX-License-Identifier: LGPL-2.1-or-later
184
+ *
185
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
186
+ * See the COPYING.LIB file in the top-level directory.
187
+ *
188
+ */
189
+#include "qemu/coroutine-pool-timer.h"
190
+
191
+static void coroutine_pool_timer_cb(void *opaque)
192
+{
193
+ CoroutinePoolTimer *pt = opaque;
194
+ int64_t expiry_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
195
+ 15 * NANOSECONDS_PER_SECOND;
196
+
197
+ qemu_coroutine_pool_periodic_resize();
198
+ timer_mod(pt->timer, expiry_time_ns);
199
+}
200
+
201
+void coroutine_pool_timer_init(CoroutinePoolTimer *pt, AioContext *ctx)
202
+{
203
+ pt->timer = aio_timer_new(ctx, QEMU_CLOCK_REALTIME, SCALE_NS,
204
+ coroutine_pool_timer_cb, pt);
205
+ coroutine_pool_timer_cb(pt);
206
+}
207
+
208
+void coroutine_pool_timer_cleanup(CoroutinePoolTimer *pt)
209
+{
210
+ timer_free(pt->timer);
211
+ pt->timer = NULL;
212
+}
213
diff --git a/util/main-loop.c b/util/main-loop.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/util/main-loop.c
216
+++ b/util/main-loop.c
217
@@ -XXX,XX +XXX,XX @@
218
#include "qemu/error-report.h"
219
#include "qemu/queue.h"
220
#include "qemu/compiler.h"
221
+#include "qemu/coroutine-pool-timer.h"
222
223
#ifndef _WIN32
224
#include <sys/wait.h>
225
@@ -XXX,XX +XXX,XX @@ static int qemu_signal_init(Error **errp)
226
227
static AioContext *qemu_aio_context;
228
static QEMUBH *qemu_notify_bh;
229
+static CoroutinePoolTimer main_loop_co_pool_timer;
230
231
static void notify_event_cb(void *opaque)
232
{
233
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
234
g_source_set_name(src, "io-handler");
235
g_source_attach(src, NULL);
236
g_source_unref(src);
237
+
238
+ coroutine_pool_timer_init(&main_loop_co_pool_timer, qemu_aio_context);
239
+
240
return 0;
36
}
241
}
37
242
38
+/* Copy range to target with a bounce buffer and return the bytes copied. If
243
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
39
+ * error occured, return a negative error number */
244
index XXXXXXX..XXXXXXX 100644
40
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
245
--- a/util/qemu-coroutine.c
41
+ int64_t start,
246
+++ b/util/qemu-coroutine.c
42
+ int64_t end,
247
@@ -XXX,XX +XXX,XX @@
43
+ bool is_write_notifier,
248
#include "block/aio.h"
44
+ bool *error_is_read,
249
45
+ void **bounce_buffer)
250
enum {
251
- POOL_BATCH_SIZE = 64,
252
+ /*
253
+ * qemu_coroutine_pool_periodic_resize() keeps at least this many
254
+ * coroutines around.
255
+ */
256
+ ALLOC_POOL_MIN = 64,
257
};
258
259
+
260
/** Free list to speed up creation */
261
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
262
-static unsigned int release_pool_size;
263
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
264
static __thread unsigned int alloc_pool_size;
265
+static __thread unsigned int num_coroutines;
266
+static __thread unsigned int max_coroutines_this_slice;
267
static __thread Notifier coroutine_pool_cleanup_notifier;
268
269
static void coroutine_pool_cleanup(Notifier *n, void *value)
270
@@ -XXX,XX +XXX,XX @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
271
272
if (CONFIG_COROUTINE_POOL) {
273
co = QSLIST_FIRST(&alloc_pool);
274
- if (!co) {
275
- if (release_pool_size > POOL_BATCH_SIZE) {
276
- /* Slow path; a good place to register the destructor, too. */
277
- if (!coroutine_pool_cleanup_notifier.notify) {
278
- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
279
- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
280
- }
281
-
282
- /* This is not exact; there could be a little skew between
283
- * release_pool_size and the actual size of release_pool. But
284
- * it is just a heuristic, it does not need to be perfect.
285
- */
286
- alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
287
- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
288
- co = QSLIST_FIRST(&alloc_pool);
289
- }
290
- }
291
if (co) {
292
QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
293
alloc_pool_size--;
294
}
295
+
296
+ num_coroutines++;
297
+ if (num_coroutines > max_coroutines_this_slice) {
298
+ max_coroutines_this_slice = num_coroutines;
299
+ }
300
}
301
302
if (!co) {
303
@@ -XXX,XX +XXX,XX @@ static void coroutine_delete(Coroutine *co)
304
co->caller = NULL;
305
306
if (CONFIG_COROUTINE_POOL) {
307
- if (release_pool_size < POOL_BATCH_SIZE * 2) {
308
- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
309
- qatomic_inc(&release_pool_size);
310
- return;
311
- }
312
- if (alloc_pool_size < POOL_BATCH_SIZE) {
313
- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
314
- alloc_pool_size++;
315
- return;
316
+ if (!coroutine_pool_cleanup_notifier.notify) {
317
+ coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
318
+ qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
319
}
320
+
321
+ num_coroutines--;
322
+ QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
323
+ alloc_pool_size++;
324
+ return;
325
}
326
327
qemu_coroutine_delete(co);
328
}
329
330
+void qemu_coroutine_pool_periodic_resize(void)
46
+{
331
+{
47
+ int ret;
332
+ unsigned pool_size_target =
48
+ struct iovec iov;
333
+ MAX(ALLOC_POOL_MIN, max_coroutines_this_slice) - num_coroutines;
49
+ QEMUIOVector qiov;
334
+ max_coroutines_this_slice = num_coroutines;
50
+ BlockBackend *blk = job->common.blk;
335
+
51
+ int nbytes;
336
+ while (alloc_pool_size > pool_size_target) {
52
+
337
+ Coroutine *co = QSLIST_FIRST(&alloc_pool);
53
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
338
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
54
+ nbytes = MIN(job->cluster_size, job->len - start);
339
+ qemu_coroutine_delete(co);
55
+ if (!*bounce_buffer) {
340
+ alloc_pool_size--;
56
+ *bounce_buffer = blk_blockalign(blk, job->cluster_size);
57
+ }
341
+ }
58
+ iov.iov_base = *bounce_buffer;
59
+ iov.iov_len = nbytes;
60
+ qemu_iovec_init_external(&qiov, &iov, 1);
61
+
62
+ ret = blk_co_preadv(blk, start, qiov.size, &qiov,
63
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
64
+ if (ret < 0) {
65
+ trace_backup_do_cow_read_fail(job, start, ret);
66
+ if (error_is_read) {
67
+ *error_is_read = true;
68
+ }
69
+ goto fail;
70
+ }
71
+
72
+ if (qemu_iovec_is_zero(&qiov)) {
73
+ ret = blk_co_pwrite_zeroes(job->target, start,
74
+ qiov.size, BDRV_REQ_MAY_UNMAP);
75
+ } else {
76
+ ret = blk_co_pwritev(job->target, start,
77
+ qiov.size, &qiov,
78
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
79
+ }
80
+ if (ret < 0) {
81
+ trace_backup_do_cow_write_fail(job, start, ret);
82
+ if (error_is_read) {
83
+ *error_is_read = false;
84
+ }
85
+ goto fail;
86
+ }
87
+
88
+ return nbytes;
89
+fail:
90
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
91
+ return ret;
92
+
93
+}
342
+}
94
+
343
+
95
+/* Copy range to target and return the bytes copied. If error occured, return a
344
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
96
+ * negative error number. */
97
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
98
+ int64_t start,
99
+ int64_t end,
100
+ bool is_write_notifier)
101
+{
102
+ int ret;
103
+ int nr_clusters;
104
+ BlockBackend *blk = job->common.blk;
105
+ int nbytes;
106
+
107
+ assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
108
+ nbytes = MIN(job->copy_range_size, end - start);
109
+ nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
110
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
111
+ nr_clusters);
112
+ ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
113
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
114
+ if (ret < 0) {
115
+ trace_backup_do_cow_copy_range_fail(job, start, ret);
116
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size,
117
+ nr_clusters);
118
+ return ret;
119
+ }
120
+
121
+ return nbytes;
122
+}
123
+
124
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
125
int64_t offset, uint64_t bytes,
126
bool *error_is_read,
127
bool is_write_notifier)
128
{
345
{
129
- BlockBackend *blk = job->common.blk;
346
QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
130
CowRequest cow_request;
347
diff --git a/util/meson.build b/util/meson.build
131
- struct iovec iov;
348
index XXXXXXX..XXXXXXX 100644
132
- QEMUIOVector bounce_qiov;
349
--- a/util/meson.build
133
- void *bounce_buffer = NULL;
350
+++ b/util/meson.build
134
int ret = 0;
351
@@ -XXX,XX +XXX,XX @@ if have_block
135
int64_t start, end; /* bytes */
352
util_ss.add(files('buffer.c'))
136
- int n; /* bytes */
353
util_ss.add(files('bufferiszero.c'))
137
+ void *bounce_buffer = NULL;
354
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
138
355
+ util_ss.add(files('coroutine-pool-timer.c'))
139
qemu_co_rwlock_rdlock(&job->flush_rwlock);
356
util_ss.add(files('hbitmap.c'))
140
357
util_ss.add(files('hexdump.c'))
141
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
358
util_ss.add(files('iova-tree.c'))
142
wait_for_overlapping_requests(job, start, end);
143
cow_request_begin(&cow_request, job, start, end);
144
145
- for (; start < end; start += job->cluster_size) {
146
+ while (start < end) {
147
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
148
trace_backup_do_cow_skip(job, start);
149
+ start += job->cluster_size;
150
continue; /* already copied */
151
}
152
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
153
154
trace_backup_do_cow_process(job, start);
155
156
- n = MIN(job->cluster_size, job->len - start);
157
-
158
- if (!bounce_buffer) {
159
- bounce_buffer = blk_blockalign(blk, job->cluster_size);
160
- }
161
- iov.iov_base = bounce_buffer;
162
- iov.iov_len = n;
163
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
164
-
165
- ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
166
- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
167
- if (ret < 0) {
168
- trace_backup_do_cow_read_fail(job, start, ret);
169
- if (error_is_read) {
170
- *error_is_read = true;
171
+ if (job->use_copy_range) {
172
+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
173
+ if (ret < 0) {
174
+ job->use_copy_range = false;
175
}
176
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
177
- goto out;
178
}
179
-
180
- if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
181
- ret = blk_co_pwrite_zeroes(job->target, start,
182
- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
183
- } else {
184
- ret = blk_co_pwritev(job->target, start,
185
- bounce_qiov.size, &bounce_qiov,
186
- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
187
+ if (!job->use_copy_range) {
188
+ ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
189
+ error_is_read, &bounce_buffer);
190
}
191
if (ret < 0) {
192
- trace_backup_do_cow_write_fail(job, start, ret);
193
- if (error_is_read) {
194
- *error_is_read = false;
195
- }
196
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
197
- goto out;
198
+ break;
199
}
200
201
/* Publish progress, guest I/O counts as progress too. Note that the
202
* offset field is an opaque progress value, it is not a disk offset.
203
*/
204
- job->bytes_read += n;
205
- job_progress_update(&job->common.job, n);
206
+ start += ret;
207
+ job->bytes_read += ret;
208
+ job_progress_update(&job->common.job, ret);
209
+ ret = 0;
210
}
211
212
-out:
213
if (bounce_buffer) {
214
qemu_vfree(bounce_buffer);
215
}
216
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
217
} else {
218
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
219
}
220
+ job->use_copy_range = true;
221
+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
222
+ blk_get_max_transfer(job->target));
223
+ job->copy_range_size = MAX(job->cluster_size,
224
+ QEMU_ALIGN_UP(job->copy_range_size,
225
+ job->cluster_size));
226
227
/* Required permissions are already taken with target's blk_new() */
228
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
229
diff --git a/block/trace-events b/block/trace-events
230
index XXXXXXX..XXXXXXX 100644
231
--- a/block/trace-events
232
+++ b/block/trace-events
233
@@ -XXX,XX +XXX,XX @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
234
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
235
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
236
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
237
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
238
239
# blockdev.c
240
qmp_block_job_cancel(void *job) "job %p"
241
--
359
--
242
2.17.1
360
2.31.1
243
361
244
362
diff view generated by jsdifflib