1
The following changes since commit afc9fcde55296b83f659de9da3cdf044812a6eeb:
1
The following changes since commit fd87be1dada5672f877e03c2ca8504458292c479:
2
2
3
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging (2021-10-20 06:10:51 -0700)
3
Merge tag 'accel-20240426' of https://github.com/philmd/qemu into staging (2024-04-26 15:28:13 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 4b2b3d2653f255ef4259a7689af1956536565901:
9
for you to fetch changes up to d1c4580662bf75bf6875bb5e1ad446b300816ac7:
10
10
11
coroutine: resize pool periodically instead of limiting size (2021-10-21 18:40:07 +0100)
11
hw/ufs: Fix buffer overflow bug (2024-04-29 09:33:06 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
Performance optimization when guest applications submit a lot of parallel I/O.
16
Buffer overflow fix for Universal Flash Storage (UFS) emulation.
17
This has also been found to improve clang SafeStack performance.
18
17
19
----------------------------------------------------------------
18
----------------------------------------------------------------
20
19
21
Stefan Hajnoczi (1):
20
Jeuk Kim (1):
22
coroutine: resize pool periodically instead of limiting size
21
hw/ufs: Fix buffer overflow bug
23
22
24
include/qemu/coroutine-pool-timer.h | 36 ++++++++++++++++
23
hw/ufs/ufs.c | 8 ++++++++
25
include/qemu/coroutine.h | 7 ++++
24
1 file changed, 8 insertions(+)
26
iothread.c | 6 +++
27
util/coroutine-pool-timer.c | 35 ++++++++++++++++
28
util/main-loop.c | 5 +++
29
util/qemu-coroutine.c | 64 ++++++++++++++++-------------
30
util/meson.build | 1 +
31
7 files changed, 125 insertions(+), 29 deletions(-)
32
create mode 100644 include/qemu/coroutine-pool-timer.h
33
create mode 100644 util/coroutine-pool-timer.c
34
25
35
--
26
--
36
2.31.1
27
2.44.0
37
38
39
diff view generated by jsdifflib
1
It was reported that enabling SafeStack reduces IOPS significantly
1
From: Jeuk Kim <jeuk20.kim@samsung.com>
2
(>25%) with the following fio benchmark on virtio-blk using a NVMe host
3
block device:
4
2
5
# fio --rw=randrw --bs=4k --iodepth=64 --runtime=1m --direct=1 \
3
It fixes the buffer overflow vulnerability in the ufs device.
6
    --filename=/dev/vdb --name=job1 --ioengine=libaio --thread \
4
The bug was detected by sanitizers.
7
    --group_reporting --numjobs=16 --time_based \
8
--output=/tmp/fio_result
9
5
10
Serge Guelton and I found that SafeStack is not really at fault, it just
6
You can reproduce it by:
11
increases the cost of coroutine creation. This fio workload exhausts the
12
coroutine pool and coroutine creation becomes a bottleneck. Previous
13
work by Honghao Wang also pointed to excessive coroutine creation.
14
7
15
Creating new coroutines is expensive due to allocating new stacks with
8
cat << EOF |\
16
mmap(2) and mprotect(2). Currently there are thread-local and global
9
qemu-system-x86_64 \
17
pools that recycle old Coroutine objects and their stacks but the
10
-display none -machine accel=qtest -m 512M -M q35 -nodefaults -drive \
18
hardcoded size limit of 64 for thread-local pools and 128 for the global
11
file=null-co://,if=none,id=disk0 -device ufs,id=ufs_bus -device \
19
pool is insufficient for the fio benchmark shown above.
12
ufs-lu,drive=disk0,bus=ufs_bus -qtest stdio
13
outl 0xcf8 0x80000810
14
outl 0xcfc 0xe0000000
15
outl 0xcf8 0x80000804
16
outw 0xcfc 0x06
17
write 0xe0000058 0x1 0xa7
18
write 0xa 0x1 0x50
19
EOF
20
20
21
This patch changes the coroutine pool algorithm to a simple thread-local
21
Resolves: #2299
22
pool without a maximum size limit. Threads periodically shrink the pool
22
Fixes: 329f16624499 ("hw/ufs: Support for Query Transfer Requests")
23
down to a size sufficient for the maximum observed number of coroutines.
23
Reported-by: Zheyu Ma <zheyuma97@gmail.com>
24
Signed-off-by: Jeuk Kim <jeuk20.kim@samsung.com>
25
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
Message-ID: <f2c8aeb1afefcda92054c448b21fc59cdd99db30.1714360640.git.jeuk20.kim@samsung.com>
27
---
28
hw/ufs/ufs.c | 8 ++++++++
29
1 file changed, 8 insertions(+)
24
30
25
The global pool is removed by this patch. It can help to hide the fact
31
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
26
that local pools are easily exhausted, but it's doesn't fix the root
32
index XXXXXXX..XXXXXXX 100644
27
cause. I don't think there is a need for a global pool because QEMU's
33
--- a/hw/ufs/ufs.c
28
threads are long-lived, so let's keep things simple.
34
+++ b/hw/ufs/ufs.c
29
35
@@ -XXX,XX +XXX,XX @@ static MemTxResult ufs_dma_read_req_upiu(UfsRequest *req)
30
Performance of the above fio benchmark is as follows:
36
copy_size = sizeof(UtpUpiuHeader) + UFS_TRANSACTION_SPECIFIC_FIELD_SIZE +
31
37
data_segment_length;
32
Before After
38
33
IOPS 60k 97k
39
+ if (copy_size > sizeof(req->req_upiu)) {
34
40
+ copy_size = sizeof(req->req_upiu);
35
Memory usage varies over time as needed by the workload:
41
+ }
36
37
VSZ (KB) RSS (KB)
38
Before fio 4705248 843128
39
During fio 5747668 (+ ~100 MB) 849280
40
After fio 4694996 (- ~100 MB) 845184
41
42
This confirms that coroutines are indeed being freed when no longer
43
needed.
44
45
Thanks to Serge Guelton for working on identifying the bottleneck with
46
me!
47
48
Reported-by: Tingting Mao <timao@redhat.com>
49
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
50
Message-id: 20210913153524.1190696-1-stefanha@redhat.com
51
Cc: Serge Guelton <sguelton@redhat.com>
52
Cc: Honghao Wang <wanghonghao@bytedance.com>
53
Cc: Paolo Bonzini <pbonzini@redhat.com>
54
Cc: Daniele Buono <dbuono@linux.vnet.ibm.com>
55
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
56
57
[Moved atexit notifier to coroutine_delete() after GitLab CI reported a
58
memory leak in tests/unit/test-aio-multithread because the Coroutine
59
object was created in the main thread but runs in an IOThread (where
60
it's also deleted).
61
--Stefan]
62
---
63
include/qemu/coroutine-pool-timer.h | 36 ++++++++++++++++
64
include/qemu/coroutine.h | 7 ++++
65
iothread.c | 6 +++
66
util/coroutine-pool-timer.c | 35 ++++++++++++++++
67
util/main-loop.c | 5 +++
68
util/qemu-coroutine.c | 64 ++++++++++++++++-------------
69
util/meson.build | 1 +
70
7 files changed, 125 insertions(+), 29 deletions(-)
71
create mode 100644 include/qemu/coroutine-pool-timer.h
72
create mode 100644 util/coroutine-pool-timer.c
73
74
diff --git a/include/qemu/coroutine-pool-timer.h b/include/qemu/coroutine-pool-timer.h
75
new file mode 100644
76
index XXXXXXX..XXXXXXX
77
--- /dev/null
78
+++ b/include/qemu/coroutine-pool-timer.h
79
@@ -XXX,XX +XXX,XX @@
80
+/*
81
+ * QEMU coroutine pool timer
82
+ *
83
+ * Copyright (c) 2021 Red Hat, Inc.
84
+ *
85
+ * SPDX-License-Identifier: LGPL-2.1-or-later
86
+ *
87
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
88
+ * See the COPYING.LIB file in the top-level directory.
89
+ *
90
+ */
91
+#ifndef COROUTINE_POOL_TIMER_H
92
+#define COROUTINE_POOL_TIMER_H
93
+
42
+
94
+#include "qemu/osdep.h"
43
ret = ufs_addr_read(u, req_upiu_base_addr, &req->req_upiu, copy_size);
95
+#include "block/aio.h"
44
if (ret) {
45
trace_ufs_err_dma_read_req_upiu(req->slot, req_upiu_base_addr);
46
@@ -XXX,XX +XXX,XX @@ static MemTxResult ufs_dma_write_rsp_upiu(UfsRequest *req)
47
copy_size = rsp_upiu_byte_len;
48
}
49
50
+ if (copy_size > sizeof(req->rsp_upiu)) {
51
+ copy_size = sizeof(req->rsp_upiu);
52
+ }
96
+
53
+
97
+/**
54
ret = ufs_addr_write(u, rsp_upiu_base_addr, &req->rsp_upiu, copy_size);
98
+ * A timer that periodically resizes this thread's coroutine pool, freeing
55
if (ret) {
99
+ * memory if there are too many unused coroutines.
56
trace_ufs_err_dma_write_rsp_upiu(req->slot, rsp_upiu_base_addr);
100
+ *
101
+ * Threads that make heavy use of coroutines should use this. Failure to resize
102
+ * the coroutine pool can lead to large amounts of memory sitting idle and
103
+ * never being used after the first time.
104
+ */
105
+typedef struct {
106
+ QEMUTimer *timer;
107
+} CoroutinePoolTimer;
108
+
109
+/* Call this before the thread runs the AioContext */
110
+void coroutine_pool_timer_init(CoroutinePoolTimer *pt, AioContext *ctx);
111
+
112
+/* Call this before the AioContext from the init function is destroyed */
113
+void coroutine_pool_timer_cleanup(CoroutinePoolTimer *pt);
114
+
115
+#endif /* COROUTINE_POOL_TIMER_H */
116
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/include/qemu/coroutine.h
119
+++ b/include/qemu/coroutine.h
120
@@ -XXX,XX +XXX,XX @@ bool qemu_in_coroutine(void);
121
*/
122
bool qemu_coroutine_entered(Coroutine *co);
123
124
+/**
125
+ * Optionally call this function periodically to shrink the thread-local pool
126
+ * down. Spiky workloads can create many coroutines and then never reach that
127
+ * level again. Shrinking the pool reclaims memory in this case.
128
+ */
129
+void qemu_coroutine_pool_periodic_resize(void);
130
+
131
/**
132
* Provides a mutex that can be used to synchronise coroutines
133
*/
134
diff --git a/iothread.c b/iothread.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/iothread.c
137
+++ b/iothread.c
138
@@ -XXX,XX +XXX,XX @@
139
#include "qemu/error-report.h"
140
#include "qemu/rcu.h"
141
#include "qemu/main-loop.h"
142
+#include "qemu/coroutine-pool-timer.h"
143
144
typedef ObjectClass IOThreadClass;
145
146
@@ -XXX,XX +XXX,XX @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
147
static void *iothread_run(void *opaque)
148
{
149
IOThread *iothread = opaque;
150
+ CoroutinePoolTimer co_pool_timer;
151
152
rcu_register_thread();
153
/*
154
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
155
iothread->thread_id = qemu_get_thread_id();
156
qemu_sem_post(&iothread->init_done_sem);
157
158
+ coroutine_pool_timer_init(&co_pool_timer, iothread->ctx);
159
+
160
while (iothread->running) {
161
/*
162
* Note: from functional-wise the g_main_loop_run() below can
163
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
164
}
165
}
166
167
+ coroutine_pool_timer_cleanup(&co_pool_timer);
168
+
169
g_main_context_pop_thread_default(iothread->worker_context);
170
rcu_unregister_thread();
171
return NULL;
172
diff --git a/util/coroutine-pool-timer.c b/util/coroutine-pool-timer.c
173
new file mode 100644
174
index XXXXXXX..XXXXXXX
175
--- /dev/null
176
+++ b/util/coroutine-pool-timer.c
177
@@ -XXX,XX +XXX,XX @@
178
+/*
179
+ * QEMU coroutine pool timer
180
+ *
181
+ * Copyright (c) 2021 Red Hat, Inc.
182
+ *
183
+ * SPDX-License-Identifier: LGPL-2.1-or-later
184
+ *
185
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
186
+ * See the COPYING.LIB file in the top-level directory.
187
+ *
188
+ */
189
+#include "qemu/coroutine-pool-timer.h"
190
+
191
+static void coroutine_pool_timer_cb(void *opaque)
192
+{
193
+ CoroutinePoolTimer *pt = opaque;
194
+ int64_t expiry_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
195
+ 15 * NANOSECONDS_PER_SECOND;
196
+
197
+ qemu_coroutine_pool_periodic_resize();
198
+ timer_mod(pt->timer, expiry_time_ns);
199
+}
200
+
201
+void coroutine_pool_timer_init(CoroutinePoolTimer *pt, AioContext *ctx)
202
+{
203
+ pt->timer = aio_timer_new(ctx, QEMU_CLOCK_REALTIME, SCALE_NS,
204
+ coroutine_pool_timer_cb, pt);
205
+ coroutine_pool_timer_cb(pt);
206
+}
207
+
208
+void coroutine_pool_timer_cleanup(CoroutinePoolTimer *pt)
209
+{
210
+ timer_free(pt->timer);
211
+ pt->timer = NULL;
212
+}
213
diff --git a/util/main-loop.c b/util/main-loop.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/util/main-loop.c
216
+++ b/util/main-loop.c
217
@@ -XXX,XX +XXX,XX @@
218
#include "qemu/error-report.h"
219
#include "qemu/queue.h"
220
#include "qemu/compiler.h"
221
+#include "qemu/coroutine-pool-timer.h"
222
223
#ifndef _WIN32
224
#include <sys/wait.h>
225
@@ -XXX,XX +XXX,XX @@ static int qemu_signal_init(Error **errp)
226
227
static AioContext *qemu_aio_context;
228
static QEMUBH *qemu_notify_bh;
229
+static CoroutinePoolTimer main_loop_co_pool_timer;
230
231
static void notify_event_cb(void *opaque)
232
{
233
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
234
g_source_set_name(src, "io-handler");
235
g_source_attach(src, NULL);
236
g_source_unref(src);
237
+
238
+ coroutine_pool_timer_init(&main_loop_co_pool_timer, qemu_aio_context);
239
+
240
return 0;
241
}
242
243
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
244
index XXXXXXX..XXXXXXX 100644
245
--- a/util/qemu-coroutine.c
246
+++ b/util/qemu-coroutine.c
247
@@ -XXX,XX +XXX,XX @@
248
#include "block/aio.h"
249
250
enum {
251
- POOL_BATCH_SIZE = 64,
252
+ /*
253
+ * qemu_coroutine_pool_periodic_resize() keeps at least this many
254
+ * coroutines around.
255
+ */
256
+ ALLOC_POOL_MIN = 64,
257
};
258
259
+
260
/** Free list to speed up creation */
261
-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
262
-static unsigned int release_pool_size;
263
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
264
static __thread unsigned int alloc_pool_size;
265
+static __thread unsigned int num_coroutines;
266
+static __thread unsigned int max_coroutines_this_slice;
267
static __thread Notifier coroutine_pool_cleanup_notifier;
268
269
static void coroutine_pool_cleanup(Notifier *n, void *value)
270
@@ -XXX,XX +XXX,XX @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
271
272
if (CONFIG_COROUTINE_POOL) {
273
co = QSLIST_FIRST(&alloc_pool);
274
- if (!co) {
275
- if (release_pool_size > POOL_BATCH_SIZE) {
276
- /* Slow path; a good place to register the destructor, too. */
277
- if (!coroutine_pool_cleanup_notifier.notify) {
278
- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
279
- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
280
- }
281
-
282
- /* This is not exact; there could be a little skew between
283
- * release_pool_size and the actual size of release_pool. But
284
- * it is just a heuristic, it does not need to be perfect.
285
- */
286
- alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
287
- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
288
- co = QSLIST_FIRST(&alloc_pool);
289
- }
290
- }
291
if (co) {
292
QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
293
alloc_pool_size--;
294
}
295
+
296
+ num_coroutines++;
297
+ if (num_coroutines > max_coroutines_this_slice) {
298
+ max_coroutines_this_slice = num_coroutines;
299
+ }
300
}
301
302
if (!co) {
303
@@ -XXX,XX +XXX,XX @@ static void coroutine_delete(Coroutine *co)
304
co->caller = NULL;
305
306
if (CONFIG_COROUTINE_POOL) {
307
- if (release_pool_size < POOL_BATCH_SIZE * 2) {
308
- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
309
- qatomic_inc(&release_pool_size);
310
- return;
311
- }
312
- if (alloc_pool_size < POOL_BATCH_SIZE) {
313
- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
314
- alloc_pool_size++;
315
- return;
316
+ if (!coroutine_pool_cleanup_notifier.notify) {
317
+ coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
318
+ qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
319
}
320
+
321
+ num_coroutines--;
322
+ QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
323
+ alloc_pool_size++;
324
+ return;
325
}
326
327
qemu_coroutine_delete(co);
328
}
329
330
+void qemu_coroutine_pool_periodic_resize(void)
331
+{
332
+ unsigned pool_size_target =
333
+ MAX(ALLOC_POOL_MIN, max_coroutines_this_slice) - num_coroutines;
334
+ max_coroutines_this_slice = num_coroutines;
335
+
336
+ while (alloc_pool_size > pool_size_target) {
337
+ Coroutine *co = QSLIST_FIRST(&alloc_pool);
338
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
339
+ qemu_coroutine_delete(co);
340
+ alloc_pool_size--;
341
+ }
342
+}
343
+
344
void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
345
{
346
QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
347
diff --git a/util/meson.build b/util/meson.build
348
index XXXXXXX..XXXXXXX 100644
349
--- a/util/meson.build
350
+++ b/util/meson.build
351
@@ -XXX,XX +XXX,XX @@ if have_block
352
util_ss.add(files('buffer.c'))
353
util_ss.add(files('bufferiszero.c'))
354
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
355
+ util_ss.add(files('coroutine-pool-timer.c'))
356
util_ss.add(files('hbitmap.c'))
357
util_ss.add(files('hexdump.c'))
358
util_ss.add(files('iova-tree.c'))
359
--
57
--
360
2.31.1
58
2.44.0
361
362
diff view generated by jsdifflib