1 | The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935: | 1 | The following changes since commit ddc27d2ad9361a81c2b3800d14143bf420dae172: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100) | 3 | Merge tag 'pull-request-2024-03-18' of https://gitlab.com/thuth/qemu into staging (2024-03-19 10:25:25 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a: | 9 | for you to fetch changes up to 86a637e48104ae74d8be53bed6441ce32be33433: |
10 | 10 | ||
11 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100) | 11 | coroutine: cap per-thread local pool size (2024-03-19 10:49:31 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the | 16 | This fix solves the "failed to set up stack guard page" error that has been |
17 | request needs to be resubmitted. | 17 | reported on Linux hosts where the QEMU coroutine pool exceeds the |
18 | 18 | vm.max_map_count limit. | |
19 | The MAINTAINERS changes carry no risk and we might as well include them in QEMU | ||
20 | 6.1. | ||
21 | 19 | ||
22 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
23 | 21 | ||
24 | Fabian Ebner (1): | 22 | Stefan Hajnoczi (1): |
25 | block/io_uring: resubmit when result is -EAGAIN | 23 | coroutine: cap per-thread local pool size |
26 | 24 | ||
27 | Philippe Mathieu-Daudé (1): | 25 | util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++--------- |
28 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver | 26 | 1 file changed, 223 insertions(+), 59 deletions(-) |
29 | |||
30 | Stefano Garzarella (1): | ||
31 | MAINTAINERS: add Stefano Garzarella as io_uring reviewer | ||
32 | |||
33 | MAINTAINERS | 2 ++ | ||
34 | block/io_uring.c | 16 +++++++++++++++- | ||
35 | 2 files changed, 17 insertions(+), 1 deletion(-) | ||
36 | 27 | ||
37 | -- | 28 | -- |
38 | 2.31.1 | 29 | 2.44.0 |
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Stefano Garzarella <sgarzare@redhat.com> | ||
2 | 1 | ||
3 | I've been working with io_uring for a while so I'd like to help | ||
4 | with reviews. | ||
5 | |||
6 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
7 | Message-Id: <20210728131515.131045-1-sgarzare@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | MAINTAINERS | 1 + | ||
11 | 1 file changed, 1 insertion(+) | ||
12 | |||
13 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/MAINTAINERS | ||
16 | +++ b/MAINTAINERS | ||
17 | @@ -XXX,XX +XXX,XX @@ Linux io_uring | ||
18 | M: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
19 | M: Julia Suvorova <jusual@redhat.com> | ||
20 | M: Stefan Hajnoczi <stefanha@redhat.com> | ||
21 | +R: Stefano Garzarella <sgarzare@redhat.com> | ||
22 | L: qemu-block@nongnu.org | ||
23 | S: Maintained | ||
24 | F: block/io_uring.c | ||
25 | -- | ||
26 | 2.31.1 | ||
27 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fabian Ebner <f.ebner@proxmox.com> | ||
2 | 1 | ||
3 | Linux SCSI can throw spurious -EAGAIN in some corner cases in its | ||
4 | completion path, which will end up being the result in the completed | ||
5 | io_uring request. | ||
6 | |||
7 | Resubmitting such requests should allow block jobs to complete, even | ||
8 | if such spurious errors are encountered. | ||
9 | |||
10 | Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com> | ||
11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
12 | Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> | ||
13 | Message-id: 20210729091029.65369-1-f.ebner@proxmox.com | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | ||
16 | block/io_uring.c | 16 +++++++++++++++- | ||
17 | 1 file changed, 15 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/io_uring.c | ||
22 | +++ b/block/io_uring.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) | ||
24 | total_bytes = ret + luringcb->total_read; | ||
25 | |||
26 | if (ret < 0) { | ||
27 | - if (ret == -EINTR) { | ||
28 | + /* | ||
29 | + * Only writev/readv/fsync requests on regular files or host block | ||
30 | + * devices are submitted. Therefore -EAGAIN is not expected but it's | ||
31 | + * known to happen sometimes with Linux SCSI. Submit again and hope | ||
32 | + * the request completes successfully. | ||
33 | + * | ||
34 | + * For more information, see: | ||
35 | + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u | ||
36 | + * | ||
37 | + * If the code is changed to submit other types of requests in the | ||
38 | + * future, then this workaround may need to be extended to deal with | ||
39 | + * genuine -EAGAIN results that should not be resubmitted | ||
40 | + * immediately. | ||
41 | + */ | ||
42 | + if (ret == -EINTR || ret == -EAGAIN) { | ||
43 | luring_resubmit(s, luringcb); | ||
44 | continue; | ||
45 | } | ||
46 | -- | ||
47 | 2.31.1 | ||
48 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | 1 | The coroutine pool implementation can hit the Linux vm.max_map_count |
---|---|---|---|
2 | 2 | limit, causing QEMU to abort with "failed to allocate memory for stack" | |
3 | I'm interested in following the activity around the NVMe bdrv. | 3 | or "failed to set up stack guard page" during coroutine creation. |
4 | 4 | ||
5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 5 | This happens because per-thread pools can grow to tens of thousands of |
6 | Message-id: 20210728183340.2018313-1-philmd@redhat.com | 6 | coroutines. Each coroutine causes 2 virtual memory areas to be created. |
7 | Eventually vm.max_map_count is reached and memory-related syscalls fail. | ||
8 | The per-thread pool sizes are non-uniform and depend on past coroutine | ||
9 | usage in each thread, so it's possible for one thread to have a large | ||
10 | pool while another thread's pool is empty. | ||
11 | |||
12 | Switch to a new coroutine pool implementation with a global pool that | ||
13 | grows to a maximum number of coroutines and per-thread local pools that | ||
14 | are capped at hardcoded small number of coroutines. | ||
15 | |||
16 | This approach does not leave large numbers of coroutines pooled in a | ||
17 | thread that may not use them again. In order to perform well it | ||
18 | amortizes the cost of global pool accesses by working in batches of | ||
19 | coroutines instead of individual coroutines. | ||
20 | |||
21 | The global pool is a list. Threads donate batches of coroutines to when | ||
22 | they have too many and take batches from when they have too few: | ||
23 | |||
24 | .-----------------------------------. | ||
25 | | Batch 1 | Batch 2 | Batch 3 | ... | global_pool | ||
26 | `-----------------------------------' | ||
27 | |||
28 | Each thread has up to 2 batches of coroutines: | ||
29 | |||
30 | .-------------------. | ||
31 | | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) | ||
32 | `-------------------' | ||
33 | |||
34 | The goal of this change is to reduce the excessive number of pooled | ||
35 | coroutines that cause QEMU to abort when vm.max_map_count is reached | ||
36 | without losing the performance of an adequately sized coroutine pool. | ||
37 | |||
38 | Here are virtio-blk disk I/O benchmark results: | ||
39 | |||
40 | RW BLKSIZE IODEPTH OLD NEW CHANGE | ||
41 | randread 4k 1 113725 117451 +3.3% | ||
42 | randread 4k 8 192968 198510 +2.9% | ||
43 | randread 4k 16 207138 209429 +1.1% | ||
44 | randread 4k 32 212399 215145 +1.3% | ||
45 | randread 4k 64 218319 221277 +1.4% | ||
46 | randread 128k 1 17587 17535 -0.3% | ||
47 | randread 128k 8 17614 17616 +0.0% | ||
48 | randread 128k 16 17608 17609 +0.0% | ||
49 | randread 128k 32 17552 17553 +0.0% | ||
50 | randread 128k 64 17484 17484 +0.0% | ||
51 | |||
52 | See files/{fio.sh,test.xml.j2} for the benchmark configuration: | ||
53 | https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing | ||
54 | |||
55 | Buglink: https://issues.redhat.com/browse/RHEL-28947 | ||
56 | Reported-by: Sanjay Rao <srao@redhat.com> | ||
57 | Reported-by: Boaz Ben Shabat <bbenshab@redhat.com> | ||
58 | Reported-by: Joe Mario <jmario@redhat.com> | ||
59 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 60 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
61 | Message-ID: <20240318183429.1039340-1-stefanha@redhat.com> | ||
8 | --- | 62 | --- |
9 | MAINTAINERS | 1 + | 63 | util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++--------- |
10 | 1 file changed, 1 insertion(+) | 64 | 1 file changed, 223 insertions(+), 59 deletions(-) |
11 | 65 | ||
12 | diff --git a/MAINTAINERS b/MAINTAINERS | 66 | diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c |
13 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/MAINTAINERS | 68 | --- a/util/qemu-coroutine.c |
15 | +++ b/MAINTAINERS | 69 | +++ b/util/qemu-coroutine.c |
16 | @@ -XXX,XX +XXX,XX @@ F: block/null.c | 70 | @@ -XXX,XX +XXX,XX @@ |
17 | NVMe Block Driver | 71 | #include "qemu/atomic.h" |
18 | M: Stefan Hajnoczi <stefanha@redhat.com> | 72 | #include "qemu/coroutine_int.h" |
19 | R: Fam Zheng <fam@euphon.net> | 73 | #include "qemu/coroutine-tls.h" |
20 | +R: Philippe Mathieu-Daudé <philmd@redhat.com> | 74 | +#include "qemu/cutils.h" |
21 | L: qemu-block@nongnu.org | 75 | #include "block/aio.h" |
22 | S: Supported | 76 | |
23 | F: block/nvme* | 77 | -/** |
78 | - * The minimal batch size is always 64, coroutines from the release_pool are | ||
79 | - * reused as soon as there are 64 coroutines in it. The maximum pool size starts | ||
80 | - * with 64 and is increased on demand so that coroutines are not deleted even if | ||
81 | - * they are not immediately reused. | ||
82 | - */ | ||
83 | enum { | ||
84 | - POOL_MIN_BATCH_SIZE = 64, | ||
85 | - POOL_INITIAL_MAX_SIZE = 64, | ||
86 | + COROUTINE_POOL_BATCH_MAX_SIZE = 128, | ||
87 | }; | ||
88 | |||
89 | -/** Free list to speed up creation */ | ||
90 | -static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); | ||
91 | -static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; | ||
92 | -static unsigned int release_pool_size; | ||
93 | +/* | ||
94 | + * Coroutine creation and deletion is expensive so a pool of unused coroutines | ||
95 | + * is kept as a cache. When the pool has coroutines available, they are | ||
96 | + * recycled instead of creating new ones from scratch. Coroutines are added to | ||
97 | + * the pool upon termination. | ||
98 | + * | ||
99 | + * The pool is global but each thread maintains a small local pool to avoid | ||
100 | + * global pool contention. Threads fetch and return batches of coroutines from | ||
101 | + * the global pool to maintain their local pool. The local pool holds up to two | ||
102 | + * batches whereas the maximum size of the global pool is controlled by the | ||
103 | + * qemu_coroutine_inc_pool_size() API. | ||
104 | + * | ||
105 | + * .-----------------------------------. | ||
106 | + * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool | ||
107 | + * `-----------------------------------' | ||
108 | + * | ||
109 | + * .-------------------. | ||
110 | + * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) | ||
111 | + * `-------------------' | ||
112 | + */ | ||
113 | +typedef struct CoroutinePoolBatch { | ||
114 | + /* Batches are kept in a list */ | ||
115 | + QSLIST_ENTRY(CoroutinePoolBatch) next; | ||
116 | |||
117 | -typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; | ||
118 | -QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); | ||
119 | -QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); | ||
120 | -QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); | ||
121 | + /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ | ||
122 | + QSLIST_HEAD(, Coroutine) list; | ||
123 | + unsigned int size; | ||
124 | +} CoroutinePoolBatch; | ||
125 | |||
126 | -static void coroutine_pool_cleanup(Notifier *n, void *value) | ||
127 | +typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; | ||
128 | + | ||
129 | +/* Host operating system limit on number of pooled coroutines */ | ||
130 | +static unsigned int global_pool_hard_max_size; | ||
131 | + | ||
132 | +static QemuMutex global_pool_lock; /* protects the following variables */ | ||
133 | +static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); | ||
134 | +static unsigned int global_pool_size; | ||
135 | +static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; | ||
136 | + | ||
137 | +QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); | ||
138 | +QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); | ||
139 | + | ||
140 | +static CoroutinePoolBatch *coroutine_pool_batch_new(void) | ||
141 | +{ | ||
142 | + CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); | ||
143 | + | ||
144 | + QSLIST_INIT(&batch->list); | ||
145 | + batch->size = 0; | ||
146 | + return batch; | ||
147 | +} | ||
148 | + | ||
149 | +static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) | ||
150 | { | ||
151 | Coroutine *co; | ||
152 | Coroutine *tmp; | ||
153 | - CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); | ||
154 | |||
155 | - QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { | ||
156 | - QSLIST_REMOVE_HEAD(alloc_pool, pool_next); | ||
157 | + QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { | ||
158 | + QSLIST_REMOVE_HEAD(&batch->list, pool_next); | ||
159 | qemu_coroutine_delete(co); | ||
160 | } | ||
161 | + g_free(batch); | ||
162 | +} | ||
163 | + | ||
164 | +static void local_pool_cleanup(Notifier *n, void *value) | ||
165 | +{ | ||
166 | + CoroutinePool *local_pool = get_ptr_local_pool(); | ||
167 | + CoroutinePoolBatch *batch; | ||
168 | + CoroutinePoolBatch *tmp; | ||
169 | + | ||
170 | + QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { | ||
171 | + QSLIST_REMOVE_HEAD(local_pool, next); | ||
172 | + coroutine_pool_batch_delete(batch); | ||
173 | + } | ||
174 | +} | ||
175 | + | ||
176 | +/* Ensure the atexit notifier is registered */ | ||
177 | +static void local_pool_cleanup_init_once(void) | ||
178 | +{ | ||
179 | + Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); | ||
180 | + if (!notifier->notify) { | ||
181 | + notifier->notify = local_pool_cleanup; | ||
182 | + qemu_thread_atexit_add(notifier); | ||
183 | + } | ||
184 | +} | ||
185 | + | ||
186 | +/* Helper to get the next unused coroutine from the local pool */ | ||
187 | +static Coroutine *coroutine_pool_get_local(void) | ||
188 | +{ | ||
189 | + CoroutinePool *local_pool = get_ptr_local_pool(); | ||
190 | + CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); | ||
191 | + Coroutine *co; | ||
192 | + | ||
193 | + if (unlikely(!batch)) { | ||
194 | + return NULL; | ||
195 | + } | ||
196 | + | ||
197 | + co = QSLIST_FIRST(&batch->list); | ||
198 | + QSLIST_REMOVE_HEAD(&batch->list, pool_next); | ||
199 | + batch->size--; | ||
200 | + | ||
201 | + if (batch->size == 0) { | ||
202 | + QSLIST_REMOVE_HEAD(local_pool, next); | ||
203 | + coroutine_pool_batch_delete(batch); | ||
204 | + } | ||
205 | + return co; | ||
206 | +} | ||
207 | + | ||
208 | +/* Get the next batch from the global pool */ | ||
209 | +static void coroutine_pool_refill_local(void) | ||
210 | +{ | ||
211 | + CoroutinePool *local_pool = get_ptr_local_pool(); | ||
212 | + CoroutinePoolBatch *batch; | ||
213 | + | ||
214 | + WITH_QEMU_LOCK_GUARD(&global_pool_lock) { | ||
215 | + batch = QSLIST_FIRST(&global_pool); | ||
216 | + | ||
217 | + if (batch) { | ||
218 | + QSLIST_REMOVE_HEAD(&global_pool, next); | ||
219 | + global_pool_size -= batch->size; | ||
220 | + } | ||
221 | + } | ||
222 | + | ||
223 | + if (batch) { | ||
224 | + QSLIST_INSERT_HEAD(local_pool, batch, next); | ||
225 | + local_pool_cleanup_init_once(); | ||
226 | + } | ||
227 | +} | ||
228 | + | ||
229 | +/* Add a batch of coroutines to the global pool */ | ||
230 | +static void coroutine_pool_put_global(CoroutinePoolBatch *batch) | ||
231 | +{ | ||
232 | + WITH_QEMU_LOCK_GUARD(&global_pool_lock) { | ||
233 | + unsigned int max = MIN(global_pool_max_size, | ||
234 | + global_pool_hard_max_size); | ||
235 | + | ||
236 | + if (global_pool_size < max) { | ||
237 | + QSLIST_INSERT_HEAD(&global_pool, batch, next); | ||
238 | + | ||
239 | + /* Overshooting the max pool size is allowed */ | ||
240 | + global_pool_size += batch->size; | ||
241 | + return; | ||
242 | + } | ||
243 | + } | ||
244 | + | ||
245 | + /* The global pool was full, so throw away this batch */ | ||
246 | + coroutine_pool_batch_delete(batch); | ||
247 | +} | ||
248 | + | ||
249 | +/* Get the next unused coroutine from the pool or return NULL */ | ||
250 | +static Coroutine *coroutine_pool_get(void) | ||
251 | +{ | ||
252 | + Coroutine *co; | ||
253 | + | ||
254 | + co = coroutine_pool_get_local(); | ||
255 | + if (!co) { | ||
256 | + coroutine_pool_refill_local(); | ||
257 | + co = coroutine_pool_get_local(); | ||
258 | + } | ||
259 | + return co; | ||
260 | +} | ||
261 | + | ||
262 | +static void coroutine_pool_put(Coroutine *co) | ||
263 | +{ | ||
264 | + CoroutinePool *local_pool = get_ptr_local_pool(); | ||
265 | + CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); | ||
266 | + | ||
267 | + if (unlikely(!batch)) { | ||
268 | + batch = coroutine_pool_batch_new(); | ||
269 | + QSLIST_INSERT_HEAD(local_pool, batch, next); | ||
270 | + local_pool_cleanup_init_once(); | ||
271 | + } | ||
272 | + | ||
273 | + if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { | ||
274 | + CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); | ||
275 | + | ||
276 | + /* Is the local pool full? */ | ||
277 | + if (next) { | ||
278 | + QSLIST_REMOVE_HEAD(local_pool, next); | ||
279 | + coroutine_pool_put_global(batch); | ||
280 | + } | ||
281 | + | ||
282 | + batch = coroutine_pool_batch_new(); | ||
283 | + QSLIST_INSERT_HEAD(local_pool, batch, next); | ||
284 | + } | ||
285 | + | ||
286 | + QSLIST_INSERT_HEAD(&batch->list, co, pool_next); | ||
287 | + batch->size++; | ||
288 | } | ||
289 | |||
290 | Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) | ||
291 | @@ -XXX,XX +XXX,XX @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) | ||
292 | Coroutine *co = NULL; | ||
293 | |||
294 | if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { | ||
295 | - CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); | ||
296 | - | ||
297 | - co = QSLIST_FIRST(alloc_pool); | ||
298 | - if (!co) { | ||
299 | - if (release_pool_size > POOL_MIN_BATCH_SIZE) { | ||
300 | - /* Slow path; a good place to register the destructor, too. */ | ||
301 | - Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); | ||
302 | - if (!notifier->notify) { | ||
303 | - notifier->notify = coroutine_pool_cleanup; | ||
304 | - qemu_thread_atexit_add(notifier); | ||
305 | - } | ||
306 | - | ||
307 | - /* This is not exact; there could be a little skew between | ||
308 | - * release_pool_size and the actual size of release_pool. But | ||
309 | - * it is just a heuristic, it does not need to be perfect. | ||
310 | - */ | ||
311 | - set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); | ||
312 | - QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); | ||
313 | - co = QSLIST_FIRST(alloc_pool); | ||
314 | - } | ||
315 | - } | ||
316 | - if (co) { | ||
317 | - QSLIST_REMOVE_HEAD(alloc_pool, pool_next); | ||
318 | - set_alloc_pool_size(get_alloc_pool_size() - 1); | ||
319 | - } | ||
320 | + co = coroutine_pool_get(); | ||
321 | } | ||
322 | |||
323 | if (!co) { | ||
324 | @@ -XXX,XX +XXX,XX @@ static void coroutine_delete(Coroutine *co) | ||
325 | co->caller = NULL; | ||
326 | |||
327 | if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { | ||
328 | - if (release_pool_size < qatomic_read(&pool_max_size) * 2) { | ||
329 | - QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); | ||
330 | - qatomic_inc(&release_pool_size); | ||
331 | - return; | ||
332 | - } | ||
333 | - if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { | ||
334 | - QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); | ||
335 | - set_alloc_pool_size(get_alloc_pool_size() + 1); | ||
336 | - return; | ||
337 | - } | ||
338 | + coroutine_pool_put(co); | ||
339 | + } else { | ||
340 | + qemu_coroutine_delete(co); | ||
341 | } | ||
342 | - | ||
343 | - qemu_coroutine_delete(co); | ||
344 | } | ||
345 | |||
346 | void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) | ||
347 | @@ -XXX,XX +XXX,XX @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co) | ||
348 | |||
349 | void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) | ||
350 | { | ||
351 | - qatomic_add(&pool_max_size, additional_pool_size); | ||
352 | + QEMU_LOCK_GUARD(&global_pool_lock); | ||
353 | + global_pool_max_size += additional_pool_size; | ||
354 | } | ||
355 | |||
356 | void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) | ||
357 | { | ||
358 | - qatomic_sub(&pool_max_size, removing_pool_size); | ||
359 | + QEMU_LOCK_GUARD(&global_pool_lock); | ||
360 | + global_pool_max_size -= removing_pool_size; | ||
361 | +} | ||
362 | + | ||
363 | +static unsigned int get_global_pool_hard_max_size(void) | ||
364 | +{ | ||
365 | +#ifdef __linux__ | ||
366 | + g_autofree char *contents = NULL; | ||
367 | + int max_map_count; | ||
368 | + | ||
369 | + /* | ||
370 | + * Linux processes can have up to max_map_count virtual memory areas | ||
371 | + * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We | ||
372 | + * must limit the coroutine pool to a safe size to avoid running out of | ||
373 | + * VMAs. | ||
374 | + */ | ||
375 | + if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, | ||
376 | + NULL) && | ||
377 | + qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { | ||
378 | + /* | ||
379 | + * This is a conservative upper bound that avoids exceeding | ||
380 | + * max_map_count. Leave half for non-coroutine users like library | ||
381 | + * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so | ||
382 | + * halve the amount again. | ||
383 | + */ | ||
384 | + return max_map_count / 4; | ||
385 | + } | ||
386 | +#endif | ||
387 | + | ||
388 | + return UINT_MAX; | ||
389 | +} | ||
390 | + | ||
391 | +static void __attribute__((constructor)) qemu_coroutine_init(void) | ||
392 | +{ | ||
393 | + qemu_mutex_init(&global_pool_lock); | ||
394 | + global_pool_hard_max_size = get_global_pool_hard_max_size(); | ||
395 | } | ||
24 | -- | 396 | -- |
25 | 2.31.1 | 397 | 2.44.0 |
26 | diff view generated by jsdifflib |