1 | The following changes since commit 801f3db7564dcce8a37a70833c0abe40ec19f8ce: | 1 | The following changes since commit 887cba855bb6ff4775256f7968409281350b568c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/philmd/tags/kconfig-20210720' into staging (2021-07-20 19:30:28 +0100) | 3 | configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to d7ddd0a1618a75b31dc308bb37365ce1da972154: | 9 | for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6: |
10 | 10 | ||
11 | linux-aio: limit the batch size using `aio-max-batch` parameter (2021-07-21 13:47:50 +0100) | 11 | virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | Stefano's performance regression fix for commit 2558cb8dd4 ("linux-aio: | ||
17 | increasing MAX_EVENTS to a larger hardcoded value"). | ||
18 | |||
19 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
20 | 17 | ||
21 | Stefano Garzarella (3): | 18 | Stefan Hajnoczi (1): |
22 | iothread: generalize iothread_set_param/iothread_get_param | 19 | virtio-blk: fix host notifier issues during dataplane start/stop |
23 | iothread: add aio-max-batch parameter | ||
24 | linux-aio: limit the batch size using `aio-max-batch` parameter | ||
25 | 20 | ||
26 | qapi/misc.json | 6 ++- | 21 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
27 | qapi/qom.json | 7 +++- | 22 | 1 file changed, 38 insertions(+), 29 deletions(-) |
28 | include/block/aio.h | 12 ++++++ | ||
29 | include/sysemu/iothread.h | 3 ++ | ||
30 | block/linux-aio.c | 9 ++++- | ||
31 | iothread.c | 82 ++++++++++++++++++++++++++++++++++----- | ||
32 | monitor/hmp-cmds.c | 2 + | ||
33 | util/aio-posix.c | 12 ++++++ | ||
34 | util/aio-win32.c | 5 +++ | ||
35 | util/async.c | 2 + | ||
36 | qemu-options.hx | 8 +++- | ||
37 | 11 files changed, 134 insertions(+), 14 deletions(-) | ||
38 | 23 | ||
39 | -- | 24 | -- |
40 | 2.31.1 | 25 | 2.40.1 |
41 | diff view generated by jsdifflib |
1 | From: Stefano Garzarella <sgarzare@redhat.com> | 1 | The main loop thread can consume 100% CPU when using --device |
---|---|---|---|
2 | virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but | ||
3 | reading virtqueue host notifiers fails with EAGAIN. The file descriptors | ||
4 | are stale and remain registered with the AioContext because of bugs in | ||
5 | the virtio-blk dataplane start/stop code. | ||
2 | 6 | ||
3 | Changes in preparation for next patches where we add a new | 7 | The problem is that the dataplane start/stop code involves drain |
4 | parameter not related to the poll mechanism. | 8 | operations, which call virtio_blk_drained_begin() and |
9 | virtio_blk_drained_end() at points where the host notifier is not | ||
10 | operational: | ||
11 | - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after | ||
12 | vblk->dataplane_started has been set to true but the host notifier has | ||
13 | not been attached yet. | ||
14 | - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context() | ||
15 | drain after the host notifier has already been detached but with | ||
16 | vblk->dataplane_started still set to true. | ||
5 | 17 | ||
6 | Let's add two new generic functions (iothread_set_param and | 18 | I would like to simplify ->ioeventfd_start/stop() to avoid interactions |
7 | iothread_get_param) that we use to set and get IOThread | 19 | with drain entirely, but couldn't find a way to do that. Instead, this |
8 | parameters. | 20 | patch accepts the fragile nature of the code and reorders it so that |
21 | vblk->dataplane_started is false during drain operations. This way the | ||
22 | virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't | ||
23 | touch the host notifier. The result is that | ||
24 | virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have | ||
25 | complete control over the host notifier and stale file descriptors are | ||
26 | no longer left in the AioContext. | ||
9 | 27 | ||
10 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | 28 | This patch fixes the 100% CPU consumption in the main loop thread and |
11 | Message-id: 20210721094211.69853-2-sgarzare@redhat.com | 29 | correctly moves host notifier processing to the IOThread. |
30 | |||
31 | Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()") | ||
32 | Reported-by: Lukáš Doktor <ldoktor@redhat.com> | ||
33 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
34 | Tested-by: Lukas Doktor <ldoktor@redhat.com> | ||
35 | Message-id: 20230704151527.193586-1-stefanha@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 36 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
13 | --- | 37 | --- |
14 | iothread.c | 27 +++++++++++++++++++++++---- | 38 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
15 | 1 file changed, 23 insertions(+), 4 deletions(-) | 39 | 1 file changed, 38 insertions(+), 29 deletions(-) |
16 | 40 | ||
17 | diff --git a/iothread.c b/iothread.c | 41 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
18 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/iothread.c | 43 | --- a/hw/block/dataplane/virtio-blk.c |
20 | +++ b/iothread.c | 44 | +++ b/hw/block/dataplane/virtio-blk.c |
21 | @@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_shrink_info = { | 45 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
22 | "poll-shrink", offsetof(IOThread, poll_shrink), | 46 | |
23 | }; | 47 | memory_region_transaction_commit(); |
24 | 48 | ||
25 | -static void iothread_get_poll_param(Object *obj, Visitor *v, | 49 | - /* |
26 | +static void iothread_get_param(Object *obj, Visitor *v, | 50 | - * These fields are visible to the IOThread so we rely on implicit barriers |
27 | const char *name, void *opaque, Error **errp) | 51 | - * in aio_context_acquire() on the write side and aio_notify_accept() on |
28 | { | 52 | - * the read side. |
29 | IOThread *iothread = IOTHREAD(obj); | 53 | - */ |
30 | @@ -XXX,XX +XXX,XX @@ static void iothread_get_poll_param(Object *obj, Visitor *v, | 54 | - s->starting = false; |
31 | visit_type_int64(v, name, field, errp); | 55 | - vblk->dataplane_started = true; |
56 | trace_virtio_blk_data_plane_start(s); | ||
57 | |||
58 | old_context = blk_get_aio_context(s->conf->conf.blk); | ||
59 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
60 | event_notifier_set(virtio_queue_get_host_notifier(vq)); | ||
61 | } | ||
62 | |||
63 | + /* | ||
64 | + * These fields must be visible to the IOThread when it processes the | ||
65 | + * virtqueue, otherwise it will think dataplane has not started yet. | ||
66 | + * | ||
67 | + * Make sure ->dataplane_started is false when blk_set_aio_context() is | ||
68 | + * called above so that draining does not cause the host notifier to be | ||
69 | + * detached/attached prematurely. | ||
70 | + */ | ||
71 | + s->starting = false; | ||
72 | + vblk->dataplane_started = true; | ||
73 | + smp_wmb(); /* paired with aio_notify_accept() on the read side */ | ||
74 | + | ||
75 | /* Get this show started by hooking up our callbacks */ | ||
76 | if (!blk_in_drain(s->conf->conf.blk)) { | ||
77 | aio_context_acquire(s->ctx); | ||
78 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
79 | fail_guest_notifiers: | ||
80 | vblk->dataplane_disabled = true; | ||
81 | s->starting = false; | ||
82 | - vblk->dataplane_started = true; | ||
83 | return -ENOSYS; | ||
32 | } | 84 | } |
33 | 85 | ||
34 | -static void iothread_set_poll_param(Object *obj, Visitor *v, | 86 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
35 | +static bool iothread_set_param(Object *obj, Visitor *v, | 87 | aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); |
36 | const char *name, void *opaque, Error **errp) | ||
37 | { | ||
38 | IOThread *iothread = IOTHREAD(obj); | ||
39 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v, | ||
40 | int64_t value; | ||
41 | |||
42 | if (!visit_type_int64(v, name, &value, errp)) { | ||
43 | - return; | ||
44 | + return false; | ||
45 | } | 88 | } |
46 | 89 | ||
47 | if (value < 0) { | 90 | + /* |
48 | error_setg(errp, "%s value must be in range [0, %" PRId64 "]", | 91 | + * Batch all the host notifiers in a single transaction to avoid |
49 | info->name, INT64_MAX); | 92 | + * quadratic time complexity in address_space_update_ioeventfds(). |
50 | - return; | 93 | + */ |
51 | + return false; | 94 | + memory_region_transaction_begin(); |
52 | } | ||
53 | |||
54 | *field = value; | ||
55 | |||
56 | + return true; | ||
57 | +} | ||
58 | + | 95 | + |
59 | +static void iothread_get_poll_param(Object *obj, Visitor *v, | 96 | + for (i = 0; i < nvqs; i++) { |
60 | + const char *name, void *opaque, Error **errp) | 97 | + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); |
61 | +{ | ||
62 | + | ||
63 | + iothread_get_param(obj, v, name, opaque, errp); | ||
64 | +} | ||
65 | + | ||
66 | +static void iothread_set_poll_param(Object *obj, Visitor *v, | ||
67 | + const char *name, void *opaque, Error **errp) | ||
68 | +{ | ||
69 | + IOThread *iothread = IOTHREAD(obj); | ||
70 | + | ||
71 | + if (!iothread_set_param(obj, v, name, opaque, errp)) { | ||
72 | + return; | ||
73 | + } | 98 | + } |
74 | + | 99 | + |
75 | if (iothread->ctx) { | 100 | + /* |
76 | aio_context_set_poll_params(iothread->ctx, | 101 | + * The transaction expects the ioeventfds to be open when it |
77 | iothread->poll_max_ns, | 102 | + * commits. Do it now, before the cleanup loop. |
103 | + */ | ||
104 | + memory_region_transaction_commit(); | ||
105 | + | ||
106 | + for (i = 0; i < nvqs; i++) { | ||
107 | + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | ||
108 | + } | ||
109 | + | ||
110 | + /* | ||
111 | + * Set ->dataplane_started to false before draining so that host notifiers | ||
112 | + * are not detached/attached anymore. | ||
113 | + */ | ||
114 | + vblk->dataplane_started = false; | ||
115 | + | ||
116 | aio_context_acquire(s->ctx); | ||
117 | |||
118 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | ||
119 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | ||
120 | |||
121 | aio_context_release(s->ctx); | ||
122 | |||
123 | - /* | ||
124 | - * Batch all the host notifiers in a single transaction to avoid | ||
125 | - * quadratic time complexity in address_space_update_ioeventfds(). | ||
126 | - */ | ||
127 | - memory_region_transaction_begin(); | ||
128 | - | ||
129 | - for (i = 0; i < nvqs; i++) { | ||
130 | - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
131 | - } | ||
132 | - | ||
133 | - /* | ||
134 | - * The transaction expects the ioeventfds to be open when it | ||
135 | - * commits. Do it now, before the cleanup loop. | ||
136 | - */ | ||
137 | - memory_region_transaction_commit(); | ||
138 | - | ||
139 | - for (i = 0; i < nvqs; i++) { | ||
140 | - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | ||
141 | - } | ||
142 | - | ||
143 | qemu_bh_cancel(s->bh); | ||
144 | notify_guest_bh(s); /* final chance to notify guest */ | ||
145 | |||
146 | /* Clean up guest notifier (irq) */ | ||
147 | k->set_guest_notifiers(qbus->parent, nvqs, false); | ||
148 | |||
149 | - vblk->dataplane_started = false; | ||
150 | s->stopping = false; | ||
151 | } | ||
78 | -- | 152 | -- |
79 | 2.31.1 | 153 | 2.40.1 |
80 | 154 | ||
155 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Stefano Garzarella <sgarzare@redhat.com> | ||
2 | 1 | ||
3 | The `aio-max-batch` parameter will be propagated to AIO engines | ||
4 | and it will be used to control the maximum number of queued requests. | ||
5 | |||
6 | When there are in queue a number of requests equal to `aio-max-batch`, | ||
7 | the engine invokes the system call to forward the requests to the kernel. | ||
8 | |||
9 | This parameter allows us to control the maximum batch size to reduce | ||
10 | the latency that requests might accumulate while queued in the AIO | ||
11 | engine queue. | ||
12 | |||
13 | If `aio-max-batch` is equal to 0 (default value), the AIO engine will | ||
14 | use its default maximum batch size value. | ||
15 | |||
16 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
17 | Message-id: 20210721094211.69853-3-sgarzare@redhat.com | ||
18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | ||
20 | qapi/misc.json | 6 ++++- | ||
21 | qapi/qom.json | 7 ++++- | ||
22 | include/block/aio.h | 12 +++++++++ | ||
23 | include/sysemu/iothread.h | 3 +++ | ||
24 | iothread.c | 55 +++++++++++++++++++++++++++++++++++---- | ||
25 | monitor/hmp-cmds.c | 2 ++ | ||
26 | util/aio-posix.c | 12 +++++++++ | ||
27 | util/aio-win32.c | 5 ++++ | ||
28 | util/async.c | 2 ++ | ||
29 | qemu-options.hx | 8 ++++-- | ||
30 | 10 files changed, 103 insertions(+), 9 deletions(-) | ||
31 | |||
32 | diff --git a/qapi/misc.json b/qapi/misc.json | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/qapi/misc.json | ||
35 | +++ b/qapi/misc.json | ||
36 | @@ -XXX,XX +XXX,XX @@ | ||
37 | # @poll-shrink: how many ns will be removed from polling time, 0 means that | ||
38 | # it's not configured (since 2.9) | ||
39 | # | ||
40 | +# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | ||
41 | +# 0 means that the engine will use its default (since 6.1) | ||
42 | +# | ||
43 | # Since: 2.0 | ||
44 | ## | ||
45 | { 'struct': 'IOThreadInfo', | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | 'thread-id': 'int', | ||
48 | 'poll-max-ns': 'int', | ||
49 | 'poll-grow': 'int', | ||
50 | - 'poll-shrink': 'int' } } | ||
51 | + 'poll-shrink': 'int', | ||
52 | + 'aio-max-batch': 'int' } } | ||
53 | |||
54 | ## | ||
55 | # @query-iothreads: | ||
56 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/qapi/qom.json | ||
59 | +++ b/qapi/qom.json | ||
60 | @@ -XXX,XX +XXX,XX @@ | ||
61 | # algorithm detects it is spending too long polling without | ||
62 | # encountering events. 0 selects a default behaviour (default: 0) | ||
63 | # | ||
64 | +# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | ||
65 | +# 0 means that the engine will use its default | ||
66 | +# (default:0, since 6.1) | ||
67 | +# | ||
68 | # Since: 2.0 | ||
69 | ## | ||
70 | { 'struct': 'IothreadProperties', | ||
71 | 'data': { '*poll-max-ns': 'int', | ||
72 | '*poll-grow': 'int', | ||
73 | - '*poll-shrink': 'int' } } | ||
74 | + '*poll-shrink': 'int', | ||
75 | + '*aio-max-batch': 'int' } } | ||
76 | |||
77 | ## | ||
78 | # @MemoryBackendProperties: | ||
79 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/include/block/aio.h | ||
82 | +++ b/include/block/aio.h | ||
83 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | ||
84 | int64_t poll_grow; /* polling time growth factor */ | ||
85 | int64_t poll_shrink; /* polling time shrink factor */ | ||
86 | |||
87 | + /* AIO engine parameters */ | ||
88 | + int64_t aio_max_batch; /* maximum number of requests in a batch */ | ||
89 | + | ||
90 | /* | ||
91 | * List of handlers participating in userspace polling. Protected by | ||
92 | * ctx->list_lock. Iterated and modified mostly by the event loop thread | ||
93 | @@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, | ||
94 | int64_t grow, int64_t shrink, | ||
95 | Error **errp); | ||
96 | |||
97 | +/** | ||
98 | + * aio_context_set_aio_params: | ||
99 | + * @ctx: the aio context | ||
100 | + * @max_batch: maximum number of requests in a batch, 0 means that the | ||
101 | + * engine will use its default | ||
102 | + */ | ||
103 | +void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, | ||
104 | + Error **errp); | ||
105 | + | ||
106 | #endif | ||
107 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h | ||
108 | index XXXXXXX..XXXXXXX 100644 | ||
109 | --- a/include/sysemu/iothread.h | ||
110 | +++ b/include/sysemu/iothread.h | ||
111 | @@ -XXX,XX +XXX,XX @@ struct IOThread { | ||
112 | int64_t poll_max_ns; | ||
113 | int64_t poll_grow; | ||
114 | int64_t poll_shrink; | ||
115 | + | ||
116 | + /* AioContext AIO engine parameters */ | ||
117 | + int64_t aio_max_batch; | ||
118 | }; | ||
119 | typedef struct IOThread IOThread; | ||
120 | |||
121 | diff --git a/iothread.c b/iothread.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/iothread.c | ||
124 | +++ b/iothread.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread) | ||
126 | iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); | ||
127 | } | ||
128 | |||
129 | +static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) | ||
130 | +{ | ||
131 | + ERRP_GUARD(); | ||
132 | + | ||
133 | + aio_context_set_poll_params(iothread->ctx, | ||
134 | + iothread->poll_max_ns, | ||
135 | + iothread->poll_grow, | ||
136 | + iothread->poll_shrink, | ||
137 | + errp); | ||
138 | + if (*errp) { | ||
139 | + return; | ||
140 | + } | ||
141 | + | ||
142 | + aio_context_set_aio_params(iothread->ctx, | ||
143 | + iothread->aio_max_batch, | ||
144 | + errp); | ||
145 | +} | ||
146 | + | ||
147 | static void iothread_complete(UserCreatable *obj, Error **errp) | ||
148 | { | ||
149 | Error *local_error = NULL; | ||
150 | @@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp) | ||
151 | */ | ||
152 | iothread_init_gcontext(iothread); | ||
153 | |||
154 | - aio_context_set_poll_params(iothread->ctx, | ||
155 | - iothread->poll_max_ns, | ||
156 | - iothread->poll_grow, | ||
157 | - iothread->poll_shrink, | ||
158 | - &local_error); | ||
159 | + iothread_set_aio_context_params(iothread, &local_error); | ||
160 | if (local_error) { | ||
161 | error_propagate(errp, local_error); | ||
162 | aio_context_unref(iothread->ctx); | ||
163 | @@ -XXX,XX +XXX,XX @@ static PollParamInfo poll_grow_info = { | ||
164 | static PollParamInfo poll_shrink_info = { | ||
165 | "poll-shrink", offsetof(IOThread, poll_shrink), | ||
166 | }; | ||
167 | +static PollParamInfo aio_max_batch_info = { | ||
168 | + "aio-max-batch", offsetof(IOThread, aio_max_batch), | ||
169 | +}; | ||
170 | |||
171 | static void iothread_get_param(Object *obj, Visitor *v, | ||
172 | const char *name, void *opaque, Error **errp) | ||
173 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v, | ||
174 | } | ||
175 | } | ||
176 | |||
177 | +static void iothread_get_aio_param(Object *obj, Visitor *v, | ||
178 | + const char *name, void *opaque, Error **errp) | ||
179 | +{ | ||
180 | + | ||
181 | + iothread_get_param(obj, v, name, opaque, errp); | ||
182 | +} | ||
183 | + | ||
184 | +static void iothread_set_aio_param(Object *obj, Visitor *v, | ||
185 | + const char *name, void *opaque, Error **errp) | ||
186 | +{ | ||
187 | + IOThread *iothread = IOTHREAD(obj); | ||
188 | + | ||
189 | + if (!iothread_set_param(obj, v, name, opaque, errp)) { | ||
190 | + return; | ||
191 | + } | ||
192 | + | ||
193 | + if (iothread->ctx) { | ||
194 | + aio_context_set_aio_params(iothread->ctx, | ||
195 | + iothread->aio_max_batch, | ||
196 | + errp); | ||
197 | + } | ||
198 | +} | ||
199 | + | ||
200 | static void iothread_class_init(ObjectClass *klass, void *class_data) | ||
201 | { | ||
202 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | ||
203 | @@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data) | ||
204 | iothread_get_poll_param, | ||
205 | iothread_set_poll_param, | ||
206 | NULL, &poll_shrink_info); | ||
207 | + object_class_property_add(klass, "aio-max-batch", "int", | ||
208 | + iothread_get_aio_param, | ||
209 | + iothread_set_aio_param, | ||
210 | + NULL, &aio_max_batch_info); | ||
211 | } | ||
212 | |||
213 | static const TypeInfo iothread_info = { | ||
214 | @@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque) | ||
215 | info->poll_max_ns = iothread->poll_max_ns; | ||
216 | info->poll_grow = iothread->poll_grow; | ||
217 | info->poll_shrink = iothread->poll_shrink; | ||
218 | + info->aio_max_batch = iothread->aio_max_batch; | ||
219 | |||
220 | QAPI_LIST_APPEND(*tail, info); | ||
221 | return 0; | ||
222 | diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c | ||
223 | index XXXXXXX..XXXXXXX 100644 | ||
224 | --- a/monitor/hmp-cmds.c | ||
225 | +++ b/monitor/hmp-cmds.c | ||
226 | @@ -XXX,XX +XXX,XX @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict) | ||
227 | monitor_printf(mon, " poll-max-ns=%" PRId64 "\n", value->poll_max_ns); | ||
228 | monitor_printf(mon, " poll-grow=%" PRId64 "\n", value->poll_grow); | ||
229 | monitor_printf(mon, " poll-shrink=%" PRId64 "\n", value->poll_shrink); | ||
230 | + monitor_printf(mon, " aio-max-batch=%" PRId64 "\n", | ||
231 | + value->aio_max_batch); | ||
232 | } | ||
233 | |||
234 | qapi_free_IOThreadInfoList(info_list); | ||
235 | diff --git a/util/aio-posix.c b/util/aio-posix.c | ||
236 | index XXXXXXX..XXXXXXX 100644 | ||
237 | --- a/util/aio-posix.c | ||
238 | +++ b/util/aio-posix.c | ||
239 | @@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, | ||
240 | |||
241 | aio_notify(ctx); | ||
242 | } | ||
243 | + | ||
244 | +void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, | ||
245 | + Error **errp) | ||
246 | +{ | ||
247 | + /* | ||
248 | + * No thread synchronization here, it doesn't matter if an incorrect value | ||
249 | + * is used once. | ||
250 | + */ | ||
251 | + ctx->aio_max_batch = max_batch; | ||
252 | + | ||
253 | + aio_notify(ctx); | ||
254 | +} | ||
255 | diff --git a/util/aio-win32.c b/util/aio-win32.c | ||
256 | index XXXXXXX..XXXXXXX 100644 | ||
257 | --- a/util/aio-win32.c | ||
258 | +++ b/util/aio-win32.c | ||
259 | @@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, | ||
260 | error_setg(errp, "AioContext polling is not implemented on Windows"); | ||
261 | } | ||
262 | } | ||
263 | + | ||
264 | +void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, | ||
265 | + Error **errp) | ||
266 | +{ | ||
267 | +} | ||
268 | diff --git a/util/async.c b/util/async.c | ||
269 | index XXXXXXX..XXXXXXX 100644 | ||
270 | --- a/util/async.c | ||
271 | +++ b/util/async.c | ||
272 | @@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp) | ||
273 | ctx->poll_grow = 0; | ||
274 | ctx->poll_shrink = 0; | ||
275 | |||
276 | + ctx->aio_max_batch = 0; | ||
277 | + | ||
278 | return ctx; | ||
279 | fail: | ||
280 | g_source_destroy(&ctx->source); | ||
281 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
282 | index XXXXXXX..XXXXXXX 100644 | ||
283 | --- a/qemu-options.hx | ||
284 | +++ b/qemu-options.hx | ||
285 | @@ -XXX,XX +XXX,XX @@ SRST | ||
286 | |||
287 | CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB | ||
288 | |||
289 | - ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink`` | ||
290 | + ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch`` | ||
291 | Creates a dedicated event loop thread that devices can be | ||
292 | assigned to. This is known as an IOThread. By default device | ||
293 | emulation happens in vCPU threads or the main event loop thread. | ||
294 | @@ -XXX,XX +XXX,XX @@ SRST | ||
295 | the polling time when the algorithm detects it is spending too | ||
296 | long polling without encountering events. | ||
297 | |||
298 | - The polling parameters can be modified at run-time using the | ||
299 | + The ``aio-max-batch`` parameter is the maximum number of requests | ||
300 | + in a batch for the AIO engine, 0 means that the engine will use | ||
301 | + its default. | ||
302 | + | ||
303 | + The IOThread parameters can be modified at run-time using the | ||
304 | ``qom-set`` command (where ``iothread1`` is the IOThread's | ||
305 | ``id``): | ||
306 | |||
307 | -- | ||
308 | 2.31.1 | ||
309 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Stefano Garzarella <sgarzare@redhat.com> | ||
2 | 1 | ||
3 | When there are multiple queues attached to the same AIO context, | ||
4 | some requests may experience high latency, since in the worst case | ||
5 | the AIO engine queue is only flushed when it is full (MAX_EVENTS) or | ||
6 | there are no more queues plugged. | ||
7 | |||
8 | Commit 2558cb8dd4 ("linux-aio: increasing MAX_EVENTS to a larger | ||
9 | hardcoded value") changed MAX_EVENTS from 128 to 1024, to increase | ||
10 | the number of in-flight requests. But this change also increased | ||
11 | the potential maximum batch to 1024 elements. | ||
12 | |||
13 | When there is a single queue attached to the AIO context, the issue | ||
14 | is mitigated from laio_io_unplug() that will flush the queue every | ||
15 | time is invoked since there can't be others queue plugged. | ||
16 | |||
17 | Let's use the new `aio-max-batch` IOThread parameter to mitigate | ||
18 | this issue, limiting the number of requests in a batch. | ||
19 | |||
20 | We also define a default value (32): this value is obtained running | ||
21 | some benchmarks and it represents a good tradeoff between the latency | ||
22 | increase while a request is queued and the cost of the io_submit(2) | ||
23 | system call. | ||
24 | |||
25 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
26 | Message-id: 20210721094211.69853-4-sgarzare@redhat.com | ||
27 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
28 | --- | ||
29 | block/linux-aio.c | 9 ++++++++- | ||
30 | 1 file changed, 8 insertions(+), 1 deletion(-) | ||
31 | |||
32 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/block/linux-aio.c | ||
35 | +++ b/block/linux-aio.c | ||
36 | @@ -XXX,XX +XXX,XX @@ | ||
37 | */ | ||
38 | #define MAX_EVENTS 1024 | ||
39 | |||
40 | +/* Maximum number of requests in a batch. (default value) */ | ||
41 | +#define DEFAULT_MAX_BATCH 32 | ||
42 | + | ||
43 | struct qemu_laiocb { | ||
44 | Coroutine *co; | ||
45 | LinuxAioState *ctx; | ||
46 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
47 | LinuxAioState *s = laiocb->ctx; | ||
48 | struct iocb *iocbs = &laiocb->iocb; | ||
49 | QEMUIOVector *qiov = laiocb->qiov; | ||
50 | + int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; | ||
51 | + | ||
52 | + /* limit the batch with the number of available events */ | ||
53 | + max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); | ||
54 | |||
55 | switch (type) { | ||
56 | case QEMU_AIO_WRITE: | ||
57 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
58 | s->io_q.in_queue++; | ||
59 | if (!s->io_q.blocked && | ||
60 | (!s->io_q.plugged || | ||
61 | - s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { | ||
62 | + s->io_q.in_queue >= max_batch)) { | ||
63 | ioq_submit(s); | ||
64 | } | ||
65 | |||
66 | -- | ||
67 | 2.31.1 | ||
68 | diff view generated by jsdifflib |