1
The following changes since commit 171199f56f5f9bdf1e5d670d09ef1351d8f01bae:
1
The following changes since commit 9cf289af47bcfae5c75de37d8e5d6fd23705322c:
2
2
3
Merge remote-tracking branch 'remotes/alistair/tags/pull-riscv-to-apply-20200619-3' into staging (2020-06-22 14:45:25 +0100)
3
Merge tag 'qga-pull-request' of gitlab.com:marcandre.lureau/qemu into staging (2022-05-04 03:42:49 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 7838c67f22a81fcf669785cd6c0876438422071a:
9
for you to fetch changes up to bef2e050d6a7feb865854c65570c496ac5a8cf53:
10
10
11
block/nvme: support nested aio_poll() (2020-06-23 15:46:08 +0100)
11
util/event-loop-base: Introduce options to set the thread pool size (2022-05-04 17:02:19 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
Add new thread-pool-min/thread-pool-max parameters to control the thread pool
17
used for async I/O.
18
16
----------------------------------------------------------------
19
----------------------------------------------------------------
17
20
18
Daniele Buono (4):
21
Nicolas Saenz Julienne (3):
19
coroutine: support SafeStack in ucontext backend
22
Introduce event-loop-base abstract class
20
coroutine: add check for SafeStack in sigaltstack
23
util/main-loop: Introduce the main loop into QOM
21
configure: add flags to support SafeStack
24
util/event-loop-base: Introduce options to set the thread pool size
22
check-block: enable iotests with SafeStack
23
25
24
Stefan Hajnoczi (8):
26
qapi/qom.json | 43 ++++++++--
25
minikconf: explicitly set encoding to UTF-8
27
meson.build | 26 +++---
26
block/nvme: poll queues without q->lock
28
include/block/aio.h | 10 +++
27
block/nvme: drop tautologous assertion
29
include/block/thread-pool.h | 3 +
28
block/nvme: don't access CQE after moving cq.head
30
include/qemu/main-loop.h | 10 +++
29
block/nvme: switch to a NVMeRequest freelist
31
include/sysemu/event-loop-base.h | 41 +++++++++
30
block/nvme: clarify that free_req_queue is protected by q->lock
32
include/sysemu/iothread.h | 6 +-
31
block/nvme: keep BDRVNVMeState pointer in NVMeQueuePair
33
event-loop-base.c | 140 +++++++++++++++++++++++++++++++
32
block/nvme: support nested aio_poll()
34
iothread.c | 68 +++++----------
33
35
util/aio-posix.c | 1 +
34
configure | 73 ++++++++++++
36
util/async.c | 20 +++++
35
include/qemu/coroutine_int.h | 5 +
37
util/main-loop.c | 65 ++++++++++++++
36
block/nvme.c | 220 +++++++++++++++++++++++++----------
38
util/thread-pool.c | 55 +++++++++++-
37
util/coroutine-sigaltstack.c | 4 +
39
13 files changed, 419 insertions(+), 69 deletions(-)
38
util/coroutine-ucontext.c | 28 +++++
40
create mode 100644 include/sysemu/event-loop-base.h
39
block/trace-events | 2 +-
41
create mode 100644 event-loop-base.c
40
scripts/minikconf.py | 6 +-
41
tests/check-block.sh | 12 +-
42
8 files changed, 284 insertions(+), 66 deletions(-)
43
42
44
--
43
--
45
2.26.2
44
2.35.1
46
diff view generated by jsdifflib
Deleted patch
1
QEMU currently only has ASCII Kconfig files but Linux actually uses
2
UTF-8. Explicitly specify the encoding and that we're doing text file
3
I/O.
4
1
5
It's unclear whether or not QEMU will ever need Unicode in its Kconfig
6
files. If we start using the help text then it will become an issue
7
sooner or later. Make this change now for consistency with Linux
8
Kconfig.
9
10
Reported-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Message-id: 20200521153616.307100-1-stefanha@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
scripts/minikconf.py | 6 +++---
17
1 file changed, 3 insertions(+), 3 deletions(-)
18
19
diff --git a/scripts/minikconf.py b/scripts/minikconf.py
20
index XXXXXXX..XXXXXXX 100755
21
--- a/scripts/minikconf.py
22
+++ b/scripts/minikconf.py
23
@@ -XXX,XX +XXX,XX @@ class KconfigParser:
24
if incl_abs_fname in self.data.previously_included:
25
return
26
try:
27
- fp = open(incl_abs_fname, 'r')
28
+ fp = open(incl_abs_fname, 'rt', encoding='utf-8')
29
except IOError as e:
30
raise KconfigParserError(self,
31
'%s: %s' % (e.strerror, include))
32
@@ -XXX,XX +XXX,XX @@ if __name__ == '__main__':
33
parser.do_assignment(name, value == 'y')
34
external_vars.add(name[7:])
35
else:
36
- fp = open(arg, 'r')
37
+ fp = open(arg, 'rt', encoding='utf-8')
38
parser.parse_file(fp)
39
fp.close()
40
41
@@ -XXX,XX +XXX,XX @@ if __name__ == '__main__':
42
if key not in external_vars and config[key]:
43
print ('CONFIG_%s=y' % key)
44
45
- deps = open(argv[2], 'w')
46
+ deps = open(argv[2], 'wt', encoding='utf-8')
47
for fname in data.previously_included:
48
print ('%s: %s' % (argv[1], fname), file=deps)
49
deps.close()
50
--
51
2.26.2
52
diff view generated by jsdifflib
1
From: Daniele Buono <dbuono@linux.vnet.ibm.com>
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
2
2
3
LLVM's SafeStack instrumentation does not yet support programs that make
3
Introduce the 'event-loop-base' abstract class, it'll hold the
4
use of the APIs in ucontext.h
4
properties common to all event loops and provide the necessary hooks for
5
With the current implementation of coroutine-ucontext, the resulting
5
their creation and maintenance. Then have iothread inherit from it.
6
binary is incorrect, with different coroutines sharing the same unsafe
6
7
stack and producing undefined behavior at runtime.
7
EventLoopBaseClass is defined as user creatable and provides a hook for
8
This fix allocates an additional unsafe stack area for each coroutine,
8
its children to attach themselves to the user creatable class 'complete'
9
and sets the new unsafe stack pointer before calling swapcontext() in
9
function. It also provides an update_params() callback to propagate
10
qemu_coroutine_new.
10
property changes onto its children.
11
This is the only place where the pointer needs to be manually updated,
11
12
since sigsetjmp/siglongjmp are already instrumented by LLVM to properly
12
The new 'event-loop-base' class will live in the root directory. It is
13
support SafeStack.
13
built on its own using the 'link_whole' option (there are no direct
14
The additional stack is then freed in qemu_coroutine_delete.
14
function dependencies between the class and its children, it all happens
15
15
trough 'constructor' magic). And also imposes new compilation
16
Signed-off-by: Daniele Buono <dbuono@linux.vnet.ibm.com>
16
dependencies:
17
Message-id: 20200529205122.714-2-dbuono@linux.vnet.ibm.com
17
18
qom <- event-loop-base <- blockdev (iothread.c)
19
20
And in subsequent patches:
21
22
qom <- event-loop-base <- qemuutil (util/main-loop.c)
23
24
All this forced some amount of reordering in meson.build:
25
26
- Moved qom build definition before qemuutil. Doing it the other way
27
around (i.e. moving qemuutil after qom) isn't possible as a lot of
28
core libraries that live in between the two depend on it.
29
30
- Process the 'hw' subdir earlier, as it introduces files into the
31
'qom' source set.
32
33
No functional changes intended.
34
35
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
36
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
37
Acked-by: Markus Armbruster <armbru@redhat.com>
38
Message-id: 20220425075723.20019-2-nsaenzju@redhat.com
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
39
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
40
---
20
include/qemu/coroutine_int.h | 5 +++++
41
qapi/qom.json | 22 +++++--
21
util/coroutine-ucontext.c | 28 ++++++++++++++++++++++++++++
42
meson.build | 23 ++++---
22
2 files changed, 33 insertions(+)
43
include/sysemu/event-loop-base.h | 36 +++++++++++
23
44
include/sysemu/iothread.h | 6 +-
24
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
45
event-loop-base.c | 104 +++++++++++++++++++++++++++++++
46
iothread.c | 65 ++++++-------------
47
6 files changed, 192 insertions(+), 64 deletions(-)
48
create mode 100644 include/sysemu/event-loop-base.h
49
create mode 100644 event-loop-base.c
50
51
diff --git a/qapi/qom.json b/qapi/qom.json
25
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/coroutine_int.h
53
--- a/qapi/qom.json
27
+++ b/include/qemu/coroutine_int.h
54
+++ b/qapi/qom.json
28
@@ -XXX,XX +XXX,XX @@
55
@@ -XXX,XX +XXX,XX @@
29
#include "qemu/queue.h"
56
'*repeat': 'bool',
30
#include "qemu/coroutine.h"
57
'*grab-toggle': 'GrabToggleKeys' } }
31
58
32
+#ifdef CONFIG_SAFESTACK
59
+##
33
+/* Pointer to the unsafe stack, defined by the compiler */
60
+# @EventLoopBaseProperties:
34
+extern __thread void *__safestack_unsafe_stack_ptr;
61
+#
62
+# Common properties for event loops
63
+#
64
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
65
+# 0 means that the engine will use its default.
66
+# (default: 0)
67
+#
68
+# Since: 7.1
69
+##
70
+{ 'struct': 'EventLoopBaseProperties',
71
+ 'data': { '*aio-max-batch': 'int' } }
72
+
73
##
74
# @IothreadProperties:
75
#
76
@@ -XXX,XX +XXX,XX @@
77
# algorithm detects it is spending too long polling without
78
# encountering events. 0 selects a default behaviour (default: 0)
79
#
80
-# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
81
-# 0 means that the engine will use its default
82
-# (default:0, since 6.1)
83
+# The @aio-max-batch option is available since 6.1.
84
#
85
# Since: 2.0
86
##
87
{ 'struct': 'IothreadProperties',
88
+ 'base': 'EventLoopBaseProperties',
89
'data': { '*poll-max-ns': 'int',
90
'*poll-grow': 'int',
91
- '*poll-shrink': 'int',
92
- '*aio-max-batch': 'int' } }
93
+ '*poll-shrink': 'int' } }
94
95
##
96
# @MemoryBackendProperties:
97
diff --git a/meson.build b/meson.build
98
index XXXXXXX..XXXXXXX 100644
99
--- a/meson.build
100
+++ b/meson.build
101
@@ -XXX,XX +XXX,XX @@ subdir('qom')
102
subdir('authz')
103
subdir('crypto')
104
subdir('ui')
105
+subdir('hw')
106
107
108
if enable_modules
109
@@ -XXX,XX +XXX,XX @@ if enable_modules
110
modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
111
endif
112
113
+qom_ss = qom_ss.apply(config_host, strict: false)
114
+libqom = static_library('qom', qom_ss.sources() + genh,
115
+ dependencies: [qom_ss.dependencies()],
116
+ name_suffix: 'fa')
117
+qom = declare_dependency(link_whole: libqom)
118
+
119
+event_loop_base = files('event-loop-base.c')
120
+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh,
121
+ build_by_default: true)
122
+event_loop_base = declare_dependency(link_whole: event_loop_base,
123
+ dependencies: [qom])
124
+
125
stub_ss = stub_ss.apply(config_all, strict: false)
126
127
util_ss.add_all(trace_ss)
128
@@ -XXX,XX +XXX,XX @@ subdir('monitor')
129
subdir('net')
130
subdir('replay')
131
subdir('semihosting')
132
-subdir('hw')
133
subdir('tcg')
134
subdir('fpu')
135
subdir('accel')
136
@@ -XXX,XX +XXX,XX @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
137
capture: true,
138
command: [undefsym, nm, '@INPUT@'])
139
140
-qom_ss = qom_ss.apply(config_host, strict: false)
141
-libqom = static_library('qom', qom_ss.sources() + genh,
142
- dependencies: [qom_ss.dependencies()],
143
- name_suffix: 'fa')
144
-
145
-qom = declare_dependency(link_whole: libqom)
146
-
147
authz_ss = authz_ss.apply(config_host, strict: false)
148
libauthz = static_library('authz', authz_ss.sources() + genh,
149
dependencies: [authz_ss.dependencies()],
150
@@ -XXX,XX +XXX,XX @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
151
build_by_default: false)
152
153
blockdev = declare_dependency(link_whole: [libblockdev],
154
- dependencies: [block])
155
+ dependencies: [block, event_loop_base])
156
157
qmp_ss = qmp_ss.apply(config_host, strict: false)
158
libqmp = static_library('qmp', qmp_ss.sources() + genh,
159
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
160
new file mode 100644
161
index XXXXXXX..XXXXXXX
162
--- /dev/null
163
+++ b/include/sysemu/event-loop-base.h
164
@@ -XXX,XX +XXX,XX @@
165
+/*
166
+ * QEMU event-loop backend
167
+ *
168
+ * Copyright (C) 2022 Red Hat Inc
169
+ *
170
+ * Authors:
171
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
172
+ *
173
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
174
+ * See the COPYING file in the top-level directory.
175
+ */
176
+#ifndef QEMU_EVENT_LOOP_BASE_H
177
+#define QEMU_EVENT_LOOP_BASE_H
178
+
179
+#include "qom/object.h"
180
+#include "block/aio.h"
181
+#include "qemu/typedefs.h"
182
+
183
+#define TYPE_EVENT_LOOP_BASE "event-loop-base"
184
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
185
+ EVENT_LOOP_BASE)
186
+
187
+struct EventLoopBaseClass {
188
+ ObjectClass parent_class;
189
+
190
+ void (*init)(EventLoopBase *base, Error **errp);
191
+ void (*update_params)(EventLoopBase *base, Error **errp);
192
+};
193
+
194
+struct EventLoopBase {
195
+ Object parent;
196
+
197
+ /* AioContext AIO engine parameters */
198
+ int64_t aio_max_batch;
199
+};
35
+#endif
200
+#endif
36
+
201
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
37
#define COROUTINE_STACK_SIZE (1 << 20)
38
39
typedef enum {
40
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
41
index XXXXXXX..XXXXXXX 100644
202
index XXXXXXX..XXXXXXX 100644
42
--- a/util/coroutine-ucontext.c
203
--- a/include/sysemu/iothread.h
43
+++ b/util/coroutine-ucontext.c
204
+++ b/include/sysemu/iothread.h
44
@@ -XXX,XX +XXX,XX @@ typedef struct {
205
@@ -XXX,XX +XXX,XX @@
45
Coroutine base;
206
#include "block/aio.h"
46
void *stack;
207
#include "qemu/thread.h"
47
size_t stack_size;
208
#include "qom/object.h"
48
+#ifdef CONFIG_SAFESTACK
209
+#include "sysemu/event-loop-base.h"
49
+ /* Need an unsafe stack for each coroutine */
210
50
+ void *unsafe_stack;
211
#define TYPE_IOTHREAD "iothread"
51
+ size_t unsafe_stack_size;
212
52
+#endif
213
struct IOThread {
53
sigjmp_buf env;
214
- Object parent_obj;
54
215
+ EventLoopBase parent_obj;
55
void *tsan_co_fiber;
216
56
@@ -XXX,XX +XXX,XX @@ Coroutine *qemu_coroutine_new(void)
217
QemuThread thread;
57
co = g_malloc0(sizeof(*co));
218
AioContext *ctx;
58
co->stack_size = COROUTINE_STACK_SIZE;
219
@@ -XXX,XX +XXX,XX @@ struct IOThread {
59
co->stack = qemu_alloc_stack(&co->stack_size);
220
int64_t poll_max_ns;
60
+#ifdef CONFIG_SAFESTACK
221
int64_t poll_grow;
61
+ co->unsafe_stack_size = COROUTINE_STACK_SIZE;
222
int64_t poll_shrink;
62
+ co->unsafe_stack = qemu_alloc_stack(&co->unsafe_stack_size);
223
-
63
+#endif
224
- /* AioContext AIO engine parameters */
64
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
225
- int64_t aio_max_batch;
65
226
};
66
uc.uc_link = &old_uc;
227
typedef struct IOThread IOThread;
67
@@ -XXX,XX +XXX,XX @@ Coroutine *qemu_coroutine_new(void)
228
68
COROUTINE_YIELD,
229
diff --git a/event-loop-base.c b/event-loop-base.c
69
&fake_stack_save,
230
new file mode 100644
70
co->stack, co->stack_size, co->tsan_co_fiber);
231
index XXXXXXX..XXXXXXX
71
+
232
--- /dev/null
72
+#ifdef CONFIG_SAFESTACK
233
+++ b/event-loop-base.c
73
+ /*
234
@@ -XXX,XX +XXX,XX @@
74
+ * Before we swap the context, set the new unsafe stack
235
+/*
75
+ * The unsafe stack grows just like the normal stack, so start from
236
+ * QEMU event-loop base
76
+ * the last usable location of the memory area.
237
+ *
77
+ * NOTE: we don't have to re-set the usp afterwards because we are
238
+ * Copyright (C) 2022 Red Hat Inc
78
+ * coming back to this context through a siglongjmp.
239
+ *
79
+ * The compiler already wrapped the corresponding sigsetjmp call with
240
+ * Authors:
80
+ * code that saves the usp on the (safe) stack before the call, and
241
+ * Stefan Hajnoczi <stefanha@redhat.com>
81
+ * restores it right after (which is where we return with siglongjmp).
242
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
82
+ */
243
+ *
83
+ void *usp = co->unsafe_stack + co->unsafe_stack_size;
244
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
84
+ __safestack_unsafe_stack_ptr = usp;
245
+ * See the COPYING file in the top-level directory.
85
+#endif
246
+ */
86
+
247
+
87
swapcontext(&old_uc, &uc);
248
+#include "qemu/osdep.h"
249
+#include "qom/object_interfaces.h"
250
+#include "qapi/error.h"
251
+#include "sysemu/event-loop-base.h"
252
+
253
+typedef struct {
254
+ const char *name;
255
+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
256
+} EventLoopBaseParamInfo;
257
+
258
+static EventLoopBaseParamInfo aio_max_batch_info = {
259
+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
260
+};
261
+
262
+static void event_loop_base_get_param(Object *obj, Visitor *v,
263
+ const char *name, void *opaque, Error **errp)
264
+{
265
+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
266
+ EventLoopBaseParamInfo *info = opaque;
267
+ int64_t *field = (void *)event_loop_base + info->offset;
268
+
269
+ visit_type_int64(v, name, field, errp);
270
+}
271
+
272
+static void event_loop_base_set_param(Object *obj, Visitor *v,
273
+ const char *name, void *opaque, Error **errp)
274
+{
275
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
276
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
277
+ EventLoopBaseParamInfo *info = opaque;
278
+ int64_t *field = (void *)base + info->offset;
279
+ int64_t value;
280
+
281
+ if (!visit_type_int64(v, name, &value, errp)) {
282
+ return;
283
+ }
284
+
285
+ if (value < 0) {
286
+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
287
+ info->name, INT64_MAX);
288
+ return;
289
+ }
290
+
291
+ *field = value;
292
+
293
+ if (bc->update_params) {
294
+ bc->update_params(base, errp);
295
+ }
296
+
297
+ return;
298
+}
299
+
300
+static void event_loop_base_complete(UserCreatable *uc, Error **errp)
301
+{
302
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
303
+ EventLoopBase *base = EVENT_LOOP_BASE(uc);
304
+
305
+ if (bc->init) {
306
+ bc->init(base, errp);
307
+ }
308
+}
309
+
310
+static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
311
+{
312
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
313
+ ucc->complete = event_loop_base_complete;
314
+
315
+ object_class_property_add(klass, "aio-max-batch", "int",
316
+ event_loop_base_get_param,
317
+ event_loop_base_set_param,
318
+ NULL, &aio_max_batch_info);
319
+}
320
+
321
+static const TypeInfo event_loop_base_info = {
322
+ .name = TYPE_EVENT_LOOP_BASE,
323
+ .parent = TYPE_OBJECT,
324
+ .instance_size = sizeof(EventLoopBase),
325
+ .class_size = sizeof(EventLoopBaseClass),
326
+ .class_init = event_loop_base_class_init,
327
+ .abstract = true,
328
+ .interfaces = (InterfaceInfo[]) {
329
+ { TYPE_USER_CREATABLE },
330
+ { }
331
+ }
332
+};
333
+
334
+static void register_types(void)
335
+{
336
+ type_register_static(&event_loop_base_info);
337
+}
338
+type_init(register_types);
339
diff --git a/iothread.c b/iothread.c
340
index XXXXXXX..XXXXXXX 100644
341
--- a/iothread.c
342
+++ b/iothread.c
343
@@ -XXX,XX +XXX,XX @@
344
#include "qemu/module.h"
345
#include "block/aio.h"
346
#include "block/block.h"
347
+#include "sysemu/event-loop-base.h"
348
#include "sysemu/iothread.h"
349
#include "qapi/error.h"
350
#include "qapi/qapi-commands-misc.h"
351
@@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread)
352
iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
353
}
354
355
-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
356
+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
357
{
358
+ IOThread *iothread = IOTHREAD(base);
359
ERRP_GUARD();
360
361
+ if (!iothread->ctx) {
362
+ return;
363
+ }
364
+
365
aio_context_set_poll_params(iothread->ctx,
366
iothread->poll_max_ns,
367
iothread->poll_grow,
368
@@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
88
}
369
}
89
370
90
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_delete(Coroutine *co_)
371
aio_context_set_aio_params(iothread->ctx,
91
#endif
372
- iothread->aio_max_batch,
92
373
+ iothread->parent_obj.aio_max_batch,
93
qemu_free_stack(co->stack, co->stack_size);
374
errp);
94
+#ifdef CONFIG_SAFESTACK
95
+ qemu_free_stack(co->unsafe_stack, co->unsafe_stack_size);
96
+#endif
97
g_free(co);
98
}
375
}
99
376
377
-static void iothread_complete(UserCreatable *obj, Error **errp)
378
+
379
+static void iothread_init(EventLoopBase *base, Error **errp)
380
{
381
Error *local_error = NULL;
382
- IOThread *iothread = IOTHREAD(obj);
383
+ IOThread *iothread = IOTHREAD(base);
384
char *thread_name;
385
386
iothread->stopping = false;
387
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
388
*/
389
iothread_init_gcontext(iothread);
390
391
- iothread_set_aio_context_params(iothread, &local_error);
392
+ iothread_set_aio_context_params(base, &local_error);
393
if (local_error) {
394
error_propagate(errp, local_error);
395
aio_context_unref(iothread->ctx);
396
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
397
* to inherit.
398
*/
399
thread_name = g_strdup_printf("IO %s",
400
- object_get_canonical_path_component(OBJECT(obj)));
401
+ object_get_canonical_path_component(OBJECT(base)));
402
qemu_thread_create(&iothread->thread, thread_name, iothread_run,
403
iothread, QEMU_THREAD_JOINABLE);
404
g_free(thread_name);
405
@@ -XXX,XX +XXX,XX @@ static IOThreadParamInfo poll_grow_info = {
406
static IOThreadParamInfo poll_shrink_info = {
407
"poll-shrink", offsetof(IOThread, poll_shrink),
408
};
409
-static IOThreadParamInfo aio_max_batch_info = {
410
- "aio-max-batch", offsetof(IOThread, aio_max_batch),
411
-};
412
413
static void iothread_get_param(Object *obj, Visitor *v,
414
const char *name, IOThreadParamInfo *info, Error **errp)
415
@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
416
}
417
}
418
419
-static void iothread_get_aio_param(Object *obj, Visitor *v,
420
- const char *name, void *opaque, Error **errp)
421
-{
422
- IOThreadParamInfo *info = opaque;
423
-
424
- iothread_get_param(obj, v, name, info, errp);
425
-}
426
-
427
-static void iothread_set_aio_param(Object *obj, Visitor *v,
428
- const char *name, void *opaque, Error **errp)
429
-{
430
- IOThread *iothread = IOTHREAD(obj);
431
- IOThreadParamInfo *info = opaque;
432
-
433
- if (!iothread_set_param(obj, v, name, info, errp)) {
434
- return;
435
- }
436
-
437
- if (iothread->ctx) {
438
- aio_context_set_aio_params(iothread->ctx,
439
- iothread->aio_max_batch,
440
- errp);
441
- }
442
-}
443
-
444
static void iothread_class_init(ObjectClass *klass, void *class_data)
445
{
446
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
447
- ucc->complete = iothread_complete;
448
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
449
+
450
+ bc->init = iothread_init;
451
+ bc->update_params = iothread_set_aio_context_params;
452
453
object_class_property_add(klass, "poll-max-ns", "int",
454
iothread_get_poll_param,
455
@@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
456
iothread_get_poll_param,
457
iothread_set_poll_param,
458
NULL, &poll_shrink_info);
459
- object_class_property_add(klass, "aio-max-batch", "int",
460
- iothread_get_aio_param,
461
- iothread_set_aio_param,
462
- NULL, &aio_max_batch_info);
463
}
464
465
static const TypeInfo iothread_info = {
466
.name = TYPE_IOTHREAD,
467
- .parent = TYPE_OBJECT,
468
+ .parent = TYPE_EVENT_LOOP_BASE,
469
.class_init = iothread_class_init,
470
.instance_size = sizeof(IOThread),
471
.instance_init = iothread_instance_init,
472
.instance_finalize = iothread_instance_finalize,
473
- .interfaces = (InterfaceInfo[]) {
474
- {TYPE_USER_CREATABLE},
475
- {}
476
- },
477
};
478
479
static void iothread_register_types(void)
480
@@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque)
481
info->poll_max_ns = iothread->poll_max_ns;
482
info->poll_grow = iothread->poll_grow;
483
info->poll_shrink = iothread->poll_shrink;
484
- info->aio_max_batch = iothread->aio_max_batch;
485
+ info->aio_max_batch = iothread->parent_obj.aio_max_batch;
486
487
QAPI_LIST_APPEND(*tail, info);
488
return 0;
100
--
489
--
101
2.26.2
490
2.35.1
102
diff view generated by jsdifflib
Deleted patch
1
From: Daniele Buono <dbuono@linux.vnet.ibm.com>
2
1
3
Current implementation of LLVM's SafeStack is not compatible with
4
code that uses an alternate stack created with sigaltstack().
5
Since coroutine-sigaltstack relies on sigaltstack(), it is not
6
compatible with SafeStack. The resulting binary is incorrect, with
7
different coroutines sharing the same unsafe stack and producing
8
undefined behavior at runtime.
9
10
In the future LLVM may provide a SafeStack implementation compatible with
11
sigaltstack(). In the meantime, if SafeStack is desired, the coroutine
12
implementation from coroutine-ucontext should be used.
13
As a safety check, add a control in coroutine-sigaltstack to throw a
14
preprocessor #error if SafeStack is enabled and we are trying to
15
use coroutine-sigaltstack to implement coroutines.
16
17
Signed-off-by: Daniele Buono <dbuono@linux.vnet.ibm.com>
18
Message-id: 20200529205122.714-3-dbuono@linux.vnet.ibm.com
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
---
21
util/coroutine-sigaltstack.c | 4 ++++
22
1 file changed, 4 insertions(+)
23
24
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/util/coroutine-sigaltstack.c
27
+++ b/util/coroutine-sigaltstack.c
28
@@ -XXX,XX +XXX,XX @@
29
#include "qemu-common.h"
30
#include "qemu/coroutine_int.h"
31
32
+#ifdef CONFIG_SAFESTACK
33
+#error "SafeStack is not compatible with code run in alternate signal stacks"
34
+#endif
35
+
36
typedef struct {
37
Coroutine base;
38
void *stack;
39
--
40
2.26.2
41
diff view generated by jsdifflib
Deleted patch
1
From: Daniele Buono <dbuono@linux.vnet.ibm.com>
2
1
3
This patch adds a flag to enable/disable the SafeStack instrumentation
4
provided by LLVM.
5
6
On enable, make sure that the compiler supports the flags, and that we
7
are using the proper coroutine implementation (coroutine-ucontext).
8
On disable, explicitly disable the option if it was enabled by default.
9
10
While SafeStack is supported only on Linux, NetBSD, FreeBSD and macOS,
11
we are not checking for the O.S. since this is already done by LLVM.
12
13
Signed-off-by: Daniele Buono <dbuono@linux.vnet.ibm.com>
14
Message-id: 20200529205122.714-4-dbuono@linux.vnet.ibm.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
configure | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
18
1 file changed, 73 insertions(+)
19
20
diff --git a/configure b/configure
21
index XXXXXXX..XXXXXXX 100755
22
--- a/configure
23
+++ b/configure
24
@@ -XXX,XX +XXX,XX @@ audio_win_int=""
25
libs_qga=""
26
debug_info="yes"
27
stack_protector=""
28
+safe_stack=""
29
use_containers="yes"
30
gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
31
32
@@ -XXX,XX +XXX,XX @@ for opt do
33
;;
34
--disable-stack-protector) stack_protector="no"
35
;;
36
+ --enable-safe-stack) safe_stack="yes"
37
+ ;;
38
+ --disable-safe-stack) safe_stack="no"
39
+ ;;
40
--disable-curses) curses="no"
41
;;
42
--enable-curses) curses="yes"
43
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
44
debug-tcg TCG debugging (default is disabled)
45
debug-info debugging information
46
sparse sparse checker
47
+ safe-stack SafeStack Stack Smash Protection. Depends on
48
+ clang/llvm >= 3.7 and requires coroutine backend ucontext.
49
50
gnutls GNUTLS cryptography support
51
nettle nettle cryptography support
52
@@ -XXX,XX +XXX,XX @@ if test "$debug_stack_usage" = "yes"; then
53
fi
54
fi
55
56
+##################################################
57
+# SafeStack
58
+
59
+
60
+if test "$safe_stack" = "yes"; then
61
+cat > $TMPC << EOF
62
+int main(int argc, char *argv[])
63
+{
64
+#if ! __has_feature(safe_stack)
65
+#error SafeStack Disabled
66
+#endif
67
+ return 0;
68
+}
69
+EOF
70
+ flag="-fsanitize=safe-stack"
71
+ # Check that safe-stack is supported and enabled.
72
+ if compile_prog "-Werror $flag" "$flag"; then
73
+ # Flag needed both at compilation and at linking
74
+ QEMU_CFLAGS="$QEMU_CFLAGS $flag"
75
+ QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
76
+ else
77
+ error_exit "SafeStack not supported by your compiler"
78
+ fi
79
+ if test "$coroutine" != "ucontext"; then
80
+ error_exit "SafeStack is only supported by the coroutine backend ucontext"
81
+ fi
82
+else
83
+cat > $TMPC << EOF
84
+int main(int argc, char *argv[])
85
+{
86
+#if defined(__has_feature)
87
+#if __has_feature(safe_stack)
88
+#error SafeStack Enabled
89
+#endif
90
+#endif
91
+ return 0;
92
+}
93
+EOF
94
+if test "$safe_stack" = "no"; then
95
+ # Make sure that safe-stack is disabled
96
+ if ! compile_prog "-Werror" ""; then
97
+ # SafeStack was already enabled, try to explicitly remove the feature
98
+ flag="-fno-sanitize=safe-stack"
99
+ if ! compile_prog "-Werror $flag" "$flag"; then
100
+ error_exit "Configure cannot disable SafeStack"
101
+ fi
102
+ QEMU_CFLAGS="$QEMU_CFLAGS $flag"
103
+ QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
104
+ fi
105
+else # "$safe_stack" = ""
106
+ # Set safe_stack to yes or no based on pre-existing flags
107
+ if compile_prog "-Werror" ""; then
108
+ safe_stack="no"
109
+ else
110
+ safe_stack="yes"
111
+ if test "$coroutine" != "ucontext"; then
112
+ error_exit "SafeStack is only supported by the coroutine backend ucontext"
113
+ fi
114
+ fi
115
+fi
116
+fi
117
118
##########################################
119
# check if we have open_by_handle_at
120
@@ -XXX,XX +XXX,XX @@ echo "sparse enabled $sparse"
121
echo "strip binaries $strip_opt"
122
echo "profiler $profiler"
123
echo "static build $static"
124
+echo "safe stack $safe_stack"
125
if test "$darwin" = "yes" ; then
126
echo "Cocoa support $cocoa"
127
fi
128
@@ -XXX,XX +XXX,XX @@ if test "$ccache_cpp2" = "yes"; then
129
echo "export CCACHE_CPP2=y" >> $config_host_mak
130
fi
131
132
+if test "$safe_stack" = "yes"; then
133
+ echo "CONFIG_SAFESTACK=y" >> $config_host_mak
134
+fi
135
+
136
# If we're using a separate build tree, set it up now.
137
# DIRS are directories which we simply mkdir in the build tree;
138
# LINKS are things to symlink back into the source tree
139
--
140
2.26.2
141
diff view generated by jsdifflib
Deleted patch
1
From: Daniele Buono <dbuono@linux.vnet.ibm.com>
2
1
3
SafeStack is a stack protection technique implemented in llvm. It is
4
enabled with a -fsanitize flag.
5
iotests are currently disabled when any -fsanitize option is used,
6
because such options tend to produce additional warnings and false
7
positives.
8
9
While common -fsanitize options are used to verify the code and not
10
added in production, SafeStack's main use is in production environments
11
to protect against stack smashing.
12
13
Since SafeStack does not print any warning or false positive, enable
14
iotests when SafeStack is the only -fsanitize option used.
15
This is likely going to be a production binary and we want to make sure
16
it works correctly.
17
18
Signed-off-by: Daniele Buono <dbuono@linux.vnet.ibm.com>
19
Message-id: 20200529205122.714-5-dbuono@linux.vnet.ibm.com
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
---
22
tests/check-block.sh | 12 +++++++++++-
23
1 file changed, 11 insertions(+), 1 deletion(-)
24
25
diff --git a/tests/check-block.sh b/tests/check-block.sh
26
index XXXXXXX..XXXXXXX 100755
27
--- a/tests/check-block.sh
28
+++ b/tests/check-block.sh
29
@@ -XXX,XX +XXX,XX @@ if grep -q "CONFIG_GPROF=y" config-host.mak 2>/dev/null ; then
30
exit 0
31
fi
32
33
-if grep -q "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ; then
34
+# Disable tests with any sanitizer except for SafeStack
35
+CFLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null )
36
+SANITIZE_FLAGS=""
37
+#Remove all occurrencies of -fsanitize=safe-stack
38
+for i in ${CFLAGS}; do
39
+ if [ "${i}" != "-fsanitize=safe-stack" ]; then
40
+ SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}"
41
+ fi
42
+done
43
+if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then
44
+ # Have a sanitize flag that is not allowed, stop
45
echo "Sanitizers are enabled ==> Not running the qemu-iotests."
46
exit 0
47
fi
48
--
49
2.26.2
50
diff view generated by jsdifflib
Deleted patch
1
A lot of CPU time is spent simply locking/unlocking q->lock during
2
polling. Check for completion outside the lock to make q->lock disappear
3
from the profile.
4
1
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Sergio Lopez <slp@redhat.com>
7
Message-id: 20200617132201.1832152-2-stefanha@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/nvme.c | 12 ++++++++++++
11
1 file changed, 12 insertions(+)
12
13
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/nvme.c
16
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queues(BDRVNVMeState *s)
18
19
for (i = 0; i < s->nr_queues; i++) {
20
NVMeQueuePair *q = s->queues[i];
21
+ const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
22
+ NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
23
+
24
+ /*
25
+ * Do an early check for completions. q->lock isn't needed because
26
+ * nvme_process_completion() only runs in the event loop thread and
27
+ * cannot race with itself.
28
+ */
29
+ if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
30
+ continue;
31
+ }
32
+
33
qemu_mutex_lock(&q->lock);
34
while (nvme_process_completion(s, q)) {
35
/* Keep polling */
36
--
37
2.26.2
38
diff view generated by jsdifflib
Deleted patch
1
nvme_process_completion() explicitly checks cid so the assertion that
2
follows is always true:
3
1
4
if (cid == 0 || cid > NVME_QUEUE_SIZE) {
5
...
6
continue;
7
}
8
assert(cid <= NVME_QUEUE_SIZE);
9
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Sergio Lopez <slp@redhat.com>
12
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Message-id: 20200617132201.1832152-3-stefanha@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
block/nvme.c | 1 -
17
1 file changed, 1 deletion(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
24
cid);
25
continue;
26
}
27
- assert(cid <= NVME_QUEUE_SIZE);
28
trace_nvme_complete_command(s, q->index, cid);
29
preq = &q->reqs[cid - 1];
30
req = *preq;
31
--
32
2.26.2
33
diff view generated by jsdifflib
Deleted patch
1
Do not access a CQE after incrementing q->cq.head and releasing q->lock.
2
It is unlikely that this causes problems in practice but it's a latent
3
bug.
4
1
5
The reason why it should be safe at the moment is that completion
6
processing is not re-entrant and the CQ doorbell isn't written until the
7
end of nvme_process_completion().
8
9
Make this change now because QEMU expects completion processing to be
10
re-entrant and later patches will do that.
11
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Sergio Lopez <slp@redhat.com>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Message-id: 20200617132201.1832152-4-stefanha@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
18
block/nvme.c | 5 ++++-
19
1 file changed, 4 insertions(+), 1 deletion(-)
20
21
diff --git a/block/nvme.c b/block/nvme.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/nvme.c
24
+++ b/block/nvme.c
25
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
26
q->busy = true;
27
assert(q->inflight >= 0);
28
while (q->inflight) {
29
+ int ret;
30
int16_t cid;
31
+
32
c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES];
33
if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) {
34
break;
35
}
36
+ ret = nvme_translate_error(c);
37
q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
38
if (!q->cq.head) {
39
q->cq_phase = !q->cq_phase;
40
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
41
preq->busy = false;
42
preq->cb = preq->opaque = NULL;
43
qemu_mutex_unlock(&q->lock);
44
- req.cb(req.opaque, nvme_translate_error(c));
45
+ req.cb(req.opaque, ret);
46
qemu_mutex_lock(&q->lock);
47
q->inflight--;
48
progress = true;
49
--
50
2.26.2
51
diff view generated by jsdifflib
1
Passing around both BDRVNVMeState and NVMeQueuePair is unwieldy. Reduce
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
2
the number of function arguments by keeping the BDRVNVMeState pointer in
2
3
NVMeQueuePair. This will come in handly when a BH is introduced in a
3
'event-loop-base' provides basic property handling for all 'AioContext'
4
later patch and only one argument can be passed to it.
4
based event loops. So let's define a new 'MainLoopClass' that inherits
5
5
from it. This will permit tweaking the main loop's properties through
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
qapi as well as through the command line using the '-object' keyword[1].
7
Reviewed-by: Sergio Lopez <slp@redhat.com>
7
Only one instance of 'MainLoopClass' might be created at any time.
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
9
Message-id: 20200617132201.1832152-7-stefanha@redhat.com
9
'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to
10
mark 'MainLoop' as non-deletable.
11
12
[1] For example:
13
-object main-loop,id=main-loop,aio-max-batch=<value>
14
15
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Acked-by: Markus Armbruster <armbru@redhat.com>
18
Message-id: 20220425075723.20019-3-nsaenzju@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
20
---
12
block/nvme.c | 70 ++++++++++++++++++++++++++++------------------------
21
qapi/qom.json | 13 ++++++++
13
1 file changed, 38 insertions(+), 32 deletions(-)
22
meson.build | 3 +-
14
23
include/qemu/main-loop.h | 10 ++++++
15
diff --git a/block/nvme.c b/block/nvme.c
24
include/sysemu/event-loop-base.h | 1 +
16
index XXXXXXX..XXXXXXX 100644
25
event-loop-base.c | 13 ++++++++
17
--- a/block/nvme.c
26
util/main-loop.c | 56 ++++++++++++++++++++++++++++++++
18
+++ b/block/nvme.c
27
6 files changed, 95 insertions(+), 1 deletion(-)
19
@@ -XXX,XX +XXX,XX @@
28
20
*/
29
diff --git a/qapi/qom.json b/qapi/qom.json
21
#define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1)
30
index XXXXXXX..XXXXXXX 100644
22
31
--- a/qapi/qom.json
23
+typedef struct BDRVNVMeState BDRVNVMeState;
32
+++ b/qapi/qom.json
24
+
33
@@ -XXX,XX +XXX,XX @@
25
typedef struct {
34
'*poll-grow': 'int',
26
int32_t head, tail;
35
'*poll-shrink': 'int' } }
27
uint8_t *queue;
36
28
@@ -XXX,XX +XXX,XX @@ typedef struct {
37
+##
29
typedef struct {
38
+# @MainLoopProperties:
30
QemuMutex lock;
39
+#
31
40
+# Properties for the main-loop object.
32
+ /* Read from I/O code path, initialized under BQL */
41
+#
33
+ BDRVNVMeState *s;
42
+# Since: 7.1
34
+ int index;
43
+##
35
+
44
+{ 'struct': 'MainLoopProperties',
36
/* Fields protected by BQL */
45
+ 'base': 'EventLoopBaseProperties',
37
- int index;
46
+ 'data': {} }
38
uint8_t *prp_list_pages;
47
+
39
48
##
40
/* Fields protected by @lock */
49
# @MemoryBackendProperties:
41
@@ -XXX,XX +XXX,XX @@ typedef volatile struct {
50
#
42
51
@@ -XXX,XX +XXX,XX @@
43
QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);
52
{ 'name': 'input-linux',
44
53
'if': 'CONFIG_LINUX' },
45
-typedef struct {
54
'iothread',
46
+struct BDRVNVMeState {
55
+ 'main-loop',
47
AioContext *aio_context;
56
{ 'name': 'memory-backend-epc',
48
QEMUVFIOState *vfio;
57
'if': 'CONFIG_LINUX' },
49
NVMeRegs *regs;
58
'memory-backend-file',
50
@@ -XXX,XX +XXX,XX @@ typedef struct {
59
@@ -XXX,XX +XXX,XX @@
51
60
'input-linux': { 'type': 'InputLinuxProperties',
52
/* PCI address (required for nvme_refresh_filename()) */
61
'if': 'CONFIG_LINUX' },
53
char *device;
62
'iothread': 'IothreadProperties',
54
-} BDRVNVMeState;
63
+ 'main-loop': 'MainLoopProperties',
64
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
65
'if': 'CONFIG_LINUX' },
66
'memory-backend-file': 'MemoryBackendFileProperties',
67
diff --git a/meson.build b/meson.build
68
index XXXXXXX..XXXXXXX 100644
69
--- a/meson.build
70
+++ b/meson.build
71
@@ -XXX,XX +XXX,XX @@ libqemuutil = static_library('qemuutil',
72
sources: util_ss.sources() + stub_ss.sources() + genh,
73
dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman])
74
qemuutil = declare_dependency(link_with: libqemuutil,
75
- sources: genh + version_res)
76
+ sources: genh + version_res,
77
+ dependencies: [event_loop_base])
78
79
if have_system or have_user
80
decodetree = generator(find_program('scripts/decodetree.py'),
81
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/include/qemu/main-loop.h
84
+++ b/include/qemu/main-loop.h
85
@@ -XXX,XX +XXX,XX @@
86
#define QEMU_MAIN_LOOP_H
87
88
#include "block/aio.h"
89
+#include "qom/object.h"
90
+#include "sysemu/event-loop-base.h"
91
92
#define SIG_IPI SIGUSR1
93
94
+#define TYPE_MAIN_LOOP "main-loop"
95
+OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP)
96
+
97
+struct MainLoop {
98
+ EventLoopBase parent_obj;
55
+};
99
+};
56
100
+typedef struct MainLoop MainLoop;
57
#define NVME_BLOCK_OPT_DEVICE "device"
101
+
58
#define NVME_BLOCK_OPT_NAMESPACE "namespace"
102
/**
59
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q,
103
* qemu_init_main_loop: Set up the process so that it can run the main loop.
104
*
105
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/sysemu/event-loop-base.h
108
+++ b/include/sysemu/event-loop-base.h
109
@@ -XXX,XX +XXX,XX @@ struct EventLoopBaseClass {
110
111
void (*init)(EventLoopBase *base, Error **errp);
112
void (*update_params)(EventLoopBase *base, Error **errp);
113
+ bool (*can_be_deleted)(EventLoopBase *base);
114
};
115
116
struct EventLoopBase {
117
diff --git a/event-loop-base.c b/event-loop-base.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/event-loop-base.c
120
+++ b/event-loop-base.c
121
@@ -XXX,XX +XXX,XX @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp)
60
}
122
}
61
}
123
}
62
124
63
-static void nvme_free_queue_pair(BlockDriverState *bs, NVMeQueuePair *q)
125
+static bool event_loop_base_can_be_deleted(UserCreatable *uc)
64
+static void nvme_free_queue_pair(NVMeQueuePair *q)
126
+{
127
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
128
+ EventLoopBase *backend = EVENT_LOOP_BASE(uc);
129
+
130
+ if (bc->can_be_deleted) {
131
+ return bc->can_be_deleted(backend);
132
+ }
133
+
134
+ return true;
135
+}
136
+
137
static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
65
{
138
{
66
qemu_vfree(q->prp_list_pages);
139
UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
67
qemu_vfree(q->sq.queue);
140
ucc->complete = event_loop_base_complete;
68
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
141
+ ucc->can_be_deleted = event_loop_base_can_be_deleted;
69
uint64_t prp_list_iova;
142
70
143
object_class_property_add(klass, "aio-max-batch", "int",
71
qemu_mutex_init(&q->lock);
144
event_loop_base_get_param,
72
+ q->s = s;
145
diff --git a/util/main-loop.c b/util/main-loop.c
73
q->index = idx;
146
index XXXXXXX..XXXXXXX 100644
74
qemu_co_queue_init(&q->free_req_queue);
147
--- a/util/main-loop.c
75
q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS);
148
+++ b/util/main-loop.c
76
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
149
@@ -XXX,XX +XXX,XX @@
77
150
#include "qemu/error-report.h"
78
return q;
151
#include "qemu/queue.h"
79
fail:
152
#include "qemu/compiler.h"
80
- nvme_free_queue_pair(bs, q);
153
+#include "qom/object.h"
81
+ nvme_free_queue_pair(q);
154
82
return NULL;
155
#ifndef _WIN32
156
#include <sys/wait.h>
157
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
158
return 0;
83
}
159
}
84
160
85
/* With q->lock */
161
+static void main_loop_update_params(EventLoopBase *base, Error **errp)
86
-static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q)
162
+{
87
+static void nvme_kick(NVMeQueuePair *q)
163
+ if (!qemu_aio_context) {
88
{
164
+ error_setg(errp, "qemu aio context not ready");
89
+ BDRVNVMeState *s = q->s;
165
+ return;
90
+
166
+ }
91
if (s->plugged || !q->need_kick) {
167
+
92
return;
168
+ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
93
}
169
+}
94
@@ -XXX,XX +XXX,XX @@ static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req)
170
+
95
}
171
+MainLoop *mloop;
96
172
+
97
/* With q->lock */
173
+static void main_loop_init(EventLoopBase *base, Error **errp)
98
-static void nvme_wake_free_req_locked(BDRVNVMeState *s, NVMeQueuePair *q)
174
+{
99
+static void nvme_wake_free_req_locked(NVMeQueuePair *q)
175
+ MainLoop *m = MAIN_LOOP(base);
100
{
176
+
101
if (!qemu_co_queue_empty(&q->free_req_queue)) {
177
+ if (mloop) {
102
- replay_bh_schedule_oneshot_event(s->aio_context,
178
+ error_setg(errp, "only one main-loop instance allowed");
103
+ replay_bh_schedule_oneshot_event(q->s->aio_context,
179
+ return;
104
nvme_free_req_queue_cb, q);
180
+ }
105
}
181
+
106
}
182
+ main_loop_update_params(base, errp);
107
183
+
108
/* Insert a request in the freelist and wake waiters */
184
+ mloop = m;
109
-static void nvme_put_free_req_and_wake(BDRVNVMeState *s, NVMeQueuePair *q,
185
+ return;
110
- NVMeRequest *req)
186
+}
111
+static void nvme_put_free_req_and_wake(NVMeQueuePair *q, NVMeRequest *req)
187
+
112
{
188
+static bool main_loop_can_be_deleted(EventLoopBase *base)
113
qemu_mutex_lock(&q->lock);
189
+{
114
nvme_put_free_req_locked(q, req);
190
+ return false;
115
- nvme_wake_free_req_locked(s, q);
191
+}
116
+ nvme_wake_free_req_locked(q);
192
+
117
qemu_mutex_unlock(&q->lock);
193
+static void main_loop_class_init(ObjectClass *oc, void *class_data)
118
}
194
+{
119
195
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
120
@@ -XXX,XX +XXX,XX @@ static inline int nvme_translate_error(const NvmeCqe *c)
196
+
121
}
197
+ bc->init = main_loop_init;
122
198
+ bc->update_params = main_loop_update_params;
123
/* With q->lock */
199
+ bc->can_be_deleted = main_loop_can_be_deleted;
124
-static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
200
+}
125
+static bool nvme_process_completion(NVMeQueuePair *q)
201
+
126
{
202
+static const TypeInfo main_loop_info = {
127
+ BDRVNVMeState *s = q->s;
203
+ .name = TYPE_MAIN_LOOP,
128
bool progress = false;
204
+ .parent = TYPE_EVENT_LOOP_BASE,
129
NVMeRequest *preq;
205
+ .class_init = main_loop_class_init,
130
NVMeRequest req;
206
+ .instance_size = sizeof(MainLoop),
131
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
207
+};
132
/* Notify the device so it can post more completions. */
208
+
133
smp_mb_release();
209
+static void main_loop_register_types(void)
134
*q->cq.doorbell = cpu_to_le32(q->cq.head);
210
+{
135
- nvme_wake_free_req_locked(s, q);
211
+ type_register_static(&main_loop_info);
136
+ nvme_wake_free_req_locked(q);
212
+}
137
}
213
+
138
q->busy = false;
214
+type_init(main_loop_register_types)
139
return progress;
215
+
140
@@ -XXX,XX +XXX,XX @@ static void nvme_trace_command(const NvmeCmd *cmd)
216
static int max_priority;
141
}
217
142
}
218
#ifndef _WIN32
143
144
-static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q,
145
- NVMeRequest *req,
146
+static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
147
NvmeCmd *cmd, BlockCompletionFunc cb,
148
void *opaque)
149
{
150
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q,
151
req->opaque = opaque;
152
cmd->cid = cpu_to_le32(req->cid);
153
154
- trace_nvme_submit_command(s, q->index, req->cid);
155
+ trace_nvme_submit_command(q->s, q->index, req->cid);
156
nvme_trace_command(cmd);
157
qemu_mutex_lock(&q->lock);
158
memcpy((uint8_t *)q->sq.queue +
159
q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
160
q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
161
q->need_kick++;
162
- nvme_kick(s, q);
163
- nvme_process_completion(s, q);
164
+ nvme_kick(q);
165
+ nvme_process_completion(q);
166
qemu_mutex_unlock(&q->lock);
167
}
168
169
@@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
170
NvmeCmd *cmd)
171
{
172
NVMeRequest *req;
173
- BDRVNVMeState *s = bs->opaque;
174
int ret = -EINPROGRESS;
175
req = nvme_get_free_req(q);
176
if (!req) {
177
return -EBUSY;
178
}
179
- nvme_submit_command(s, q, req, cmd, nvme_cmd_sync_cb, &ret);
180
+ nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
181
182
BDRV_POLL_WHILE(bs, ret == -EINPROGRESS);
183
return ret;
184
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queues(BDRVNVMeState *s)
185
}
186
187
qemu_mutex_lock(&q->lock);
188
- while (nvme_process_completion(s, q)) {
189
+ while (nvme_process_completion(q)) {
190
/* Keep polling */
191
progress = true;
192
}
193
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
194
};
195
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
196
error_setg(errp, "Failed to create io queue [%d]", n);
197
- nvme_free_queue_pair(bs, q);
198
+ nvme_free_queue_pair(q);
199
return false;
200
}
201
cmd = (NvmeCmd) {
202
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
203
};
204
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) {
205
error_setg(errp, "Failed to create io queue [%d]", n);
206
- nvme_free_queue_pair(bs, q);
207
+ nvme_free_queue_pair(q);
208
return false;
209
}
210
s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
211
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
212
BDRVNVMeState *s = bs->opaque;
213
214
for (i = 0; i < s->nr_queues; ++i) {
215
- nvme_free_queue_pair(bs, s->queues[i]);
216
+ nvme_free_queue_pair(s->queues[i]);
217
}
218
g_free(s->queues);
219
aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
220
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
221
r = nvme_cmd_map_qiov(bs, &cmd, req, qiov);
222
qemu_co_mutex_unlock(&s->dma_map_lock);
223
if (r) {
224
- nvme_put_free_req_and_wake(s, ioq, req);
225
+ nvme_put_free_req_and_wake(ioq, req);
226
return r;
227
}
228
- nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
229
+ nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
230
231
data.co = qemu_coroutine_self();
232
while (data.ret == -EINPROGRESS) {
233
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
234
assert(s->nr_queues > 1);
235
req = nvme_get_free_req(ioq);
236
assert(req);
237
- nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
238
+ nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
239
240
data.co = qemu_coroutine_self();
241
if (data.ret == -EINPROGRESS) {
242
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
243
req = nvme_get_free_req(ioq);
244
assert(req);
245
246
- nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
247
+ nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
248
249
data.co = qemu_coroutine_self();
250
while (data.ret == -EINPROGRESS) {
251
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
252
qemu_co_mutex_unlock(&s->dma_map_lock);
253
254
if (ret) {
255
- nvme_put_free_req_and_wake(s, ioq, req);
256
+ nvme_put_free_req_and_wake(ioq, req);
257
goto out;
258
}
259
260
trace_nvme_dsm(s, offset, bytes);
261
262
- nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
263
+ nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
264
265
data.co = qemu_coroutine_self();
266
while (data.ret == -EINPROGRESS) {
267
@@ -XXX,XX +XXX,XX @@ static void nvme_aio_unplug(BlockDriverState *bs)
268
for (i = 1; i < s->nr_queues; i++) {
269
NVMeQueuePair *q = s->queues[i];
270
qemu_mutex_lock(&q->lock);
271
- nvme_kick(s, q);
272
- nvme_process_completion(s, q);
273
+ nvme_kick(q);
274
+ nvme_process_completion(q);
275
qemu_mutex_unlock(&q->lock);
276
}
277
}
278
--
219
--
279
2.26.2
220
2.35.1
280
diff view generated by jsdifflib
1
There are three issues with the current NVMeRequest->busy field:
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
2
1. The busy field is accidentally accessed outside q->lock when request
3
submission fails.
4
2. Waiters on free_req_queue are not woken when a request is returned
5
early due to submission failure.
6
2. Finding a free request involves scanning all requests. This makes
7
request submission O(n^2).
8
2
9
Switch to an O(1) freelist that is always accessed under the lock.
3
The thread pool regulates itself: when idle, it kills threads until
4
empty, when in demand, it creates new threads until full. This behaviour
5
doesn't play well with latency sensitive workloads where the price of
6
creating a new thread is too high. For example, when paired with qemu's
7
'-mlock', or using safety features like SafeStack, creating a new thread
8
has been measured take multiple milliseconds.
10
9
11
Also differentiate between NVME_QUEUE_SIZE, the actual SQ/CQ size, and
10
In order to mitigate this let's introduce a new 'EventLoopBase'
12
NVME_NUM_REQS, the number of usable requests. This makes the code
11
property to set the thread pool size. The threads will be created during
13
simpler than using NVME_QUEUE_SIZE everywhere and having to keep in mind
12
the pool's initialization or upon updating the property's value, remain
14
that one slot is reserved.
13
available during its lifetime regardless of demand, and destroyed upon
14
freeing it. A properly characterized workload will then be able to
15
configure the pool to avoid any latency spikes.
15
16
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
17
Reviewed-by: Sergio Lopez <slp@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Message-id: 20200617132201.1832152-5-stefanha@redhat.com
19
Acked-by: Markus Armbruster <armbru@redhat.com>
20
Message-id: 20220425075723.20019-4-nsaenzju@redhat.com
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
---
22
---
21
block/nvme.c | 81 ++++++++++++++++++++++++++++++++++------------------
23
qapi/qom.json | 10 +++++-
22
1 file changed, 54 insertions(+), 27 deletions(-)
24
include/block/aio.h | 10 ++++++
25
include/block/thread-pool.h | 3 ++
26
include/sysemu/event-loop-base.h | 4 +++
27
event-loop-base.c | 23 +++++++++++++
28
iothread.c | 3 ++
29
util/aio-posix.c | 1 +
30
util/async.c | 20 ++++++++++++
31
util/main-loop.c | 9 ++++++
32
util/thread-pool.c | 55 +++++++++++++++++++++++++++++---
33
10 files changed, 133 insertions(+), 5 deletions(-)
23
34
24
diff --git a/block/nvme.c b/block/nvme.c
35
diff --git a/qapi/qom.json b/qapi/qom.json
25
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
26
--- a/block/nvme.c
37
--- a/qapi/qom.json
27
+++ b/block/nvme.c
38
+++ b/qapi/qom.json
28
@@ -XXX,XX +XXX,XX @@
39
@@ -XXX,XX +XXX,XX @@
29
#define NVME_QUEUE_SIZE 128
40
# 0 means that the engine will use its default.
30
#define NVME_BAR_SIZE 8192
41
# (default: 0)
31
42
#
32
+/*
43
+# @thread-pool-min: minimum number of threads reserved in the thread pool
33
+ * We have to leave one slot empty as that is the full queue case where
44
+# (default:0)
34
+ * head == tail + 1.
45
+#
46
+# @thread-pool-max: maximum number of threads the thread pool can contain
47
+# (default:64)
48
+#
49
# Since: 7.1
50
##
51
{ 'struct': 'EventLoopBaseProperties',
52
- 'data': { '*aio-max-batch': 'int' } }
53
+ 'data': { '*aio-max-batch': 'int',
54
+ '*thread-pool-min': 'int',
55
+ '*thread-pool-max': 'int' } }
56
57
##
58
# @IothreadProperties:
59
diff --git a/include/block/aio.h b/include/block/aio.h
60
index XXXXXXX..XXXXXXX 100644
61
--- a/include/block/aio.h
62
+++ b/include/block/aio.h
63
@@ -XXX,XX +XXX,XX @@ struct AioContext {
64
QSLIST_HEAD(, Coroutine) scheduled_coroutines;
65
QEMUBH *co_schedule_bh;
66
67
+ int thread_pool_min;
68
+ int thread_pool_max;
69
/* Thread pool for performing work and receiving completion callbacks.
70
* Has its own locking.
71
*/
72
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
73
void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
74
Error **errp);
75
76
+/**
77
+ * aio_context_set_thread_pool_params:
78
+ * @ctx: the aio context
79
+ * @min: min number of threads to have readily available in the thread pool
80
+ * @min: max number of threads the thread pool can contain
35
+ */
81
+ */
36
+#define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1)
82
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
37
+
83
+ int64_t max, Error **errp);
38
typedef struct {
84
#endif
39
int32_t head, tail;
85
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
40
uint8_t *queue;
86
index XXXXXXX..XXXXXXX 100644
41
@@ -XXX,XX +XXX,XX @@ typedef struct {
87
--- a/include/block/thread-pool.h
42
int cid;
88
+++ b/include/block/thread-pool.h
43
void *prp_list_page;
89
@@ -XXX,XX +XXX,XX @@
44
uint64_t prp_list_iova;
90
45
- bool busy;
91
#include "block/block.h"
46
+ int free_req_next; /* q->reqs[] index of next free req */
92
47
} NVMeRequest;
93
+#define THREAD_POOL_MAX_THREADS_DEFAULT 64
94
+
95
typedef int ThreadPoolFunc(void *opaque);
96
97
typedef struct ThreadPool ThreadPool;
98
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
99
int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
100
ThreadPoolFunc *func, void *arg);
101
void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
102
+void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
103
104
#endif
105
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/sysemu/event-loop-base.h
108
+++ b/include/sysemu/event-loop-base.h
109
@@ -XXX,XX +XXX,XX @@ struct EventLoopBase {
110
111
/* AioContext AIO engine parameters */
112
int64_t aio_max_batch;
113
+
114
+ /* AioContext thread pool parameters */
115
+ int64_t thread_pool_min;
116
+ int64_t thread_pool_max;
117
};
118
#endif
119
diff --git a/event-loop-base.c b/event-loop-base.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/event-loop-base.c
122
+++ b/event-loop-base.c
123
@@ -XXX,XX +XXX,XX @@
124
#include "qemu/osdep.h"
125
#include "qom/object_interfaces.h"
126
#include "qapi/error.h"
127
+#include "block/thread-pool.h"
128
#include "sysemu/event-loop-base.h"
48
129
49
typedef struct {
130
typedef struct {
50
@@ -XXX,XX +XXX,XX @@ typedef struct {
131
@@ -XXX,XX +XXX,XX @@ typedef struct {
51
/* Fields protected by @lock */
132
ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
52
NVMeQueue sq, cq;
133
} EventLoopBaseParamInfo;
53
int cq_phase;
134
54
- NVMeRequest reqs[NVME_QUEUE_SIZE];
135
+static void event_loop_base_instance_init(Object *obj)
55
+ int free_req_head;
136
+{
56
+ NVMeRequest reqs[NVME_NUM_REQS];
137
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
57
bool busy;
138
+
58
int need_kick;
139
+ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
59
int inflight;
140
+}
60
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
141
+
61
qemu_mutex_init(&q->lock);
142
static EventLoopBaseParamInfo aio_max_batch_info = {
62
q->index = idx;
143
"aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
63
qemu_co_queue_init(&q->free_req_queue);
144
};
64
- q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_QUEUE_SIZE);
145
+static EventLoopBaseParamInfo thread_pool_min_info = {
65
+ q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS);
146
+ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min),
66
r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
147
+};
67
- s->page_size * NVME_QUEUE_SIZE,
148
+static EventLoopBaseParamInfo thread_pool_max_info = {
68
+ s->page_size * NVME_NUM_REQS,
149
+ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max),
69
false, &prp_list_iova);
150
+};
70
if (r) {
151
71
goto fail;
152
static void event_loop_base_get_param(Object *obj, Visitor *v,
153
const char *name, void *opaque, Error **errp)
154
@@ -XXX,XX +XXX,XX @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
155
event_loop_base_get_param,
156
event_loop_base_set_param,
157
NULL, &aio_max_batch_info);
158
+ object_class_property_add(klass, "thread-pool-min", "int",
159
+ event_loop_base_get_param,
160
+ event_loop_base_set_param,
161
+ NULL, &thread_pool_min_info);
162
+ object_class_property_add(klass, "thread-pool-max", "int",
163
+ event_loop_base_get_param,
164
+ event_loop_base_set_param,
165
+ NULL, &thread_pool_max_info);
166
}
167
168
static const TypeInfo event_loop_base_info = {
169
.name = TYPE_EVENT_LOOP_BASE,
170
.parent = TYPE_OBJECT,
171
.instance_size = sizeof(EventLoopBase),
172
+ .instance_init = event_loop_base_instance_init,
173
.class_size = sizeof(EventLoopBaseClass),
174
.class_init = event_loop_base_class_init,
175
.abstract = true,
176
diff --git a/iothread.c b/iothread.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/iothread.c
179
+++ b/iothread.c
180
@@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
181
aio_context_set_aio_params(iothread->ctx,
182
iothread->parent_obj.aio_max_batch,
183
errp);
184
+
185
+ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min,
186
+ base->thread_pool_max, errp);
187
}
188
189
190
diff --git a/util/aio-posix.c b/util/aio-posix.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/util/aio-posix.c
193
+++ b/util/aio-posix.c
194
@@ -XXX,XX +XXX,XX @@
195
196
#include "qemu/osdep.h"
197
#include "block/block.h"
198
+#include "block/thread-pool.h"
199
#include "qemu/main-loop.h"
200
#include "qemu/rcu.h"
201
#include "qemu/rcu_queue.h"
202
diff --git a/util/async.c b/util/async.c
203
index XXXXXXX..XXXXXXX 100644
204
--- a/util/async.c
205
+++ b/util/async.c
206
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
207
208
ctx->aio_max_batch = 0;
209
210
+ ctx->thread_pool_min = 0;
211
+ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
212
+
213
return ctx;
214
fail:
215
g_source_destroy(&ctx->source);
216
@@ -XXX,XX +XXX,XX @@ void qemu_set_current_aio_context(AioContext *ctx)
217
assert(!get_my_aiocontext());
218
set_my_aiocontext(ctx);
219
}
220
+
221
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
222
+ int64_t max, Error **errp)
223
+{
224
+
225
+ if (min > max || !max || min > INT_MAX || max > INT_MAX) {
226
+ error_setg(errp, "bad thread-pool-min/thread-pool-max values");
227
+ return;
228
+ }
229
+
230
+ ctx->thread_pool_min = min;
231
+ ctx->thread_pool_max = max;
232
+
233
+ if (ctx->thread_pool) {
234
+ thread_pool_update_params(ctx->thread_pool, ctx);
235
+ }
236
+}
237
diff --git a/util/main-loop.c b/util/main-loop.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/util/main-loop.c
240
+++ b/util/main-loop.c
241
@@ -XXX,XX +XXX,XX @@
242
#include "sysemu/replay.h"
243
#include "qemu/main-loop.h"
244
#include "block/aio.h"
245
+#include "block/thread-pool.h"
246
#include "qemu/error-report.h"
247
#include "qemu/queue.h"
248
#include "qemu/compiler.h"
249
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
250
251
static void main_loop_update_params(EventLoopBase *base, Error **errp)
252
{
253
+ ERRP_GUARD();
254
+
255
if (!qemu_aio_context) {
256
error_setg(errp, "qemu aio context not ready");
257
return;
72
}
258
}
73
- for (i = 0; i < NVME_QUEUE_SIZE; i++) {
259
74
+ q->free_req_head = -1;
260
aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
75
+ for (i = 0; i < NVME_NUM_REQS; i++) {
261
+ if (*errp) {
76
NVMeRequest *req = &q->reqs[i];
262
+ return;
77
req->cid = i + 1;
263
+ }
78
+ req->free_req_next = q->free_req_head;
264
+
79
+ q->free_req_head = i;
265
+ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
80
req->prp_list_page = q->prp_list_pages + i * s->page_size;
266
+ base->thread_pool_max, errp);
81
req->prp_list_iova = prp_list_iova + i * s->page_size;
267
}
82
}
268
83
+
269
MainLoop *mloop;
84
nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
270
diff --git a/util/thread-pool.c b/util/thread-pool.c
85
if (local_err) {
271
index XXXXXXX..XXXXXXX 100644
86
error_propagate(errp, local_err);
272
--- a/util/thread-pool.c
87
@@ -XXX,XX +XXX,XX @@ static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q)
273
+++ b/util/thread-pool.c
88
*/
274
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
89
static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
275
QemuMutex lock;
276
QemuCond worker_stopped;
277
QemuSemaphore sem;
278
- int max_threads;
279
QEMUBH *new_thread_bh;
280
281
/* The following variables are only accessed from one AioContext. */
282
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
283
int new_threads; /* backlog of threads we need to create */
284
int pending_threads; /* threads created but not running yet */
285
bool stopping;
286
+ int min_threads;
287
+ int max_threads;
288
};
289
290
+static inline bool back_to_sleep(ThreadPool *pool, int ret)
291
+{
292
+ /*
293
+ * The semaphore timed out, we should exit the loop except when:
294
+ * - There is work to do, we raced with the signal.
295
+ * - The max threads threshold just changed, we raced with the signal.
296
+ * - The thread pool forces a minimum number of readily available threads.
297
+ */
298
+ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) ||
299
+ pool->cur_threads > pool->max_threads ||
300
+ pool->cur_threads <= pool->min_threads)) {
301
+ return true;
302
+ }
303
+
304
+ return false;
305
+}
306
+
307
static void *worker_thread(void *opaque)
90
{
308
{
91
- int i;
309
ThreadPool *pool = opaque;
92
- NVMeRequest *req = NULL;
310
@@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque)
93
+ NVMeRequest *req;
311
ret = qemu_sem_timedwait(&pool->sem, 10000);
94
312
qemu_mutex_lock(&pool->lock);
95
qemu_mutex_lock(&q->lock);
313
pool->idle_threads--;
96
- while (q->inflight + q->need_kick > NVME_QUEUE_SIZE - 2) {
314
- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
97
- /* We have to leave one slot empty as that is the full queue case (head
315
- if (ret == -1 || pool->stopping) {
98
- * == tail + 1). */
316
+ } while (back_to_sleep(pool, ret));
99
+
317
+ if (ret == -1 || pool->stopping ||
100
+ while (q->free_req_head == -1) {
318
+ pool->cur_threads > pool->max_threads) {
101
if (qemu_in_coroutine()) {
319
break;
102
trace_nvme_free_req_queue_wait(q);
103
qemu_co_queue_wait(&q->free_req_queue, &q->lock);
104
@@ -XXX,XX +XXX,XX @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
105
return NULL;
106
}
320
}
107
}
321
108
- for (i = 0; i < NVME_QUEUE_SIZE; i++) {
322
@@ -XXX,XX +XXX,XX @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
109
- if (!q->reqs[i].busy) {
323
thread_pool_submit_aio(pool, func, arg, NULL, NULL);
110
- q->reqs[i].busy = true;
324
}
111
- req = &q->reqs[i];
325
112
- break;
326
+void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
113
- }
114
- }
115
- /* We have checked inflight and need_kick while holding q->lock, so one
116
- * free req must be available. */
117
- assert(req);
118
+
119
+ req = &q->reqs[q->free_req_head];
120
+ q->free_req_head = req->free_req_next;
121
+ req->free_req_next = -1;
122
+
123
qemu_mutex_unlock(&q->lock);
124
return req;
125
}
126
127
+/* With q->lock */
128
+static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req)
129
+{
327
+{
130
+ req->free_req_next = q->free_req_head;
328
+ qemu_mutex_lock(&pool->lock);
131
+ q->free_req_head = req - q->reqs;
329
+
330
+ pool->min_threads = ctx->thread_pool_min;
331
+ pool->max_threads = ctx->thread_pool_max;
332
+
333
+ /*
334
+ * We either have to:
335
+ * - Increase the number available of threads until over the min_threads
336
+ * threshold.
337
+ * - Decrease the number of available threads until under the max_threads
338
+ * threshold.
339
+ * - Do nothing. The current number of threads fall in between the min and
340
+ * max thresholds. We'll let the pool manage itself.
341
+ */
342
+ for (int i = pool->cur_threads; i < pool->min_threads; i++) {
343
+ spawn_thread(pool);
344
+ }
345
+
346
+ for (int i = pool->cur_threads; i > pool->max_threads; i--) {
347
+ qemu_sem_post(&pool->sem);
348
+ }
349
+
350
+ qemu_mutex_unlock(&pool->lock);
132
+}
351
+}
133
+
352
+
134
+/* With q->lock */
353
static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
135
+static void nvme_wake_free_req_locked(BDRVNVMeState *s, NVMeQueuePair *q)
136
+{
137
+ if (!qemu_co_queue_empty(&q->free_req_queue)) {
138
+ replay_bh_schedule_oneshot_event(s->aio_context,
139
+ nvme_free_req_queue_cb, q);
140
+ }
141
+}
142
+
143
+/* Insert a request in the freelist and wake waiters */
144
+static void nvme_put_free_req_and_wake(BDRVNVMeState *s, NVMeQueuePair *q,
145
+ NVMeRequest *req)
146
+{
147
+ qemu_mutex_lock(&q->lock);
148
+ nvme_put_free_req_locked(q, req);
149
+ nvme_wake_free_req_locked(s, q);
150
+ qemu_mutex_unlock(&q->lock);
151
+}
152
+
153
static inline int nvme_translate_error(const NvmeCqe *c)
154
{
354
{
155
uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF;
355
if (!ctx) {
156
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
356
@@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
157
req = *preq;
357
qemu_mutex_init(&pool->lock);
158
assert(req.cid == cid);
358
qemu_cond_init(&pool->worker_stopped);
159
assert(req.cb);
359
qemu_sem_init(&pool->sem, 0);
160
- preq->busy = false;
360
- pool->max_threads = 64;
161
+ nvme_put_free_req_locked(q, preq);
361
pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
162
preq->cb = preq->opaque = NULL;
362
163
qemu_mutex_unlock(&q->lock);
363
QLIST_INIT(&pool->head);
164
req.cb(req.opaque, ret);
364
QTAILQ_INIT(&pool->request_list);
165
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
365
+
166
/* Notify the device so it can post more completions. */
366
+ thread_pool_update_params(pool, ctx);
167
smp_mb_release();
367
}
168
*q->cq.doorbell = cpu_to_le32(q->cq.head);
368
169
- if (!qemu_co_queue_empty(&q->free_req_queue)) {
369
ThreadPool *thread_pool_new(AioContext *ctx)
170
- replay_bh_schedule_oneshot_event(s->aio_context,
171
- nvme_free_req_queue_cb, q);
172
- }
173
+ nvme_wake_free_req_locked(s, q);
174
}
175
q->busy = false;
176
return progress;
177
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
178
r = nvme_cmd_map_qiov(bs, &cmd, req, qiov);
179
qemu_co_mutex_unlock(&s->dma_map_lock);
180
if (r) {
181
- req->busy = false;
182
+ nvme_put_free_req_and_wake(s, ioq, req);
183
return r;
184
}
185
nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
186
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
187
qemu_co_mutex_unlock(&s->dma_map_lock);
188
189
if (ret) {
190
- req->busy = false;
191
+ nvme_put_free_req_and_wake(s, ioq, req);
192
goto out;
193
}
194
195
--
370
--
196
2.26.2
371
2.35.1
197
diff view generated by jsdifflib
Deleted patch
1
Existing users access free_req_queue under q->lock. Document this.
2
1
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Reviewed-by: Sergio Lopez <slp@redhat.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Message-id: 20200617132201.1832152-6-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
block/nvme.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/block/nvme.c b/block/nvme.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/nvme.c
15
+++ b/block/nvme.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct {
17
} NVMeRequest;
18
19
typedef struct {
20
- CoQueue free_req_queue;
21
QemuMutex lock;
22
23
/* Fields protected by BQL */
24
@@ -XXX,XX +XXX,XX @@ typedef struct {
25
uint8_t *prp_list_pages;
26
27
/* Fields protected by @lock */
28
+ CoQueue free_req_queue;
29
NVMeQueue sq, cq;
30
int cq_phase;
31
int free_req_head;
32
--
33
2.26.2
34
diff view generated by jsdifflib
Deleted patch
1
QEMU block drivers are supposed to support aio_poll() from I/O
2
completion callback functions. This means completion processing must be
3
re-entrant.
4
1
5
The standard approach is to schedule a BH during completion processing
6
and cancel it at the end of processing. If aio_poll() is invoked by a
7
callback function then the BH will run. The BH continues the suspended
8
completion processing.
9
10
All of this means that request A's cb() can synchronously wait for
11
request B to complete. Previously the nvme block driver would hang
12
because it didn't process completions from nested aio_poll().
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Sergio Lopez <slp@redhat.com>
16
Message-id: 20200617132201.1832152-8-stefanha@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
19
block/nvme.c | 67 ++++++++++++++++++++++++++++++++++++++++------
20
block/trace-events | 2 +-
21
2 files changed, 60 insertions(+), 9 deletions(-)
22
23
diff --git a/block/nvme.c b/block/nvme.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/nvme.c
26
+++ b/block/nvme.c
27
@@ -XXX,XX +XXX,XX @@ typedef struct {
28
int cq_phase;
29
int free_req_head;
30
NVMeRequest reqs[NVME_NUM_REQS];
31
- bool busy;
32
int need_kick;
33
int inflight;
34
+
35
+ /* Thread-safe, no lock necessary */
36
+ QEMUBH *completion_bh;
37
} NVMeQueuePair;
38
39
/* Memory mapped registers */
40
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
41
#define NVME_BLOCK_OPT_DEVICE "device"
42
#define NVME_BLOCK_OPT_NAMESPACE "namespace"
43
44
+static void nvme_process_completion_bh(void *opaque);
45
+
46
static QemuOptsList runtime_opts = {
47
.name = "nvme",
48
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
49
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q,
50
51
static void nvme_free_queue_pair(NVMeQueuePair *q)
52
{
53
+ if (q->completion_bh) {
54
+ qemu_bh_delete(q->completion_bh);
55
+ }
56
qemu_vfree(q->prp_list_pages);
57
qemu_vfree(q->sq.queue);
58
qemu_vfree(q->cq.queue);
59
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
60
q->index = idx;
61
qemu_co_queue_init(&q->free_req_queue);
62
q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS);
63
+ q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
64
+ nvme_process_completion_bh, q);
65
r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
66
s->page_size * NVME_NUM_REQS,
67
false, &prp_list_iova);
68
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
69
NvmeCqe *c;
70
71
trace_nvme_process_completion(s, q->index, q->inflight);
72
- if (q->busy || s->plugged) {
73
- trace_nvme_process_completion_queue_busy(s, q->index);
74
+ if (s->plugged) {
75
+ trace_nvme_process_completion_queue_plugged(s, q->index);
76
return false;
77
}
78
- q->busy = true;
79
+
80
+ /*
81
+ * Support re-entrancy when a request cb() function invokes aio_poll().
82
+ * Pending completions must be visible to aio_poll() so that a cb()
83
+ * function can wait for the completion of another request.
84
+ *
85
+ * The aio_poll() loop will execute our BH and we'll resume completion
86
+ * processing there.
87
+ */
88
+ qemu_bh_schedule(q->completion_bh);
89
+
90
assert(q->inflight >= 0);
91
while (q->inflight) {
92
int ret;
93
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
94
assert(req.cb);
95
nvme_put_free_req_locked(q, preq);
96
preq->cb = preq->opaque = NULL;
97
- qemu_mutex_unlock(&q->lock);
98
- req.cb(req.opaque, ret);
99
- qemu_mutex_lock(&q->lock);
100
q->inflight--;
101
+ qemu_mutex_unlock(&q->lock);
102
+ req.cb(req.opaque, ret);
103
+ qemu_mutex_lock(&q->lock);
104
progress = true;
105
}
106
if (progress) {
107
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
108
*q->cq.doorbell = cpu_to_le32(q->cq.head);
109
nvme_wake_free_req_locked(q);
110
}
111
- q->busy = false;
112
+
113
+ qemu_bh_cancel(q->completion_bh);
114
+
115
return progress;
116
}
117
118
+static void nvme_process_completion_bh(void *opaque)
119
+{
120
+ NVMeQueuePair *q = opaque;
121
+
122
+ /*
123
+ * We're being invoked because a nvme_process_completion() cb() function
124
+ * called aio_poll(). The callback may be waiting for further completions
125
+ * so notify the device that it has space to fill in more completions now.
126
+ */
127
+ smp_mb_release();
128
+ *q->cq.doorbell = cpu_to_le32(q->cq.head);
129
+ nvme_wake_free_req_locked(q);
130
+
131
+ nvme_process_completion(q);
132
+}
133
+
134
static void nvme_trace_command(const NvmeCmd *cmd)
135
{
136
int i;
137
@@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs)
138
{
139
BDRVNVMeState *s = bs->opaque;
140
141
+ for (int i = 0; i < s->nr_queues; i++) {
142
+ NVMeQueuePair *q = s->queues[i];
143
+
144
+ qemu_bh_delete(q->completion_bh);
145
+ q->completion_bh = NULL;
146
+ }
147
+
148
aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
149
false, NULL, NULL);
150
}
151
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
152
s->aio_context = new_context;
153
aio_set_event_notifier(new_context, &s->irq_notifier,
154
false, nvme_handle_event, nvme_poll_cb);
155
+
156
+ for (int i = 0; i < s->nr_queues; i++) {
157
+ NVMeQueuePair *q = s->queues[i];
158
+
159
+ q->completion_bh =
160
+ aio_bh_new(new_context, nvme_process_completion_bh, q);
161
+ }
162
}
163
164
static void nvme_aio_plug(BlockDriverState *bs)
165
diff --git a/block/trace-events b/block/trace-events
166
index XXXXXXX..XXXXXXX 100644
167
--- a/block/trace-events
168
+++ b/block/trace-events
169
@@ -XXX,XX +XXX,XX @@ nvme_kick(void *s, int queue) "s %p queue %d"
170
nvme_dma_flush_queue_wait(void *s) "s %p"
171
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
172
nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d"
173
-nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d"
174
+nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d"
175
nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
176
nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
177
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
178
--
179
2.26.2
180
diff view generated by jsdifflib