1
The following changes since commit 1d60bb4b14601e38ed17384277aa4c30c57925d3:
1
The following changes since commit b0fbe46ad82982b289a44ee2495b59b0bad8a842:
2
2
3
Merge tag 'pull-request-2022-03-15v2' of https://gitlab.com/thuth/qemu into staging (2022-03-16 10:43:58 +0000)
3
Update version for v2.11.0-rc0 release (2017-11-07 16:05:28 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to fc8796465c6cd4091efe6a2f8b353f07324f49c7:
9
for you to fetch changes up to ef6dada8b44e1e7c4bec5c1115903af9af415b50:
10
10
11
aio-posix: fix spurious ->poll_ready() callbacks in main loop (2022-03-17 11:23:18 +0000)
11
util/async: use atomic_mb_set in qemu_bh_cancel (2017-11-08 19:09:15 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
Bug fixes for 7.0.
16
v2:
17
* v1 emails 2/3 and 3/3 weren't sent due to an email failure
18
* Included Sergio's updated wording in the commit description
17
19
18
----------------------------------------------------------------
20
----------------------------------------------------------------
19
21
20
Haiyue Wang (1):
22
Sergio Lopez (1):
21
aio-posix: fix build failure io_uring 2.2
23
util/async: use atomic_mb_set in qemu_bh_cancel
22
24
23
Stefan Hajnoczi (1):
25
Stefan Hajnoczi (1):
24
aio-posix: fix spurious ->poll_ready() callbacks in main loop
26
tests-aio-multithread: fix /aio/multi/schedule race condition
25
27
26
util/aio-posix.h | 1 +
28
tests/test-aio-multithread.c | 5 ++---
27
util/aio-posix.c | 32 ++++++++++++++++++--------------
29
util/async.c | 2 +-
28
util/fdmon-io_uring.c | 4 ++++
30
2 files changed, 3 insertions(+), 4 deletions(-)
29
3 files changed, 23 insertions(+), 14 deletions(-)
30
31
31
--
32
--
32
2.35.1
33
2.13.6
33
34
35
diff view generated by jsdifflib
1
When ->poll() succeeds the AioHandler is placed on the ready list with
1
test_multi_co_schedule_entry() set to_schedule[id] in the final loop
2
revents set to the magic value 0. This magic value causes
2
iteration before terminating the coroutine. There is a race condition
3
aio_dispatch_handler() to invoke ->poll_ready() instead of ->io_read()
3
where the main thread attempts to enter the terminating or terminated
4
for G_IO_IN or ->io_write() for G_IO_OUT.
4
coroutine when signalling coroutines to stop:
5
5
6
This magic value 0 hack works for the IOThread where AioHandlers are
6
atomic_mb_set(&now_stopping, true);
7
placed on ->ready_list and processed by aio_dispatch_ready_handlers().
7
for (i = 0; i < NUM_CONTEXTS; i++) {
8
It does not work for the main loop where all AioHandlers are processed
8
ctx_run(i, finish_cb, NULL); <--- enters dead coroutine!
9
by aio_dispatch_handlers(), even those that are not ready and have a
9
to_schedule[i] = NULL;
10
revents value of 0.
10
}
11
11
12
As a result the main loop invokes ->poll_ready() on AioHandlers that are
12
Make sure only to set to_schedule[id] if this coroutine really needs to
13
not ready. These spurious ->poll_ready() calls waste CPU cycles and
13
be scheduled!
14
could lead to crashes if the code assumes ->poll() must have succeeded
15
before ->poll_ready() is called (a reasonable asumption but I haven't
16
seen it in practice).
17
14
18
Stop using revents to track whether ->poll_ready() will be called on an
15
Reported-by: "R.Nageswara Sastry" <nasastry@in.ibm.com>
19
AioHandler. Introduce a separate AioHandler->poll_ready field instead.
20
This eliminates spurious ->poll_ready() calls in the main loop.
21
22
Fixes: 826cc32423db2a99d184dbf4f507c737d7e7a4ae ("aio-posix: split poll check from ready handler")
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Reported-by: Jason Wang <jasowang@redhat.com>
17
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
25
Tested-by: Jason Wang <jasowang@redhat.com>
18
Message-id: 20171106190233.1175-1-stefanha@redhat.com
26
Message-id: 20220223155703.136833-1-stefanha@redhat.com
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
28
---
20
---
29
util/aio-posix.h | 1 +
21
tests/test-aio-multithread.c | 5 ++---
30
util/aio-posix.c | 32 ++++++++++++++++++--------------
22
1 file changed, 2 insertions(+), 3 deletions(-)
31
2 files changed, 19 insertions(+), 14 deletions(-)
32
23
33
diff --git a/util/aio-posix.h b/util/aio-posix.h
24
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
34
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
35
--- a/util/aio-posix.h
26
--- a/tests/test-aio-multithread.c
36
+++ b/util/aio-posix.h
27
+++ b/tests/test-aio-multithread.c
37
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
28
@@ -XXX,XX +XXX,XX @@ static void finish_cb(void *opaque)
38
unsigned flags; /* see fdmon-io_uring.c */
29
static coroutine_fn void test_multi_co_schedule_entry(void *opaque)
39
#endif
30
{
40
int64_t poll_idle_timeout; /* when to stop userspace polling */
31
g_assert(to_schedule[id] == NULL);
41
+ bool poll_ready; /* has polling detected an event? */
32
- atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
42
bool is_external;
33
43
};
34
while (!atomic_mb_read(&now_stopping)) {
44
35
int n;
45
diff --git a/util/aio-posix.c b/util/aio-posix.c
36
46
index XXXXXXX..XXXXXXX 100644
37
n = g_test_rand_int_range(0, NUM_CONTEXTS);
47
--- a/util/aio-posix.c
38
schedule_next(n);
48
+++ b/util/aio-posix.c
39
+
49
@@ -XXX,XX +XXX,XX @@
40
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
50
#include "trace.h"
41
qemu_coroutine_yield();
51
#include "aio-posix.h"
52
53
-/*
54
- * G_IO_IN and G_IO_OUT are not appropriate revents values for polling, since
55
- * the handler may not need to access the file descriptor. For example, the
56
- * handler doesn't need to read from an EventNotifier if it polled a memory
57
- * location and a read syscall would be slow. Define our own unique revents
58
- * value to indicate that polling determined this AioHandler is ready.
59
- */
60
-#define REVENTS_POLL_READY 0
61
-
42
-
62
/* Stop userspace polling on a handler if it isn't active for some time */
43
g_assert(to_schedule[id] == NULL);
63
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
44
- atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
64
45
}
65
@@ -XXX,XX +XXX,XX @@ void aio_add_ready_handler(AioHandlerList *ready_list,
66
QLIST_INSERT_HEAD(ready_list, node, node_ready);
67
}
46
}
68
47
69
+static void aio_add_poll_ready_handler(AioHandlerList *ready_list,
70
+ AioHandler *node)
71
+{
72
+ QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
73
+ node->poll_ready = true;
74
+ QLIST_INSERT_HEAD(ready_list, node, node_ready);
75
+}
76
+
77
static AioHandler *find_aio_handler(AioContext *ctx, int fd)
78
{
79
AioHandler *node;
80
@@ -XXX,XX +XXX,XX @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
81
}
82
83
node->pfd.revents = 0;
84
+ node->poll_ready = false;
85
86
/* If the fd monitor has already marked it deleted, leave it alone */
87
if (QLIST_IS_INSERTED(node, node_deleted)) {
88
@@ -XXX,XX +XXX,XX @@ static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list,
89
90
/* Poll one last time in case ->io_poll_end() raced with the event */
91
if (!started && node->io_poll(node->opaque)) {
92
- aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY);
93
+ aio_add_poll_ready_handler(ready_list, node);
94
progress = true;
95
}
96
}
97
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx)
98
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
99
int revents;
100
101
+ /* TODO should this check poll ready? */
102
revents = node->pfd.revents & node->pfd.events;
103
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
104
aio_node_check(ctx, node->is_external)) {
105
@@ -XXX,XX +XXX,XX @@ static void aio_free_deleted_handlers(AioContext *ctx)
106
static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
107
{
108
bool progress = false;
109
+ bool poll_ready;
110
int revents;
111
112
revents = node->pfd.revents & node->pfd.events;
113
node->pfd.revents = 0;
114
115
+ poll_ready = node->poll_ready;
116
+ node->poll_ready = false;
117
+
118
/*
119
* Start polling AioHandlers when they become ready because activity is
120
* likely to continue. Note that starvation is theoretically possible when
121
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
122
QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
123
}
124
if (!QLIST_IS_INSERTED(node, node_deleted) &&
125
- revents == 0 &&
126
+ poll_ready && revents == 0 &&
127
aio_node_check(ctx, node->is_external) &&
128
node->io_poll_ready) {
129
node->io_poll_ready(node->opaque);
130
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx,
131
QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
132
if (aio_node_check(ctx, node->is_external) &&
133
node->io_poll(node->opaque)) {
134
- aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY);
135
+ aio_add_poll_ready_handler(ready_list, node);
136
137
node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
138
139
@@ -XXX,XX +XXX,XX @@ static bool remove_idle_poll_handlers(AioContext *ctx,
140
* this causes progress.
141
*/
142
if (node->io_poll(node->opaque)) {
143
- aio_add_ready_handler(ready_list, node,
144
- REVENTS_POLL_READY);
145
+ aio_add_poll_ready_handler(ready_list, node);
146
progress = true;
147
}
148
}
149
--
48
--
150
2.35.1
49
2.13.6
50
51
diff view generated by jsdifflib
1
From: Haiyue Wang <haiyue.wang@intel.com>
1
From: Sergio Lopez <slp@redhat.com>
2
2
3
The io_uring fixed "Don't truncate addr fields to 32-bit on 32-bit":
3
Commit b7a745d added a qemu_bh_cancel call to the completion function
4
https://git.kernel.dk/cgit/liburing/commit/?id=d84c29b19ed0b130000619cff40141bb1fc3615b
4
as an optimization to prevent it from unnecessarily rescheduling itself.
5
5
6
This leads to build failure:
6
This completion function is scheduled from worker_thread, after setting
7
../util/fdmon-io_uring.c: In function ‘add_poll_remove_sqe’:
7
the state of a ThreadPoolElement to THREAD_DONE.
8
../util/fdmon-io_uring.c:182:36: error: passing argument 2 of ‘io_uring_prep_poll_remove’ makes integer from pointer without a cast [-Werror=int-conversion]
9
182 | io_uring_prep_poll_remove(sqe, node);
10
| ^~~~
11
| |
12
| AioHandler *
13
In file included from /root/io/qemu/include/block/aio.h:18,
14
from ../util/aio-posix.h:20,
15
from ../util/fdmon-io_uring.c:49:
16
/usr/include/liburing.h:415:17: note: expected ‘__u64’ {aka ‘long long unsigned int’} but argument is of type ‘AioHandler *’
17
415 | __u64 user_data)
18
| ~~~~~~^~~~~~~~~
19
cc1: all warnings being treated as errors
20
8
21
Use LIBURING_HAVE_DATA64 to check whether the io_uring supports 64-bit
9
This was considered to be safe, as the completion function restarts the
22
variants of the get/set userdata, to convert the paramter to the right
10
loop just after the call to qemu_bh_cancel. But, as this loop lacks a HW
23
data type.
11
memory barrier, the read of req->state may actually happen _before_ the
12
call, seeing it still as THREAD_QUEUED, and ending the completion
13
function without having processed a pending TPE linked at pool->head:
24
14
25
Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
15
worker thread | I/O thread
26
Message-Id: <20220221162401.45415-1-haiyue.wang@intel.com>
16
------------------------------------------------------------------------
17
| speculatively read req->state
18
req->state = THREAD_DONE; |
19
qemu_bh_schedule(p->completion_bh) |
20
bh->scheduled = 1; |
21
| qemu_bh_cancel(p->completion_bh)
22
| bh->scheduled = 0;
23
| if (req->state == THREAD_DONE)
24
| // sees THREAD_QUEUED
25
26
The source of the misunderstanding was that qemu_bh_cancel is now being
27
used by the _consumer_ rather than the producer, and therefore now needs
28
to have acquire semantics just like e.g. aio_bh_poll.
29
30
In some situations, if there are no other independent requests in the
31
same aio context that could eventually trigger the scheduling of the
32
completion function, the omitted TPE and all operations pending on it
33
will get stuck forever.
34
35
[Added Sergio's updated wording about the HW memory barrier.
36
--Stefan]
37
38
Signed-off-by: Sergio Lopez <slp@redhat.com>
39
Message-id: 20171108063447.2842-1-slp@redhat.com
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
40
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
28
---
41
---
29
util/fdmon-io_uring.c | 4 ++++
42
util/async.c | 2 +-
30
1 file changed, 4 insertions(+)
43
1 file changed, 1 insertion(+), 1 deletion(-)
31
44
32
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
45
diff --git a/util/async.c b/util/async.c
33
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
34
--- a/util/fdmon-io_uring.c
47
--- a/util/async.c
35
+++ b/util/fdmon-io_uring.c
48
+++ b/util/async.c
36
@@ -XXX,XX +XXX,XX @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
49
@@ -XXX,XX +XXX,XX @@ void qemu_bh_schedule(QEMUBH *bh)
50
*/
51
void qemu_bh_cancel(QEMUBH *bh)
37
{
52
{
38
struct io_uring_sqe *sqe = get_sqe(ctx);
53
- bh->scheduled = 0;
39
54
+ atomic_mb_set(&bh->scheduled, 0);
40
+#ifdef LIBURING_HAVE_DATA64
41
+ io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node);
42
+#else
43
io_uring_prep_poll_remove(sqe, node);
44
+#endif
45
}
55
}
46
56
47
/* Add a timeout that self-cancels when another cqe becomes ready */
57
/* This func is async.The bottom half will do the delete action at the finial
48
--
58
--
49
2.35.1
59
2.13.6
50
60
51
61
diff view generated by jsdifflib