1 | The following changes since commit 1d60bb4b14601e38ed17384277aa4c30c57925d3: | 1 | The following changes since commit b0fbe46ad82982b289a44ee2495b59b0bad8a842: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-request-2022-03-15v2' of https://gitlab.com/thuth/qemu into staging (2022-03-16 10:43:58 +0000) | 3 | Update version for v2.11.0-rc0 release (2017-11-07 16:05:28 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | git://github.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to fc8796465c6cd4091efe6a2f8b353f07324f49c7: | 9 | for you to fetch changes up to ef6dada8b44e1e7c4bec5c1115903af9af415b50: |
10 | 10 | ||
11 | aio-posix: fix spurious ->poll_ready() callbacks in main loop (2022-03-17 11:23:18 +0000) | 11 | util/async: use atomic_mb_set in qemu_bh_cancel (2017-11-08 19:09:15 +0000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | Bug fixes for 7.0. | 16 | v2: |
17 | * v1 emails 2/3 and 3/3 weren't sent due to an email failure | ||
18 | * Included Sergio's updated wording in the commit description | ||
17 | 19 | ||
18 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
19 | 21 | ||
20 | Haiyue Wang (1): | 22 | Sergio Lopez (1): |
21 | aio-posix: fix build failure io_uring 2.2 | 23 | util/async: use atomic_mb_set in qemu_bh_cancel |
22 | 24 | ||
23 | Stefan Hajnoczi (1): | 25 | Stefan Hajnoczi (1): |
24 | aio-posix: fix spurious ->poll_ready() callbacks in main loop | 26 | tests-aio-multithread: fix /aio/multi/schedule race condition |
25 | 27 | ||
26 | util/aio-posix.h | 1 + | 28 | tests/test-aio-multithread.c | 5 ++--- |
27 | util/aio-posix.c | 32 ++++++++++++++++++-------------- | 29 | util/async.c | 2 +- |
28 | util/fdmon-io_uring.c | 4 ++++ | 30 | 2 files changed, 3 insertions(+), 4 deletions(-) |
29 | 3 files changed, 23 insertions(+), 14 deletions(-) | ||
30 | 31 | ||
31 | -- | 32 | -- |
32 | 2.35.1 | 33 | 2.13.6 |
33 | 34 | ||
35 | diff view generated by jsdifflib |
1 | When ->poll() succeeds the AioHandler is placed on the ready list with | 1 | test_multi_co_schedule_entry() set to_schedule[id] in the final loop |
---|---|---|---|
2 | revents set to the magic value 0. This magic value causes | 2 | iteration before terminating the coroutine. There is a race condition |
3 | aio_dispatch_handler() to invoke ->poll_ready() instead of ->io_read() | 3 | where the main thread attempts to enter the terminating or terminated |
4 | for G_IO_IN or ->io_write() for G_IO_OUT. | 4 | coroutine when signalling coroutines to stop: |
5 | 5 | ||
6 | This magic value 0 hack works for the IOThread where AioHandlers are | 6 | atomic_mb_set(&now_stopping, true); |
7 | placed on ->ready_list and processed by aio_dispatch_ready_handlers(). | 7 | for (i = 0; i < NUM_CONTEXTS; i++) { |
8 | It does not work for the main loop where all AioHandlers are processed | 8 | ctx_run(i, finish_cb, NULL); <--- enters dead coroutine! |
9 | by aio_dispatch_handlers(), even those that are not ready and have a | 9 | to_schedule[i] = NULL; |
10 | revents value of 0. | 10 | } |
11 | 11 | ||
12 | As a result the main loop invokes ->poll_ready() on AioHandlers that are | 12 | Make sure only to set to_schedule[id] if this coroutine really needs to |
13 | not ready. These spurious ->poll_ready() calls waste CPU cycles and | 13 | be scheduled! |
14 | could lead to crashes if the code assumes ->poll() must have succeeded | ||
15 | before ->poll_ready() is called (a reasonable asumption but I haven't | ||
16 | seen it in practice). | ||
17 | 14 | ||
18 | Stop using revents to track whether ->poll_ready() will be called on an | 15 | Reported-by: "R.Nageswara Sastry" <nasastry@in.ibm.com> |
19 | AioHandler. Introduce a separate AioHandler->poll_ready field instead. | ||
20 | This eliminates spurious ->poll_ready() calls in the main loop. | ||
21 | |||
22 | Fixes: 826cc32423db2a99d184dbf4f507c737d7e7a4ae ("aio-posix: split poll check from ready handler") | ||
23 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
24 | Reported-by: Jason Wang <jasowang@redhat.com> | 17 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> |
25 | Tested-by: Jason Wang <jasowang@redhat.com> | 18 | Message-id: 20171106190233.1175-1-stefanha@redhat.com |
26 | Message-id: 20220223155703.136833-1-stefanha@redhat.com | ||
27 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
28 | --- | 20 | --- |
29 | util/aio-posix.h | 1 + | 21 | tests/test-aio-multithread.c | 5 ++--- |
30 | util/aio-posix.c | 32 ++++++++++++++++++-------------- | 22 | 1 file changed, 2 insertions(+), 3 deletions(-) |
31 | 2 files changed, 19 insertions(+), 14 deletions(-) | ||
32 | 23 | ||
33 | diff --git a/util/aio-posix.h b/util/aio-posix.h | 24 | diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c |
34 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/util/aio-posix.h | 26 | --- a/tests/test-aio-multithread.c |
36 | +++ b/util/aio-posix.h | 27 | +++ b/tests/test-aio-multithread.c |
37 | @@ -XXX,XX +XXX,XX @@ struct AioHandler { | 28 | @@ -XXX,XX +XXX,XX @@ static void finish_cb(void *opaque) |
38 | unsigned flags; /* see fdmon-io_uring.c */ | 29 | static coroutine_fn void test_multi_co_schedule_entry(void *opaque) |
39 | #endif | 30 | { |
40 | int64_t poll_idle_timeout; /* when to stop userspace polling */ | 31 | g_assert(to_schedule[id] == NULL); |
41 | + bool poll_ready; /* has polling detected an event? */ | 32 | - atomic_mb_set(&to_schedule[id], qemu_coroutine_self()); |
42 | bool is_external; | 33 | |
43 | }; | 34 | while (!atomic_mb_read(&now_stopping)) { |
44 | 35 | int n; | |
45 | diff --git a/util/aio-posix.c b/util/aio-posix.c | 36 | |
46 | index XXXXXXX..XXXXXXX 100644 | 37 | n = g_test_rand_int_range(0, NUM_CONTEXTS); |
47 | --- a/util/aio-posix.c | 38 | schedule_next(n); |
48 | +++ b/util/aio-posix.c | 39 | + |
49 | @@ -XXX,XX +XXX,XX @@ | 40 | + atomic_mb_set(&to_schedule[id], qemu_coroutine_self()); |
50 | #include "trace.h" | 41 | qemu_coroutine_yield(); |
51 | #include "aio-posix.h" | ||
52 | |||
53 | -/* | ||
54 | - * G_IO_IN and G_IO_OUT are not appropriate revents values for polling, since | ||
55 | - * the handler may not need to access the file descriptor. For example, the | ||
56 | - * handler doesn't need to read from an EventNotifier if it polled a memory | ||
57 | - * location and a read syscall would be slow. Define our own unique revents | ||
58 | - * value to indicate that polling determined this AioHandler is ready. | ||
59 | - */ | ||
60 | -#define REVENTS_POLL_READY 0 | ||
61 | - | 42 | - |
62 | /* Stop userspace polling on a handler if it isn't active for some time */ | 43 | g_assert(to_schedule[id] == NULL); |
63 | #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND) | 44 | - atomic_mb_set(&to_schedule[id], qemu_coroutine_self()); |
64 | 45 | } | |
65 | @@ -XXX,XX +XXX,XX @@ void aio_add_ready_handler(AioHandlerList *ready_list, | ||
66 | QLIST_INSERT_HEAD(ready_list, node, node_ready); | ||
67 | } | 46 | } |
68 | 47 | ||
69 | +static void aio_add_poll_ready_handler(AioHandlerList *ready_list, | ||
70 | + AioHandler *node) | ||
71 | +{ | ||
72 | + QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */ | ||
73 | + node->poll_ready = true; | ||
74 | + QLIST_INSERT_HEAD(ready_list, node, node_ready); | ||
75 | +} | ||
76 | + | ||
77 | static AioHandler *find_aio_handler(AioContext *ctx, int fd) | ||
78 | { | ||
79 | AioHandler *node; | ||
80 | @@ -XXX,XX +XXX,XX @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node) | ||
81 | } | ||
82 | |||
83 | node->pfd.revents = 0; | ||
84 | + node->poll_ready = false; | ||
85 | |||
86 | /* If the fd monitor has already marked it deleted, leave it alone */ | ||
87 | if (QLIST_IS_INSERTED(node, node_deleted)) { | ||
88 | @@ -XXX,XX +XXX,XX @@ static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list, | ||
89 | |||
90 | /* Poll one last time in case ->io_poll_end() raced with the event */ | ||
91 | if (!started && node->io_poll(node->opaque)) { | ||
92 | - aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); | ||
93 | + aio_add_poll_ready_handler(ready_list, node); | ||
94 | progress = true; | ||
95 | } | ||
96 | } | ||
97 | @@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx) | ||
98 | QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { | ||
99 | int revents; | ||
100 | |||
101 | + /* TODO should this check poll ready? */ | ||
102 | revents = node->pfd.revents & node->pfd.events; | ||
103 | if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read && | ||
104 | aio_node_check(ctx, node->is_external)) { | ||
105 | @@ -XXX,XX +XXX,XX @@ static void aio_free_deleted_handlers(AioContext *ctx) | ||
106 | static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) | ||
107 | { | ||
108 | bool progress = false; | ||
109 | + bool poll_ready; | ||
110 | int revents; | ||
111 | |||
112 | revents = node->pfd.revents & node->pfd.events; | ||
113 | node->pfd.revents = 0; | ||
114 | |||
115 | + poll_ready = node->poll_ready; | ||
116 | + node->poll_ready = false; | ||
117 | + | ||
118 | /* | ||
119 | * Start polling AioHandlers when they become ready because activity is | ||
120 | * likely to continue. Note that starvation is theoretically possible when | ||
121 | @@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) | ||
122 | QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); | ||
123 | } | ||
124 | if (!QLIST_IS_INSERTED(node, node_deleted) && | ||
125 | - revents == 0 && | ||
126 | + poll_ready && revents == 0 && | ||
127 | aio_node_check(ctx, node->is_external) && | ||
128 | node->io_poll_ready) { | ||
129 | node->io_poll_ready(node->opaque); | ||
130 | @@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx, | ||
131 | QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) { | ||
132 | if (aio_node_check(ctx, node->is_external) && | ||
133 | node->io_poll(node->opaque)) { | ||
134 | - aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY); | ||
135 | + aio_add_poll_ready_handler(ready_list, node); | ||
136 | |||
137 | node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS; | ||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ static bool remove_idle_poll_handlers(AioContext *ctx, | ||
140 | * this causes progress. | ||
141 | */ | ||
142 | if (node->io_poll(node->opaque)) { | ||
143 | - aio_add_ready_handler(ready_list, node, | ||
144 | - REVENTS_POLL_READY); | ||
145 | + aio_add_poll_ready_handler(ready_list, node); | ||
146 | progress = true; | ||
147 | } | ||
148 | } | ||
149 | -- | 48 | -- |
150 | 2.35.1 | 49 | 2.13.6 |
50 | |||
51 | diff view generated by jsdifflib |
1 | From: Haiyue Wang <haiyue.wang@intel.com> | 1 | From: Sergio Lopez <slp@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The io_uring fixed "Don't truncate addr fields to 32-bit on 32-bit": | 3 | Commit b7a745d added a qemu_bh_cancel call to the completion function |
4 | https://git.kernel.dk/cgit/liburing/commit/?id=d84c29b19ed0b130000619cff40141bb1fc3615b | 4 | as an optimization to prevent it from unnecessarily rescheduling itself. |
5 | 5 | ||
6 | This leads to build failure: | 6 | This completion function is scheduled from worker_thread, after setting |
7 | ../util/fdmon-io_uring.c: In function ‘add_poll_remove_sqe’: | 7 | the state of a ThreadPoolElement to THREAD_DONE. |
8 | ../util/fdmon-io_uring.c:182:36: error: passing argument 2 of ‘io_uring_prep_poll_remove’ makes integer from pointer without a cast [-Werror=int-conversion] | ||
9 | 182 | io_uring_prep_poll_remove(sqe, node); | ||
10 | | ^~~~ | ||
11 | | | | ||
12 | | AioHandler * | ||
13 | In file included from /root/io/qemu/include/block/aio.h:18, | ||
14 | from ../util/aio-posix.h:20, | ||
15 | from ../util/fdmon-io_uring.c:49: | ||
16 | /usr/include/liburing.h:415:17: note: expected ‘__u64’ {aka ‘long long unsigned int’} but argument is of type ‘AioHandler *’ | ||
17 | 415 | __u64 user_data) | ||
18 | | ~~~~~~^~~~~~~~~ | ||
19 | cc1: all warnings being treated as errors | ||
20 | 8 | ||
21 | Use LIBURING_HAVE_DATA64 to check whether the io_uring supports 64-bit | 9 | This was considered to be safe, as the completion function restarts the |
22 | variants of the get/set userdata, to convert the paramter to the right | 10 | loop just after the call to qemu_bh_cancel. But, as this loop lacks a HW |
23 | data type. | 11 | memory barrier, the read of req->state may actually happen _before_ the |
12 | call, seeing it still as THREAD_QUEUED, and ending the completion | ||
13 | function without having processed a pending TPE linked at pool->head: | ||
24 | 14 | ||
25 | Signed-off-by: Haiyue Wang <haiyue.wang@intel.com> | 15 | worker thread | I/O thread |
26 | Message-Id: <20220221162401.45415-1-haiyue.wang@intel.com> | 16 | ------------------------------------------------------------------------ |
17 | | speculatively read req->state | ||
18 | req->state = THREAD_DONE; | | ||
19 | qemu_bh_schedule(p->completion_bh) | | ||
20 | bh->scheduled = 1; | | ||
21 | | qemu_bh_cancel(p->completion_bh) | ||
22 | | bh->scheduled = 0; | ||
23 | | if (req->state == THREAD_DONE) | ||
24 | | // sees THREAD_QUEUED | ||
25 | |||
26 | The source of the misunderstanding was that qemu_bh_cancel is now being | ||
27 | used by the _consumer_ rather than the producer, and therefore now needs | ||
28 | to have acquire semantics just like e.g. aio_bh_poll. | ||
29 | |||
30 | In some situations, if there are no other independent requests in the | ||
31 | same aio context that could eventually trigger the scheduling of the | ||
32 | completion function, the omitted TPE and all operations pending on it | ||
33 | will get stuck forever. | ||
34 | |||
35 | [Added Sergio's updated wording about the HW memory barrier. | ||
36 | --Stefan] | ||
37 | |||
38 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
39 | Message-id: 20171108063447.2842-1-slp@redhat.com | ||
27 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 40 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
28 | --- | 41 | --- |
29 | util/fdmon-io_uring.c | 4 ++++ | 42 | util/async.c | 2 +- |
30 | 1 file changed, 4 insertions(+) | 43 | 1 file changed, 1 insertion(+), 1 deletion(-) |
31 | 44 | ||
32 | diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c | 45 | diff --git a/util/async.c b/util/async.c |
33 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/util/fdmon-io_uring.c | 47 | --- a/util/async.c |
35 | +++ b/util/fdmon-io_uring.c | 48 | +++ b/util/async.c |
36 | @@ -XXX,XX +XXX,XX @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node) | 49 | @@ -XXX,XX +XXX,XX @@ void qemu_bh_schedule(QEMUBH *bh) |
50 | */ | ||
51 | void qemu_bh_cancel(QEMUBH *bh) | ||
37 | { | 52 | { |
38 | struct io_uring_sqe *sqe = get_sqe(ctx); | 53 | - bh->scheduled = 0; |
39 | 54 | + atomic_mb_set(&bh->scheduled, 0); | |
40 | +#ifdef LIBURING_HAVE_DATA64 | ||
41 | + io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node); | ||
42 | +#else | ||
43 | io_uring_prep_poll_remove(sqe, node); | ||
44 | +#endif | ||
45 | } | 55 | } |
46 | 56 | ||
47 | /* Add a timeout that self-cancels when another cqe becomes ready */ | 57 | /* This func is async.The bottom half will do the delete action at the finial |
48 | -- | 58 | -- |
49 | 2.35.1 | 59 | 2.13.6 |
50 | 60 | ||
51 | 61 | diff view generated by jsdifflib |