1 | The following changes since commit 22c5f446514a2a4bb0dbe1fea26713da92fc85fa: | 1 | The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190211' into staging (2019-02-11 17:04:57 +0000) | 3 | Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 9a6719d572e99a4e79f589d0b73f7475b86f982d: | 9 | for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a: |
10 | 10 | ||
11 | virtio-blk: cleanup using VirtIOBlock *s and VirtIODevice *vdev (2019-02-12 11:49:17 +0800) | 11 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the | ||
17 | request needs to be resubmitted. | ||
18 | |||
19 | The MAINTAINERS changes carry no risk and we might as well include them in QEMU | ||
20 | 6.1. | ||
21 | |||
16 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
17 | 23 | ||
18 | Peter Xu (1): | 24 | Fabian Ebner (1): |
19 | iothread: fix iothread hang when stop too soon | 25 | block/io_uring: resubmit when result is -EAGAIN |
26 | |||
27 | Philippe Mathieu-Daudé (1): | ||
28 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver | ||
20 | 29 | ||
21 | Stefano Garzarella (1): | 30 | Stefano Garzarella (1): |
22 | virtio-blk: cleanup using VirtIOBlock *s and VirtIODevice *vdev | 31 | MAINTAINERS: add Stefano Garzarella as io_uring reviewer |
23 | 32 | ||
24 | Vladimir Sementsov-Ogievskiy (1): | 33 | MAINTAINERS | 2 ++ |
25 | qemugdb/coroutine: fix arch_prctl has unknown return type | 34 | block/io_uring.c | 16 +++++++++++++++- |
26 | 35 | 2 files changed, 17 insertions(+), 1 deletion(-) | |
27 | hw/block/virtio-blk.c | 22 +++++++++------------- | ||
28 | iothread.c | 6 +++++- | ||
29 | scripts/qemugdb/coroutine.py | 2 +- | ||
30 | 3 files changed, 15 insertions(+), 15 deletions(-) | ||
31 | 36 | ||
32 | -- | 37 | -- |
33 | 2.20.1 | 38 | 2.31.1 |
34 | 39 | ||
35 | diff view generated by jsdifflib |
1 | From: Stefano Garzarella <sgarzare@redhat.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | In several part we still using req->dev or VIRTIO_DEVICE(req->dev) | 3 | I've been working with io_uring for a while so I'd like to help |
4 | when we have already defined s and vdev pointers: | 4 | with reviews. |
5 | VirtIOBlock *s = req->dev; | ||
6 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
7 | 5 | ||
8 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | 6 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> |
9 | Reviewed-by: Liam Merwick <liam.merwick@oracle.com> | 7 | Message-Id: <20210728131515.131045-1-sgarzare@redhat.com> |
10 | Message-id: 20190208142347.214815-1-sgarzare@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | --- | 9 | --- |
13 | hw/block/virtio-blk.c | 22 +++++++++------------- | 10 | MAINTAINERS | 1 + |
14 | 1 file changed, 9 insertions(+), 13 deletions(-) | 11 | 1 file changed, 1 insertion(+) |
15 | 12 | ||
16 | diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c | 13 | diff --git a/MAINTAINERS b/MAINTAINERS |
17 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/hw/block/virtio-blk.c | 15 | --- a/MAINTAINERS |
19 | +++ b/hw/block/virtio-blk.c | 16 | +++ b/MAINTAINERS |
20 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) | 17 | @@ -XXX,XX +XXX,XX @@ Linux io_uring |
21 | static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, | 18 | M: Aarushi Mehta <mehta.aaru20@gmail.com> |
22 | bool is_read) | 19 | M: Julia Suvorova <jusual@redhat.com> |
23 | { | 20 | M: Stefan Hajnoczi <stefanha@redhat.com> |
24 | - BlockErrorAction action = blk_get_error_action(req->dev->blk, | 21 | +R: Stefano Garzarella <sgarzare@redhat.com> |
25 | - is_read, error); | 22 | L: qemu-block@nongnu.org |
26 | VirtIOBlock *s = req->dev; | 23 | S: Maintained |
27 | + BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); | 24 | F: block/io_uring.c |
28 | |||
29 | if (action == BLOCK_ERROR_ACTION_STOP) { | ||
30 | /* Break the link as the next request is going to be parsed from the | ||
31 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_flush_complete(void *opaque, int ret) | ||
32 | } | ||
33 | |||
34 | virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); | ||
35 | - block_acct_done(blk_get_stats(req->dev->blk), &req->acct); | ||
36 | + block_acct_done(blk_get_stats(s->blk), &req->acct); | ||
37 | virtio_blk_free_request(req); | ||
38 | |||
39 | out: | ||
40 | @@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) | ||
41 | - sizeof(struct virtio_blk_inhdr); | ||
42 | iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); | ||
43 | |||
44 | - type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); | ||
45 | + type = virtio_ldl_p(vdev, &req->out.type); | ||
46 | |||
47 | /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER | ||
48 | * is an optional flag. Although a guest should not send this flag if | ||
49 | @@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) | ||
50 | case VIRTIO_BLK_T_IN: | ||
51 | { | ||
52 | bool is_write = type & VIRTIO_BLK_T_OUT; | ||
53 | - req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev), | ||
54 | - &req->out.sector); | ||
55 | + req->sector_num = virtio_ldq_p(vdev, &req->out.sector); | ||
56 | |||
57 | if (is_write) { | ||
58 | qemu_iovec_init_external(&req->qiov, out_iov, out_num); | ||
59 | @@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) | ||
60 | req->qiov.size / BDRV_SECTOR_SIZE); | ||
61 | } | ||
62 | |||
63 | - if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, | ||
64 | - req->qiov.size)) { | ||
65 | + if (!virtio_blk_sect_range_ok(s, req->sector_num, req->qiov.size)) { | ||
66 | virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); | ||
67 | - block_acct_invalid(blk_get_stats(req->dev->blk), | ||
68 | + block_acct_invalid(blk_get_stats(s->blk), | ||
69 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); | ||
70 | virtio_blk_free_request(req); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | - block_acct_start(blk_get_stats(req->dev->blk), | ||
75 | - &req->acct, req->qiov.size, | ||
76 | + block_acct_start(blk_get_stats(s->blk), &req->acct, req->qiov.size, | ||
77 | is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); | ||
78 | |||
79 | /* merge would exceed maximum number of requests or IO direction | ||
80 | * changes */ | ||
81 | if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || | ||
82 | is_write != mrb->is_write || | ||
83 | - !req->dev->conf.request_merging)) { | ||
84 | - virtio_blk_submit_multireq(req->dev->blk, mrb); | ||
85 | + !s->conf.request_merging)) { | ||
86 | + virtio_blk_submit_multireq(s->blk, mrb); | ||
87 | } | ||
88 | |||
89 | assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); | ||
90 | -- | 25 | -- |
91 | 2.20.1 | 26 | 2.31.1 |
92 | 27 | ||
93 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Fabian Ebner <f.ebner@proxmox.com> |
---|---|---|---|
2 | 2 | ||
3 | qemu coroutine command results in following error output: | 3 | Linux SCSI can throw spurious -EAGAIN in some corner cases in its |
4 | completion path, which will end up being the result in the completed | ||
5 | io_uring request. | ||
4 | 6 | ||
5 | Python Exception <class 'gdb.error'> 'arch_prctl' has unknown return | 7 | Resubmitting such requests should allow block jobs to complete, even |
6 | type; cast the call to its declared return type: Error occurred in | 8 | if such spurious errors are encountered. |
7 | Python command: 'arch_prctl' has unknown return type; cast the call to | ||
8 | its declared return type | ||
9 | 9 | ||
10 | Fix it by giving it what it wants: arch_prctl return type. | 10 | Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com> |
11 | 11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | |
12 | Information on the topic: | 12 | Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> |
13 | https://sourceware.org/gdb/onlinedocs/gdb/Calling.html | 13 | Message-id: 20210729091029.65369-1-f.ebner@proxmox.com |
14 | |||
15 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
16 | Message-id: 20190206151425.105871-1-vsementsov@virtuozzo.com | ||
17 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
18 | --- | 15 | --- |
19 | scripts/qemugdb/coroutine.py | 2 +- | 16 | block/io_uring.c | 16 +++++++++++++++- |
20 | 1 file changed, 1 insertion(+), 1 deletion(-) | 17 | 1 file changed, 15 insertions(+), 1 deletion(-) |
21 | 18 | ||
22 | diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py | 19 | diff --git a/block/io_uring.c b/block/io_uring.c |
23 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/scripts/qemugdb/coroutine.py | 21 | --- a/block/io_uring.c |
25 | +++ b/scripts/qemugdb/coroutine.py | 22 | +++ b/block/io_uring.c |
26 | @@ -XXX,XX +XXX,XX @@ def get_fs_base(): | 23 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) |
27 | pthread_self().''' | 24 | total_bytes = ret + luringcb->total_read; |
28 | # %rsp - 120 is scratch space according to the SystemV ABI | 25 | |
29 | old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') | 26 | if (ret < 0) { |
30 | - gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True) | 27 | - if (ret == -EINTR) { |
31 | + gdb.execute('call (int)arch_prctl(0x1003, $rsp - 120)', False, True) | 28 | + /* |
32 | fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)') | 29 | + * Only writev/readv/fsync requests on regular files or host block |
33 | gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True) | 30 | + * devices are submitted. Therefore -EAGAIN is not expected but it's |
34 | return fs_base | 31 | + * known to happen sometimes with Linux SCSI. Submit again and hope |
32 | + * the request completes successfully. | ||
33 | + * | ||
34 | + * For more information, see: | ||
35 | + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u | ||
36 | + * | ||
37 | + * If the code is changed to submit other types of requests in the | ||
38 | + * future, then this workaround may need to be extended to deal with | ||
39 | + * genuine -EAGAIN results that should not be resubmitted | ||
40 | + * immediately. | ||
41 | + */ | ||
42 | + if (ret == -EINTR || ret == -EAGAIN) { | ||
43 | luring_resubmit(s, luringcb); | ||
44 | continue; | ||
45 | } | ||
35 | -- | 46 | -- |
36 | 2.20.1 | 47 | 2.31.1 |
37 | 48 | ||
38 | diff view generated by jsdifflib |
1 | From: Peter Xu <peterx@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Lukas reported an hard to reproduce QMP iothread hang on s390 that | 3 | I'm interested in following the activity around the NVMe bdrv. |
4 | QEMU might hang at pthread_join() of the QMP monitor iothread before | ||
5 | quitting: | ||
6 | 4 | ||
7 | Thread 1 | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
8 | #0 0x000003ffad10932c in pthread_join | 6 | Message-id: 20210728183340.2018313-1-philmd@redhat.com |
9 | #1 0x0000000109e95750 in qemu_thread_join | ||
10 | at /home/thuth/devel/qemu/util/qemu-thread-posix.c:570 | ||
11 | #2 0x0000000109c95a1c in iothread_stop | ||
12 | #3 0x0000000109bb0874 in monitor_cleanup | ||
13 | #4 0x0000000109b55042 in main | ||
14 | |||
15 | While the iothread is still in the main loop: | ||
16 | |||
17 | Thread 4 | ||
18 | #0 0x000003ffad0010e4 in ?? | ||
19 | #1 0x000003ffad553958 in g_main_context_iterate.isra.19 | ||
20 | #2 0x000003ffad553d90 in g_main_loop_run | ||
21 | #3 0x0000000109c9585a in iothread_run | ||
22 | at /home/thuth/devel/qemu/iothread.c:74 | ||
23 | #4 0x0000000109e94752 in qemu_thread_start | ||
24 | at /home/thuth/devel/qemu/util/qemu-thread-posix.c:502 | ||
25 | #5 0x000003ffad10825a in start_thread | ||
26 | #6 0x000003ffad00dcf2 in thread_start | ||
27 | |||
28 | IMHO it's because there's a race between the main thread and iothread | ||
29 | when stopping the thread in following sequence: | ||
30 | |||
31 | main thread iothread | ||
32 | =========== ============== | ||
33 | aio_poll() | ||
34 | iothread_get_g_main_context | ||
35 | set iothread->worker_context | ||
36 | iothread_stop | ||
37 | schedule iothread_stop_bh | ||
38 | execute iothread_stop_bh [1] | ||
39 | set iothread->running=false | ||
40 | (since main_loop==NULL so | ||
41 | skip to quit main loop. | ||
42 | Note: although main_loop is | ||
43 | NULL but worker_context is | ||
44 | not!) | ||
45 | atomic_read(&iothread->worker_context) [2] | ||
46 | create main_loop object | ||
47 | g_main_loop_run() [3] | ||
48 | pthread_join() [4] | ||
49 | |||
50 | We can see that when execute iothread_stop_bh() at [1] it's possible | ||
51 | that main_loop is still NULL because it's only created until the first | ||
52 | check of the worker_context later at [2]. Then the iothread will hang | ||
53 | in the main loop [3] and it'll starve the main thread too [4]. | ||
54 | |||
55 | Here the simple solution should be that we check again the "running" | ||
56 | variable before check against worker_context. | ||
57 | |||
58 | CC: Thomas Huth <thuth@redhat.com> | ||
59 | CC: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||
60 | CC: Stefan Hajnoczi <stefanha@redhat.com> | ||
61 | CC: Lukáš Doktor <ldoktor@redhat.com> | ||
62 | CC: Markus Armbruster <armbru@redhat.com> | ||
63 | CC: Eric Blake <eblake@redhat.com> | ||
64 | CC: Paolo Bonzini <pbonzini@redhat.com> | ||
65 | Reported-by: Lukáš Doktor <ldoktor@redhat.com> | ||
66 | Signed-off-by: Peter Xu <peterx@redhat.com> | ||
67 | Tested-by: Thomas Huth <thuth@redhat.com> | ||
68 | Message-id: 20190129051432.22023-1-peterx@redhat.com | ||
69 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
70 | --- | 8 | --- |
71 | iothread.c | 6 +++++- | 9 | MAINTAINERS | 1 + |
72 | 1 file changed, 5 insertions(+), 1 deletion(-) | 10 | 1 file changed, 1 insertion(+) |
73 | 11 | ||
74 | diff --git a/iothread.c b/iothread.c | 12 | diff --git a/MAINTAINERS b/MAINTAINERS |
75 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
76 | --- a/iothread.c | 14 | --- a/MAINTAINERS |
77 | +++ b/iothread.c | 15 | +++ b/MAINTAINERS |
78 | @@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque) | 16 | @@ -XXX,XX +XXX,XX @@ F: block/null.c |
79 | while (iothread->running) { | 17 | NVMe Block Driver |
80 | aio_poll(iothread->ctx, true); | 18 | M: Stefan Hajnoczi <stefanha@redhat.com> |
81 | 19 | R: Fam Zheng <fam@euphon.net> | |
82 | - if (atomic_read(&iothread->worker_context)) { | 20 | +R: Philippe Mathieu-Daudé <philmd@redhat.com> |
83 | + /* | 21 | L: qemu-block@nongnu.org |
84 | + * We must check the running state again in case it was | 22 | S: Supported |
85 | + * changed in previous aio_poll() | 23 | F: block/nvme* |
86 | + */ | ||
87 | + if (iothread->running && atomic_read(&iothread->worker_context)) { | ||
88 | GMainLoop *loop; | ||
89 | |||
90 | g_main_context_push_thread_default(iothread->worker_context); | ||
91 | -- | 24 | -- |
92 | 2.20.1 | 25 | 2.31.1 |
93 | 26 | ||
94 | diff view generated by jsdifflib |