1 | The following changes since commit 55a19ad8b2d0797e3a8fe90ab99a9bb713824059: | 1 | The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935: |
---|---|---|---|
2 | 2 | ||
3 | Update version for v2.9.0-rc1 release (2017-03-21 17:13:29 +0000) | 3 | Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 600ac6a0ef5c06418446ef2f37407bddcc51b21c: | 9 | for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a: |
10 | 10 | ||
11 | blockjob: add devops to blockjob backends (2017-03-22 13:26:27 -0400) | 11 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block patches for 2.9 | 14 | Pull request |
15 | |||
16 | The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the | ||
17 | request needs to be resubmitted. | ||
18 | |||
19 | The MAINTAINERS changes carry no risk and we might as well include them in QEMU | ||
20 | 6.1. | ||
21 | |||
15 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
16 | 23 | ||
17 | John Snow (3): | 24 | Fabian Ebner (1): |
18 | blockjob: add block_job_start_shim | 25 | block/io_uring: resubmit when result is -EAGAIN |
19 | block-backend: add drained_begin / drained_end ops | ||
20 | blockjob: add devops to blockjob backends | ||
21 | 26 | ||
22 | Paolo Bonzini (1): | 27 | Philippe Mathieu-Daudé (1): |
23 | blockjob: avoid recursive AioContext locking | 28 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver |
24 | 29 | ||
25 | block/block-backend.c | 24 ++++++++++++++-- | 30 | Stefano Garzarella (1): |
26 | blockjob.c | 63 ++++++++++++++++++++++++++++++++---------- | 31 | MAINTAINERS: add Stefano Garzarella as io_uring reviewer |
27 | include/sysemu/block-backend.h | 8 ++++++ | 32 | |
28 | 3 files changed, 79 insertions(+), 16 deletions(-) | 33 | MAINTAINERS | 2 ++ |
34 | block/io_uring.c | 16 +++++++++++++++- | ||
35 | 2 files changed, 17 insertions(+), 1 deletion(-) | ||
29 | 36 | ||
30 | -- | 37 | -- |
31 | 2.9.3 | 38 | 2.31.1 |
32 | 39 | ||
33 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | Streaming or any other block job hangs when performed on a block device | ||
4 | that has a non-default iothread. This happens because the AioContext | ||
5 | is acquired twice by block_job_defer_to_main_loop_bh and then released | ||
6 | only once by BDRV_POLL_WHILE. (Insert rants on recursive mutexes, which | ||
7 | unfortunately are a temporary but necessary evil for iothreads at the | ||
8 | moment). | ||
9 | |||
10 | Luckily, the reason for the double acquisition is simple; the function | ||
11 | acquires the AioContext for both the job iothread and the BDS iothread, | ||
12 | in case the BDS iothread was changed while the job was running. It | ||
13 | is therefore enough to skip the second acquisition when the two | ||
14 | AioContexts are one and the same. | ||
15 | |||
16 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
17 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
18 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
19 | Message-id: 1490118490-5597-1-git-send-email-pbonzini@redhat.com | ||
20 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
21 | --- | ||
22 | blockjob.c | 8 ++++++-- | ||
23 | 1 file changed, 6 insertions(+), 2 deletions(-) | ||
24 | |||
25 | diff --git a/blockjob.c b/blockjob.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/blockjob.c | ||
28 | +++ b/blockjob.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static void block_job_defer_to_main_loop_bh(void *opaque) | ||
30 | |||
31 | /* Fetch BDS AioContext again, in case it has changed */ | ||
32 | aio_context = blk_get_aio_context(data->job->blk); | ||
33 | - aio_context_acquire(aio_context); | ||
34 | + if (aio_context != data->aio_context) { | ||
35 | + aio_context_acquire(aio_context); | ||
36 | + } | ||
37 | |||
38 | data->job->deferred_to_main_loop = false; | ||
39 | data->fn(data->job, data->opaque); | ||
40 | |||
41 | - aio_context_release(aio_context); | ||
42 | + if (aio_context != data->aio_context) { | ||
43 | + aio_context_release(aio_context); | ||
44 | + } | ||
45 | |||
46 | aio_context_release(data->aio_context); | ||
47 | |||
48 | -- | ||
49 | 2.9.3 | ||
50 | |||
51 | diff view generated by jsdifflib |
1 | From: John Snow <jsnow@redhat.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This lets us hook into drained_begin and drained_end requests from the | 3 | I've been working with io_uring for a while so I'd like to help |
4 | backend level, which is particularly useful for making sure that all | 4 | with reviews. |
5 | jobs associated with a particular node (whether the source or the target) | ||
6 | receive a drain request. | ||
7 | 5 | ||
8 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | 6 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> |
9 | Signed-off-by: John Snow <jsnow@redhat.com> | 7 | Message-Id: <20210728131515.131045-1-sgarzare@redhat.com> |
10 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | Message-id: 20170316212351.13797-4-jsnow@redhat.com | ||
12 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
13 | --- | 9 | --- |
14 | blockjob.c | 29 ++++++++++++++++++++++++----- | 10 | MAINTAINERS | 1 + |
15 | 1 file changed, 24 insertions(+), 5 deletions(-) | 11 | 1 file changed, 1 insertion(+) |
16 | 12 | ||
17 | diff --git a/blockjob.c b/blockjob.c | 13 | diff --git a/MAINTAINERS b/MAINTAINERS |
18 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/blockjob.c | 15 | --- a/MAINTAINERS |
20 | +++ b/blockjob.c | 16 | +++ b/MAINTAINERS |
21 | @@ -XXX,XX +XXX,XX @@ static const BdrvChildRole child_job = { | 17 | @@ -XXX,XX +XXX,XX @@ Linux io_uring |
22 | .stay_at_node = true, | 18 | M: Aarushi Mehta <mehta.aaru20@gmail.com> |
23 | }; | 19 | M: Julia Suvorova <jusual@redhat.com> |
24 | 20 | M: Stefan Hajnoczi <stefanha@redhat.com> | |
25 | +static void block_job_drained_begin(void *opaque) | 21 | +R: Stefano Garzarella <sgarzare@redhat.com> |
26 | +{ | 22 | L: qemu-block@nongnu.org |
27 | + BlockJob *job = opaque; | 23 | S: Maintained |
28 | + block_job_pause(job); | 24 | F: block/io_uring.c |
29 | +} | ||
30 | + | ||
31 | +static void block_job_drained_end(void *opaque) | ||
32 | +{ | ||
33 | + BlockJob *job = opaque; | ||
34 | + block_job_resume(job); | ||
35 | +} | ||
36 | + | ||
37 | +static const BlockDevOps block_job_dev_ops = { | ||
38 | + .drained_begin = block_job_drained_begin, | ||
39 | + .drained_end = block_job_drained_end, | ||
40 | +}; | ||
41 | + | ||
42 | BlockJob *block_job_next(BlockJob *job) | ||
43 | { | ||
44 | if (!job) { | ||
45 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, | ||
46 | } | ||
47 | |||
48 | job = g_malloc0(driver->instance_size); | ||
49 | - error_setg(&job->blocker, "block device is in use by block job: %s", | ||
50 | - BlockJobType_lookup[driver->job_type]); | ||
51 | - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | ||
52 | - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | ||
53 | - | ||
54 | job->driver = driver; | ||
55 | job->id = g_strdup(job_id); | ||
56 | job->blk = blk; | ||
57 | @@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, | ||
58 | job->paused = true; | ||
59 | job->pause_count = 1; | ||
60 | job->refcnt = 1; | ||
61 | + | ||
62 | + error_setg(&job->blocker, "block device is in use by block job: %s", | ||
63 | + BlockJobType_lookup[driver->job_type]); | ||
64 | + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); | ||
65 | bs->job = job; | ||
66 | |||
67 | + blk_set_dev_ops(blk, &block_job_dev_ops, job); | ||
68 | + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); | ||
69 | + | ||
70 | QLIST_INSERT_HEAD(&block_jobs, job, job_list); | ||
71 | |||
72 | blk_add_aio_context_notifier(blk, block_job_attached_aio_context, | ||
73 | -- | 25 | -- |
74 | 2.9.3 | 26 | 2.31.1 |
75 | 27 | ||
76 | diff view generated by jsdifflib |
1 | From: John Snow <jsnow@redhat.com> | 1 | From: Fabian Ebner <f.ebner@proxmox.com> |
---|---|---|---|
2 | 2 | ||
3 | Allow block backends to forward drain requests to their devices/users. | 3 | Linux SCSI can throw spurious -EAGAIN in some corner cases in its |
4 | The initial intended purpose for this patch is to allow BBs to forward | 4 | completion path, which will end up being the result in the completed |
5 | requests along to BlockJobs, which will want to pause if their associated | 5 | io_uring request. |
6 | BB has entered a drained region. | ||
7 | 6 | ||
8 | Signed-off-by: John Snow <jsnow@redhat.com> | 7 | Resubmitting such requests should allow block jobs to complete, even |
9 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 8 | if such spurious errors are encountered. |
10 | Message-id: 20170316212351.13797-3-jsnow@redhat.com | 9 | |
11 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 10 | Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com> |
11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
12 | Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> | ||
13 | Message-id: 20210729091029.65369-1-f.ebner@proxmox.com | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | 15 | --- |
13 | block/block-backend.c | 24 ++++++++++++++++++++++-- | 16 | block/io_uring.c | 16 +++++++++++++++- |
14 | include/sysemu/block-backend.h | 8 ++++++++ | 17 | 1 file changed, 15 insertions(+), 1 deletion(-) |
15 | 2 files changed, 30 insertions(+), 2 deletions(-) | ||
16 | 18 | ||
17 | diff --git a/block/block-backend.c b/block/block-backend.c | 19 | diff --git a/block/io_uring.c b/block/io_uring.c |
18 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/block-backend.c | 21 | --- a/block/io_uring.c |
20 | +++ b/block/block-backend.c | 22 | +++ b/block/io_uring.c |
21 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { | 23 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) |
22 | bool allow_write_beyond_eof; | 24 | total_bytes = ret + luringcb->total_read; |
23 | 25 | ||
24 | NotifierList remove_bs_notifiers, insert_bs_notifiers; | 26 | if (ret < 0) { |
25 | + | 27 | - if (ret == -EINTR) { |
26 | + int quiesce_counter; | 28 | + /* |
27 | }; | 29 | + * Only writev/readv/fsync requests on regular files or host block |
28 | 30 | + * devices are submitted. Therefore -EAGAIN is not expected but it's | |
29 | typedef struct BlockBackendAIOCB { | 31 | + * known to happen sometimes with Linux SCSI. Submit again and hope |
30 | @@ -XXX,XX +XXX,XX @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, | 32 | + * the request completes successfully. |
31 | void *opaque) | 33 | + * |
32 | { | 34 | + * For more information, see: |
33 | /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep | 35 | + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u |
34 | - * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops | 36 | + * |
35 | - * is set. */ | 37 | + * If the code is changed to submit other types of requests in the |
36 | + * it that way, so we can assume blk->dev, if present, is a DeviceState if | 38 | + * future, then this workaround may need to be extended to deal with |
37 | + * blk->dev_ops is set. Non-device users may use dev_ops without device. */ | 39 | + * genuine -EAGAIN results that should not be resubmitted |
38 | assert(!blk->legacy_dev); | 40 | + * immediately. |
39 | 41 | + */ | |
40 | blk->dev_ops = ops; | 42 | + if (ret == -EINTR || ret == -EAGAIN) { |
41 | blk->dev_opaque = opaque; | 43 | luring_resubmit(s, luringcb); |
42 | + | 44 | continue; |
43 | + /* Are we currently quiesced? Should we enforce this right now? */ | 45 | } |
44 | + if (blk->quiesce_counter && ops->drained_begin) { | ||
45 | + ops->drained_begin(opaque); | ||
46 | + } | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | ||
51 | { | ||
52 | BlockBackend *blk = child->opaque; | ||
53 | |||
54 | + if (++blk->quiesce_counter == 1) { | ||
55 | + if (blk->dev_ops && blk->dev_ops->drained_begin) { | ||
56 | + blk->dev_ops->drained_begin(blk->dev_opaque); | ||
57 | + } | ||
58 | + } | ||
59 | + | ||
60 | /* Note that blk->root may not be accessible here yet if we are just | ||
61 | * attaching to a BlockDriverState that is drained. Use child instead. */ | ||
62 | |||
63 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | ||
64 | static void blk_root_drained_end(BdrvChild *child) | ||
65 | { | ||
66 | BlockBackend *blk = child->opaque; | ||
67 | + assert(blk->quiesce_counter); | ||
68 | |||
69 | assert(blk->public.io_limits_disabled); | ||
70 | --blk->public.io_limits_disabled; | ||
71 | + | ||
72 | + if (--blk->quiesce_counter == 0) { | ||
73 | + if (blk->dev_ops && blk->dev_ops->drained_end) { | ||
74 | + blk->dev_ops->drained_end(blk->dev_opaque); | ||
75 | + } | ||
76 | + } | ||
77 | } | ||
78 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/include/sysemu/block-backend.h | ||
81 | +++ b/include/sysemu/block-backend.h | ||
82 | @@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps { | ||
83 | * Runs when the size changed (e.g. monitor command block_resize) | ||
84 | */ | ||
85 | void (*resize_cb)(void *opaque); | ||
86 | + /* | ||
87 | + * Runs when the backend receives a drain request. | ||
88 | + */ | ||
89 | + void (*drained_begin)(void *opaque); | ||
90 | + /* | ||
91 | + * Runs when the backend's last drain request ends. | ||
92 | + */ | ||
93 | + void (*drained_end)(void *opaque); | ||
94 | } BlockDevOps; | ||
95 | |||
96 | /* This struct is embedded in (the private) BlockBackend struct and contains | ||
97 | -- | 46 | -- |
98 | 2.9.3 | 47 | 2.31.1 |
99 | 48 | ||
100 | diff view generated by jsdifflib |
1 | From: John Snow <jsnow@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The purpose of this shim is to allow us to pause pre-started jobs. | 3 | I'm interested in following the activity around the NVMe bdrv. |
4 | The purpose of *that* is to allow us to buffer a pause request that | ||
5 | will be able to take effect before the job ever does any work, allowing | ||
6 | us to create jobs during a quiescent state (under which they will be | ||
7 | automatically paused), then resuming the jobs after the critical section | ||
8 | in any order, either: | ||
9 | 4 | ||
10 | (1) -block_job_start | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
11 | -block_job_resume (via e.g. drained_end) | 6 | Message-id: 20210728183340.2018313-1-philmd@redhat.com |
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | --- | ||
9 | MAINTAINERS | 1 + | ||
10 | 1 file changed, 1 insertion(+) | ||
12 | 11 | ||
13 | (2) -block_job_resume (via e.g. drained_end) | 12 | diff --git a/MAINTAINERS b/MAINTAINERS |
14 | -block_job_start | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/MAINTAINERS | ||
15 | +++ b/MAINTAINERS | ||
16 | @@ -XXX,XX +XXX,XX @@ F: block/null.c | ||
17 | NVMe Block Driver | ||
18 | M: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | R: Fam Zheng <fam@euphon.net> | ||
20 | +R: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
21 | L: qemu-block@nongnu.org | ||
22 | S: Supported | ||
23 | F: block/nvme* | ||
24 | -- | ||
25 | 2.31.1 | ||
15 | 26 | ||
16 | The problem that requires a startup wrapper is the idea that a job must | ||
17 | start in the busy=true state only its first time-- all subsequent entries | ||
18 | require busy to be false, and the toggling of this state is otherwise | ||
19 | handled during existing pause and yield points. | ||
20 | |||
21 | The wrapper simply allows us to mandate that a job can "start," set busy | ||
22 | to true, then immediately pause only if necessary. We could avoid | ||
23 | requiring a wrapper, but all jobs would need to do it, so it's been | ||
24 | factored out here. | ||
25 | |||
26 | Signed-off-by: John Snow <jsnow@redhat.com> | ||
27 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
28 | Message-id: 20170316212351.13797-2-jsnow@redhat.com | ||
29 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
30 | --- | ||
31 | blockjob.c | 26 +++++++++++++++++++------- | ||
32 | 1 file changed, 19 insertions(+), 7 deletions(-) | ||
33 | |||
34 | diff --git a/blockjob.c b/blockjob.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/blockjob.c | ||
37 | +++ b/blockjob.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static bool block_job_started(BlockJob *job) | ||
39 | return job->co; | ||
40 | } | ||
41 | |||
42 | +/** | ||
43 | + * All jobs must allow a pause point before entering their job proper. This | ||
44 | + * ensures that jobs can be paused prior to being started, then resumed later. | ||
45 | + */ | ||
46 | +static void coroutine_fn block_job_co_entry(void *opaque) | ||
47 | +{ | ||
48 | + BlockJob *job = opaque; | ||
49 | + | ||
50 | + assert(job && job->driver && job->driver->start); | ||
51 | + block_job_pause_point(job); | ||
52 | + job->driver->start(job); | ||
53 | +} | ||
54 | + | ||
55 | void block_job_start(BlockJob *job) | ||
56 | { | ||
57 | assert(job && !block_job_started(job) && job->paused && | ||
58 | - !job->busy && job->driver->start); | ||
59 | - job->co = qemu_coroutine_create(job->driver->start, job); | ||
60 | - if (--job->pause_count == 0) { | ||
61 | - job->paused = false; | ||
62 | - job->busy = true; | ||
63 | - qemu_coroutine_enter(job->co); | ||
64 | - } | ||
65 | + job->driver && job->driver->start); | ||
66 | + job->co = qemu_coroutine_create(block_job_co_entry, job); | ||
67 | + job->pause_count--; | ||
68 | + job->busy = true; | ||
69 | + job->paused = false; | ||
70 | + qemu_coroutine_enter(job->co); | ||
71 | } | ||
72 | |||
73 | void block_job_ref(BlockJob *job) | ||
74 | -- | ||
75 | 2.9.3 | ||
76 | |||
77 | diff view generated by jsdifflib |