1 | The following changes since commit 560009f2c8b57b7cdd31a5693ea86ab369382f49: | 1 | The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2019-10-07 15:40:53 +0100) | 3 | Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 4d804b5305ffb4d5fa414c38d4f1bdfb987c8d0b: | 9 | for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a: |
10 | 10 | ||
11 | iotests/262: Switch source/dest VM launch order (2019-10-08 14:28:25 +0100) | 11 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | This pull request also contains the two commits from the previous pull request | 16 | The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the |
17 | that was dropped due to a mingw compilation error. The compilation should now | 17 | request needs to be resubmitted. |
18 | be fixed. | 18 | |
19 | The MAINTAINERS changes carry no risk and we might as well include them in QEMU | ||
20 | 6.1. | ||
19 | 21 | ||
20 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
21 | 23 | ||
22 | Max Reitz (2): | 24 | Fabian Ebner (1): |
23 | block: Skip COR for inactive nodes | 25 | block/io_uring: resubmit when result is -EAGAIN |
24 | iotests/262: Switch source/dest VM launch order | ||
25 | 26 | ||
26 | Sergio Lopez (1): | 27 | Philippe Mathieu-Daudé (1): |
27 | virtio-blk: schedule virtio_notify_config to run on main context | 28 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver |
28 | 29 | ||
29 | Vladimir Sementsov-Ogievskiy (1): | 30 | Stefano Garzarella (1): |
30 | util/ioc.c: try to reassure Coverity about qemu_iovec_init_extended | 31 | MAINTAINERS: add Stefano Garzarella as io_uring reviewer |
31 | 32 | ||
32 | block/io.c | 41 +++++++++++++++++++++++++------------- | 33 | MAINTAINERS | 2 ++ |
33 | hw/block/virtio-blk.c | 16 ++++++++++++++- | 34 | block/io_uring.c | 16 +++++++++++++++- |
34 | util/iov.c | 5 +++-- | 35 | 2 files changed, 17 insertions(+), 1 deletion(-) |
35 | tests/qemu-iotests/262 | 12 +++++------ | ||
36 | tests/qemu-iotests/262.out | 6 +++--- | ||
37 | 5 files changed, 54 insertions(+), 26 deletions(-) | ||
38 | 36 | ||
39 | -- | 37 | -- |
40 | 2.21.0 | 38 | 2.31.1 |
41 | 39 | ||
42 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Make it more obvious, that filling qiov corresponds to qiov allocation, | ||
4 | which in turn corresponds to total_niov calculation, based on mid_niov | ||
5 | (not mid_len). Still add an assertion to show that there should be no | ||
6 | difference. | ||
7 | |||
8 | [Added mingw "error: 'mid_iov' may be used uninitialized in this | ||
9 | function" compiler error fix suggested by Vladimir. | ||
10 | --Stefan] | ||
11 | |||
12 | Reported-by: Coverity (CID 1405302) | ||
13 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
14 | Message-id: 20190910090310.14032-1-vsementsov@virtuozzo.com | ||
15 | Suggested-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
17 | Message-Id: <20190910090310.14032-1-vsementsov@virtuozzo.com> | ||
18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | |||
20 | fixup! util/ioc.c: try to reassure Coverity about qemu_iovec_init_extended | ||
21 | --- | ||
22 | util/iov.c | 5 +++-- | ||
23 | 1 file changed, 3 insertions(+), 2 deletions(-) | ||
24 | |||
25 | diff --git a/util/iov.c b/util/iov.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/util/iov.c | ||
28 | +++ b/util/iov.c | ||
29 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended( | ||
30 | { | ||
31 | size_t mid_head, mid_tail; | ||
32 | int total_niov, mid_niov = 0; | ||
33 | - struct iovec *p, *mid_iov; | ||
34 | + struct iovec *p, *mid_iov = NULL; | ||
35 | |||
36 | if (mid_len) { | ||
37 | mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, | ||
38 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended( | ||
39 | p++; | ||
40 | } | ||
41 | |||
42 | - if (mid_len) { | ||
43 | + assert(!mid_niov == !mid_len); | ||
44 | + if (mid_niov) { | ||
45 | memcpy(p, mid_iov, mid_niov * sizeof(*p)); | ||
46 | p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; | ||
47 | p[0].iov_len -= mid_head; | ||
48 | -- | ||
49 | 2.21.0 | ||
50 | |||
51 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Stefano Garzarella <sgarzare@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Launching the destination VM before the source VM gives us a regression | 3 | I've been working with io_uring for a while so I'd like to help |
4 | test for HEAD^: | 4 | with reviews. |
5 | 5 | ||
6 | The guest device causes a read from the disk image through | 6 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> |
7 | guess_disk_lchs(). This will not work if the first sector (containing | 7 | Message-Id: <20210728131515.131045-1-sgarzare@redhat.com> |
8 | the partition table) is yet unallocated, we use COR, and the node is | ||
9 | inactive. | ||
10 | |||
11 | By launching the source VM before the destination, however, the COR | ||
12 | filter on the source will allocate that area in the image shared between | ||
13 | both VMs, thus the problem will not become apparent. | ||
14 | |||
15 | Switching the launch order causes the sector to still be unallocated | ||
16 | when guess_disk_lchs() runs on the inactive node in the destination VM, | ||
17 | and thus we get our test case. | ||
18 | |||
19 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
20 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
21 | Message-id: 20191001174827.11081-3-mreitz@redhat.com | ||
22 | Message-Id: <20191001174827.11081-3-mreitz@redhat.com> | ||
23 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
24 | --- | 9 | --- |
25 | tests/qemu-iotests/262 | 12 ++++++------ | 10 | MAINTAINERS | 1 + |
26 | tests/qemu-iotests/262.out | 6 +++--- | 11 | 1 file changed, 1 insertion(+) |
27 | 2 files changed, 9 insertions(+), 9 deletions(-) | ||
28 | 12 | ||
29 | diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 | 13 | diff --git a/MAINTAINERS b/MAINTAINERS |
30 | index XXXXXXX..XXXXXXX 100755 | ||
31 | --- a/tests/qemu-iotests/262 | ||
32 | +++ b/tests/qemu-iotests/262 | ||
33 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
34 | |||
35 | os.mkfifo(fifo) | ||
36 | |||
37 | - iotests.log('Launching source VM...') | ||
38 | - add_opts(vm_a) | ||
39 | - vm_a.launch() | ||
40 | - | ||
41 | - vm_a.enable_migration_events('A') | ||
42 | - | ||
43 | iotests.log('Launching destination VM...') | ||
44 | add_opts(vm_b) | ||
45 | vm_b.add_incoming("exec: cat '%s'" % (fifo)) | ||
46 | @@ -XXX,XX +XXX,XX @@ with iotests.FilePath('img') as img_path, \ | ||
47 | |||
48 | vm_b.enable_migration_events('B') | ||
49 | |||
50 | + iotests.log('Launching source VM...') | ||
51 | + add_opts(vm_a) | ||
52 | + vm_a.launch() | ||
53 | + | ||
54 | + vm_a.enable_migration_events('A') | ||
55 | + | ||
56 | iotests.log('Starting migration to B...') | ||
57 | iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) | ||
58 | with iotests.Timeout(3, 'Migration does not complete'): | ||
59 | diff --git a/tests/qemu-iotests/262.out b/tests/qemu-iotests/262.out | ||
60 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
61 | --- a/tests/qemu-iotests/262.out | 15 | --- a/MAINTAINERS |
62 | +++ b/tests/qemu-iotests/262.out | 16 | +++ b/MAINTAINERS |
63 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ Linux io_uring |
64 | -Launching source VM... | 18 | M: Aarushi Mehta <mehta.aaru20@gmail.com> |
65 | -Enabling migration QMP events on A... | 19 | M: Julia Suvorova <jusual@redhat.com> |
66 | -{"return": {}} | 20 | M: Stefan Hajnoczi <stefanha@redhat.com> |
67 | Launching destination VM... | 21 | +R: Stefano Garzarella <sgarzare@redhat.com> |
68 | Enabling migration QMP events on B... | 22 | L: qemu-block@nongnu.org |
69 | {"return": {}} | 23 | S: Maintained |
70 | +Launching source VM... | 24 | F: block/io_uring.c |
71 | +Enabling migration QMP events on A... | ||
72 | +{"return": {}} | ||
73 | Starting migration to B... | ||
74 | {"return": {}} | ||
75 | {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} | ||
76 | -- | 25 | -- |
77 | 2.21.0 | 26 | 2.31.1 |
78 | 27 | ||
79 | diff view generated by jsdifflib |
1 | From: Sergio Lopez <slp@redhat.com> | 1 | From: Fabian Ebner <f.ebner@proxmox.com> |
---|---|---|---|
2 | 2 | ||
3 | virtio_notify_config() needs to acquire the global mutex, which isn't | 3 | Linux SCSI can throw spurious -EAGAIN in some corner cases in its |
4 | allowed from an iothread, and may lead to a deadlock like this: | 4 | completion path, which will end up being the result in the completed |
5 | io_uring request. | ||
5 | 6 | ||
6 | - main thead | 7 | Resubmitting such requests should allow block jobs to complete, even |
7 | * Has acquired: qemu_global_mutex. | 8 | if such spurious errors are encountered. |
8 | * Is trying the acquire: iothread AioContext lock via | ||
9 | AIO_WAIT_WHILE (after aio_poll). | ||
10 | 9 | ||
11 | - iothread | 10 | Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com> |
12 | * Has acquired: AioContext lock. | 11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> |
13 | * Is trying to acquire: qemu_global_mutex (via | 12 | Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> |
14 | virtio_notify_config->prepare_mmio_access). | 13 | Message-id: 20210729091029.65369-1-f.ebner@proxmox.com |
15 | |||
16 | If virtio_blk_resize() is called from an iothread, schedule | ||
17 | virtio_notify_config() to be run in the main context BH. | ||
18 | |||
19 | [Removed unnecessary newline as suggested by Kevin Wolf | ||
20 | <kwolf@redhat.com>. | ||
21 | --Stefan] | ||
22 | |||
23 | Signed-off-by: Sergio Lopez <slp@redhat.com> | ||
24 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
25 | Message-id: 20190916112411.21636-1-slp@redhat.com | ||
26 | Message-Id: <20190916112411.21636-1-slp@redhat.com> | ||
27 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
28 | --- | 15 | --- |
29 | hw/block/virtio-blk.c | 16 +++++++++++++++- | 16 | block/io_uring.c | 16 +++++++++++++++- |
30 | 1 file changed, 15 insertions(+), 1 deletion(-) | 17 | 1 file changed, 15 insertions(+), 1 deletion(-) |
31 | 18 | ||
32 | diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c | 19 | diff --git a/block/io_uring.c b/block/io_uring.c |
33 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/hw/block/virtio-blk.c | 21 | --- a/block/io_uring.c |
35 | +++ b/hw/block/virtio-blk.c | 22 | +++ b/block/io_uring.c |
36 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) |
37 | #include "qemu/iov.h" | 24 | total_bytes = ret + luringcb->total_read; |
38 | #include "qemu/module.h" | 25 | |
39 | #include "qemu/error-report.h" | 26 | if (ret < 0) { |
40 | +#include "qemu/main-loop.h" | 27 | - if (ret == -EINTR) { |
41 | #include "trace.h" | 28 | + /* |
42 | #include "hw/block/block.h" | 29 | + * Only writev/readv/fsync requests on regular files or host block |
43 | #include "hw/qdev-properties.h" | 30 | + * devices are submitted. Therefore -EAGAIN is not expected but it's |
44 | @@ -XXX,XX +XXX,XX @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, | 31 | + * known to happen sometimes with Linux SCSI. Submit again and hope |
45 | return 0; | 32 | + * the request completes successfully. |
46 | } | 33 | + * |
47 | 34 | + * For more information, see: | |
48 | +static void virtio_resize_cb(void *opaque) | 35 | + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u |
49 | +{ | 36 | + * |
50 | + VirtIODevice *vdev = opaque; | 37 | + * If the code is changed to submit other types of requests in the |
51 | + | 38 | + * future, then this workaround may need to be extended to deal with |
52 | + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); | 39 | + * genuine -EAGAIN results that should not be resubmitted |
53 | + virtio_notify_config(vdev); | 40 | + * immediately. |
54 | +} | 41 | + */ |
55 | + | 42 | + if (ret == -EINTR || ret == -EAGAIN) { |
56 | static void virtio_blk_resize(void *opaque) | 43 | luring_resubmit(s, luringcb); |
57 | { | 44 | continue; |
58 | VirtIODevice *vdev = VIRTIO_DEVICE(opaque); | 45 | } |
59 | |||
60 | - virtio_notify_config(vdev); | ||
61 | + /* | ||
62 | + * virtio_notify_config() needs to acquire the global mutex, | ||
63 | + * so it can't be called from an iothread. Instead, schedule | ||
64 | + * it to be run in the main context BH. | ||
65 | + */ | ||
66 | + aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); | ||
67 | } | ||
68 | |||
69 | static const BlockDevOps virtio_block_ops = { | ||
70 | -- | 46 | -- |
71 | 2.21.0 | 47 | 2.31.1 |
72 | 48 | ||
73 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We must not write data to inactive nodes, and a COR is certainly | 3 | I'm interested in following the activity around the NVMe bdrv. |
4 | something we can simply not do without upsetting anyone. So skip COR | ||
5 | operations on inactive nodes. | ||
6 | 4 | ||
7 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | 6 | Message-id: 20210728183340.2018313-1-philmd@redhat.com |
9 | Message-id: 20191001174827.11081-2-mreitz@redhat.com | ||
10 | Message-Id: <20191001174827.11081-2-mreitz@redhat.com> | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
12 | --- | 8 | --- |
13 | block/io.c | 41 +++++++++++++++++++++++++++-------------- | 9 | MAINTAINERS | 1 + |
14 | 1 file changed, 27 insertions(+), 14 deletions(-) | 10 | 1 file changed, 1 insertion(+) |
15 | 11 | ||
16 | diff --git a/block/io.c b/block/io.c | 12 | diff --git a/MAINTAINERS b/MAINTAINERS |
17 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/io.c | 14 | --- a/MAINTAINERS |
19 | +++ b/block/io.c | 15 | +++ b/MAINTAINERS |
20 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | 16 | @@ -XXX,XX +XXX,XX @@ F: block/null.c |
21 | int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, | 17 | NVMe Block Driver |
22 | BDRV_REQUEST_MAX_BYTES); | 18 | M: Stefan Hajnoczi <stefanha@redhat.com> |
23 | unsigned int progress = 0; | 19 | R: Fam Zheng <fam@euphon.net> |
24 | + bool skip_write; | 20 | +R: Philippe Mathieu-Daudé <philmd@redhat.com> |
25 | 21 | L: qemu-block@nongnu.org | |
26 | if (!drv) { | 22 | S: Supported |
27 | return -ENOMEDIUM; | 23 | F: block/nvme* |
28 | } | ||
29 | |||
30 | + /* | ||
31 | + * Do not write anything when the BDS is inactive. That is not | ||
32 | + * allowed, and it would not help. | ||
33 | + */ | ||
34 | + skip_write = (bs->open_flags & BDRV_O_INACTIVE); | ||
35 | + | ||
36 | /* FIXME We cannot require callers to have write permissions when all they | ||
37 | * are doing is a read request. If we did things right, write permissions | ||
38 | * would be obtained anyway, but internally by the copy-on-read code. As | ||
39 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
40 | while (cluster_bytes) { | ||
41 | int64_t pnum; | ||
42 | |||
43 | - ret = bdrv_is_allocated(bs, cluster_offset, | ||
44 | - MIN(cluster_bytes, max_transfer), &pnum); | ||
45 | - if (ret < 0) { | ||
46 | - /* Safe to treat errors in querying allocation as if | ||
47 | - * unallocated; we'll probably fail again soon on the | ||
48 | - * read, but at least that will set a decent errno. | ||
49 | - */ | ||
50 | + if (skip_write) { | ||
51 | + ret = 1; /* "already allocated", so nothing will be copied */ | ||
52 | pnum = MIN(cluster_bytes, max_transfer); | ||
53 | - } | ||
54 | + } else { | ||
55 | + ret = bdrv_is_allocated(bs, cluster_offset, | ||
56 | + MIN(cluster_bytes, max_transfer), &pnum); | ||
57 | + if (ret < 0) { | ||
58 | + /* | ||
59 | + * Safe to treat errors in querying allocation as if | ||
60 | + * unallocated; we'll probably fail again soon on the | ||
61 | + * read, but at least that will set a decent errno. | ||
62 | + */ | ||
63 | + pnum = MIN(cluster_bytes, max_transfer); | ||
64 | + } | ||
65 | |||
66 | - /* Stop at EOF if the image ends in the middle of the cluster */ | ||
67 | - if (ret == 0 && pnum == 0) { | ||
68 | - assert(progress >= bytes); | ||
69 | - break; | ||
70 | - } | ||
71 | + /* Stop at EOF if the image ends in the middle of the cluster */ | ||
72 | + if (ret == 0 && pnum == 0) { | ||
73 | + assert(progress >= bytes); | ||
74 | + break; | ||
75 | + } | ||
76 | |||
77 | - assert(skip_bytes < pnum); | ||
78 | + assert(skip_bytes < pnum); | ||
79 | + } | ||
80 | |||
81 | if (ret <= 0) { | ||
82 | QEMUIOVector local_qiov; | ||
83 | -- | 24 | -- |
84 | 2.21.0 | 25 | 2.31.1 |
85 | 26 | ||
86 | diff view generated by jsdifflib |