1 | The following changes since commit 3f3bbfc7cef4490c5ed5550766a81e7d18f08db1: | 1 | The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2019-03-12' into staging (2019-03-12 21:06:26 +0000) | 3 | Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to f357fcd890a8d6ced6d261338b859a41414561e9: | 9 | for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a: |
10 | 10 | ||
11 | file-posix: add drop-cache=on|off option (2019-03-13 10:54:55 +0000) | 11 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | * Add 'drop-cache=on|off' option to file-posix.c. The default is on. | 16 | The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the |
17 | Disabling the option fixes a QEMU 3.0.0 performance regression when live | 17 | request needs to be resubmitted. |
18 | migrating on the same host with cache.direct=off. | 18 | |
19 | The MAINTAINERS changes carry no risk and we might as well include them in QEMU | ||
20 | 6.1. | ||
19 | 21 | ||
20 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
21 | 23 | ||
22 | Stefan Hajnoczi (1): | 24 | Fabian Ebner (1): |
23 | file-posix: add drop-cache=on|off option | 25 | block/io_uring: resubmit when result is -EAGAIN |
24 | 26 | ||
25 | qapi/block-core.json | 6 ++++++ | 27 | Philippe Mathieu-Daudé (1): |
26 | block/file-posix.c | 16 ++++++++++++++++ | 28 | MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver |
27 | 2 files changed, 22 insertions(+) | 29 | |
30 | Stefano Garzarella (1): | ||
31 | MAINTAINERS: add Stefano Garzarella as io_uring reviewer | ||
32 | |||
33 | MAINTAINERS | 2 ++ | ||
34 | block/io_uring.c | 16 +++++++++++++++- | ||
35 | 2 files changed, 17 insertions(+), 1 deletion(-) | ||
28 | 36 | ||
29 | -- | 37 | -- |
30 | 2.20.1 | 38 | 2.31.1 |
31 | 39 | ||
32 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Stefano Garzarella <sgarzare@redhat.com> | ||
1 | 2 | ||
3 | I've been working with io_uring for a while so I'd like to help | ||
4 | with reviews. | ||
5 | |||
6 | Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> | ||
7 | Message-Id: <20210728131515.131045-1-sgarzare@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | MAINTAINERS | 1 + | ||
11 | 1 file changed, 1 insertion(+) | ||
12 | |||
13 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/MAINTAINERS | ||
16 | +++ b/MAINTAINERS | ||
17 | @@ -XXX,XX +XXX,XX @@ Linux io_uring | ||
18 | M: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
19 | M: Julia Suvorova <jusual@redhat.com> | ||
20 | M: Stefan Hajnoczi <stefanha@redhat.com> | ||
21 | +R: Stefano Garzarella <sgarzare@redhat.com> | ||
22 | L: qemu-block@nongnu.org | ||
23 | S: Maintained | ||
24 | F: block/io_uring.c | ||
25 | -- | ||
26 | 2.31.1 | ||
27 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Fabian Ebner <f.ebner@proxmox.com> | ||
1 | 2 | ||
3 | Linux SCSI can throw spurious -EAGAIN in some corner cases in its | ||
4 | completion path, which will end up being the result in the completed | ||
5 | io_uring request. | ||
6 | |||
7 | Resubmitting such requests should allow block jobs to complete, even | ||
8 | if such spurious errors are encountered. | ||
9 | |||
10 | Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com> | ||
11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
12 | Signed-off-by: Fabian Ebner <f.ebner@proxmox.com> | ||
13 | Message-id: 20210729091029.65369-1-f.ebner@proxmox.com | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | ||
16 | block/io_uring.c | 16 +++++++++++++++- | ||
17 | 1 file changed, 15 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/io_uring.c | ||
22 | +++ b/block/io_uring.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) | ||
24 | total_bytes = ret + luringcb->total_read; | ||
25 | |||
26 | if (ret < 0) { | ||
27 | - if (ret == -EINTR) { | ||
28 | + /* | ||
29 | + * Only writev/readv/fsync requests on regular files or host block | ||
30 | + * devices are submitted. Therefore -EAGAIN is not expected but it's | ||
31 | + * known to happen sometimes with Linux SCSI. Submit again and hope | ||
32 | + * the request completes successfully. | ||
33 | + * | ||
34 | + * For more information, see: | ||
35 | + * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u | ||
36 | + * | ||
37 | + * If the code is changed to submit other types of requests in the | ||
38 | + * future, then this workaround may need to be extended to deal with | ||
39 | + * genuine -EAGAIN results that should not be resubmitted | ||
40 | + * immediately. | ||
41 | + */ | ||
42 | + if (ret == -EINTR || ret == -EAGAIN) { | ||
43 | luring_resubmit(s, luringcb); | ||
44 | continue; | ||
45 | } | ||
46 | -- | ||
47 | 2.31.1 | ||
48 | diff view generated by jsdifflib |
1 | Commit dd577a26ff03b6829721b1ffbbf9e7c411b72378 ("block/file-posix: | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | implement bdrv_co_invalidate_cache() on Linux") introduced page cache | ||
3 | invalidation so that cache.direct=off live migration is safe on Linux. | ||
4 | 2 | ||
5 | The invalidation takes a significant amount of time when the file is | 3 | I'm interested in following the activity around the NVMe bdrv. |
6 | large and present in the page cache. Normally this is not the case for | ||
7 | cross-host live migration but it can happen when migrating between QEMU | ||
8 | processes on the same host. | ||
9 | 4 | ||
10 | On same-host migration we don't need to invalidate pages for correctness | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
11 | anyway, so an option to skip page cache invalidation is useful. I | 6 | Message-id: 20210728183340.2018313-1-philmd@redhat.com |
12 | investigated optimizing invalidation and detecting same-host migration, | ||
13 | but both are hard to achieve so a user-visible option will suffice. | ||
14 | |||
15 | As a bonus this option means that the cache invalidation feature will | ||
16 | now be detectable by libvirt via QMP schema introspection. | ||
17 | |||
18 | Suggested-by: Neil Skrypuch <neil@tembosocial.com> | ||
19 | Tested-by: Neil Skrypuch <neil@tembosocial.com> | ||
20 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
21 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
22 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
23 | Message-id: 20190307164941.3322-1-stefanha@redhat.com | ||
24 | Message-Id: <20190307164941.3322-1-stefanha@redhat.com> | ||
25 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
26 | --- | 8 | --- |
27 | qapi/block-core.json | 6 ++++++ | 9 | MAINTAINERS | 1 + |
28 | block/file-posix.c | 16 ++++++++++++++++ | 10 | 1 file changed, 1 insertion(+) |
29 | 2 files changed, 22 insertions(+) | ||
30 | 11 | ||
31 | diff --git a/qapi/block-core.json b/qapi/block-core.json | 12 | diff --git a/MAINTAINERS b/MAINTAINERS |
32 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/qapi/block-core.json | 14 | --- a/MAINTAINERS |
34 | +++ b/qapi/block-core.json | 15 | +++ b/MAINTAINERS |
35 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ F: block/null.c |
36 | # @locking: whether to enable file locking. If set to 'auto', only enable | 17 | NVMe Block Driver |
37 | # when Open File Descriptor (OFD) locking API is available | 18 | M: Stefan Hajnoczi <stefanha@redhat.com> |
38 | # (default: auto, since 2.10) | 19 | R: Fam Zheng <fam@euphon.net> |
39 | +# @drop-cache: invalidate page cache during live migration. This prevents | 20 | +R: Philippe Mathieu-Daudé <philmd@redhat.com> |
40 | +# stale data on the migration destination with cache.direct=off. | 21 | L: qemu-block@nongnu.org |
41 | +# Currently only supported on Linux hosts. | 22 | S: Supported |
42 | +# (default: on, since: 4.0) | 23 | F: block/nvme* |
43 | # @x-check-cache-dropped: whether to check that page cache was dropped on live | ||
44 | # migration. May cause noticeable delays if the image | ||
45 | # file is large, do not use in production. | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | '*pr-manager': 'str', | ||
48 | '*locking': 'OnOffAuto', | ||
49 | '*aio': 'BlockdevAioOptions', | ||
50 | + '*drop-cache': {'type': 'bool', | ||
51 | + 'if': 'defined(CONFIG_LINUX)'}, | ||
52 | '*x-check-cache-dropped': 'bool' } } | ||
53 | |||
54 | ## | ||
55 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/block/file-posix.c | ||
58 | +++ b/block/file-posix.c | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState { | ||
60 | bool page_cache_inconsistent:1; | ||
61 | bool has_fallocate; | ||
62 | bool needs_alignment; | ||
63 | + bool drop_cache; | ||
64 | bool check_cache_dropped; | ||
65 | |||
66 | PRManager *pr_mgr; | ||
67 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState { | ||
68 | typedef struct BDRVRawReopenState { | ||
69 | int fd; | ||
70 | int open_flags; | ||
71 | + bool drop_cache; | ||
72 | bool check_cache_dropped; | ||
73 | } BDRVRawReopenState; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = { | ||
76 | .type = QEMU_OPT_STRING, | ||
77 | .help = "id of persistent reservation manager object (default: none)", | ||
78 | }, | ||
79 | +#if defined(__linux__) | ||
80 | + { | ||
81 | + .name = "drop-cache", | ||
82 | + .type = QEMU_OPT_BOOL, | ||
83 | + .help = "invalidate page cache during live migration (default: on)", | ||
84 | + }, | ||
85 | +#endif | ||
86 | { | ||
87 | .name = "x-check-cache-dropped", | ||
88 | .type = QEMU_OPT_BOOL, | ||
89 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
90 | } | ||
91 | } | ||
92 | |||
93 | + s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true); | ||
94 | s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped", | ||
95 | false); | ||
96 | |||
97 | @@ -XXX,XX +XXX,XX @@ static int raw_reopen_prepare(BDRVReopenState *state, | ||
98 | goto out; | ||
99 | } | ||
100 | |||
101 | + rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true); | ||
102 | rs->check_cache_dropped = | ||
103 | qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false); | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static void raw_reopen_commit(BDRVReopenState *state) | ||
106 | BDRVRawState *s = state->bs->opaque; | ||
107 | Error *local_err = NULL; | ||
108 | |||
109 | + s->drop_cache = rs->drop_cache; | ||
110 | s->check_cache_dropped = rs->check_cache_dropped; | ||
111 | s->open_flags = rs->open_flags; | ||
112 | |||
113 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, | ||
114 | return; | ||
115 | } | ||
116 | |||
117 | + if (!s->drop_cache) { | ||
118 | + return; | ||
119 | + } | ||
120 | + | ||
121 | if (s->open_flags & O_DIRECT) { | ||
122 | return; /* No host kernel page cache */ | ||
123 | } | ||
124 | -- | 24 | -- |
125 | 2.20.1 | 25 | 2.31.1 |
126 | 26 | ||
127 | diff view generated by jsdifflib |