1 | The following changes since commit 9cf289af47bcfae5c75de37d8e5d6fd23705322c: | 1 | The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'qga-pull-request' of gitlab.com:marcandre.lureau/qemu into staging (2022-05-04 03:42:49 -0700) | 3 | Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/hreitz/qemu.git tags/pull-block-2023-06-05 |
8 | 8 | ||
9 | for you to fetch changes up to bef2e050d6a7feb865854c65570c496ac5a8cf53: | 9 | for you to fetch changes up to 42a2890a76f4783cd1c212f27856edcf2b5e8a75: |
10 | 10 | ||
11 | util/event-loop-base: Introduce options to set the thread pool size (2022-05-04 17:02:19 +0100) | 11 | qcow2: add discard-no-unref option (2023-06-05 13:15:42 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Block patches |
15 | 15 | ||
16 | Add new thread-pool-min/thread-pool-max parameters to control the thread pool | 16 | - Fix padding of unaligned vectored requests to match the host alignment |
17 | used for async I/O. | 17 | for vectors with 1023 or 1024 buffers |
18 | - Refactor and fix bugs in parallels's image check functionality | ||
19 | - Add an option to the qcow2 driver to retain (qcow2-level) allocations | ||
20 | on discard requests from the guest (while still forwarding the discard | ||
21 | to the lower level and marking the range as zero) | ||
18 | 22 | ||
19 | ---------------------------------------------------------------- | 23 | ---------------------------------------------------------------- |
24 | Alexander Ivanov (12): | ||
25 | parallels: Out of image offset in BAT leads to image inflation | ||
26 | parallels: Fix high_off calculation in parallels_co_check() | ||
27 | parallels: Fix image_end_offset and data_end after out-of-image check | ||
28 | parallels: create parallels_set_bat_entry_helper() to assign BAT value | ||
29 | parallels: Use generic infrastructure for BAT writing in | ||
30 | parallels_co_check() | ||
31 | parallels: Move check of unclean image to a separate function | ||
32 | parallels: Move check of cluster outside image to a separate function | ||
33 | parallels: Fix statistics calculation | ||
34 | parallels: Move check of leaks to a separate function | ||
35 | parallels: Move statistic collection to a separate function | ||
36 | parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD | ||
37 | parallels: Incorrect condition in out-of-image check | ||
20 | 38 | ||
21 | Nicolas Saenz Julienne (3): | 39 | Hanna Czenczek (4): |
22 | Introduce event-loop-base abstract class | 40 | util/iov: Make qiov_slice() public |
23 | util/main-loop: Introduce the main loop into QOM | 41 | block: Collapse padded I/O vecs exceeding IOV_MAX |
24 | util/event-loop-base: Introduce options to set the thread pool size | 42 | util/iov: Remove qemu_iovec_init_extended() |
43 | iotests/iov-padding: New test | ||
25 | 44 | ||
26 | qapi/qom.json | 43 ++++++++-- | 45 | Jean-Louis Dupond (1): |
27 | meson.build | 26 +++--- | 46 | qcow2: add discard-no-unref option |
28 | include/block/aio.h | 10 +++ | 47 | |
29 | include/block/thread-pool.h | 3 + | 48 | qapi/block-core.json | 12 ++ |
30 | include/qemu/main-loop.h | 10 +++ | 49 | block/qcow2.h | 3 + |
31 | include/sysemu/event-loop-base.h | 41 +++++++++ | 50 | include/qemu/iov.h | 8 +- |
32 | include/sysemu/iothread.h | 6 +- | 51 | block/io.c | 166 ++++++++++++++++++-- |
33 | event-loop-base.c | 140 +++++++++++++++++++++++++++++++ | 52 | block/parallels.c | 190 ++++++++++++++++------- |
34 | iothread.c | 68 +++++---------- | 53 | block/qcow2-cluster.c | 32 +++- |
35 | util/aio-posix.c | 1 + | 54 | block/qcow2.c | 18 +++ |
36 | util/async.c | 20 +++++ | 55 | util/iov.c | 89 ++--------- |
37 | util/main-loop.c | 65 ++++++++++++++ | 56 | qemu-options.hx | 12 ++ |
38 | util/thread-pool.c | 55 +++++++++++- | 57 | tests/qemu-iotests/tests/iov-padding | 85 ++++++++++ |
39 | 13 files changed, 419 insertions(+), 69 deletions(-) | 58 | tests/qemu-iotests/tests/iov-padding.out | 59 +++++++ |
40 | create mode 100644 include/sysemu/event-loop-base.h | 59 | 11 files changed, 523 insertions(+), 151 deletions(-) |
41 | create mode 100644 event-loop-base.c | 60 | create mode 100755 tests/qemu-iotests/tests/iov-padding |
61 | create mode 100644 tests/qemu-iotests/tests/iov-padding.out | ||
42 | 62 | ||
43 | -- | 63 | -- |
44 | 2.35.1 | 64 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | We want to inline qemu_iovec_init_extended() in block/io.c for padding | ||
2 | requests, and having access to qiov_slice() is useful for this. As a | ||
3 | public function, it is renamed to qemu_iovec_slice(). | ||
1 | 4 | ||
5 | (We will need to count the number of I/O vector elements of a slice | ||
6 | there, and then later process this slice. Without qiov_slice(), we | ||
7 | would need to call qemu_iovec_subvec_niov(), and all further | ||
8 | IOV-processing functions may need to skip prefixing elements to | ||
9 | accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() | ||
10 | internally calls qiov_slice(), we can just have the block/io.c code call | ||
11 | qiov_slice() itself, thus get the number of elements, and also create an | ||
12 | iovec array with the superfluous prefixing elements stripped, so the | ||
13 | following processing functions no longer need to skip them.) | ||
14 | |||
15 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
16 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
17 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
18 | Message-Id: <20230411173418.19549-2-hreitz@redhat.com> | ||
19 | --- | ||
20 | include/qemu/iov.h | 3 +++ | ||
21 | util/iov.c | 14 +++++++------- | ||
22 | 2 files changed, 10 insertions(+), 7 deletions(-) | ||
23 | |||
24 | diff --git a/include/qemu/iov.h b/include/qemu/iov.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/qemu/iov.h | ||
27 | +++ b/include/qemu/iov.h | ||
28 | @@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended( | ||
29 | void *tail_buf, size_t tail_len); | ||
30 | void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | ||
31 | size_t offset, size_t len); | ||
32 | +struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, | ||
33 | + size_t offset, size_t len, | ||
34 | + size_t *head, size_t *tail, int *niov); | ||
35 | int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); | ||
36 | void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); | ||
37 | void qemu_iovec_concat(QEMUIOVector *dst, | ||
38 | diff --git a/util/iov.c b/util/iov.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/util/iov.c | ||
41 | +++ b/util/iov.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | - * qiov_slice | ||
47 | + * qemu_iovec_slice | ||
48 | * | ||
49 | * Find subarray of iovec's, containing requested range. @head would | ||
50 | * be offset in first iov (returned by the function), @tail would be | ||
51 | * count of extra bytes in last iovec (returned iov + @niov - 1). | ||
52 | */ | ||
53 | -static struct iovec *qiov_slice(QEMUIOVector *qiov, | ||
54 | - size_t offset, size_t len, | ||
55 | - size_t *head, size_t *tail, int *niov) | ||
56 | +struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, | ||
57 | + size_t offset, size_t len, | ||
58 | + size_t *head, size_t *tail, int *niov) | ||
59 | { | ||
60 | struct iovec *iov, *end_iov; | ||
61 | |||
62 | @@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) | ||
63 | size_t head, tail; | ||
64 | int niov; | ||
65 | |||
66 | - qiov_slice(qiov, offset, len, &head, &tail, &niov); | ||
67 | + qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); | ||
68 | |||
69 | return niov; | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended( | ||
72 | } | ||
73 | |||
74 | if (mid_len) { | ||
75 | - mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, | ||
76 | - &mid_head, &mid_tail, &mid_niov); | ||
77 | + mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, | ||
78 | + &mid_head, &mid_tail, &mid_niov); | ||
79 | } | ||
80 | |||
81 | total_niov = !!head_len + mid_niov + !!tail_len; | ||
82 | -- | ||
83 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | When processing vectored guest requests that are not aligned to the | |
2 | storage request alignment, we pad them by adding head and/or tail | ||
3 | buffers for a read-modify-write cycle. | ||
4 | |||
5 | The guest can submit I/O vectors up to IOV_MAX (1024) in length, but | ||
6 | with this padding, the vector can exceed that limit. As of | ||
7 | 4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make | ||
8 | qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the | ||
9 | limit, instead returning an error to the guest. | ||
10 | |||
11 | To the guest, this appears as a random I/O error. We should not return | ||
12 | an I/O error to the guest when it issued a perfectly valid request. | ||
13 | |||
14 | Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector | ||
15 | longer than IOV_MAX, which generally seems to work (because the guest | ||
16 | assumes a smaller alignment than we really have, file-posix's | ||
17 | raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and | ||
18 | so emulate the request, so that the IOV_MAX does not matter). However, | ||
19 | that does not seem exactly great. | ||
20 | |||
21 | I see two ways to fix this problem: | ||
22 | 1. We split such long requests into two requests. | ||
23 | 2. We join some elements of the vector into new buffers to make it | ||
24 | shorter. | ||
25 | |||
26 | I am wary of (1), because it seems like it may have unintended side | ||
27 | effects. | ||
28 | |||
29 | (2) on the other hand seems relatively simple to implement, with | ||
30 | hopefully few side effects, so this patch does that. | ||
31 | |||
32 | To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() | ||
33 | is effectively replaced by the new function bdrv_create_padded_qiov(), | ||
34 | which not only wraps the request IOV with padding head/tail, but also | ||
35 | ensures that the resulting vector will not have more than IOV_MAX | ||
36 | elements. Putting that functionality into qemu_iovec_init_extended() is | ||
37 | infeasible because it requires allocating a bounce buffer; doing so | ||
38 | would require many more parameters (buffer alignment, how to initialize | ||
39 | the buffer, and out parameters like the buffer, its length, and the | ||
40 | original elements), which is not reasonable. | ||
41 | |||
42 | Conversely, it is not difficult to move qemu_iovec_init_extended()'s | ||
43 | functionality into bdrv_create_padded_qiov() by using public | ||
44 | qemu_iovec_* functions, so that is what this patch does. | ||
45 | |||
46 | Because bdrv_pad_request() was the only "serious" user of | ||
47 | qemu_iovec_init_extended(), the next patch will remove the latter | ||
48 | function, so the functionality is not implemented twice. | ||
49 | |||
50 | Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 | ||
51 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
52 | Message-Id: <20230411173418.19549-3-hreitz@redhat.com> | ||
53 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
54 | --- | ||
55 | block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- | ||
56 | 1 file changed, 151 insertions(+), 15 deletions(-) | ||
57 | |||
58 | diff --git a/block/io.c b/block/io.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/block/io.c | ||
61 | +++ b/block/io.c | ||
62 | @@ -XXX,XX +XXX,XX @@ out: | ||
63 | * @merge_reads is true for small requests, | ||
64 | * if @buf_len == @head + bytes + @tail. In this case it is possible that both | ||
65 | * head and tail exist but @buf_len == align and @tail_buf == @buf. | ||
66 | + * | ||
67 | + * @write is true for write requests, false for read requests. | ||
68 | + * | ||
69 | + * If padding makes the vector too long (exceeding IOV_MAX), then we need to | ||
70 | + * merge existing vector elements into a single one. @collapse_bounce_buf acts | ||
71 | + * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse | ||
72 | + * I/O vector elements so for read requests, the data can be copied back after | ||
73 | + * the read is done. | ||
74 | */ | ||
75 | typedef struct BdrvRequestPadding { | ||
76 | uint8_t *buf; | ||
77 | @@ -XXX,XX +XXX,XX @@ typedef struct BdrvRequestPadding { | ||
78 | size_t head; | ||
79 | size_t tail; | ||
80 | bool merge_reads; | ||
81 | + bool write; | ||
82 | QEMUIOVector local_qiov; | ||
83 | + | ||
84 | + uint8_t *collapse_bounce_buf; | ||
85 | + size_t collapse_len; | ||
86 | + QEMUIOVector pre_collapse_qiov; | ||
87 | } BdrvRequestPadding; | ||
88 | |||
89 | static bool bdrv_init_padding(BlockDriverState *bs, | ||
90 | int64_t offset, int64_t bytes, | ||
91 | + bool write, | ||
92 | BdrvRequestPadding *pad) | ||
93 | { | ||
94 | int64_t align = bs->bl.request_alignment; | ||
95 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_init_padding(BlockDriverState *bs, | ||
96 | pad->tail_buf = pad->buf + pad->buf_len - align; | ||
97 | } | ||
98 | |||
99 | + pad->write = write; | ||
100 | + | ||
101 | return true; | ||
102 | } | ||
103 | |||
104 | @@ -XXX,XX +XXX,XX @@ zero_mem: | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | -static void bdrv_padding_destroy(BdrvRequestPadding *pad) | ||
109 | +/** | ||
110 | + * Free *pad's associated buffers, and perform any necessary finalization steps. | ||
111 | + */ | ||
112 | +static void bdrv_padding_finalize(BdrvRequestPadding *pad) | ||
113 | { | ||
114 | + if (pad->collapse_bounce_buf) { | ||
115 | + if (!pad->write) { | ||
116 | + /* | ||
117 | + * If padding required elements in the vector to be collapsed into a | ||
118 | + * bounce buffer, copy the bounce buffer content back | ||
119 | + */ | ||
120 | + qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, | ||
121 | + pad->collapse_bounce_buf, pad->collapse_len); | ||
122 | + } | ||
123 | + qemu_vfree(pad->collapse_bounce_buf); | ||
124 | + qemu_iovec_destroy(&pad->pre_collapse_qiov); | ||
125 | + } | ||
126 | if (pad->buf) { | ||
127 | qemu_vfree(pad->buf); | ||
128 | qemu_iovec_destroy(&pad->local_qiov); | ||
129 | @@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) | ||
130 | memset(pad, 0, sizeof(*pad)); | ||
131 | } | ||
132 | |||
133 | +/* | ||
134 | + * Create pad->local_qiov by wrapping @iov in the padding head and tail, while | ||
135 | + * ensuring that the resulting vector will not exceed IOV_MAX elements. | ||
136 | + * | ||
137 | + * To ensure this, when necessary, the first two or three elements of @iov are | ||
138 | + * merged into pad->collapse_bounce_buf and replaced by a reference to that | ||
139 | + * bounce buffer in pad->local_qiov. | ||
140 | + * | ||
141 | + * After performing a read request, the data from the bounce buffer must be | ||
142 | + * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). | ||
143 | + */ | ||
144 | +static int bdrv_create_padded_qiov(BlockDriverState *bs, | ||
145 | + BdrvRequestPadding *pad, | ||
146 | + struct iovec *iov, int niov, | ||
147 | + size_t iov_offset, size_t bytes) | ||
148 | +{ | ||
149 | + int padded_niov, surplus_count, collapse_count; | ||
150 | + | ||
151 | + /* Assert this invariant */ | ||
152 | + assert(niov <= IOV_MAX); | ||
153 | + | ||
154 | + /* | ||
155 | + * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error | ||
156 | + * to the guest is not ideal, but there is little else we can do. At least | ||
157 | + * this will practically never happen on 64-bit systems. | ||
158 | + */ | ||
159 | + if (SIZE_MAX - pad->head < bytes || | ||
160 | + SIZE_MAX - pad->head - bytes < pad->tail) | ||
161 | + { | ||
162 | + return -EINVAL; | ||
163 | + } | ||
164 | + | ||
165 | + /* Length of the resulting IOV if we just concatenated everything */ | ||
166 | + padded_niov = !!pad->head + niov + !!pad->tail; | ||
167 | + | ||
168 | + qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); | ||
169 | + | ||
170 | + if (pad->head) { | ||
171 | + qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); | ||
172 | + } | ||
173 | + | ||
174 | + /* | ||
175 | + * If padded_niov > IOV_MAX, we cannot just concatenate everything. | ||
176 | + * Instead, merge the first two or three elements of @iov to reduce the | ||
177 | + * number of vector elements as necessary. | ||
178 | + */ | ||
179 | + if (padded_niov > IOV_MAX) { | ||
180 | + /* | ||
181 | + * Only head and tail can have lead to the number of entries exceeding | ||
182 | + * IOV_MAX, so we can exceed it by the head and tail at most. We need | ||
183 | + * to reduce the number of elements by `surplus_count`, so we merge that | ||
184 | + * many elements plus one into one element. | ||
185 | + */ | ||
186 | + surplus_count = padded_niov - IOV_MAX; | ||
187 | + assert(surplus_count <= !!pad->head + !!pad->tail); | ||
188 | + collapse_count = surplus_count + 1; | ||
189 | + | ||
190 | + /* | ||
191 | + * Move the elements to collapse into `pad->pre_collapse_qiov`, then | ||
192 | + * advance `iov` (and associated variables) by those elements. | ||
193 | + */ | ||
194 | + qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); | ||
195 | + qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, | ||
196 | + collapse_count, iov_offset, SIZE_MAX); | ||
197 | + iov += collapse_count; | ||
198 | + iov_offset = 0; | ||
199 | + niov -= collapse_count; | ||
200 | + bytes -= pad->pre_collapse_qiov.size; | ||
201 | + | ||
202 | + /* | ||
203 | + * Construct the bounce buffer to match the length of the to-collapse | ||
204 | + * vector elements, and for write requests, initialize it with the data | ||
205 | + * from those elements. Then add it to `pad->local_qiov`. | ||
206 | + */ | ||
207 | + pad->collapse_len = pad->pre_collapse_qiov.size; | ||
208 | + pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); | ||
209 | + if (pad->write) { | ||
210 | + qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, | ||
211 | + pad->collapse_bounce_buf, pad->collapse_len); | ||
212 | + } | ||
213 | + qemu_iovec_add(&pad->local_qiov, | ||
214 | + pad->collapse_bounce_buf, pad->collapse_len); | ||
215 | + } | ||
216 | + | ||
217 | + qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); | ||
218 | + | ||
219 | + if (pad->tail) { | ||
220 | + qemu_iovec_add(&pad->local_qiov, | ||
221 | + pad->buf + pad->buf_len - pad->tail, pad->tail); | ||
222 | + } | ||
223 | + | ||
224 | + assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); | ||
225 | + return 0; | ||
226 | +} | ||
227 | + | ||
228 | /* | ||
229 | * bdrv_pad_request | ||
230 | * | ||
231 | @@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) | ||
232 | * read of padding, bdrv_padding_rmw_read() should be called separately if | ||
233 | * needed. | ||
234 | * | ||
235 | + * @write is true for write requests, false for read requests. | ||
236 | + * | ||
237 | * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: | ||
238 | * - on function start they represent original request | ||
239 | * - on failure or when padding is not needed they are unchanged | ||
240 | @@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) | ||
241 | static int bdrv_pad_request(BlockDriverState *bs, | ||
242 | QEMUIOVector **qiov, size_t *qiov_offset, | ||
243 | int64_t *offset, int64_t *bytes, | ||
244 | + bool write, | ||
245 | BdrvRequestPadding *pad, bool *padded, | ||
246 | BdrvRequestFlags *flags) | ||
247 | { | ||
248 | int ret; | ||
249 | + struct iovec *sliced_iov; | ||
250 | + int sliced_niov; | ||
251 | + size_t sliced_head, sliced_tail; | ||
252 | |||
253 | bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); | ||
254 | |||
255 | - if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { | ||
256 | + if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { | ||
257 | if (padded) { | ||
258 | *padded = false; | ||
259 | } | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | - ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, | ||
264 | - *qiov, *qiov_offset, *bytes, | ||
265 | - pad->buf + pad->buf_len - pad->tail, | ||
266 | - pad->tail); | ||
267 | + sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, | ||
268 | + &sliced_head, &sliced_tail, | ||
269 | + &sliced_niov); | ||
270 | + | ||
271 | + /* Guaranteed by bdrv_check_qiov_request() */ | ||
272 | + assert(*bytes <= SIZE_MAX); | ||
273 | + ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, | ||
274 | + sliced_head, *bytes); | ||
275 | if (ret < 0) { | ||
276 | - bdrv_padding_destroy(pad); | ||
277 | + bdrv_padding_finalize(pad); | ||
278 | return ret; | ||
279 | } | ||
280 | *bytes += pad->head + pad->tail; | ||
281 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, | ||
282 | flags |= BDRV_REQ_COPY_ON_READ; | ||
283 | } | ||
284 | |||
285 | - ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, | ||
286 | - NULL, &flags); | ||
287 | + ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, | ||
288 | + &pad, NULL, &flags); | ||
289 | if (ret < 0) { | ||
290 | goto fail; | ||
291 | } | ||
292 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, | ||
293 | bs->bl.request_alignment, | ||
294 | qiov, qiov_offset, flags); | ||
295 | tracked_request_end(&req); | ||
296 | - bdrv_padding_destroy(&pad); | ||
297 | + bdrv_padding_finalize(&pad); | ||
298 | |||
299 | fail: | ||
300 | bdrv_dec_in_flight(bs); | ||
301 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, | ||
302 | /* This flag doesn't make sense for padding or zero writes */ | ||
303 | flags &= ~BDRV_REQ_REGISTERED_BUF; | ||
304 | |||
305 | - padding = bdrv_init_padding(bs, offset, bytes, &pad); | ||
306 | + padding = bdrv_init_padding(bs, offset, bytes, true, &pad); | ||
307 | if (padding) { | ||
308 | assert(!(flags & BDRV_REQ_NO_WAIT)); | ||
309 | bdrv_make_request_serialising(req, align); | ||
310 | @@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, | ||
311 | } | ||
312 | |||
313 | out: | ||
314 | - bdrv_padding_destroy(&pad); | ||
315 | + bdrv_padding_finalize(&pad); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | ||
320 | * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do | ||
321 | * alignment only if there is no ZERO flag. | ||
322 | */ | ||
323 | - ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, | ||
324 | - &padded, &flags); | ||
325 | + ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, | ||
326 | + &pad, &padded, &flags); | ||
327 | if (ret < 0) { | ||
328 | return ret; | ||
329 | } | ||
330 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | ||
331 | ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, | ||
332 | qiov, qiov_offset, flags); | ||
333 | |||
334 | - bdrv_padding_destroy(&pad); | ||
335 | + bdrv_padding_finalize(&pad); | ||
336 | |||
337 | out: | ||
338 | tracked_request_end(&req); | ||
339 | -- | ||
340 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | bdrv_pad_request() was the main user of qemu_iovec_init_extended(). | ||
2 | HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() | ||
3 | now. | ||
1 | 4 | ||
5 | The only remaining user is qemu_iovec_init_slice(), which can easily | ||
6 | inline the small part it really needs. | ||
7 | |||
8 | Note that qemu_iovec_init_extended() offered a memcpy() optimization to | ||
9 | initialize the new I/O vector. qemu_iovec_concat_iov(), which is used | ||
10 | to replace its functionality, does not, but calls qemu_iovec_add() for | ||
11 | every single element. If we decide this optimization was important, we | ||
12 | will need to re-implement it in qemu_iovec_concat_iov(), which might | ||
13 | also benefit its pre-existing users. | ||
14 | |||
15 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
16 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
17 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
18 | Message-Id: <20230411173418.19549-4-hreitz@redhat.com> | ||
19 | --- | ||
20 | include/qemu/iov.h | 5 --- | ||
21 | util/iov.c | 79 +++++++--------------------------------------- | ||
22 | 2 files changed, 11 insertions(+), 73 deletions(-) | ||
23 | |||
24 | diff --git a/include/qemu/iov.h b/include/qemu/iov.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/qemu/iov.h | ||
27 | +++ b/include/qemu/iov.h | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) | ||
29 | |||
30 | void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); | ||
31 | void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); | ||
32 | -int qemu_iovec_init_extended( | ||
33 | - QEMUIOVector *qiov, | ||
34 | - void *head_buf, size_t head_len, | ||
35 | - QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, | ||
36 | - void *tail_buf, size_t tail_len); | ||
37 | void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | ||
38 | size_t offset, size_t len); | ||
39 | struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, | ||
40 | diff --git a/util/iov.c b/util/iov.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/util/iov.c | ||
43 | +++ b/util/iov.c | ||
44 | @@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) | ||
45 | return niov; | ||
46 | } | ||
47 | |||
48 | -/* | ||
49 | - * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, | ||
50 | - * and @tail_buf buffer into new qiov. | ||
51 | - */ | ||
52 | -int qemu_iovec_init_extended( | ||
53 | - QEMUIOVector *qiov, | ||
54 | - void *head_buf, size_t head_len, | ||
55 | - QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, | ||
56 | - void *tail_buf, size_t tail_len) | ||
57 | -{ | ||
58 | - size_t mid_head, mid_tail; | ||
59 | - int total_niov, mid_niov = 0; | ||
60 | - struct iovec *p, *mid_iov = NULL; | ||
61 | - | ||
62 | - assert(mid_qiov->niov <= IOV_MAX); | ||
63 | - | ||
64 | - if (SIZE_MAX - head_len < mid_len || | ||
65 | - SIZE_MAX - head_len - mid_len < tail_len) | ||
66 | - { | ||
67 | - return -EINVAL; | ||
68 | - } | ||
69 | - | ||
70 | - if (mid_len) { | ||
71 | - mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, | ||
72 | - &mid_head, &mid_tail, &mid_niov); | ||
73 | - } | ||
74 | - | ||
75 | - total_niov = !!head_len + mid_niov + !!tail_len; | ||
76 | - if (total_niov > IOV_MAX) { | ||
77 | - return -EINVAL; | ||
78 | - } | ||
79 | - | ||
80 | - if (total_niov == 1) { | ||
81 | - qemu_iovec_init_buf(qiov, NULL, 0); | ||
82 | - p = &qiov->local_iov; | ||
83 | - } else { | ||
84 | - qiov->niov = qiov->nalloc = total_niov; | ||
85 | - qiov->size = head_len + mid_len + tail_len; | ||
86 | - p = qiov->iov = g_new(struct iovec, qiov->niov); | ||
87 | - } | ||
88 | - | ||
89 | - if (head_len) { | ||
90 | - p->iov_base = head_buf; | ||
91 | - p->iov_len = head_len; | ||
92 | - p++; | ||
93 | - } | ||
94 | - | ||
95 | - assert(!mid_niov == !mid_len); | ||
96 | - if (mid_niov) { | ||
97 | - memcpy(p, mid_iov, mid_niov * sizeof(*p)); | ||
98 | - p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; | ||
99 | - p[0].iov_len -= mid_head; | ||
100 | - p[mid_niov - 1].iov_len -= mid_tail; | ||
101 | - p += mid_niov; | ||
102 | - } | ||
103 | - | ||
104 | - if (tail_len) { | ||
105 | - p->iov_base = tail_buf; | ||
106 | - p->iov_len = tail_len; | ||
107 | - } | ||
108 | - | ||
109 | - return 0; | ||
110 | -} | ||
111 | - | ||
112 | /* | ||
113 | * Check if the contents of subrange of qiov data is all zeroes. | ||
114 | */ | ||
115 | @@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) | ||
116 | void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | ||
117 | size_t offset, size_t len) | ||
118 | { | ||
119 | - int ret; | ||
120 | + struct iovec *slice_iov; | ||
121 | + int slice_niov; | ||
122 | + size_t slice_head, slice_tail; | ||
123 | |||
124 | assert(source->size >= len); | ||
125 | assert(source->size - len >= offset); | ||
126 | |||
127 | - /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ | ||
128 | - ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); | ||
129 | - assert(ret == 0); | ||
130 | + slice_iov = qemu_iovec_slice(source, offset, len, | ||
131 | + &slice_head, &slice_tail, &slice_niov); | ||
132 | + if (slice_niov == 1) { | ||
133 | + qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); | ||
134 | + } else { | ||
135 | + qemu_iovec_init(qiov, slice_niov); | ||
136 | + qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); | ||
137 | + } | ||
138 | } | ||
139 | |||
140 | void qemu_iovec_destroy(QEMUIOVector *qiov) | ||
141 | -- | ||
142 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Test that even vectored IO requests with 1024 vector elements that are | ||
2 | not aligned to the device's request alignment will succeed. | ||
1 | 3 | ||
4 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
5 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
6 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
7 | Message-Id: <20230411173418.19549-5-hreitz@redhat.com> | ||
8 | --- | ||
9 | tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ | ||
10 | tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ | ||
11 | 2 files changed, 144 insertions(+) | ||
12 | create mode 100755 tests/qemu-iotests/tests/iov-padding | ||
13 | create mode 100644 tests/qemu-iotests/tests/iov-padding.out | ||
14 | |||
15 | diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding | ||
16 | new file mode 100755 | ||
17 | index XXXXXXX..XXXXXXX | ||
18 | --- /dev/null | ||
19 | +++ b/tests/qemu-iotests/tests/iov-padding | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | +#!/usr/bin/env bash | ||
22 | +# group: rw quick | ||
23 | +# | ||
24 | +# Check the interaction of request padding (to fit alignment restrictions) with | ||
25 | +# vectored I/O from the guest | ||
26 | +# | ||
27 | +# Copyright Red Hat | ||
28 | +# | ||
29 | +# This program is free software; you can redistribute it and/or modify | ||
30 | +# it under the terms of the GNU General Public License as published by | ||
31 | +# the Free Software Foundation; either version 2 of the License, or | ||
32 | +# (at your option) any later version. | ||
33 | +# | ||
34 | +# This program is distributed in the hope that it will be useful, | ||
35 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
36 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
37 | +# GNU General Public License for more details. | ||
38 | +# | ||
39 | +# You should have received a copy of the GNU General Public License | ||
40 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
41 | +# | ||
42 | + | ||
43 | +seq=$(basename $0) | ||
44 | +echo "QA output created by $seq" | ||
45 | + | ||
46 | +status=1 # failure is the default! | ||
47 | + | ||
48 | +_cleanup() | ||
49 | +{ | ||
50 | + _cleanup_test_img | ||
51 | +} | ||
52 | +trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
53 | + | ||
54 | +# get standard environment, filters and checks | ||
55 | +cd .. | ||
56 | +. ./common.rc | ||
57 | +. ./common.filter | ||
58 | + | ||
59 | +_supported_fmt raw | ||
60 | +_supported_proto file | ||
61 | + | ||
62 | +_make_test_img 1M | ||
63 | + | ||
64 | +IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" | ||
65 | + | ||
66 | +# Four combinations: | ||
67 | +# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k | ||
68 | +# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not | ||
69 | +# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned | ||
70 | +# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not | ||
71 | +for start_offset in 4096 512; do | ||
72 | + for last_element_length in 512 4096; do | ||
73 | + length=$((1023 * 512 + $last_element_length)) | ||
74 | + | ||
75 | + echo | ||
76 | + echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" | ||
77 | + | ||
78 | + # Fill with data for testing | ||
79 | + $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io | ||
80 | + | ||
81 | + # 1023 512-byte buffers, and then one with length $last_element_length | ||
82 | + cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" | ||
83 | + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ | ||
84 | + -c "writev $cmd_params" \ | ||
85 | + --image-opts \ | ||
86 | + "$IMGSPEC" \ | ||
87 | + | _filter_qemu_io | ||
88 | + | ||
89 | + # Read all patterns -- read the part we just wrote with writev twice, | ||
90 | + # once "normally", and once with a readv, so we see that that works, too | ||
91 | + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ | ||
92 | + -c "read -P 1 0 $start_offset" \ | ||
93 | + -c "read -P 2 $start_offset $length" \ | ||
94 | + -c "readv $cmd_params" \ | ||
95 | + -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ | ||
96 | + --image-opts \ | ||
97 | + "$IMGSPEC" \ | ||
98 | + | _filter_qemu_io | ||
99 | + done | ||
100 | +done | ||
101 | + | ||
102 | +# success, all done | ||
103 | +echo "*** done" | ||
104 | +rm -f $seq.full | ||
105 | +status=0 | ||
106 | diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out | ||
107 | new file mode 100644 | ||
108 | index XXXXXXX..XXXXXXX | ||
109 | --- /dev/null | ||
110 | +++ b/tests/qemu-iotests/tests/iov-padding.out | ||
111 | @@ -XXX,XX +XXX,XX @@ | ||
112 | +QA output created by iov-padding | ||
113 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 | ||
114 | + | ||
115 | +== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == | ||
116 | +wrote 1048576/1048576 bytes at offset 0 | ||
117 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
118 | +wrote 524288/524288 bytes at offset 4096 | ||
119 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
120 | +read 4096/4096 bytes at offset 0 | ||
121 | +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
122 | +read 524288/524288 bytes at offset 4096 | ||
123 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
124 | +read 524288/524288 bytes at offset 4096 | ||
125 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
126 | +read 520192/520192 bytes at offset 528384 | ||
127 | +508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
128 | + | ||
129 | +== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == | ||
130 | +wrote 1048576/1048576 bytes at offset 0 | ||
131 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
132 | +wrote 527872/527872 bytes at offset 4096 | ||
133 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
134 | +read 4096/4096 bytes at offset 0 | ||
135 | +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
136 | +read 527872/527872 bytes at offset 4096 | ||
137 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
138 | +read 527872/527872 bytes at offset 4096 | ||
139 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
140 | +read 516608/516608 bytes at offset 531968 | ||
141 | +504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
142 | + | ||
143 | +== performing 1024-element vectored requests to image (offset: 512; length: 524288) == | ||
144 | +wrote 1048576/1048576 bytes at offset 0 | ||
145 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
146 | +wrote 524288/524288 bytes at offset 512 | ||
147 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
148 | +read 512/512 bytes at offset 0 | ||
149 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
150 | +read 524288/524288 bytes at offset 512 | ||
151 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
152 | +read 524288/524288 bytes at offset 512 | ||
153 | +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
154 | +read 523776/523776 bytes at offset 524800 | ||
155 | +511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
156 | + | ||
157 | +== performing 1024-element vectored requests to image (offset: 512; length: 527872) == | ||
158 | +wrote 1048576/1048576 bytes at offset 0 | ||
159 | +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
160 | +wrote 527872/527872 bytes at offset 512 | ||
161 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
162 | +read 512/512 bytes at offset 0 | ||
163 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
164 | +read 527872/527872 bytes at offset 512 | ||
165 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
166 | +read 527872/527872 bytes at offset 512 | ||
167 | +515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
168 | +read 520192/520192 bytes at offset 528384 | ||
169 | +508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
170 | +*** done | ||
171 | -- | ||
172 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | data_end field in BDRVParallelsState is set to the biggest offset present | ||
4 | in BAT. If this offset is outside of the image, any further write will | ||
5 | create the cluster at this offset and/or the image will be truncated to | ||
6 | this offset on close. This is definitely not correct. | ||
7 | |||
8 | Raise an error in parallels_open() if data_end points outside the image | ||
9 | and it is not a check (let the check to repaire the image). Set data_end | ||
10 | to the end of the cluster with the last correct offset. | ||
11 | |||
12 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
13 | Message-Id: <20230424093147.197643-2-alexander.ivanov@virtuozzo.com> | ||
14 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
15 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
16 | --- | ||
17 | block/parallels.c | 17 +++++++++++++++++ | ||
18 | 1 file changed, 17 insertions(+) | ||
19 | |||
20 | diff --git a/block/parallels.c b/block/parallels.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/block/parallels.c | ||
23 | +++ b/block/parallels.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, | ||
25 | BDRVParallelsState *s = bs->opaque; | ||
26 | ParallelsHeader ph; | ||
27 | int ret, size, i; | ||
28 | + int64_t file_nb_sectors; | ||
29 | QemuOpts *opts = NULL; | ||
30 | Error *local_err = NULL; | ||
31 | char *buf; | ||
32 | @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, | ||
33 | return ret; | ||
34 | } | ||
35 | |||
36 | + file_nb_sectors = bdrv_nb_sectors(bs->file->bs); | ||
37 | + if (file_nb_sectors < 0) { | ||
38 | + return -EINVAL; | ||
39 | + } | ||
40 | + | ||
41 | ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); | ||
42 | if (ret < 0) { | ||
43 | goto fail; | ||
44 | @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, | ||
45 | |||
46 | for (i = 0; i < s->bat_size; i++) { | ||
47 | int64_t off = bat2sect(s, i); | ||
48 | + if (off >= file_nb_sectors) { | ||
49 | + if (flags & BDRV_O_CHECK) { | ||
50 | + continue; | ||
51 | + } | ||
52 | + error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " | ||
53 | + "is larger than file size (%" PRIi64 ")", | ||
54 | + off << BDRV_SECTOR_BITS, i, | ||
55 | + file_nb_sectors << BDRV_SECTOR_BITS); | ||
56 | + ret = -EINVAL; | ||
57 | + goto fail; | ||
58 | + } | ||
59 | if (off >= s->data_end) { | ||
60 | s->data_end = off + s->tracks; | ||
61 | } | ||
62 | -- | ||
63 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | Don't let high_off be more than the file size even if we don't fix the | ||
4 | image. | ||
5 | |||
6 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
7 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
9 | Message-Id: <20230424093147.197643-3-alexander.ivanov@virtuozzo.com> | ||
10 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
12 | --- | ||
13 | block/parallels.c | 4 ++-- | ||
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/block/parallels.c b/block/parallels.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/parallels.c | ||
19 | +++ b/block/parallels.c | ||
20 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
21 | fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); | ||
22 | res->corruptions++; | ||
23 | if (fix & BDRV_FIX_ERRORS) { | ||
24 | - prev_off = 0; | ||
25 | s->bat_bitmap[i] = 0; | ||
26 | res->corruptions_fixed++; | ||
27 | flush_bat = true; | ||
28 | - continue; | ||
29 | } | ||
30 | + prev_off = 0; | ||
31 | + continue; | ||
32 | } | ||
33 | |||
34 | res->bfi.allocated_clusters++; | ||
35 | -- | ||
36 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | Set data_end to the end of the last cluster inside the image. In such a | ||
4 | way we can be sure that corrupted offsets in the BAT can't affect on the | ||
5 | image size. If there are no allocated clusters set image_end_offset by | ||
6 | data_end. | ||
7 | |||
8 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
9 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
10 | Message-Id: <20230424093147.197643-4-alexander.ivanov@virtuozzo.com> | ||
11 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
12 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
13 | --- | ||
14 | block/parallels.c | 8 +++++++- | ||
15 | 1 file changed, 7 insertions(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/block/parallels.c b/block/parallels.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/parallels.c | ||
20 | +++ b/block/parallels.c | ||
21 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
22 | } | ||
23 | } | ||
24 | |||
25 | - res->image_end_offset = high_off + s->cluster_size; | ||
26 | + if (high_off == 0) { | ||
27 | + res->image_end_offset = s->data_end << BDRV_SECTOR_BITS; | ||
28 | + } else { | ||
29 | + res->image_end_offset = high_off + s->cluster_size; | ||
30 | + s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS; | ||
31 | + } | ||
32 | + | ||
33 | if (size > res->image_end_offset) { | ||
34 | int64_t count; | ||
35 | count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); | ||
36 | -- | ||
37 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | This helper will be reused in next patches during parallels_co_check | ||
4 | rework to simplify its code. | ||
5 | |||
6 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
7 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
8 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
9 | Message-Id: <20230424093147.197643-5-alexander.ivanov@virtuozzo.com> | ||
10 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
12 | --- | ||
13 | block/parallels.c | 11 ++++++++--- | ||
14 | 1 file changed, 8 insertions(+), 3 deletions(-) | ||
15 | |||
16 | diff --git a/block/parallels.c b/block/parallels.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/parallels.c | ||
19 | +++ b/block/parallels.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, | ||
21 | return start_off; | ||
22 | } | ||
23 | |||
24 | +static void parallels_set_bat_entry(BDRVParallelsState *s, | ||
25 | + uint32_t index, uint32_t offset) | ||
26 | +{ | ||
27 | + s->bat_bitmap[index] = cpu_to_le32(offset); | ||
28 | + bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); | ||
29 | +} | ||
30 | + | ||
31 | static int64_t coroutine_fn GRAPH_RDLOCK | ||
32 | allocate_clusters(BlockDriverState *bs, int64_t sector_num, | ||
33 | int nb_sectors, int *pnum) | ||
34 | @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, | ||
35 | } | ||
36 | |||
37 | for (i = 0; i < to_allocate; i++) { | ||
38 | - s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); | ||
39 | + parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); | ||
40 | s->data_end += s->tracks; | ||
41 | - bitmap_set(s->bat_dirty_bmap, | ||
42 | - bat_entry_off(idx + i) / s->bat_dirty_block, 1); | ||
43 | } | ||
44 | |||
45 | return bat2sect(s, idx) + sector_num % s->tracks; | ||
46 | -- | ||
47 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | BAT is written in the context of conventional operations over the image | ||
4 | inside bdrv_co_flush() when it calls parallels_co_flush_to_os() callback. | ||
5 | Thus we should not modify BAT array directly, but call | ||
6 | parallels_set_bat_entry() helper and bdrv_co_flush() further on. After | ||
7 | that there is no need to manually write BAT and track its modification. | ||
8 | |||
9 | This makes code more generic and allows to split parallels_set_bat_entry() | ||
10 | for independent pieces. | ||
11 | |||
12 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
13 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
14 | Message-Id: <20230424093147.197643-6-alexander.ivanov@virtuozzo.com> | ||
15 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
16 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
17 | --- | ||
18 | block/parallels.c | 23 ++++++++++------------- | ||
19 | 1 file changed, 10 insertions(+), 13 deletions(-) | ||
20 | |||
21 | diff --git a/block/parallels.c b/block/parallels.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/block/parallels.c | ||
24 | +++ b/block/parallels.c | ||
25 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
26 | { | ||
27 | BDRVParallelsState *s = bs->opaque; | ||
28 | int64_t size, prev_off, high_off; | ||
29 | - int ret; | ||
30 | + int ret = 0; | ||
31 | uint32_t i; | ||
32 | - bool flush_bat = false; | ||
33 | |||
34 | size = bdrv_getlength(bs->file->bs); | ||
35 | if (size < 0) { | ||
36 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
37 | fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); | ||
38 | res->corruptions++; | ||
39 | if (fix & BDRV_FIX_ERRORS) { | ||
40 | - s->bat_bitmap[i] = 0; | ||
41 | + parallels_set_bat_entry(s, i, 0); | ||
42 | res->corruptions_fixed++; | ||
43 | - flush_bat = true; | ||
44 | } | ||
45 | prev_off = 0; | ||
46 | continue; | ||
47 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
48 | prev_off = off; | ||
49 | } | ||
50 | |||
51 | - ret = 0; | ||
52 | - if (flush_bat) { | ||
53 | - ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0); | ||
54 | - if (ret < 0) { | ||
55 | - res->check_errors++; | ||
56 | - goto out; | ||
57 | - } | ||
58 | - } | ||
59 | - | ||
60 | if (high_off == 0) { | ||
61 | res->image_end_offset = s->data_end << BDRV_SECTOR_BITS; | ||
62 | } else { | ||
63 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
64 | |||
65 | out: | ||
66 | qemu_co_mutex_unlock(&s->lock); | ||
67 | + | ||
68 | + if (ret == 0) { | ||
69 | + ret = bdrv_co_flush(bs); | ||
70 | + if (ret < 0) { | ||
71 | + res->check_errors++; | ||
72 | + } | ||
73 | + } | ||
74 | + | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | -- | ||
79 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | We will add more and more checks so we need a better code structure | ||
4 | in parallels_co_check. Let each check performs in a separate loop | ||
5 | in a separate helper. | ||
6 | |||
7 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
8 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | ||
10 | Message-Id: <20230424093147.197643-7-alexander.ivanov@virtuozzo.com> | ||
11 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
12 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
13 | --- | ||
14 | block/parallels.c | 31 +++++++++++++++++++++---------- | ||
15 | 1 file changed, 21 insertions(+), 10 deletions(-) | ||
16 | |||
17 | diff --git a/block/parallels.c b/block/parallels.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/parallels.c | ||
20 | +++ b/block/parallels.c | ||
21 | @@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, | ||
22 | return ret; | ||
23 | } | ||
24 | |||
25 | +static void parallels_check_unclean(BlockDriverState *bs, | ||
26 | + BdrvCheckResult *res, | ||
27 | + BdrvCheckMode fix) | ||
28 | +{ | ||
29 | + BDRVParallelsState *s = bs->opaque; | ||
30 | + | ||
31 | + if (!s->header_unclean) { | ||
32 | + return; | ||
33 | + } | ||
34 | + | ||
35 | + fprintf(stderr, "%s image was not closed correctly\n", | ||
36 | + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); | ||
37 | + res->corruptions++; | ||
38 | + if (fix & BDRV_FIX_ERRORS) { | ||
39 | + /* parallels_close will do the job right */ | ||
40 | + res->corruptions_fixed++; | ||
41 | + s->header_unclean = false; | ||
42 | + } | ||
43 | +} | ||
44 | |||
45 | static int coroutine_fn GRAPH_RDLOCK | ||
46 | parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
47 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
48 | } | ||
49 | |||
50 | qemu_co_mutex_lock(&s->lock); | ||
51 | - if (s->header_unclean) { | ||
52 | - fprintf(stderr, "%s image was not closed correctly\n", | ||
53 | - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); | ||
54 | - res->corruptions++; | ||
55 | - if (fix & BDRV_FIX_ERRORS) { | ||
56 | - /* parallels_close will do the job right */ | ||
57 | - res->corruptions_fixed++; | ||
58 | - s->header_unclean = false; | ||
59 | - } | ||
60 | - } | ||
61 | + | ||
62 | + parallels_check_unclean(bs, res, fix); | ||
63 | |||
64 | res->bfi.total_clusters = s->bat_size; | ||
65 | res->bfi.compressed_clusters = 0; /* compression is not supported */ | ||
66 | -- | ||
67 | 2.40.1 | diff view generated by jsdifflib |
1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> | 1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | 'event-loop-base' provides basic property handling for all 'AioContext' | 3 | We will add more and more checks so we need a better code structure in |
4 | based event loops. So let's define a new 'MainLoopClass' that inherits | 4 | parallels_co_check. Let each check performs in a separate loop in a |
5 | from it. This will permit tweaking the main loop's properties through | 5 | separate helper. |
6 | qapi as well as through the command line using the '-object' keyword[1]. | ||
7 | Only one instance of 'MainLoopClass' might be created at any time. | ||
8 | 6 | ||
9 | 'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to | 7 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
10 | mark 'MainLoop' as non-deletable. | 8 | Reviewed-by: Denis V. Lunev <den@openvz.org> |
9 | Message-Id: <20230424093147.197643-8-alexander.ivanov@virtuozzo.com> | ||
10 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
12 | --- | ||
13 | block/parallels.c | 75 +++++++++++++++++++++++++++++++---------------- | ||
14 | 1 file changed, 49 insertions(+), 26 deletions(-) | ||
11 | 15 | ||
12 | [1] For example: | 16 | diff --git a/block/parallels.c b/block/parallels.c |
13 | -object main-loop,id=main-loop,aio-max-batch=<value> | ||
14 | |||
15 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
18 | Message-id: 20220425075723.20019-3-nsaenzju@redhat.com | ||
19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | --- | ||
21 | qapi/qom.json | 13 ++++++++ | ||
22 | meson.build | 3 +- | ||
23 | include/qemu/main-loop.h | 10 ++++++ | ||
24 | include/sysemu/event-loop-base.h | 1 + | ||
25 | event-loop-base.c | 13 ++++++++ | ||
26 | util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ | ||
27 | 6 files changed, 95 insertions(+), 1 deletion(-) | ||
28 | |||
29 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
30 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/qapi/qom.json | 18 | --- a/block/parallels.c |
32 | +++ b/qapi/qom.json | 19 | +++ b/block/parallels.c |
33 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs, |
34 | '*poll-grow': 'int', | ||
35 | '*poll-shrink': 'int' } } | ||
36 | |||
37 | +## | ||
38 | +# @MainLoopProperties: | ||
39 | +# | ||
40 | +# Properties for the main-loop object. | ||
41 | +# | ||
42 | +# Since: 7.1 | ||
43 | +## | ||
44 | +{ 'struct': 'MainLoopProperties', | ||
45 | + 'base': 'EventLoopBaseProperties', | ||
46 | + 'data': {} } | ||
47 | + | ||
48 | ## | ||
49 | # @MemoryBackendProperties: | ||
50 | # | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | { 'name': 'input-linux', | ||
53 | 'if': 'CONFIG_LINUX' }, | ||
54 | 'iothread', | ||
55 | + 'main-loop', | ||
56 | { 'name': 'memory-backend-epc', | ||
57 | 'if': 'CONFIG_LINUX' }, | ||
58 | 'memory-backend-file', | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | 'input-linux': { 'type': 'InputLinuxProperties', | ||
61 | 'if': 'CONFIG_LINUX' }, | ||
62 | 'iothread': 'IothreadProperties', | ||
63 | + 'main-loop': 'MainLoopProperties', | ||
64 | 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', | ||
65 | 'if': 'CONFIG_LINUX' }, | ||
66 | 'memory-backend-file': 'MemoryBackendFileProperties', | ||
67 | diff --git a/meson.build b/meson.build | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/meson.build | ||
70 | +++ b/meson.build | ||
71 | @@ -XXX,XX +XXX,XX @@ libqemuutil = static_library('qemuutil', | ||
72 | sources: util_ss.sources() + stub_ss.sources() + genh, | ||
73 | dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) | ||
74 | qemuutil = declare_dependency(link_with: libqemuutil, | ||
75 | - sources: genh + version_res) | ||
76 | + sources: genh + version_res, | ||
77 | + dependencies: [event_loop_base]) | ||
78 | |||
79 | if have_system or have_user | ||
80 | decodetree = generator(find_program('scripts/decodetree.py'), | ||
81 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/include/qemu/main-loop.h | ||
84 | +++ b/include/qemu/main-loop.h | ||
85 | @@ -XXX,XX +XXX,XX @@ | ||
86 | #define QEMU_MAIN_LOOP_H | ||
87 | |||
88 | #include "block/aio.h" | ||
89 | +#include "qom/object.h" | ||
90 | +#include "sysemu/event-loop-base.h" | ||
91 | |||
92 | #define SIG_IPI SIGUSR1 | ||
93 | |||
94 | +#define TYPE_MAIN_LOOP "main-loop" | ||
95 | +OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) | ||
96 | + | ||
97 | +struct MainLoop { | ||
98 | + EventLoopBase parent_obj; | ||
99 | +}; | ||
100 | +typedef struct MainLoop MainLoop; | ||
101 | + | ||
102 | /** | ||
103 | * qemu_init_main_loop: Set up the process so that it can run the main loop. | ||
104 | * | ||
105 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/include/sysemu/event-loop-base.h | ||
108 | +++ b/include/sysemu/event-loop-base.h | ||
109 | @@ -XXX,XX +XXX,XX @@ struct EventLoopBaseClass { | ||
110 | |||
111 | void (*init)(EventLoopBase *base, Error **errp); | ||
112 | void (*update_params)(EventLoopBase *base, Error **errp); | ||
113 | + bool (*can_be_deleted)(EventLoopBase *base); | ||
114 | }; | ||
115 | |||
116 | struct EventLoopBase { | ||
117 | diff --git a/event-loop-base.c b/event-loop-base.c | ||
118 | index XXXXXXX..XXXXXXX 100644 | ||
119 | --- a/event-loop-base.c | ||
120 | +++ b/event-loop-base.c | ||
121 | @@ -XXX,XX +XXX,XX @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) | ||
122 | } | 21 | } |
123 | } | 22 | } |
124 | 23 | ||
125 | +static bool event_loop_base_can_be_deleted(UserCreatable *uc) | 24 | +static int coroutine_fn GRAPH_RDLOCK |
25 | +parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, | ||
26 | + BdrvCheckMode fix) | ||
126 | +{ | 27 | +{ |
127 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); | 28 | + BDRVParallelsState *s = bs->opaque; |
128 | + EventLoopBase *backend = EVENT_LOOP_BASE(uc); | 29 | + uint32_t i; |
30 | + int64_t off, high_off, size; | ||
129 | + | 31 | + |
130 | + if (bc->can_be_deleted) { | 32 | + size = bdrv_getlength(bs->file->bs); |
131 | + return bc->can_be_deleted(backend); | 33 | + if (size < 0) { |
34 | + res->check_errors++; | ||
35 | + return size; | ||
132 | + } | 36 | + } |
133 | + | 37 | + |
134 | + return true; | 38 | + high_off = 0; |
39 | + for (i = 0; i < s->bat_size; i++) { | ||
40 | + off = bat2sect(s, i) << BDRV_SECTOR_BITS; | ||
41 | + if (off > size) { | ||
42 | + fprintf(stderr, "%s cluster %u is outside image\n", | ||
43 | + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); | ||
44 | + res->corruptions++; | ||
45 | + if (fix & BDRV_FIX_ERRORS) { | ||
46 | + parallels_set_bat_entry(s, i, 0); | ||
47 | + res->corruptions_fixed++; | ||
48 | + } | ||
49 | + continue; | ||
50 | + } | ||
51 | + if (high_off < off) { | ||
52 | + high_off = off; | ||
53 | + } | ||
54 | + } | ||
55 | + | ||
56 | + if (high_off == 0) { | ||
57 | + res->image_end_offset = s->data_end << BDRV_SECTOR_BITS; | ||
58 | + } else { | ||
59 | + res->image_end_offset = high_off + s->cluster_size; | ||
60 | + s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS; | ||
61 | + } | ||
62 | + | ||
63 | + return 0; | ||
135 | +} | 64 | +} |
136 | + | 65 | + |
137 | static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | 66 | static int coroutine_fn GRAPH_RDLOCK |
67 | parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
68 | BdrvCheckMode fix) | ||
138 | { | 69 | { |
139 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | 70 | BDRVParallelsState *s = bs->opaque; |
140 | ucc->complete = event_loop_base_complete; | 71 | - int64_t size, prev_off, high_off; |
141 | + ucc->can_be_deleted = event_loop_base_can_be_deleted; | 72 | - int ret = 0; |
142 | 73 | + int64_t size, prev_off; | |
143 | object_class_property_add(klass, "aio-max-batch", "int", | 74 | + int ret; |
144 | event_loop_base_get_param, | 75 | uint32_t i; |
145 | diff --git a/util/main-loop.c b/util/main-loop.c | 76 | |
146 | index XXXXXXX..XXXXXXX 100644 | 77 | size = bdrv_getlength(bs->file->bs); |
147 | --- a/util/main-loop.c | 78 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
148 | +++ b/util/main-loop.c | 79 | |
149 | @@ -XXX,XX +XXX,XX @@ | 80 | parallels_check_unclean(bs, res, fix); |
150 | #include "qemu/error-report.h" | 81 | |
151 | #include "qemu/queue.h" | 82 | + ret = parallels_check_outside_image(bs, res, fix); |
152 | #include "qemu/compiler.h" | 83 | + if (ret < 0) { |
153 | +#include "qom/object.h" | 84 | + goto out; |
154 | |||
155 | #ifndef _WIN32 | ||
156 | #include <sys/wait.h> | ||
157 | @@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp) | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | +static void main_loop_update_params(EventLoopBase *base, Error **errp) | ||
162 | +{ | ||
163 | + if (!qemu_aio_context) { | ||
164 | + error_setg(errp, "qemu aio context not ready"); | ||
165 | + return; | ||
166 | + } | 85 | + } |
167 | + | 86 | + |
168 | + aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); | 87 | res->bfi.total_clusters = s->bat_size; |
169 | +} | 88 | res->bfi.compressed_clusters = 0; /* compression is not supported */ |
170 | + | 89 | |
171 | +MainLoop *mloop; | 90 | - high_off = 0; |
172 | + | 91 | prev_off = 0; |
173 | +static void main_loop_init(EventLoopBase *base, Error **errp) | 92 | for (i = 0; i < s->bat_size; i++) { |
174 | +{ | 93 | int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; |
175 | + MainLoop *m = MAIN_LOOP(base); | 94 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
176 | + | 95 | continue; |
177 | + if (mloop) { | 96 | } |
178 | + error_setg(errp, "only one main-loop instance allowed"); | 97 | |
179 | + return; | 98 | - /* cluster outside the image */ |
180 | + } | 99 | - if (off > size) { |
181 | + | 100 | - fprintf(stderr, "%s cluster %u is outside image\n", |
182 | + main_loop_update_params(base, errp); | 101 | - fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); |
183 | + | 102 | - res->corruptions++; |
184 | + mloop = m; | 103 | - if (fix & BDRV_FIX_ERRORS) { |
185 | + return; | 104 | - parallels_set_bat_entry(s, i, 0); |
186 | +} | 105 | - res->corruptions_fixed++; |
187 | + | 106 | - } |
188 | +static bool main_loop_can_be_deleted(EventLoopBase *base) | 107 | - prev_off = 0; |
189 | +{ | 108 | - continue; |
190 | + return false; | 109 | - } |
191 | +} | 110 | - |
192 | + | 111 | res->bfi.allocated_clusters++; |
193 | +static void main_loop_class_init(ObjectClass *oc, void *class_data) | 112 | - if (off > high_off) { |
194 | +{ | 113 | - high_off = off; |
195 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); | 114 | - } |
196 | + | 115 | |
197 | + bc->init = main_loop_init; | 116 | if (prev_off != 0 && (prev_off + s->cluster_size) != off) { |
198 | + bc->update_params = main_loop_update_params; | 117 | res->bfi.fragmented_clusters++; |
199 | + bc->can_be_deleted = main_loop_can_be_deleted; | 118 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
200 | +} | 119 | prev_off = off; |
201 | + | 120 | } |
202 | +static const TypeInfo main_loop_info = { | 121 | |
203 | + .name = TYPE_MAIN_LOOP, | 122 | - if (high_off == 0) { |
204 | + .parent = TYPE_EVENT_LOOP_BASE, | 123 | - res->image_end_offset = s->data_end << BDRV_SECTOR_BITS; |
205 | + .class_init = main_loop_class_init, | 124 | - } else { |
206 | + .instance_size = sizeof(MainLoop), | 125 | - res->image_end_offset = high_off + s->cluster_size; |
207 | +}; | 126 | - s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS; |
208 | + | 127 | - } |
209 | +static void main_loop_register_types(void) | 128 | - |
210 | +{ | 129 | if (size > res->image_end_offset) { |
211 | + type_register_static(&main_loop_info); | 130 | int64_t count; |
212 | +} | 131 | count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); |
213 | + | ||
214 | +type_init(main_loop_register_types) | ||
215 | + | ||
216 | static int max_priority; | ||
217 | |||
218 | #ifndef _WIN32 | ||
219 | -- | 132 | -- |
220 | 2.35.1 | 133 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | Exclude out-of-image clusters from allocated and fragmented clusters | ||
4 | calculation. | ||
5 | |||
6 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
7 | Message-Id: <20230424093147.197643-9-alexander.ivanov@virtuozzo.com> | ||
8 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
9 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
10 | --- | ||
11 | block/parallels.c | 6 +++++- | ||
12 | 1 file changed, 5 insertions(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/block/parallels.c b/block/parallels.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/parallels.c | ||
17 | +++ b/block/parallels.c | ||
18 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
19 | prev_off = 0; | ||
20 | for (i = 0; i < s->bat_size; i++) { | ||
21 | int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; | ||
22 | - if (off == 0) { | ||
23 | + /* | ||
24 | + * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not | ||
25 | + * fixed. Skip not allocated and out-of-image BAT entries. | ||
26 | + */ | ||
27 | + if (off == 0 || off + s->cluster_size > res->image_end_offset) { | ||
28 | prev_off = 0; | ||
29 | continue; | ||
30 | } | ||
31 | -- | ||
32 | 2.40.1 | diff view generated by jsdifflib |
1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> | 1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | Introduce the 'event-loop-base' abstract class, it'll hold the | 3 | We will add more and more checks so we need a better code structure |
4 | properties common to all event loops and provide the necessary hooks for | 4 | in parallels_co_check. Let each check performs in a separate loop |
5 | their creation and maintenance. Then have iothread inherit from it. | 5 | in a separate helper. |
6 | 6 | ||
7 | EventLoopBaseClass is defined as user creatable and provides a hook for | 7 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
8 | its children to attach themselves to the user creatable class 'complete' | 8 | Message-Id: <20230424093147.197643-10-alexander.ivanov@virtuozzo.com> |
9 | function. It also provides an update_params() callback to propagate | 9 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> |
10 | property changes onto its children. | 10 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> |
11 | --- | ||
12 | block/parallels.c | 74 ++++++++++++++++++++++++++++------------------- | ||
13 | 1 file changed, 45 insertions(+), 29 deletions(-) | ||
11 | 14 | ||
12 | The new 'event-loop-base' class will live in the root directory. It is | 15 | diff --git a/block/parallels.c b/block/parallels.c |
13 | built on its own using the 'link_whole' option (there are no direct | ||
14 | function dependencies between the class and its children, it all happens | ||
15 | trough 'constructor' magic). And also imposes new compilation | ||
16 | dependencies: | ||
17 | |||
18 | qom <- event-loop-base <- blockdev (iothread.c) | ||
19 | |||
20 | And in subsequent patches: | ||
21 | |||
22 | qom <- event-loop-base <- qemuutil (util/main-loop.c) | ||
23 | |||
24 | All this forced some amount of reordering in meson.build: | ||
25 | |||
26 | - Moved qom build definition before qemuutil. Doing it the other way | ||
27 | around (i.e. moving qemuutil after qom) isn't possible as a lot of | ||
28 | core libraries that live in between the two depend on it. | ||
29 | |||
30 | - Process the 'hw' subdir earlier, as it introduces files into the | ||
31 | 'qom' source set. | ||
32 | |||
33 | No functional changes intended. | ||
34 | |||
35 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
36 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
37 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
38 | Message-id: 20220425075723.20019-2-nsaenzju@redhat.com | ||
39 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
40 | --- | ||
41 | qapi/qom.json | 22 +++++-- | ||
42 | meson.build | 23 ++++--- | ||
43 | include/sysemu/event-loop-base.h | 36 +++++++++++ | ||
44 | include/sysemu/iothread.h | 6 +- | ||
45 | event-loop-base.c | 104 +++++++++++++++++++++++++++++++ | ||
46 | iothread.c | 65 ++++++------------- | ||
47 | 6 files changed, 192 insertions(+), 64 deletions(-) | ||
48 | create mode 100644 include/sysemu/event-loop-base.h | ||
49 | create mode 100644 event-loop-base.c | ||
50 | |||
51 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
52 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
53 | --- a/qapi/qom.json | 17 | --- a/block/parallels.c |
54 | +++ b/qapi/qom.json | 18 | +++ b/block/parallels.c |
55 | @@ -XXX,XX +XXX,XX @@ | 19 | @@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, |
56 | '*repeat': 'bool', | 20 | } |
57 | '*grab-toggle': 'GrabToggleKeys' } } | 21 | |
58 | 22 | static int coroutine_fn GRAPH_RDLOCK | |
59 | +## | 23 | -parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
60 | +# @EventLoopBaseProperties: | 24 | - BdrvCheckMode fix) |
61 | +# | 25 | +parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, |
62 | +# Common properties for event loops | 26 | + BdrvCheckMode fix) |
63 | +# | 27 | { |
64 | +# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | 28 | BDRVParallelsState *s = bs->opaque; |
65 | +# 0 means that the engine will use its default. | 29 | - int64_t size, prev_off; |
66 | +# (default: 0) | 30 | + int64_t size; |
67 | +# | 31 | int ret; |
68 | +# Since: 7.1 | 32 | - uint32_t i; |
69 | +## | 33 | |
70 | +{ 'struct': 'EventLoopBaseProperties', | 34 | size = bdrv_getlength(bs->file->bs); |
71 | + 'data': { '*aio-max-batch': 'int' } } | 35 | if (size < 0) { |
36 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
37 | return size; | ||
38 | } | ||
39 | |||
40 | + if (size > res->image_end_offset) { | ||
41 | + int64_t count; | ||
42 | + count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); | ||
43 | + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", | ||
44 | + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", | ||
45 | + size - res->image_end_offset); | ||
46 | + res->leaks += count; | ||
47 | + if (fix & BDRV_FIX_LEAKS) { | ||
48 | + Error *local_err = NULL; | ||
72 | + | 49 | + |
73 | ## | 50 | + /* |
74 | # @IothreadProperties: | 51 | + * In order to really repair the image, we must shrink it. |
75 | # | 52 | + * That means we have to pass exact=true. |
76 | @@ -XXX,XX +XXX,XX @@ | 53 | + */ |
77 | # algorithm detects it is spending too long polling without | 54 | + ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, |
78 | # encountering events. 0 selects a default behaviour (default: 0) | 55 | + PREALLOC_MODE_OFF, 0, &local_err); |
79 | # | 56 | + if (ret < 0) { |
80 | -# @aio-max-batch: maximum number of requests in a batch for the AIO engine, | 57 | + error_report_err(local_err); |
81 | -# 0 means that the engine will use its default | 58 | + res->check_errors++; |
82 | -# (default:0, since 6.1) | 59 | + return ret; |
83 | +# The @aio-max-batch option is available since 6.1. | 60 | + } |
84 | # | 61 | + res->leaks_fixed += count; |
85 | # Since: 2.0 | 62 | + } |
86 | ## | 63 | + } |
87 | { 'struct': 'IothreadProperties', | ||
88 | + 'base': 'EventLoopBaseProperties', | ||
89 | 'data': { '*poll-max-ns': 'int', | ||
90 | '*poll-grow': 'int', | ||
91 | - '*poll-shrink': 'int', | ||
92 | - '*aio-max-batch': 'int' } } | ||
93 | + '*poll-shrink': 'int' } } | ||
94 | |||
95 | ## | ||
96 | # @MemoryBackendProperties: | ||
97 | diff --git a/meson.build b/meson.build | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/meson.build | ||
100 | +++ b/meson.build | ||
101 | @@ -XXX,XX +XXX,XX @@ subdir('qom') | ||
102 | subdir('authz') | ||
103 | subdir('crypto') | ||
104 | subdir('ui') | ||
105 | +subdir('hw') | ||
106 | |||
107 | |||
108 | if enable_modules | ||
109 | @@ -XXX,XX +XXX,XX @@ if enable_modules | ||
110 | modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') | ||
111 | endif | ||
112 | |||
113 | +qom_ss = qom_ss.apply(config_host, strict: false) | ||
114 | +libqom = static_library('qom', qom_ss.sources() + genh, | ||
115 | + dependencies: [qom_ss.dependencies()], | ||
116 | + name_suffix: 'fa') | ||
117 | +qom = declare_dependency(link_whole: libqom) | ||
118 | + | 64 | + |
119 | +event_loop_base = files('event-loop-base.c') | 65 | + return 0; |
120 | +event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, | ||
121 | + build_by_default: true) | ||
122 | +event_loop_base = declare_dependency(link_whole: event_loop_base, | ||
123 | + dependencies: [qom]) | ||
124 | + | ||
125 | stub_ss = stub_ss.apply(config_all, strict: false) | ||
126 | |||
127 | util_ss.add_all(trace_ss) | ||
128 | @@ -XXX,XX +XXX,XX @@ subdir('monitor') | ||
129 | subdir('net') | ||
130 | subdir('replay') | ||
131 | subdir('semihosting') | ||
132 | -subdir('hw') | ||
133 | subdir('tcg') | ||
134 | subdir('fpu') | ||
135 | subdir('accel') | ||
136 | @@ -XXX,XX +XXX,XX @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', | ||
137 | capture: true, | ||
138 | command: [undefsym, nm, '@INPUT@']) | ||
139 | |||
140 | -qom_ss = qom_ss.apply(config_host, strict: false) | ||
141 | -libqom = static_library('qom', qom_ss.sources() + genh, | ||
142 | - dependencies: [qom_ss.dependencies()], | ||
143 | - name_suffix: 'fa') | ||
144 | - | ||
145 | -qom = declare_dependency(link_whole: libqom) | ||
146 | - | ||
147 | authz_ss = authz_ss.apply(config_host, strict: false) | ||
148 | libauthz = static_library('authz', authz_ss.sources() + genh, | ||
149 | dependencies: [authz_ss.dependencies()], | ||
150 | @@ -XXX,XX +XXX,XX @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, | ||
151 | build_by_default: false) | ||
152 | |||
153 | blockdev = declare_dependency(link_whole: [libblockdev], | ||
154 | - dependencies: [block]) | ||
155 | + dependencies: [block, event_loop_base]) | ||
156 | |||
157 | qmp_ss = qmp_ss.apply(config_host, strict: false) | ||
158 | libqmp = static_library('qmp', qmp_ss.sources() + genh, | ||
159 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
160 | new file mode 100644 | ||
161 | index XXXXXXX..XXXXXXX | ||
162 | --- /dev/null | ||
163 | +++ b/include/sysemu/event-loop-base.h | ||
164 | @@ -XXX,XX +XXX,XX @@ | ||
165 | +/* | ||
166 | + * QEMU event-loop backend | ||
167 | + * | ||
168 | + * Copyright (C) 2022 Red Hat Inc | ||
169 | + * | ||
170 | + * Authors: | ||
171 | + * Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
172 | + * | ||
173 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
174 | + * See the COPYING file in the top-level directory. | ||
175 | + */ | ||
176 | +#ifndef QEMU_EVENT_LOOP_BASE_H | ||
177 | +#define QEMU_EVENT_LOOP_BASE_H | ||
178 | + | ||
179 | +#include "qom/object.h" | ||
180 | +#include "block/aio.h" | ||
181 | +#include "qemu/typedefs.h" | ||
182 | + | ||
183 | +#define TYPE_EVENT_LOOP_BASE "event-loop-base" | ||
184 | +OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, | ||
185 | + EVENT_LOOP_BASE) | ||
186 | + | ||
187 | +struct EventLoopBaseClass { | ||
188 | + ObjectClass parent_class; | ||
189 | + | ||
190 | + void (*init)(EventLoopBase *base, Error **errp); | ||
191 | + void (*update_params)(EventLoopBase *base, Error **errp); | ||
192 | +}; | ||
193 | + | ||
194 | +struct EventLoopBase { | ||
195 | + Object parent; | ||
196 | + | ||
197 | + /* AioContext AIO engine parameters */ | ||
198 | + int64_t aio_max_batch; | ||
199 | +}; | ||
200 | +#endif | ||
201 | diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h | ||
202 | index XXXXXXX..XXXXXXX 100644 | ||
203 | --- a/include/sysemu/iothread.h | ||
204 | +++ b/include/sysemu/iothread.h | ||
205 | @@ -XXX,XX +XXX,XX @@ | ||
206 | #include "block/aio.h" | ||
207 | #include "qemu/thread.h" | ||
208 | #include "qom/object.h" | ||
209 | +#include "sysemu/event-loop-base.h" | ||
210 | |||
211 | #define TYPE_IOTHREAD "iothread" | ||
212 | |||
213 | struct IOThread { | ||
214 | - Object parent_obj; | ||
215 | + EventLoopBase parent_obj; | ||
216 | |||
217 | QemuThread thread; | ||
218 | AioContext *ctx; | ||
219 | @@ -XXX,XX +XXX,XX @@ struct IOThread { | ||
220 | int64_t poll_max_ns; | ||
221 | int64_t poll_grow; | ||
222 | int64_t poll_shrink; | ||
223 | - | ||
224 | - /* AioContext AIO engine parameters */ | ||
225 | - int64_t aio_max_batch; | ||
226 | }; | ||
227 | typedef struct IOThread IOThread; | ||
228 | |||
229 | diff --git a/event-loop-base.c b/event-loop-base.c | ||
230 | new file mode 100644 | ||
231 | index XXXXXXX..XXXXXXX | ||
232 | --- /dev/null | ||
233 | +++ b/event-loop-base.c | ||
234 | @@ -XXX,XX +XXX,XX @@ | ||
235 | +/* | ||
236 | + * QEMU event-loop base | ||
237 | + * | ||
238 | + * Copyright (C) 2022 Red Hat Inc | ||
239 | + * | ||
240 | + * Authors: | ||
241 | + * Stefan Hajnoczi <stefanha@redhat.com> | ||
242 | + * Nicolas Saenz Julienne <nsaenzju@redhat.com> | ||
243 | + * | ||
244 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
245 | + * See the COPYING file in the top-level directory. | ||
246 | + */ | ||
247 | + | ||
248 | +#include "qemu/osdep.h" | ||
249 | +#include "qom/object_interfaces.h" | ||
250 | +#include "qapi/error.h" | ||
251 | +#include "sysemu/event-loop-base.h" | ||
252 | + | ||
253 | +typedef struct { | ||
254 | + const char *name; | ||
255 | + ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ | ||
256 | +} EventLoopBaseParamInfo; | ||
257 | + | ||
258 | +static EventLoopBaseParamInfo aio_max_batch_info = { | ||
259 | + "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), | ||
260 | +}; | ||
261 | + | ||
262 | +static void event_loop_base_get_param(Object *obj, Visitor *v, | ||
263 | + const char *name, void *opaque, Error **errp) | ||
264 | +{ | ||
265 | + EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); | ||
266 | + EventLoopBaseParamInfo *info = opaque; | ||
267 | + int64_t *field = (void *)event_loop_base + info->offset; | ||
268 | + | ||
269 | + visit_type_int64(v, name, field, errp); | ||
270 | +} | 66 | +} |
271 | + | 67 | + |
272 | +static void event_loop_base_set_param(Object *obj, Visitor *v, | 68 | +static int coroutine_fn GRAPH_RDLOCK |
273 | + const char *name, void *opaque, Error **errp) | 69 | +parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
70 | + BdrvCheckMode fix) | ||
274 | +{ | 71 | +{ |
275 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); | 72 | + BDRVParallelsState *s = bs->opaque; |
276 | + EventLoopBase *base = EVENT_LOOP_BASE(obj); | 73 | + int64_t prev_off; |
277 | + EventLoopBaseParamInfo *info = opaque; | 74 | + int ret; |
278 | + int64_t *field = (void *)base + info->offset; | 75 | + uint32_t i; |
279 | + int64_t value; | ||
280 | + | 76 | + |
281 | + if (!visit_type_int64(v, name, &value, errp)) { | 77 | qemu_co_mutex_lock(&s->lock); |
282 | + return; | 78 | |
79 | parallels_check_unclean(bs, res, fix); | ||
80 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
81 | goto out; | ||
82 | } | ||
83 | |||
84 | + ret = parallels_check_leak(bs, res, fix); | ||
85 | + if (ret < 0) { | ||
86 | + goto out; | ||
283 | + } | 87 | + } |
284 | + | 88 | + |
285 | + if (value < 0) { | 89 | res->bfi.total_clusters = s->bat_size; |
286 | + error_setg(errp, "%s value must be in range [0, %" PRId64 "]", | 90 | res->bfi.compressed_clusters = 0; /* compression is not supported */ |
287 | + info->name, INT64_MAX); | 91 | |
288 | + return; | 92 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
289 | + } | 93 | prev_off = off; |
290 | + | ||
291 | + *field = value; | ||
292 | + | ||
293 | + if (bc->update_params) { | ||
294 | + bc->update_params(base, errp); | ||
295 | + } | ||
296 | + | ||
297 | + return; | ||
298 | +} | ||
299 | + | ||
300 | +static void event_loop_base_complete(UserCreatable *uc, Error **errp) | ||
301 | +{ | ||
302 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); | ||
303 | + EventLoopBase *base = EVENT_LOOP_BASE(uc); | ||
304 | + | ||
305 | + if (bc->init) { | ||
306 | + bc->init(base, errp); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | ||
311 | +{ | ||
312 | + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | ||
313 | + ucc->complete = event_loop_base_complete; | ||
314 | + | ||
315 | + object_class_property_add(klass, "aio-max-batch", "int", | ||
316 | + event_loop_base_get_param, | ||
317 | + event_loop_base_set_param, | ||
318 | + NULL, &aio_max_batch_info); | ||
319 | +} | ||
320 | + | ||
321 | +static const TypeInfo event_loop_base_info = { | ||
322 | + .name = TYPE_EVENT_LOOP_BASE, | ||
323 | + .parent = TYPE_OBJECT, | ||
324 | + .instance_size = sizeof(EventLoopBase), | ||
325 | + .class_size = sizeof(EventLoopBaseClass), | ||
326 | + .class_init = event_loop_base_class_init, | ||
327 | + .abstract = true, | ||
328 | + .interfaces = (InterfaceInfo[]) { | ||
329 | + { TYPE_USER_CREATABLE }, | ||
330 | + { } | ||
331 | + } | ||
332 | +}; | ||
333 | + | ||
334 | +static void register_types(void) | ||
335 | +{ | ||
336 | + type_register_static(&event_loop_base_info); | ||
337 | +} | ||
338 | +type_init(register_types); | ||
339 | diff --git a/iothread.c b/iothread.c | ||
340 | index XXXXXXX..XXXXXXX 100644 | ||
341 | --- a/iothread.c | ||
342 | +++ b/iothread.c | ||
343 | @@ -XXX,XX +XXX,XX @@ | ||
344 | #include "qemu/module.h" | ||
345 | #include "block/aio.h" | ||
346 | #include "block/block.h" | ||
347 | +#include "sysemu/event-loop-base.h" | ||
348 | #include "sysemu/iothread.h" | ||
349 | #include "qapi/error.h" | ||
350 | #include "qapi/qapi-commands-misc.h" | ||
351 | @@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread) | ||
352 | iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); | ||
353 | } | ||
354 | |||
355 | -static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) | ||
356 | +static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) | ||
357 | { | ||
358 | + IOThread *iothread = IOTHREAD(base); | ||
359 | ERRP_GUARD(); | ||
360 | |||
361 | + if (!iothread->ctx) { | ||
362 | + return; | ||
363 | + } | ||
364 | + | ||
365 | aio_context_set_poll_params(iothread->ctx, | ||
366 | iothread->poll_max_ns, | ||
367 | iothread->poll_grow, | ||
368 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) | ||
369 | } | 94 | } |
370 | 95 | ||
371 | aio_context_set_aio_params(iothread->ctx, | 96 | - if (size > res->image_end_offset) { |
372 | - iothread->aio_max_batch, | 97 | - int64_t count; |
373 | + iothread->parent_obj.aio_max_batch, | 98 | - count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); |
374 | errp); | 99 | - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", |
375 | } | 100 | - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", |
376 | 101 | - size - res->image_end_offset); | |
377 | -static void iothread_complete(UserCreatable *obj, Error **errp) | 102 | - res->leaks += count; |
378 | + | 103 | - if (fix & BDRV_FIX_LEAKS) { |
379 | +static void iothread_init(EventLoopBase *base, Error **errp) | 104 | - Error *local_err = NULL; |
380 | { | ||
381 | Error *local_error = NULL; | ||
382 | - IOThread *iothread = IOTHREAD(obj); | ||
383 | + IOThread *iothread = IOTHREAD(base); | ||
384 | char *thread_name; | ||
385 | |||
386 | iothread->stopping = false; | ||
387 | @@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp) | ||
388 | */ | ||
389 | iothread_init_gcontext(iothread); | ||
390 | |||
391 | - iothread_set_aio_context_params(iothread, &local_error); | ||
392 | + iothread_set_aio_context_params(base, &local_error); | ||
393 | if (local_error) { | ||
394 | error_propagate(errp, local_error); | ||
395 | aio_context_unref(iothread->ctx); | ||
396 | @@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp) | ||
397 | * to inherit. | ||
398 | */ | ||
399 | thread_name = g_strdup_printf("IO %s", | ||
400 | - object_get_canonical_path_component(OBJECT(obj))); | ||
401 | + object_get_canonical_path_component(OBJECT(base))); | ||
402 | qemu_thread_create(&iothread->thread, thread_name, iothread_run, | ||
403 | iothread, QEMU_THREAD_JOINABLE); | ||
404 | g_free(thread_name); | ||
405 | @@ -XXX,XX +XXX,XX @@ static IOThreadParamInfo poll_grow_info = { | ||
406 | static IOThreadParamInfo poll_shrink_info = { | ||
407 | "poll-shrink", offsetof(IOThread, poll_shrink), | ||
408 | }; | ||
409 | -static IOThreadParamInfo aio_max_batch_info = { | ||
410 | - "aio-max-batch", offsetof(IOThread, aio_max_batch), | ||
411 | -}; | ||
412 | |||
413 | static void iothread_get_param(Object *obj, Visitor *v, | ||
414 | const char *name, IOThreadParamInfo *info, Error **errp) | ||
415 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v, | ||
416 | } | ||
417 | } | ||
418 | |||
419 | -static void iothread_get_aio_param(Object *obj, Visitor *v, | ||
420 | - const char *name, void *opaque, Error **errp) | ||
421 | -{ | ||
422 | - IOThreadParamInfo *info = opaque; | ||
423 | - | 105 | - |
424 | - iothread_get_param(obj, v, name, info, errp); | 106 | - /* |
425 | -} | 107 | - * In order to really repair the image, we must shrink it. |
426 | - | 108 | - * That means we have to pass exact=true. |
427 | -static void iothread_set_aio_param(Object *obj, Visitor *v, | 109 | - */ |
428 | - const char *name, void *opaque, Error **errp) | 110 | - ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, |
429 | -{ | 111 | - PREALLOC_MODE_OFF, 0, &local_err); |
430 | - IOThread *iothread = IOTHREAD(obj); | 112 | - if (ret < 0) { |
431 | - IOThreadParamInfo *info = opaque; | 113 | - error_report_err(local_err); |
432 | - | 114 | - res->check_errors++; |
433 | - if (!iothread_set_param(obj, v, name, info, errp)) { | 115 | - goto out; |
434 | - return; | 116 | - } |
117 | - res->leaks_fixed += count; | ||
118 | - } | ||
435 | - } | 119 | - } |
436 | - | 120 | - |
437 | - if (iothread->ctx) { | 121 | out: |
438 | - aio_context_set_aio_params(iothread->ctx, | 122 | qemu_co_mutex_unlock(&s->lock); |
439 | - iothread->aio_max_batch, | 123 | |
440 | - errp); | ||
441 | - } | ||
442 | -} | ||
443 | - | ||
444 | static void iothread_class_init(ObjectClass *klass, void *class_data) | ||
445 | { | ||
446 | - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | ||
447 | - ucc->complete = iothread_complete; | ||
448 | + EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); | ||
449 | + | ||
450 | + bc->init = iothread_init; | ||
451 | + bc->update_params = iothread_set_aio_context_params; | ||
452 | |||
453 | object_class_property_add(klass, "poll-max-ns", "int", | ||
454 | iothread_get_poll_param, | ||
455 | @@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data) | ||
456 | iothread_get_poll_param, | ||
457 | iothread_set_poll_param, | ||
458 | NULL, &poll_shrink_info); | ||
459 | - object_class_property_add(klass, "aio-max-batch", "int", | ||
460 | - iothread_get_aio_param, | ||
461 | - iothread_set_aio_param, | ||
462 | - NULL, &aio_max_batch_info); | ||
463 | } | ||
464 | |||
465 | static const TypeInfo iothread_info = { | ||
466 | .name = TYPE_IOTHREAD, | ||
467 | - .parent = TYPE_OBJECT, | ||
468 | + .parent = TYPE_EVENT_LOOP_BASE, | ||
469 | .class_init = iothread_class_init, | ||
470 | .instance_size = sizeof(IOThread), | ||
471 | .instance_init = iothread_instance_init, | ||
472 | .instance_finalize = iothread_instance_finalize, | ||
473 | - .interfaces = (InterfaceInfo[]) { | ||
474 | - {TYPE_USER_CREATABLE}, | ||
475 | - {} | ||
476 | - }, | ||
477 | }; | ||
478 | |||
479 | static void iothread_register_types(void) | ||
480 | @@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque) | ||
481 | info->poll_max_ns = iothread->poll_max_ns; | ||
482 | info->poll_grow = iothread->poll_grow; | ||
483 | info->poll_shrink = iothread->poll_shrink; | ||
484 | - info->aio_max_batch = iothread->aio_max_batch; | ||
485 | + info->aio_max_batch = iothread->parent_obj.aio_max_batch; | ||
486 | |||
487 | QAPI_LIST_APPEND(*tail, info); | ||
488 | return 0; | ||
489 | -- | 124 | -- |
490 | 2.35.1 | 125 | 2.40.1 | diff view generated by jsdifflib |
1 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com> | 1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | The thread pool regulates itself: when idle, it kills threads until | 3 | We will add more and more checks so we need a better code structure |
4 | empty, when in demand, it creates new threads until full. This behaviour | 4 | in parallels_co_check. Let each check performs in a separate loop |
5 | doesn't play well with latency sensitive workloads where the price of | 5 | in a separate helper. |
6 | creating a new thread is too high. For example, when paired with qemu's | ||
7 | '-mlock', or using safety features like SafeStack, creating a new thread | ||
8 | has been measured take multiple milliseconds. | ||
9 | 6 | ||
10 | In order to mitigate this let's introduce a new 'EventLoopBase' | 7 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> |
11 | property to set the thread pool size. The threads will be created during | 8 | Reviewed-by: Denis V. Lunev <den@openvz.org> |
12 | the pool's initialization or upon updating the property's value, remain | 9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
13 | available during its lifetime regardless of demand, and destroyed upon | 10 | Message-Id: <20230424093147.197643-11-alexander.ivanov@virtuozzo.com> |
14 | freeing it. A properly characterized workload will then be able to | 11 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> |
15 | configure the pool to avoid any latency spikes. | 12 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> |
13 | --- | ||
14 | block/parallels.c | 52 +++++++++++++++++++++++++++-------------------- | ||
15 | 1 file changed, 30 insertions(+), 22 deletions(-) | ||
16 | 16 | ||
17 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> | 17 | diff --git a/block/parallels.c b/block/parallels.c |
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
20 | Message-id: 20220425075723.20019-4-nsaenzju@redhat.com | ||
21 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
22 | --- | ||
23 | qapi/qom.json | 10 +++++- | ||
24 | include/block/aio.h | 10 ++++++ | ||
25 | include/block/thread-pool.h | 3 ++ | ||
26 | include/sysemu/event-loop-base.h | 4 +++ | ||
27 | event-loop-base.c | 23 +++++++++++++ | ||
28 | iothread.c | 3 ++ | ||
29 | util/aio-posix.c | 1 + | ||
30 | util/async.c | 20 ++++++++++++ | ||
31 | util/main-loop.c | 9 ++++++ | ||
32 | util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- | ||
33 | 10 files changed, 133 insertions(+), 5 deletions(-) | ||
34 | |||
35 | diff --git a/qapi/qom.json b/qapi/qom.json | ||
36 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/qapi/qom.json | 19 | --- a/block/parallels.c |
38 | +++ b/qapi/qom.json | 20 | +++ b/block/parallels.c |
39 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, |
40 | # 0 means that the engine will use its default. | 22 | return 0; |
41 | # (default: 0) | 23 | } |
42 | # | 24 | |
43 | +# @thread-pool-min: minimum number of threads reserved in the thread pool | 25 | -static int coroutine_fn GRAPH_RDLOCK |
44 | +# (default:0) | 26 | -parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
45 | +# | 27 | - BdrvCheckMode fix) |
46 | +# @thread-pool-max: maximum number of threads the thread pool can contain | 28 | +static void parallels_collect_statistics(BlockDriverState *bs, |
47 | +# (default:64) | 29 | + BdrvCheckResult *res, |
48 | +# | 30 | + BdrvCheckMode fix) |
49 | # Since: 7.1 | 31 | { |
50 | ## | 32 | BDRVParallelsState *s = bs->opaque; |
51 | { 'struct': 'EventLoopBaseProperties', | 33 | - int64_t prev_off; |
52 | - 'data': { '*aio-max-batch': 'int' } } | 34 | - int ret; |
53 | + 'data': { '*aio-max-batch': 'int', | 35 | + int64_t off, prev_off; |
54 | + '*thread-pool-min': 'int', | 36 | uint32_t i; |
55 | + '*thread-pool-max': 'int' } } | 37 | |
56 | 38 | - qemu_co_mutex_lock(&s->lock); | |
57 | ## | 39 | - |
58 | # @IothreadProperties: | 40 | - parallels_check_unclean(bs, res, fix); |
59 | diff --git a/include/block/aio.h b/include/block/aio.h | 41 | - |
60 | index XXXXXXX..XXXXXXX 100644 | 42 | - ret = parallels_check_outside_image(bs, res, fix); |
61 | --- a/include/block/aio.h | 43 | - if (ret < 0) { |
62 | +++ b/include/block/aio.h | 44 | - goto out; |
63 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | 45 | - } |
64 | QSLIST_HEAD(, Coroutine) scheduled_coroutines; | 46 | - |
65 | QEMUBH *co_schedule_bh; | 47 | - ret = parallels_check_leak(bs, res, fix); |
66 | 48 | - if (ret < 0) { | |
67 | + int thread_pool_min; | 49 | - goto out; |
68 | + int thread_pool_max; | 50 | - } |
69 | /* Thread pool for performing work and receiving completion callbacks. | 51 | - |
70 | * Has its own locking. | 52 | res->bfi.total_clusters = s->bat_size; |
71 | */ | 53 | res->bfi.compressed_clusters = 0; /* compression is not supported */ |
72 | @@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, | 54 | |
73 | void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, | 55 | prev_off = 0; |
74 | Error **errp); | 56 | for (i = 0; i < s->bat_size; i++) { |
75 | 57 | - int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; | |
76 | +/** | 58 | + off = bat2sect(s, i) << BDRV_SECTOR_BITS; |
77 | + * aio_context_set_thread_pool_params: | 59 | /* |
78 | + * @ctx: the aio context | 60 | * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not |
79 | + * @min: min number of threads to have readily available in the thread pool | 61 | * fixed. Skip not allocated and out-of-image BAT entries. |
80 | + * @min: max number of threads the thread pool can contain | 62 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
81 | + */ | 63 | continue; |
82 | +void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, | 64 | } |
83 | + int64_t max, Error **errp); | 65 | |
84 | #endif | 66 | - res->bfi.allocated_clusters++; |
85 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h | 67 | - |
86 | index XXXXXXX..XXXXXXX 100644 | 68 | if (prev_off != 0 && (prev_off + s->cluster_size) != off) { |
87 | --- a/include/block/thread-pool.h | 69 | res->bfi.fragmented_clusters++; |
88 | +++ b/include/block/thread-pool.h | 70 | } |
89 | @@ -XXX,XX +XXX,XX @@ | 71 | prev_off = off; |
90 | 72 | + res->bfi.allocated_clusters++; | |
91 | #include "block/block.h" | 73 | } |
92 | |||
93 | +#define THREAD_POOL_MAX_THREADS_DEFAULT 64 | ||
94 | + | ||
95 | typedef int ThreadPoolFunc(void *opaque); | ||
96 | |||
97 | typedef struct ThreadPool ThreadPool; | ||
98 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, | ||
99 | int coroutine_fn thread_pool_submit_co(ThreadPool *pool, | ||
100 | ThreadPoolFunc *func, void *arg); | ||
101 | void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); | ||
102 | +void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); | ||
103 | |||
104 | #endif | ||
105 | diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/include/sysemu/event-loop-base.h | ||
108 | +++ b/include/sysemu/event-loop-base.h | ||
109 | @@ -XXX,XX +XXX,XX @@ struct EventLoopBase { | ||
110 | |||
111 | /* AioContext AIO engine parameters */ | ||
112 | int64_t aio_max_batch; | ||
113 | + | ||
114 | + /* AioContext thread pool parameters */ | ||
115 | + int64_t thread_pool_min; | ||
116 | + int64_t thread_pool_max; | ||
117 | }; | ||
118 | #endif | ||
119 | diff --git a/event-loop-base.c b/event-loop-base.c | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/event-loop-base.c | ||
122 | +++ b/event-loop-base.c | ||
123 | @@ -XXX,XX +XXX,XX @@ | ||
124 | #include "qemu/osdep.h" | ||
125 | #include "qom/object_interfaces.h" | ||
126 | #include "qapi/error.h" | ||
127 | +#include "block/thread-pool.h" | ||
128 | #include "sysemu/event-loop-base.h" | ||
129 | |||
130 | typedef struct { | ||
131 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
132 | ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ | ||
133 | } EventLoopBaseParamInfo; | ||
134 | |||
135 | +static void event_loop_base_instance_init(Object *obj) | ||
136 | +{ | ||
137 | + EventLoopBase *base = EVENT_LOOP_BASE(obj); | ||
138 | + | ||
139 | + base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; | ||
140 | +} | 74 | +} |
141 | + | 75 | + |
142 | static EventLoopBaseParamInfo aio_max_batch_info = { | 76 | +static int coroutine_fn GRAPH_RDLOCK |
143 | "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), | 77 | +parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, |
144 | }; | 78 | + BdrvCheckMode fix) |
145 | +static EventLoopBaseParamInfo thread_pool_min_info = { | 79 | +{ |
146 | + "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), | 80 | + BDRVParallelsState *s = bs->opaque; |
147 | +}; | 81 | + int ret; |
148 | +static EventLoopBaseParamInfo thread_pool_max_info = { | ||
149 | + "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), | ||
150 | +}; | ||
151 | |||
152 | static void event_loop_base_get_param(Object *obj, Visitor *v, | ||
153 | const char *name, void *opaque, Error **errp) | ||
154 | @@ -XXX,XX +XXX,XX @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) | ||
155 | event_loop_base_get_param, | ||
156 | event_loop_base_set_param, | ||
157 | NULL, &aio_max_batch_info); | ||
158 | + object_class_property_add(klass, "thread-pool-min", "int", | ||
159 | + event_loop_base_get_param, | ||
160 | + event_loop_base_set_param, | ||
161 | + NULL, &thread_pool_min_info); | ||
162 | + object_class_property_add(klass, "thread-pool-max", "int", | ||
163 | + event_loop_base_get_param, | ||
164 | + event_loop_base_set_param, | ||
165 | + NULL, &thread_pool_max_info); | ||
166 | } | ||
167 | |||
168 | static const TypeInfo event_loop_base_info = { | ||
169 | .name = TYPE_EVENT_LOOP_BASE, | ||
170 | .parent = TYPE_OBJECT, | ||
171 | .instance_size = sizeof(EventLoopBase), | ||
172 | + .instance_init = event_loop_base_instance_init, | ||
173 | .class_size = sizeof(EventLoopBaseClass), | ||
174 | .class_init = event_loop_base_class_init, | ||
175 | .abstract = true, | ||
176 | diff --git a/iothread.c b/iothread.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/iothread.c | ||
179 | +++ b/iothread.c | ||
180 | @@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) | ||
181 | aio_context_set_aio_params(iothread->ctx, | ||
182 | iothread->parent_obj.aio_max_batch, | ||
183 | errp); | ||
184 | + | 82 | + |
185 | + aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, | 83 | + qemu_co_mutex_lock(&s->lock); |
186 | + base->thread_pool_max, errp); | ||
187 | } | ||
188 | |||
189 | |||
190 | diff --git a/util/aio-posix.c b/util/aio-posix.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/util/aio-posix.c | ||
193 | +++ b/util/aio-posix.c | ||
194 | @@ -XXX,XX +XXX,XX @@ | ||
195 | |||
196 | #include "qemu/osdep.h" | ||
197 | #include "block/block.h" | ||
198 | +#include "block/thread-pool.h" | ||
199 | #include "qemu/main-loop.h" | ||
200 | #include "qemu/rcu.h" | ||
201 | #include "qemu/rcu_queue.h" | ||
202 | diff --git a/util/async.c b/util/async.c | ||
203 | index XXXXXXX..XXXXXXX 100644 | ||
204 | --- a/util/async.c | ||
205 | +++ b/util/async.c | ||
206 | @@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp) | ||
207 | |||
208 | ctx->aio_max_batch = 0; | ||
209 | |||
210 | + ctx->thread_pool_min = 0; | ||
211 | + ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; | ||
212 | + | 84 | + |
213 | return ctx; | 85 | + parallels_check_unclean(bs, res, fix); |
214 | fail: | ||
215 | g_source_destroy(&ctx->source); | ||
216 | @@ -XXX,XX +XXX,XX @@ void qemu_set_current_aio_context(AioContext *ctx) | ||
217 | assert(!get_my_aiocontext()); | ||
218 | set_my_aiocontext(ctx); | ||
219 | } | ||
220 | + | 86 | + |
221 | +void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, | 87 | + ret = parallels_check_outside_image(bs, res, fix); |
222 | + int64_t max, Error **errp) | 88 | + if (ret < 0) { |
223 | +{ | 89 | + goto out; |
224 | + | ||
225 | + if (min > max || !max || min > INT_MAX || max > INT_MAX) { | ||
226 | + error_setg(errp, "bad thread-pool-min/thread-pool-max values"); | ||
227 | + return; | ||
228 | + } | 90 | + } |
229 | + | 91 | + |
230 | + ctx->thread_pool_min = min; | 92 | + ret = parallels_check_leak(bs, res, fix); |
231 | + ctx->thread_pool_max = max; | 93 | + if (ret < 0) { |
232 | + | 94 | + goto out; |
233 | + if (ctx->thread_pool) { | ||
234 | + thread_pool_update_params(ctx->thread_pool, ctx); | ||
235 | + } | ||
236 | +} | ||
237 | diff --git a/util/main-loop.c b/util/main-loop.c | ||
238 | index XXXXXXX..XXXXXXX 100644 | ||
239 | --- a/util/main-loop.c | ||
240 | +++ b/util/main-loop.c | ||
241 | @@ -XXX,XX +XXX,XX @@ | ||
242 | #include "sysemu/replay.h" | ||
243 | #include "qemu/main-loop.h" | ||
244 | #include "block/aio.h" | ||
245 | +#include "block/thread-pool.h" | ||
246 | #include "qemu/error-report.h" | ||
247 | #include "qemu/queue.h" | ||
248 | #include "qemu/compiler.h" | ||
249 | @@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp) | ||
250 | |||
251 | static void main_loop_update_params(EventLoopBase *base, Error **errp) | ||
252 | { | ||
253 | + ERRP_GUARD(); | ||
254 | + | ||
255 | if (!qemu_aio_context) { | ||
256 | error_setg(errp, "qemu aio context not ready"); | ||
257 | return; | ||
258 | } | ||
259 | |||
260 | aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); | ||
261 | + if (*errp) { | ||
262 | + return; | ||
263 | + } | 95 | + } |
264 | + | 96 | + |
265 | + aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, | 97 | + parallels_collect_statistics(bs, res, fix); |
266 | + base->thread_pool_max, errp); | 98 | |
267 | } | 99 | out: |
268 | 100 | qemu_co_mutex_unlock(&s->lock); | |
269 | MainLoop *mloop; | ||
270 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
271 | index XXXXXXX..XXXXXXX 100644 | ||
272 | --- a/util/thread-pool.c | ||
273 | +++ b/util/thread-pool.c | ||
274 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
275 | QemuMutex lock; | ||
276 | QemuCond worker_stopped; | ||
277 | QemuSemaphore sem; | ||
278 | - int max_threads; | ||
279 | QEMUBH *new_thread_bh; | ||
280 | |||
281 | /* The following variables are only accessed from one AioContext. */ | ||
282 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
283 | int new_threads; /* backlog of threads we need to create */ | ||
284 | int pending_threads; /* threads created but not running yet */ | ||
285 | bool stopping; | ||
286 | + int min_threads; | ||
287 | + int max_threads; | ||
288 | }; | ||
289 | |||
290 | +static inline bool back_to_sleep(ThreadPool *pool, int ret) | ||
291 | +{ | ||
292 | + /* | ||
293 | + * The semaphore timed out, we should exit the loop except when: | ||
294 | + * - There is work to do, we raced with the signal. | ||
295 | + * - The max threads threshold just changed, we raced with the signal. | ||
296 | + * - The thread pool forces a minimum number of readily available threads. | ||
297 | + */ | ||
298 | + if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || | ||
299 | + pool->cur_threads > pool->max_threads || | ||
300 | + pool->cur_threads <= pool->min_threads)) { | ||
301 | + return true; | ||
302 | + } | ||
303 | + | ||
304 | + return false; | ||
305 | +} | ||
306 | + | ||
307 | static void *worker_thread(void *opaque) | ||
308 | { | ||
309 | ThreadPool *pool = opaque; | ||
310 | @@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque) | ||
311 | ret = qemu_sem_timedwait(&pool->sem, 10000); | ||
312 | qemu_mutex_lock(&pool->lock); | ||
313 | pool->idle_threads--; | ||
314 | - } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); | ||
315 | - if (ret == -1 || pool->stopping) { | ||
316 | + } while (back_to_sleep(pool, ret)); | ||
317 | + if (ret == -1 || pool->stopping || | ||
318 | + pool->cur_threads > pool->max_threads) { | ||
319 | break; | ||
320 | } | ||
321 | |||
322 | @@ -XXX,XX +XXX,XX @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) | ||
323 | thread_pool_submit_aio(pool, func, arg, NULL, NULL); | ||
324 | } | ||
325 | |||
326 | +void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
327 | +{ | ||
328 | + qemu_mutex_lock(&pool->lock); | ||
329 | + | ||
330 | + pool->min_threads = ctx->thread_pool_min; | ||
331 | + pool->max_threads = ctx->thread_pool_max; | ||
332 | + | ||
333 | + /* | ||
334 | + * We either have to: | ||
335 | + * - Increase the number available of threads until over the min_threads | ||
336 | + * threshold. | ||
337 | + * - Decrease the number of available threads until under the max_threads | ||
338 | + * threshold. | ||
339 | + * - Do nothing. The current number of threads fall in between the min and | ||
340 | + * max thresholds. We'll let the pool manage itself. | ||
341 | + */ | ||
342 | + for (int i = pool->cur_threads; i < pool->min_threads; i++) { | ||
343 | + spawn_thread(pool); | ||
344 | + } | ||
345 | + | ||
346 | + for (int i = pool->cur_threads; i > pool->max_threads; i--) { | ||
347 | + qemu_sem_post(&pool->sem); | ||
348 | + } | ||
349 | + | ||
350 | + qemu_mutex_unlock(&pool->lock); | ||
351 | +} | ||
352 | + | ||
353 | static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
354 | { | ||
355 | if (!ctx) { | ||
356 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
357 | qemu_mutex_init(&pool->lock); | ||
358 | qemu_cond_init(&pool->worker_stopped); | ||
359 | qemu_sem_init(&pool->sem, 0); | ||
360 | - pool->max_threads = 64; | ||
361 | pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); | ||
362 | |||
363 | QLIST_INIT(&pool->head); | ||
364 | QTAILQ_INIT(&pool->request_list); | ||
365 | + | ||
366 | + thread_pool_update_params(pool, ctx); | ||
367 | } | ||
368 | |||
369 | ThreadPool *thread_pool_new(AioContext *ctx) | ||
370 | -- | 101 | -- |
371 | 2.35.1 | 102 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | Replace the way we use mutex in parallels_co_check() for simplier | ||
4 | and less error prone code. | ||
5 | |||
6 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
7 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
8 | Message-Id: <20230424093147.197643-12-alexander.ivanov@virtuozzo.com> | ||
9 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
10 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | --- | ||
12 | block/parallels.c | 33 ++++++++++++++------------------- | ||
13 | 1 file changed, 14 insertions(+), 19 deletions(-) | ||
14 | |||
15 | diff --git a/block/parallels.c b/block/parallels.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/parallels.c | ||
18 | +++ b/block/parallels.c | ||
19 | @@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, | ||
20 | BDRVParallelsState *s = bs->opaque; | ||
21 | int ret; | ||
22 | |||
23 | - qemu_co_mutex_lock(&s->lock); | ||
24 | + WITH_QEMU_LOCK_GUARD(&s->lock) { | ||
25 | + parallels_check_unclean(bs, res, fix); | ||
26 | |||
27 | - parallels_check_unclean(bs, res, fix); | ||
28 | + ret = parallels_check_outside_image(bs, res, fix); | ||
29 | + if (ret < 0) { | ||
30 | + return ret; | ||
31 | + } | ||
32 | |||
33 | - ret = parallels_check_outside_image(bs, res, fix); | ||
34 | - if (ret < 0) { | ||
35 | - goto out; | ||
36 | - } | ||
37 | + ret = parallels_check_leak(bs, res, fix); | ||
38 | + if (ret < 0) { | ||
39 | + return ret; | ||
40 | + } | ||
41 | |||
42 | - ret = parallels_check_leak(bs, res, fix); | ||
43 | - if (ret < 0) { | ||
44 | - goto out; | ||
45 | + parallels_collect_statistics(bs, res, fix); | ||
46 | } | ||
47 | |||
48 | - parallels_collect_statistics(bs, res, fix); | ||
49 | - | ||
50 | -out: | ||
51 | - qemu_co_mutex_unlock(&s->lock); | ||
52 | - | ||
53 | - if (ret == 0) { | ||
54 | - ret = bdrv_co_flush(bs); | ||
55 | - if (ret < 0) { | ||
56 | - res->check_errors++; | ||
57 | - } | ||
58 | + ret = bdrv_co_flush(bs); | ||
59 | + if (ret < 0) { | ||
60 | + res->check_errors++; | ||
61 | } | ||
62 | |||
63 | return ret; | ||
64 | -- | ||
65 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
1 | 2 | ||
3 | All the offsets in the BAT must be lower than the file size. | ||
4 | Fix the check condition for correct check. | ||
5 | |||
6 | Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> | ||
7 | Reviewed-by: Denis V. Lunev <den@openvz.org> | ||
8 | Message-Id: <20230424093147.197643-13-alexander.ivanov@virtuozzo.com> | ||
9 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
10 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | --- | ||
12 | block/parallels.c | 2 +- | ||
13 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/block/parallels.c b/block/parallels.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/parallels.c | ||
18 | +++ b/block/parallels.c | ||
19 | @@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, | ||
20 | high_off = 0; | ||
21 | for (i = 0; i < s->bat_size; i++) { | ||
22 | off = bat2sect(s, i) << BDRV_SECTOR_BITS; | ||
23 | - if (off > size) { | ||
24 | + if (off + s->cluster_size > size) { | ||
25 | fprintf(stderr, "%s cluster %u is outside image\n", | ||
26 | fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); | ||
27 | res->corruptions++; | ||
28 | -- | ||
29 | 2.40.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Jean-Louis Dupond <jean-louis@dupond.be> | |
2 | |||
3 | When we for example have a sparse qcow2 image and discard: unmap is enabled, | ||
4 | there can be a lot of fragmentation in the image after some time. Especially on VM's | ||
5 | that do a lot of writes/deletes. | ||
6 | This causes the qcow2 image to grow even over 110% of its virtual size, | ||
7 | because the free gaps in the image get too small to allocate new | ||
8 | continuous clusters. So it allocates new space at the end of the image. | ||
9 | |||
10 | Disabling discard is not an option, as discard is needed to keep the | ||
11 | incremental backup size as low as possible. Without discard, the | ||
12 | incremental backups would become large, as qemu thinks it's just dirty | ||
13 | blocks but it doesn't know the blocks are unneeded. | ||
14 | So we need to avoid fragmentation but also 'empty' the unneeded blocks in | ||
15 | the image to have a small incremental backup. | ||
16 | |||
17 | In addition, we also want to send the discards further down the stack, so | ||
18 | the underlying blocks are still discarded. | ||
19 | |||
20 | Therefor we introduce a new qcow2 option "discard-no-unref". | ||
21 | When setting this option to true, discards will no longer have the qcow2 | ||
22 | driver relinquish cluster allocations. Other than that, the request is | ||
23 | handled as normal: All clusters in range are marked as zero, and, if | ||
24 | pass-discard-request is true, it is passed further down the stack. | ||
25 | The only difference is that the now-zero clusters are preallocated | ||
26 | instead of being unallocated. | ||
27 | This will avoid fragmentation on the qcow2 image. | ||
28 | |||
29 | Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621 | ||
30 | Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be> | ||
31 | Message-Id: <20230605084523.34134-2-jean-louis@dupond.be> | ||
32 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
33 | Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||
34 | --- | ||
35 | qapi/block-core.json | 12 ++++++++++++ | ||
36 | block/qcow2.h | 3 +++ | ||
37 | block/qcow2-cluster.c | 32 ++++++++++++++++++++++++++++---- | ||
38 | block/qcow2.c | 18 ++++++++++++++++++ | ||
39 | qemu-options.hx | 12 ++++++++++++ | ||
40 | 5 files changed, 73 insertions(+), 4 deletions(-) | ||
41 | |||
42 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/qapi/block-core.json | ||
45 | +++ b/qapi/block-core.json | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | # @pass-discard-other: whether discard requests for the data source | ||
48 | # should be issued on other occasions where a cluster gets freed | ||
49 | # | ||
50 | +# @discard-no-unref: when enabled, discards from the guest will not cause | ||
51 | +# cluster allocations to be relinquished. This prevents qcow2 fragmentation | ||
52 | +# that would be caused by such discards. Besides potential | ||
53 | +# performance degradation, such fragmentation can lead to increased | ||
54 | +# allocation of clusters past the end of the image file, | ||
55 | +# resulting in image files whose file length can grow much larger | ||
56 | +# than their guest disk size would suggest. | ||
57 | +# If image file length is of concern (e.g. when storing qcow2 | ||
58 | +# images directly on block devices), you should consider enabling | ||
59 | +# this option. (since 8.1) | ||
60 | +# | ||
61 | # @overlap-check: which overlap checks to perform for writes to the | ||
62 | # image, defaults to 'cached' (since 2.2) | ||
63 | # | ||
64 | @@ -XXX,XX +XXX,XX @@ | ||
65 | '*pass-discard-request': 'bool', | ||
66 | '*pass-discard-snapshot': 'bool', | ||
67 | '*pass-discard-other': 'bool', | ||
68 | + '*discard-no-unref': 'bool', | ||
69 | '*overlap-check': 'Qcow2OverlapChecks', | ||
70 | '*cache-size': 'int', | ||
71 | '*l2-cache-size': 'int', | ||
72 | diff --git a/block/qcow2.h b/block/qcow2.h | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/block/qcow2.h | ||
75 | +++ b/block/qcow2.h | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" | ||
78 | #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" | ||
79 | #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" | ||
80 | +#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref" | ||
81 | #define QCOW2_OPT_OVERLAP "overlap-check" | ||
82 | #define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template" | ||
83 | #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header" | ||
84 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State { | ||
85 | |||
86 | bool discard_passthrough[QCOW2_DISCARD_MAX]; | ||
87 | |||
88 | + bool discard_no_unref; | ||
89 | + | ||
90 | int overlap_check; /* bitmask of Qcow2MetadataOverlap values */ | ||
91 | bool signaled_corruption; | ||
92 | |||
93 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/block/qcow2-cluster.c | ||
96 | +++ b/block/qcow2-cluster.c | ||
97 | @@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, | ||
98 | uint64_t new_l2_bitmap = old_l2_bitmap; | ||
99 | QCow2ClusterType cluster_type = | ||
100 | qcow2_get_cluster_type(bs, old_l2_entry); | ||
101 | + bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) && | ||
102 | + !full_discard && | ||
103 | + (s->discard_no_unref && | ||
104 | + type == QCOW2_DISCARD_REQUEST); | ||
105 | |||
106 | /* | ||
107 | * If full_discard is true, the cluster should not read back as zeroes, | ||
108 | @@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, | ||
109 | new_l2_entry = new_l2_bitmap = 0; | ||
110 | } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { | ||
111 | if (has_subclusters(s)) { | ||
112 | - new_l2_entry = 0; | ||
113 | + if (keep_reference) { | ||
114 | + new_l2_entry = old_l2_entry; | ||
115 | + } else { | ||
116 | + new_l2_entry = 0; | ||
117 | + } | ||
118 | new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; | ||
119 | } else { | ||
120 | - new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; | ||
121 | + if (s->qcow_version >= 3) { | ||
122 | + if (keep_reference) { | ||
123 | + new_l2_entry |= QCOW_OFLAG_ZERO; | ||
124 | + } else { | ||
125 | + new_l2_entry = QCOW_OFLAG_ZERO; | ||
126 | + } | ||
127 | + } else { | ||
128 | + new_l2_entry = 0; | ||
129 | + } | ||
130 | } | ||
131 | } | ||
132 | |||
133 | @@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, | ||
134 | if (has_subclusters(s)) { | ||
135 | set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); | ||
136 | } | ||
137 | - /* Then decrease the refcount */ | ||
138 | - qcow2_free_any_cluster(bs, old_l2_entry, type); | ||
139 | + if (!keep_reference) { | ||
140 | + /* Then decrease the refcount */ | ||
141 | + qcow2_free_any_cluster(bs, old_l2_entry, type); | ||
142 | + } else if (s->discard_passthrough[type] && | ||
143 | + (cluster_type == QCOW2_CLUSTER_NORMAL || | ||
144 | + cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) { | ||
145 | + /* If we keep the reference, pass on the discard still */ | ||
146 | + bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK, | ||
147 | + s->cluster_size); | ||
148 | + } | ||
149 | } | ||
150 | |||
151 | qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); | ||
152 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
153 | index XXXXXXX..XXXXXXX 100644 | ||
154 | --- a/block/qcow2.c | ||
155 | +++ b/block/qcow2.c | ||
156 | @@ -XXX,XX +XXX,XX @@ static const char *const mutable_opts[] = { | ||
157 | QCOW2_OPT_DISCARD_REQUEST, | ||
158 | QCOW2_OPT_DISCARD_SNAPSHOT, | ||
159 | QCOW2_OPT_DISCARD_OTHER, | ||
160 | + QCOW2_OPT_DISCARD_NO_UNREF, | ||
161 | QCOW2_OPT_OVERLAP, | ||
162 | QCOW2_OPT_OVERLAP_TEMPLATE, | ||
163 | QCOW2_OPT_OVERLAP_MAIN_HEADER, | ||
164 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList qcow2_runtime_opts = { | ||
165 | .type = QEMU_OPT_BOOL, | ||
166 | .help = "Generate discard requests when other clusters are freed", | ||
167 | }, | ||
168 | + { | ||
169 | + .name = QCOW2_OPT_DISCARD_NO_UNREF, | ||
170 | + .type = QEMU_OPT_BOOL, | ||
171 | + .help = "Do not unreference discarded clusters", | ||
172 | + }, | ||
173 | { | ||
174 | .name = QCOW2_OPT_OVERLAP, | ||
175 | .type = QEMU_OPT_STRING, | ||
176 | @@ -XXX,XX +XXX,XX @@ typedef struct Qcow2ReopenState { | ||
177 | bool use_lazy_refcounts; | ||
178 | int overlap_check; | ||
179 | bool discard_passthrough[QCOW2_DISCARD_MAX]; | ||
180 | + bool discard_no_unref; | ||
181 | uint64_t cache_clean_interval; | ||
182 | QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ | ||
183 | } Qcow2ReopenState; | ||
184 | @@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs, | ||
185 | r->discard_passthrough[QCOW2_DISCARD_OTHER] = | ||
186 | qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); | ||
187 | |||
188 | + r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF, | ||
189 | + false); | ||
190 | + if (r->discard_no_unref && s->qcow_version < 3) { | ||
191 | + error_setg(errp, | ||
192 | + "discard-no-unref is only supported since qcow2 version 3"); | ||
193 | + ret = -EINVAL; | ||
194 | + goto fail; | ||
195 | + } | ||
196 | + | ||
197 | switch (s->crypt_method_header) { | ||
198 | case QCOW_CRYPT_NONE: | ||
199 | if (encryptfmt) { | ||
200 | @@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_commit(BlockDriverState *bs, | ||
201 | s->discard_passthrough[i] = r->discard_passthrough[i]; | ||
202 | } | ||
203 | |||
204 | + s->discard_no_unref = r->discard_no_unref; | ||
205 | + | ||
206 | if (s->cache_clean_interval != r->cache_clean_interval) { | ||
207 | cache_clean_timer_del(bs); | ||
208 | s->cache_clean_interval = r->cache_clean_interval; | ||
209 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
210 | index XXXXXXX..XXXXXXX 100644 | ||
211 | --- a/qemu-options.hx | ||
212 | +++ b/qemu-options.hx | ||
213 | @@ -XXX,XX +XXX,XX @@ SRST | ||
214 | issued on other occasions where a cluster gets freed | ||
215 | (on/off; default: off) | ||
216 | |||
217 | + ``discard-no-unref`` | ||
218 | + When enabled, discards from the guest will not cause cluster | ||
219 | + allocations to be relinquished. This prevents qcow2 fragmentation | ||
220 | + that would be caused by such discards. Besides potential | ||
221 | + performance degradation, such fragmentation can lead to increased | ||
222 | + allocation of clusters past the end of the image file, | ||
223 | + resulting in image files whose file length can grow much larger | ||
224 | + than their guest disk size would suggest. | ||
225 | + If image file length is of concern (e.g. when storing qcow2 | ||
226 | + images directly on block devices), you should consider enabling | ||
227 | + this option. | ||
228 | + | ||
229 | ``overlap-check`` | ||
230 | Which overlap checks to perform for writes to the image | ||
231 | (none/constant/cached/all; default: cached). For details or | ||
232 | -- | ||
233 | 2.40.1 | diff view generated by jsdifflib |