1
The following changes since commit 9cf289af47bcfae5c75de37d8e5d6fd23705322c:
1
The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d:
2
2
3
Merge tag 'qga-pull-request' of gitlab.com:marcandre.lureau/qemu into staging (2022-05-04 03:42:49 -0700)
3
Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/hreitz/qemu.git tags/pull-block-2023-06-05
8
8
9
for you to fetch changes up to bef2e050d6a7feb865854c65570c496ac5a8cf53:
9
for you to fetch changes up to 42a2890a76f4783cd1c212f27856edcf2b5e8a75:
10
10
11
util/event-loop-base: Introduce options to set the thread pool size (2022-05-04 17:02:19 +0100)
11
qcow2: add discard-no-unref option (2023-06-05 13:15:42 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Block patches
15
15
16
Add new thread-pool-min/thread-pool-max parameters to control the thread pool
16
- Fix padding of unaligned vectored requests to match the host alignment
17
used for async I/O.
17
for vectors with 1023 or 1024 buffers
18
- Refactor and fix bugs in parallels's image check functionality
19
- Add an option to the qcow2 driver to retain (qcow2-level) allocations
20
on discard requests from the guest (while still forwarding the discard
21
to the lower level and marking the range as zero)
18
22
19
----------------------------------------------------------------
23
----------------------------------------------------------------
24
Alexander Ivanov (12):
25
parallels: Out of image offset in BAT leads to image inflation
26
parallels: Fix high_off calculation in parallels_co_check()
27
parallels: Fix image_end_offset and data_end after out-of-image check
28
parallels: create parallels_set_bat_entry_helper() to assign BAT value
29
parallels: Use generic infrastructure for BAT writing in
30
parallels_co_check()
31
parallels: Move check of unclean image to a separate function
32
parallels: Move check of cluster outside image to a separate function
33
parallels: Fix statistics calculation
34
parallels: Move check of leaks to a separate function
35
parallels: Move statistic collection to a separate function
36
parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD
37
parallels: Incorrect condition in out-of-image check
20
38
21
Nicolas Saenz Julienne (3):
39
Hanna Czenczek (4):
22
Introduce event-loop-base abstract class
40
util/iov: Make qiov_slice() public
23
util/main-loop: Introduce the main loop into QOM
41
block: Collapse padded I/O vecs exceeding IOV_MAX
24
util/event-loop-base: Introduce options to set the thread pool size
42
util/iov: Remove qemu_iovec_init_extended()
43
iotests/iov-padding: New test
25
44
26
qapi/qom.json | 43 ++++++++--
45
Jean-Louis Dupond (1):
27
meson.build | 26 +++---
46
qcow2: add discard-no-unref option
28
include/block/aio.h | 10 +++
47
29
include/block/thread-pool.h | 3 +
48
qapi/block-core.json | 12 ++
30
include/qemu/main-loop.h | 10 +++
49
block/qcow2.h | 3 +
31
include/sysemu/event-loop-base.h | 41 +++++++++
50
include/qemu/iov.h | 8 +-
32
include/sysemu/iothread.h | 6 +-
51
block/io.c | 166 ++++++++++++++++++--
33
event-loop-base.c | 140 +++++++++++++++++++++++++++++++
52
block/parallels.c | 190 ++++++++++++++++-------
34
iothread.c | 68 +++++----------
53
block/qcow2-cluster.c | 32 +++-
35
util/aio-posix.c | 1 +
54
block/qcow2.c | 18 +++
36
util/async.c | 20 +++++
55
util/iov.c | 89 ++---------
37
util/main-loop.c | 65 ++++++++++++++
56
qemu-options.hx | 12 ++
38
util/thread-pool.c | 55 +++++++++++-
57
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++
39
13 files changed, 419 insertions(+), 69 deletions(-)
58
tests/qemu-iotests/tests/iov-padding.out | 59 +++++++
40
create mode 100644 include/sysemu/event-loop-base.h
59
11 files changed, 523 insertions(+), 151 deletions(-)
41
create mode 100644 event-loop-base.c
60
create mode 100755 tests/qemu-iotests/tests/iov-padding
61
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
42
62
43
--
63
--
44
2.35.1
64
2.40.1
diff view generated by jsdifflib
New patch
1
We want to inline qemu_iovec_init_extended() in block/io.c for padding
2
requests, and having access to qiov_slice() is useful for this. As a
3
public function, it is renamed to qemu_iovec_slice().
1
4
5
(We will need to count the number of I/O vector elements of a slice
6
there, and then later process this slice. Without qiov_slice(), we
7
would need to call qemu_iovec_subvec_niov(), and all further
8
IOV-processing functions may need to skip prefixing elements to
9
accomodate for a qiov_offset. Because qemu_iovec_subvec_niov()
10
internally calls qiov_slice(), we can just have the block/io.c code call
11
qiov_slice() itself, thus get the number of elements, and also create an
12
iovec array with the superfluous prefixing elements stripped, so the
13
following processing functions no longer need to skip them.)
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-2-hreitz@redhat.com>
19
---
20
include/qemu/iov.h | 3 +++
21
util/iov.c | 14 +++++++-------
22
2 files changed, 10 insertions(+), 7 deletions(-)
23
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/iov.h
27
+++ b/include/qemu/iov.h
28
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
29
void *tail_buf, size_t tail_len);
30
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
31
size_t offset, size_t len);
32
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
33
+ size_t offset, size_t len,
34
+ size_t *head, size_t *tail, int *niov);
35
int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
36
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
37
void qemu_iovec_concat(QEMUIOVector *dst,
38
diff --git a/util/iov.c b/util/iov.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/iov.c
41
+++ b/util/iov.c
42
@@ -XXX,XX +XXX,XX @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
43
}
44
45
/*
46
- * qiov_slice
47
+ * qemu_iovec_slice
48
*
49
* Find subarray of iovec's, containing requested range. @head would
50
* be offset in first iov (returned by the function), @tail would be
51
* count of extra bytes in last iovec (returned iov + @niov - 1).
52
*/
53
-static struct iovec *qiov_slice(QEMUIOVector *qiov,
54
- size_t offset, size_t len,
55
- size_t *head, size_t *tail, int *niov)
56
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
57
+ size_t offset, size_t len,
58
+ size_t *head, size_t *tail, int *niov)
59
{
60
struct iovec *iov, *end_iov;
61
62
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
63
size_t head, tail;
64
int niov;
65
66
- qiov_slice(qiov, offset, len, &head, &tail, &niov);
67
+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
68
69
return niov;
70
}
71
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
72
}
73
74
if (mid_len) {
75
- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
76
- &mid_head, &mid_tail, &mid_niov);
77
+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
78
+ &mid_head, &mid_tail, &mid_niov);
79
}
80
81
total_niov = !!head_len + mid_niov + !!tail_len;
82
--
83
2.40.1
diff view generated by jsdifflib
New patch
1
1
When processing vectored guest requests that are not aligned to the
2
storage request alignment, we pad them by adding head and/or tail
3
buffers for a read-modify-write cycle.
4
5
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
6
with this padding, the vector can exceed that limit. As of
7
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
8
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
9
limit, instead returning an error to the guest.
10
11
To the guest, this appears as a random I/O error. We should not return
12
an I/O error to the guest when it issued a perfectly valid request.
13
14
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
15
longer than IOV_MAX, which generally seems to work (because the guest
16
assumes a smaller alignment than we really have, file-posix's
17
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
18
so emulate the request, so that the IOV_MAX does not matter). However,
19
that does not seem exactly great.
20
21
I see two ways to fix this problem:
22
1. We split such long requests into two requests.
23
2. We join some elements of the vector into new buffers to make it
24
shorter.
25
26
I am wary of (1), because it seems like it may have unintended side
27
effects.
28
29
(2) on the other hand seems relatively simple to implement, with
30
hopefully few side effects, so this patch does that.
31
32
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
33
is effectively replaced by the new function bdrv_create_padded_qiov(),
34
which not only wraps the request IOV with padding head/tail, but also
35
ensures that the resulting vector will not have more than IOV_MAX
36
elements. Putting that functionality into qemu_iovec_init_extended() is
37
infeasible because it requires allocating a bounce buffer; doing so
38
would require many more parameters (buffer alignment, how to initialize
39
the buffer, and out parameters like the buffer, its length, and the
40
original elements), which is not reasonable.
41
42
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
43
functionality into bdrv_create_padded_qiov() by using public
44
qemu_iovec_* functions, so that is what this patch does.
45
46
Because bdrv_pad_request() was the only "serious" user of
47
qemu_iovec_init_extended(), the next patch will remove the latter
48
function, so the functionality is not implemented twice.
49
50
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
51
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
52
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
53
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
54
---
55
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
56
1 file changed, 151 insertions(+), 15 deletions(-)
57
58
diff --git a/block/io.c b/block/io.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/io.c
61
+++ b/block/io.c
62
@@ -XXX,XX +XXX,XX @@ out:
63
* @merge_reads is true for small requests,
64
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
65
* head and tail exist but @buf_len == align and @tail_buf == @buf.
66
+ *
67
+ * @write is true for write requests, false for read requests.
68
+ *
69
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
70
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
71
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
72
+ * I/O vector elements so for read requests, the data can be copied back after
73
+ * the read is done.
74
*/
75
typedef struct BdrvRequestPadding {
76
uint8_t *buf;
77
@@ -XXX,XX +XXX,XX @@ typedef struct BdrvRequestPadding {
78
size_t head;
79
size_t tail;
80
bool merge_reads;
81
+ bool write;
82
QEMUIOVector local_qiov;
83
+
84
+ uint8_t *collapse_bounce_buf;
85
+ size_t collapse_len;
86
+ QEMUIOVector pre_collapse_qiov;
87
} BdrvRequestPadding;
88
89
static bool bdrv_init_padding(BlockDriverState *bs,
90
int64_t offset, int64_t bytes,
91
+ bool write,
92
BdrvRequestPadding *pad)
93
{
94
int64_t align = bs->bl.request_alignment;
95
@@ -XXX,XX +XXX,XX @@ static bool bdrv_init_padding(BlockDriverState *bs,
96
pad->tail_buf = pad->buf + pad->buf_len - align;
97
}
98
99
+ pad->write = write;
100
+
101
return true;
102
}
103
104
@@ -XXX,XX +XXX,XX @@ zero_mem:
105
return 0;
106
}
107
108
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
109
+/**
110
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
111
+ */
112
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
113
{
114
+ if (pad->collapse_bounce_buf) {
115
+ if (!pad->write) {
116
+ /*
117
+ * If padding required elements in the vector to be collapsed into a
118
+ * bounce buffer, copy the bounce buffer content back
119
+ */
120
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
121
+ pad->collapse_bounce_buf, pad->collapse_len);
122
+ }
123
+ qemu_vfree(pad->collapse_bounce_buf);
124
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
125
+ }
126
if (pad->buf) {
127
qemu_vfree(pad->buf);
128
qemu_iovec_destroy(&pad->local_qiov);
129
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
130
memset(pad, 0, sizeof(*pad));
131
}
132
133
+/*
134
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
135
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
136
+ *
137
+ * To ensure this, when necessary, the first two or three elements of @iov are
138
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
139
+ * bounce buffer in pad->local_qiov.
140
+ *
141
+ * After performing a read request, the data from the bounce buffer must be
142
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
143
+ */
144
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
145
+ BdrvRequestPadding *pad,
146
+ struct iovec *iov, int niov,
147
+ size_t iov_offset, size_t bytes)
148
+{
149
+ int padded_niov, surplus_count, collapse_count;
150
+
151
+ /* Assert this invariant */
152
+ assert(niov <= IOV_MAX);
153
+
154
+ /*
155
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
156
+ * to the guest is not ideal, but there is little else we can do. At least
157
+ * this will practically never happen on 64-bit systems.
158
+ */
159
+ if (SIZE_MAX - pad->head < bytes ||
160
+ SIZE_MAX - pad->head - bytes < pad->tail)
161
+ {
162
+ return -EINVAL;
163
+ }
164
+
165
+ /* Length of the resulting IOV if we just concatenated everything */
166
+ padded_niov = !!pad->head + niov + !!pad->tail;
167
+
168
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
169
+
170
+ if (pad->head) {
171
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
172
+ }
173
+
174
+ /*
175
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
176
+ * Instead, merge the first two or three elements of @iov to reduce the
177
+ * number of vector elements as necessary.
178
+ */
179
+ if (padded_niov > IOV_MAX) {
180
+ /*
181
+ * Only head and tail can have lead to the number of entries exceeding
182
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
183
+ * to reduce the number of elements by `surplus_count`, so we merge that
184
+ * many elements plus one into one element.
185
+ */
186
+ surplus_count = padded_niov - IOV_MAX;
187
+ assert(surplus_count <= !!pad->head + !!pad->tail);
188
+ collapse_count = surplus_count + 1;
189
+
190
+ /*
191
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
192
+ * advance `iov` (and associated variables) by those elements.
193
+ */
194
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
195
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
196
+ collapse_count, iov_offset, SIZE_MAX);
197
+ iov += collapse_count;
198
+ iov_offset = 0;
199
+ niov -= collapse_count;
200
+ bytes -= pad->pre_collapse_qiov.size;
201
+
202
+ /*
203
+ * Construct the bounce buffer to match the length of the to-collapse
204
+ * vector elements, and for write requests, initialize it with the data
205
+ * from those elements. Then add it to `pad->local_qiov`.
206
+ */
207
+ pad->collapse_len = pad->pre_collapse_qiov.size;
208
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
209
+ if (pad->write) {
210
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
211
+ pad->collapse_bounce_buf, pad->collapse_len);
212
+ }
213
+ qemu_iovec_add(&pad->local_qiov,
214
+ pad->collapse_bounce_buf, pad->collapse_len);
215
+ }
216
+
217
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
218
+
219
+ if (pad->tail) {
220
+ qemu_iovec_add(&pad->local_qiov,
221
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
222
+ }
223
+
224
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
225
+ return 0;
226
+}
227
+
228
/*
229
* bdrv_pad_request
230
*
231
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
232
* read of padding, bdrv_padding_rmw_read() should be called separately if
233
* needed.
234
*
235
+ * @write is true for write requests, false for read requests.
236
+ *
237
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
238
* - on function start they represent original request
239
* - on failure or when padding is not needed they are unchanged
240
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
241
static int bdrv_pad_request(BlockDriverState *bs,
242
QEMUIOVector **qiov, size_t *qiov_offset,
243
int64_t *offset, int64_t *bytes,
244
+ bool write,
245
BdrvRequestPadding *pad, bool *padded,
246
BdrvRequestFlags *flags)
247
{
248
int ret;
249
+ struct iovec *sliced_iov;
250
+ int sliced_niov;
251
+ size_t sliced_head, sliced_tail;
252
253
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
254
255
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
256
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
257
if (padded) {
258
*padded = false;
259
}
260
return 0;
261
}
262
263
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
264
- *qiov, *qiov_offset, *bytes,
265
- pad->buf + pad->buf_len - pad->tail,
266
- pad->tail);
267
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
268
+ &sliced_head, &sliced_tail,
269
+ &sliced_niov);
270
+
271
+ /* Guaranteed by bdrv_check_qiov_request() */
272
+ assert(*bytes <= SIZE_MAX);
273
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
274
+ sliced_head, *bytes);
275
if (ret < 0) {
276
- bdrv_padding_destroy(pad);
277
+ bdrv_padding_finalize(pad);
278
return ret;
279
}
280
*bytes += pad->head + pad->tail;
281
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
282
flags |= BDRV_REQ_COPY_ON_READ;
283
}
284
285
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
286
- NULL, &flags);
287
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
288
+ &pad, NULL, &flags);
289
if (ret < 0) {
290
goto fail;
291
}
292
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
293
bs->bl.request_alignment,
294
qiov, qiov_offset, flags);
295
tracked_request_end(&req);
296
- bdrv_padding_destroy(&pad);
297
+ bdrv_padding_finalize(&pad);
298
299
fail:
300
bdrv_dec_in_flight(bs);
301
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
302
/* This flag doesn't make sense for padding or zero writes */
303
flags &= ~BDRV_REQ_REGISTERED_BUF;
304
305
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
306
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
307
if (padding) {
308
assert(!(flags & BDRV_REQ_NO_WAIT));
309
bdrv_make_request_serialising(req, align);
310
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
311
}
312
313
out:
314
- bdrv_padding_destroy(&pad);
315
+ bdrv_padding_finalize(&pad);
316
317
return ret;
318
}
319
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
320
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
321
* alignment only if there is no ZERO flag.
322
*/
323
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
324
- &padded, &flags);
325
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
326
+ &pad, &padded, &flags);
327
if (ret < 0) {
328
return ret;
329
}
330
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
331
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
332
qiov, qiov_offset, flags);
333
334
- bdrv_padding_destroy(&pad);
335
+ bdrv_padding_finalize(&pad);
336
337
out:
338
tracked_request_end(&req);
339
--
340
2.40.1
diff view generated by jsdifflib
New patch
1
bdrv_pad_request() was the main user of qemu_iovec_init_extended().
2
HEAD^ has removed that use, so we can remove qemu_iovec_init_extended()
3
now.
1
4
5
The only remaining user is qemu_iovec_init_slice(), which can easily
6
inline the small part it really needs.
7
8
Note that qemu_iovec_init_extended() offered a memcpy() optimization to
9
initialize the new I/O vector. qemu_iovec_concat_iov(), which is used
10
to replace its functionality, does not, but calls qemu_iovec_add() for
11
every single element. If we decide this optimization was important, we
12
will need to re-implement it in qemu_iovec_concat_iov(), which might
13
also benefit its pre-existing users.
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-4-hreitz@redhat.com>
19
---
20
include/qemu/iov.h | 5 ---
21
util/iov.c | 79 +++++++---------------------------------------
22
2 files changed, 11 insertions(+), 73 deletions(-)
23
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/iov.h
27
+++ b/include/qemu/iov.h
28
@@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
29
30
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
31
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
32
-int qemu_iovec_init_extended(
33
- QEMUIOVector *qiov,
34
- void *head_buf, size_t head_len,
35
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
36
- void *tail_buf, size_t tail_len);
37
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
38
size_t offset, size_t len);
39
struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
40
diff --git a/util/iov.c b/util/iov.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/util/iov.c
43
+++ b/util/iov.c
44
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
45
return niov;
46
}
47
48
-/*
49
- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
50
- * and @tail_buf buffer into new qiov.
51
- */
52
-int qemu_iovec_init_extended(
53
- QEMUIOVector *qiov,
54
- void *head_buf, size_t head_len,
55
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
56
- void *tail_buf, size_t tail_len)
57
-{
58
- size_t mid_head, mid_tail;
59
- int total_niov, mid_niov = 0;
60
- struct iovec *p, *mid_iov = NULL;
61
-
62
- assert(mid_qiov->niov <= IOV_MAX);
63
-
64
- if (SIZE_MAX - head_len < mid_len ||
65
- SIZE_MAX - head_len - mid_len < tail_len)
66
- {
67
- return -EINVAL;
68
- }
69
-
70
- if (mid_len) {
71
- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
72
- &mid_head, &mid_tail, &mid_niov);
73
- }
74
-
75
- total_niov = !!head_len + mid_niov + !!tail_len;
76
- if (total_niov > IOV_MAX) {
77
- return -EINVAL;
78
- }
79
-
80
- if (total_niov == 1) {
81
- qemu_iovec_init_buf(qiov, NULL, 0);
82
- p = &qiov->local_iov;
83
- } else {
84
- qiov->niov = qiov->nalloc = total_niov;
85
- qiov->size = head_len + mid_len + tail_len;
86
- p = qiov->iov = g_new(struct iovec, qiov->niov);
87
- }
88
-
89
- if (head_len) {
90
- p->iov_base = head_buf;
91
- p->iov_len = head_len;
92
- p++;
93
- }
94
-
95
- assert(!mid_niov == !mid_len);
96
- if (mid_niov) {
97
- memcpy(p, mid_iov, mid_niov * sizeof(*p));
98
- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
99
- p[0].iov_len -= mid_head;
100
- p[mid_niov - 1].iov_len -= mid_tail;
101
- p += mid_niov;
102
- }
103
-
104
- if (tail_len) {
105
- p->iov_base = tail_buf;
106
- p->iov_len = tail_len;
107
- }
108
-
109
- return 0;
110
-}
111
-
112
/*
113
* Check if the contents of subrange of qiov data is all zeroes.
114
*/
115
@@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
116
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
117
size_t offset, size_t len)
118
{
119
- int ret;
120
+ struct iovec *slice_iov;
121
+ int slice_niov;
122
+ size_t slice_head, slice_tail;
123
124
assert(source->size >= len);
125
assert(source->size - len >= offset);
126
127
- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
128
- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
129
- assert(ret == 0);
130
+ slice_iov = qemu_iovec_slice(source, offset, len,
131
+ &slice_head, &slice_tail, &slice_niov);
132
+ if (slice_niov == 1) {
133
+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
134
+ } else {
135
+ qemu_iovec_init(qiov, slice_niov);
136
+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
137
+ }
138
}
139
140
void qemu_iovec_destroy(QEMUIOVector *qiov)
141
--
142
2.40.1
diff view generated by jsdifflib
New patch
1
Test that even vectored IO requests with 1024 vector elements that are
2
not aligned to the device's request alignment will succeed.
1
3
4
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
6
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
7
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
8
---
9
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
10
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
11
2 files changed, 144 insertions(+)
12
create mode 100755 tests/qemu-iotests/tests/iov-padding
13
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
14
15
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
16
new file mode 100755
17
index XXXXXXX..XXXXXXX
18
--- /dev/null
19
+++ b/tests/qemu-iotests/tests/iov-padding
20
@@ -XXX,XX +XXX,XX @@
21
+#!/usr/bin/env bash
22
+# group: rw quick
23
+#
24
+# Check the interaction of request padding (to fit alignment restrictions) with
25
+# vectored I/O from the guest
26
+#
27
+# Copyright Red Hat
28
+#
29
+# This program is free software; you can redistribute it and/or modify
30
+# it under the terms of the GNU General Public License as published by
31
+# the Free Software Foundation; either version 2 of the License, or
32
+# (at your option) any later version.
33
+#
34
+# This program is distributed in the hope that it will be useful,
35
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37
+# GNU General Public License for more details.
38
+#
39
+# You should have received a copy of the GNU General Public License
40
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
41
+#
42
+
43
+seq=$(basename $0)
44
+echo "QA output created by $seq"
45
+
46
+status=1    # failure is the default!
47
+
48
+_cleanup()
49
+{
50
+ _cleanup_test_img
51
+}
52
+trap "_cleanup; exit \$status" 0 1 2 3 15
53
+
54
+# get standard environment, filters and checks
55
+cd ..
56
+. ./common.rc
57
+. ./common.filter
58
+
59
+_supported_fmt raw
60
+_supported_proto file
61
+
62
+_make_test_img 1M
63
+
64
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
65
+
66
+# Four combinations:
67
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
68
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
69
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
70
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
71
+for start_offset in 4096 512; do
72
+ for last_element_length in 512 4096; do
73
+ length=$((1023 * 512 + $last_element_length))
74
+
75
+ echo
76
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
77
+
78
+ # Fill with data for testing
79
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
80
+
81
+ # 1023 512-byte buffers, and then one with length $last_element_length
82
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
83
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
84
+ -c "writev $cmd_params" \
85
+ --image-opts \
86
+ "$IMGSPEC" \
87
+ | _filter_qemu_io
88
+
89
+ # Read all patterns -- read the part we just wrote with writev twice,
90
+ # once "normally", and once with a readv, so we see that that works, too
91
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
92
+ -c "read -P 1 0 $start_offset" \
93
+ -c "read -P 2 $start_offset $length" \
94
+ -c "readv $cmd_params" \
95
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
96
+ --image-opts \
97
+ "$IMGSPEC" \
98
+ | _filter_qemu_io
99
+ done
100
+done
101
+
102
+# success, all done
103
+echo "*** done"
104
+rm -f $seq.full
105
+status=0
106
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
107
new file mode 100644
108
index XXXXXXX..XXXXXXX
109
--- /dev/null
110
+++ b/tests/qemu-iotests/tests/iov-padding.out
111
@@ -XXX,XX +XXX,XX @@
112
+QA output created by iov-padding
113
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
114
+
115
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
116
+wrote 1048576/1048576 bytes at offset 0
117
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
118
+wrote 524288/524288 bytes at offset 4096
119
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
120
+read 4096/4096 bytes at offset 0
121
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+read 524288/524288 bytes at offset 4096
123
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
124
+read 524288/524288 bytes at offset 4096
125
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
126
+read 520192/520192 bytes at offset 528384
127
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
128
+
129
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
130
+wrote 1048576/1048576 bytes at offset 0
131
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
132
+wrote 527872/527872 bytes at offset 4096
133
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
134
+read 4096/4096 bytes at offset 0
135
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
136
+read 527872/527872 bytes at offset 4096
137
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
138
+read 527872/527872 bytes at offset 4096
139
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
140
+read 516608/516608 bytes at offset 531968
141
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
142
+
143
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
144
+wrote 1048576/1048576 bytes at offset 0
145
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
146
+wrote 524288/524288 bytes at offset 512
147
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
148
+read 512/512 bytes at offset 0
149
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
150
+read 524288/524288 bytes at offset 512
151
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
152
+read 524288/524288 bytes at offset 512
153
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
154
+read 523776/523776 bytes at offset 524800
155
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
156
+
157
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
158
+wrote 1048576/1048576 bytes at offset 0
159
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
160
+wrote 527872/527872 bytes at offset 512
161
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
162
+read 512/512 bytes at offset 0
163
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
164
+read 527872/527872 bytes at offset 512
165
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
166
+read 527872/527872 bytes at offset 512
167
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
168
+read 520192/520192 bytes at offset 528384
169
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
170
+*** done
171
--
172
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
data_end field in BDRVParallelsState is set to the biggest offset present
4
in BAT. If this offset is outside of the image, any further write will
5
create the cluster at this offset and/or the image will be truncated to
6
this offset on close. This is definitely not correct.
7
8
Raise an error in parallels_open() if data_end points outside the image
9
and it is not a check (let the check to repaire the image). Set data_end
10
to the end of the cluster with the last correct offset.
11
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
13
Message-Id: <20230424093147.197643-2-alexander.ivanov@virtuozzo.com>
14
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
15
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
16
---
17
block/parallels.c | 17 +++++++++++++++++
18
1 file changed, 17 insertions(+)
19
20
diff --git a/block/parallels.c b/block/parallels.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/parallels.c
23
+++ b/block/parallels.c
24
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
25
BDRVParallelsState *s = bs->opaque;
26
ParallelsHeader ph;
27
int ret, size, i;
28
+ int64_t file_nb_sectors;
29
QemuOpts *opts = NULL;
30
Error *local_err = NULL;
31
char *buf;
32
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
33
return ret;
34
}
35
36
+ file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
37
+ if (file_nb_sectors < 0) {
38
+ return -EINVAL;
39
+ }
40
+
41
ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
42
if (ret < 0) {
43
goto fail;
44
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
45
46
for (i = 0; i < s->bat_size; i++) {
47
int64_t off = bat2sect(s, i);
48
+ if (off >= file_nb_sectors) {
49
+ if (flags & BDRV_O_CHECK) {
50
+ continue;
51
+ }
52
+ error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
53
+ "is larger than file size (%" PRIi64 ")",
54
+ off << BDRV_SECTOR_BITS, i,
55
+ file_nb_sectors << BDRV_SECTOR_BITS);
56
+ ret = -EINVAL;
57
+ goto fail;
58
+ }
59
if (off >= s->data_end) {
60
s->data_end = off + s->tracks;
61
}
62
--
63
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Don't let high_off be more than the file size even if we don't fix the
4
image.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-3-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 4 ++--
14
1 file changed, 2 insertions(+), 2 deletions(-)
15
16
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/parallels.c
19
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
21
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
22
res->corruptions++;
23
if (fix & BDRV_FIX_ERRORS) {
24
- prev_off = 0;
25
s->bat_bitmap[i] = 0;
26
res->corruptions_fixed++;
27
flush_bat = true;
28
- continue;
29
}
30
+ prev_off = 0;
31
+ continue;
32
}
33
34
res->bfi.allocated_clusters++;
35
--
36
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Set data_end to the end of the last cluster inside the image. In such a
4
way we can be sure that corrupted offsets in the BAT can't affect on the
5
image size. If there are no allocated clusters set image_end_offset by
6
data_end.
7
8
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Denis V. Lunev <den@openvz.org>
10
Message-Id: <20230424093147.197643-4-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 8 +++++++-
15
1 file changed, 7 insertions(+), 1 deletion(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
22
}
23
}
24
25
- res->image_end_offset = high_off + s->cluster_size;
26
+ if (high_off == 0) {
27
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
28
+ } else {
29
+ res->image_end_offset = high_off + s->cluster_size;
30
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
31
+ }
32
+
33
if (size > res->image_end_offset) {
34
int64_t count;
35
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
36
--
37
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
This helper will be reused in next patches during parallels_co_check
4
rework to simplify its code.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-5-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 11 ++++++++---
14
1 file changed, 8 insertions(+), 3 deletions(-)
15
16
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/parallels.c
19
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
21
return start_off;
22
}
23
24
+static void parallels_set_bat_entry(BDRVParallelsState *s,
25
+ uint32_t index, uint32_t offset)
26
+{
27
+ s->bat_bitmap[index] = cpu_to_le32(offset);
28
+ bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
29
+}
30
+
31
static int64_t coroutine_fn GRAPH_RDLOCK
32
allocate_clusters(BlockDriverState *bs, int64_t sector_num,
33
int nb_sectors, int *pnum)
34
@@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
35
}
36
37
for (i = 0; i < to_allocate; i++) {
38
- s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
39
+ parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
40
s->data_end += s->tracks;
41
- bitmap_set(s->bat_dirty_bmap,
42
- bat_entry_off(idx + i) / s->bat_dirty_block, 1);
43
}
44
45
return bat2sect(s, idx) + sector_num % s->tracks;
46
--
47
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
BAT is written in the context of conventional operations over the image
4
inside bdrv_co_flush() when it calls parallels_co_flush_to_os() callback.
5
Thus we should not modify BAT array directly, but call
6
parallels_set_bat_entry() helper and bdrv_co_flush() further on. After
7
that there is no need to manually write BAT and track its modification.
8
9
This makes code more generic and allows to split parallels_set_bat_entry()
10
for independent pieces.
11
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
13
Reviewed-by: Denis V. Lunev <den@openvz.org>
14
Message-Id: <20230424093147.197643-6-alexander.ivanov@virtuozzo.com>
15
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
16
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
17
---
18
block/parallels.c | 23 ++++++++++-------------
19
1 file changed, 10 insertions(+), 13 deletions(-)
20
21
diff --git a/block/parallels.c b/block/parallels.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/parallels.c
24
+++ b/block/parallels.c
25
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
26
{
27
BDRVParallelsState *s = bs->opaque;
28
int64_t size, prev_off, high_off;
29
- int ret;
30
+ int ret = 0;
31
uint32_t i;
32
- bool flush_bat = false;
33
34
size = bdrv_getlength(bs->file->bs);
35
if (size < 0) {
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
37
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
38
res->corruptions++;
39
if (fix & BDRV_FIX_ERRORS) {
40
- s->bat_bitmap[i] = 0;
41
+ parallels_set_bat_entry(s, i, 0);
42
res->corruptions_fixed++;
43
- flush_bat = true;
44
}
45
prev_off = 0;
46
continue;
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
48
prev_off = off;
49
}
50
51
- ret = 0;
52
- if (flush_bat) {
53
- ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
54
- if (ret < 0) {
55
- res->check_errors++;
56
- goto out;
57
- }
58
- }
59
-
60
if (high_off == 0) {
61
res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
62
} else {
63
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
64
65
out:
66
qemu_co_mutex_unlock(&s->lock);
67
+
68
+ if (ret == 0) {
69
+ ret = bdrv_co_flush(bs);
70
+ if (ret < 0) {
71
+ res->check_errors++;
72
+ }
73
+ }
74
+
75
return ret;
76
}
77
78
--
79
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
We will add more and more checks so we need a better code structure
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
6
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
10
Message-Id: <20230424093147.197643-7-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 31 +++++++++++++++++++++----------
15
1 file changed, 21 insertions(+), 10 deletions(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
22
return ret;
23
}
24
25
+static void parallels_check_unclean(BlockDriverState *bs,
26
+ BdrvCheckResult *res,
27
+ BdrvCheckMode fix)
28
+{
29
+ BDRVParallelsState *s = bs->opaque;
30
+
31
+ if (!s->header_unclean) {
32
+ return;
33
+ }
34
+
35
+ fprintf(stderr, "%s image was not closed correctly\n",
36
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
37
+ res->corruptions++;
38
+ if (fix & BDRV_FIX_ERRORS) {
39
+ /* parallels_close will do the job right */
40
+ res->corruptions_fixed++;
41
+ s->header_unclean = false;
42
+ }
43
+}
44
45
static int coroutine_fn GRAPH_RDLOCK
46
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
48
}
49
50
qemu_co_mutex_lock(&s->lock);
51
- if (s->header_unclean) {
52
- fprintf(stderr, "%s image was not closed correctly\n",
53
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
54
- res->corruptions++;
55
- if (fix & BDRV_FIX_ERRORS) {
56
- /* parallels_close will do the job right */
57
- res->corruptions_fixed++;
58
- s->header_unclean = false;
59
- }
60
- }
61
+
62
+ parallels_check_unclean(bs, res, fix);
63
64
res->bfi.total_clusters = s->bat_size;
65
res->bfi.compressed_clusters = 0; /* compression is not supported */
66
--
67
2.40.1
diff view generated by jsdifflib
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
'event-loop-base' provides basic property handling for all 'AioContext'
3
We will add more and more checks so we need a better code structure in
4
based event loops. So let's define a new 'MainLoopClass' that inherits
4
parallels_co_check. Let each check performs in a separate loop in a
5
from it. This will permit tweaking the main loop's properties through
5
separate helper.
6
qapi as well as through the command line using the '-object' keyword[1].
7
Only one instance of 'MainLoopClass' might be created at any time.
8
6
9
'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
10
mark 'MainLoop' as non-deletable.
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
9
Message-Id: <20230424093147.197643-8-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 75 +++++++++++++++++++++++++++++++----------------
14
1 file changed, 49 insertions(+), 26 deletions(-)
11
15
12
[1] For example:
16
diff --git a/block/parallels.c b/block/parallels.c
13
-object main-loop,id=main-loop,aio-max-batch=<value>
14
15
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Acked-by: Markus Armbruster <armbru@redhat.com>
18
Message-id: 20220425075723.20019-3-nsaenzju@redhat.com
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
---
21
qapi/qom.json | 13 ++++++++
22
meson.build | 3 +-
23
include/qemu/main-loop.h | 10 ++++++
24
include/sysemu/event-loop-base.h | 1 +
25
event-loop-base.c | 13 ++++++++
26
util/main-loop.c | 56 ++++++++++++++++++++++++++++++++
27
6 files changed, 95 insertions(+), 1 deletion(-)
28
29
diff --git a/qapi/qom.json b/qapi/qom.json
30
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
31
--- a/qapi/qom.json
18
--- a/block/parallels.c
32
+++ b/qapi/qom.json
19
+++ b/block/parallels.c
33
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs,
34
'*poll-grow': 'int',
35
'*poll-shrink': 'int' } }
36
37
+##
38
+# @MainLoopProperties:
39
+#
40
+# Properties for the main-loop object.
41
+#
42
+# Since: 7.1
43
+##
44
+{ 'struct': 'MainLoopProperties',
45
+ 'base': 'EventLoopBaseProperties',
46
+ 'data': {} }
47
+
48
##
49
# @MemoryBackendProperties:
50
#
51
@@ -XXX,XX +XXX,XX @@
52
{ 'name': 'input-linux',
53
'if': 'CONFIG_LINUX' },
54
'iothread',
55
+ 'main-loop',
56
{ 'name': 'memory-backend-epc',
57
'if': 'CONFIG_LINUX' },
58
'memory-backend-file',
59
@@ -XXX,XX +XXX,XX @@
60
'input-linux': { 'type': 'InputLinuxProperties',
61
'if': 'CONFIG_LINUX' },
62
'iothread': 'IothreadProperties',
63
+ 'main-loop': 'MainLoopProperties',
64
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
65
'if': 'CONFIG_LINUX' },
66
'memory-backend-file': 'MemoryBackendFileProperties',
67
diff --git a/meson.build b/meson.build
68
index XXXXXXX..XXXXXXX 100644
69
--- a/meson.build
70
+++ b/meson.build
71
@@ -XXX,XX +XXX,XX @@ libqemuutil = static_library('qemuutil',
72
sources: util_ss.sources() + stub_ss.sources() + genh,
73
dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman])
74
qemuutil = declare_dependency(link_with: libqemuutil,
75
- sources: genh + version_res)
76
+ sources: genh + version_res,
77
+ dependencies: [event_loop_base])
78
79
if have_system or have_user
80
decodetree = generator(find_program('scripts/decodetree.py'),
81
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/include/qemu/main-loop.h
84
+++ b/include/qemu/main-loop.h
85
@@ -XXX,XX +XXX,XX @@
86
#define QEMU_MAIN_LOOP_H
87
88
#include "block/aio.h"
89
+#include "qom/object.h"
90
+#include "sysemu/event-loop-base.h"
91
92
#define SIG_IPI SIGUSR1
93
94
+#define TYPE_MAIN_LOOP "main-loop"
95
+OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP)
96
+
97
+struct MainLoop {
98
+ EventLoopBase parent_obj;
99
+};
100
+typedef struct MainLoop MainLoop;
101
+
102
/**
103
* qemu_init_main_loop: Set up the process so that it can run the main loop.
104
*
105
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/sysemu/event-loop-base.h
108
+++ b/include/sysemu/event-loop-base.h
109
@@ -XXX,XX +XXX,XX @@ struct EventLoopBaseClass {
110
111
void (*init)(EventLoopBase *base, Error **errp);
112
void (*update_params)(EventLoopBase *base, Error **errp);
113
+ bool (*can_be_deleted)(EventLoopBase *base);
114
};
115
116
struct EventLoopBase {
117
diff --git a/event-loop-base.c b/event-loop-base.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/event-loop-base.c
120
+++ b/event-loop-base.c
121
@@ -XXX,XX +XXX,XX @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp)
122
}
21
}
123
}
22
}
124
23
125
+static bool event_loop_base_can_be_deleted(UserCreatable *uc)
24
+static int coroutine_fn GRAPH_RDLOCK
25
+parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
26
+ BdrvCheckMode fix)
126
+{
27
+{
127
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
28
+ BDRVParallelsState *s = bs->opaque;
128
+ EventLoopBase *backend = EVENT_LOOP_BASE(uc);
29
+ uint32_t i;
30
+ int64_t off, high_off, size;
129
+
31
+
130
+ if (bc->can_be_deleted) {
32
+ size = bdrv_getlength(bs->file->bs);
131
+ return bc->can_be_deleted(backend);
33
+ if (size < 0) {
34
+ res->check_errors++;
35
+ return size;
132
+ }
36
+ }
133
+
37
+
134
+ return true;
38
+ high_off = 0;
39
+ for (i = 0; i < s->bat_size; i++) {
40
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
41
+ if (off > size) {
42
+ fprintf(stderr, "%s cluster %u is outside image\n",
43
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
44
+ res->corruptions++;
45
+ if (fix & BDRV_FIX_ERRORS) {
46
+ parallels_set_bat_entry(s, i, 0);
47
+ res->corruptions_fixed++;
48
+ }
49
+ continue;
50
+ }
51
+ if (high_off < off) {
52
+ high_off = off;
53
+ }
54
+ }
55
+
56
+ if (high_off == 0) {
57
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
58
+ } else {
59
+ res->image_end_offset = high_off + s->cluster_size;
60
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
61
+ }
62
+
63
+ return 0;
135
+}
64
+}
136
+
65
+
137
static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
66
static int coroutine_fn GRAPH_RDLOCK
67
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
68
BdrvCheckMode fix)
138
{
69
{
139
UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
70
BDRVParallelsState *s = bs->opaque;
140
ucc->complete = event_loop_base_complete;
71
- int64_t size, prev_off, high_off;
141
+ ucc->can_be_deleted = event_loop_base_can_be_deleted;
72
- int ret = 0;
142
73
+ int64_t size, prev_off;
143
object_class_property_add(klass, "aio-max-batch", "int",
74
+ int ret;
144
event_loop_base_get_param,
75
uint32_t i;
145
diff --git a/util/main-loop.c b/util/main-loop.c
76
146
index XXXXXXX..XXXXXXX 100644
77
size = bdrv_getlength(bs->file->bs);
147
--- a/util/main-loop.c
78
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
148
+++ b/util/main-loop.c
79
149
@@ -XXX,XX +XXX,XX @@
80
parallels_check_unclean(bs, res, fix);
150
#include "qemu/error-report.h"
81
151
#include "qemu/queue.h"
82
+ ret = parallels_check_outside_image(bs, res, fix);
152
#include "qemu/compiler.h"
83
+ if (ret < 0) {
153
+#include "qom/object.h"
84
+ goto out;
154
155
#ifndef _WIN32
156
#include <sys/wait.h>
157
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
158
return 0;
159
}
160
161
+static void main_loop_update_params(EventLoopBase *base, Error **errp)
162
+{
163
+ if (!qemu_aio_context) {
164
+ error_setg(errp, "qemu aio context not ready");
165
+ return;
166
+ }
85
+ }
167
+
86
+
168
+ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
87
res->bfi.total_clusters = s->bat_size;
169
+}
88
res->bfi.compressed_clusters = 0; /* compression is not supported */
170
+
89
171
+MainLoop *mloop;
90
- high_off = 0;
172
+
91
prev_off = 0;
173
+static void main_loop_init(EventLoopBase *base, Error **errp)
92
for (i = 0; i < s->bat_size; i++) {
174
+{
93
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
175
+ MainLoop *m = MAIN_LOOP(base);
94
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
176
+
95
continue;
177
+ if (mloop) {
96
}
178
+ error_setg(errp, "only one main-loop instance allowed");
97
179
+ return;
98
- /* cluster outside the image */
180
+ }
99
- if (off > size) {
181
+
100
- fprintf(stderr, "%s cluster %u is outside image\n",
182
+ main_loop_update_params(base, errp);
101
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
183
+
102
- res->corruptions++;
184
+ mloop = m;
103
- if (fix & BDRV_FIX_ERRORS) {
185
+ return;
104
- parallels_set_bat_entry(s, i, 0);
186
+}
105
- res->corruptions_fixed++;
187
+
106
- }
188
+static bool main_loop_can_be_deleted(EventLoopBase *base)
107
- prev_off = 0;
189
+{
108
- continue;
190
+ return false;
109
- }
191
+}
110
-
192
+
111
res->bfi.allocated_clusters++;
193
+static void main_loop_class_init(ObjectClass *oc, void *class_data)
112
- if (off > high_off) {
194
+{
113
- high_off = off;
195
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
114
- }
196
+
115
197
+ bc->init = main_loop_init;
116
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
198
+ bc->update_params = main_loop_update_params;
117
res->bfi.fragmented_clusters++;
199
+ bc->can_be_deleted = main_loop_can_be_deleted;
118
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
200
+}
119
prev_off = off;
201
+
120
}
202
+static const TypeInfo main_loop_info = {
121
203
+ .name = TYPE_MAIN_LOOP,
122
- if (high_off == 0) {
204
+ .parent = TYPE_EVENT_LOOP_BASE,
123
- res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
205
+ .class_init = main_loop_class_init,
124
- } else {
206
+ .instance_size = sizeof(MainLoop),
125
- res->image_end_offset = high_off + s->cluster_size;
207
+};
126
- s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
208
+
127
- }
209
+static void main_loop_register_types(void)
128
-
210
+{
129
if (size > res->image_end_offset) {
211
+ type_register_static(&main_loop_info);
130
int64_t count;
212
+}
131
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
213
+
214
+type_init(main_loop_register_types)
215
+
216
static int max_priority;
217
218
#ifndef _WIN32
219
--
132
--
220
2.35.1
133
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Exclude out-of-image clusters from allocated and fragmented clusters
4
calculation.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Message-Id: <20230424093147.197643-9-alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
---
11
block/parallels.c | 6 +++++-
12
1 file changed, 5 insertions(+), 1 deletion(-)
13
14
diff --git a/block/parallels.c b/block/parallels.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/parallels.c
17
+++ b/block/parallels.c
18
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
19
prev_off = 0;
20
for (i = 0; i < s->bat_size; i++) {
21
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
22
- if (off == 0) {
23
+ /*
24
+ * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
25
+ * fixed. Skip not allocated and out-of-image BAT entries.
26
+ */
27
+ if (off == 0 || off + s->cluster_size > res->image_end_offset) {
28
prev_off = 0;
29
continue;
30
}
31
--
32
2.40.1
diff view generated by jsdifflib
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
Introduce the 'event-loop-base' abstract class, it'll hold the
3
We will add more and more checks so we need a better code structure
4
properties common to all event loops and provide the necessary hooks for
4
in parallels_co_check. Let each check performs in a separate loop
5
their creation and maintenance. Then have iothread inherit from it.
5
in a separate helper.
6
6
7
EventLoopBaseClass is defined as user creatable and provides a hook for
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
its children to attach themselves to the user creatable class 'complete'
8
Message-Id: <20230424093147.197643-10-alexander.ivanov@virtuozzo.com>
9
function. It also provides an update_params() callback to propagate
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
property changes onto its children.
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 74 ++++++++++++++++++++++++++++-------------------
13
1 file changed, 45 insertions(+), 29 deletions(-)
11
14
12
The new 'event-loop-base' class will live in the root directory. It is
15
diff --git a/block/parallels.c b/block/parallels.c
13
built on its own using the 'link_whole' option (there are no direct
14
function dependencies between the class and its children, it all happens
15
trough 'constructor' magic). And also imposes new compilation
16
dependencies:
17
18
qom <- event-loop-base <- blockdev (iothread.c)
19
20
And in subsequent patches:
21
22
qom <- event-loop-base <- qemuutil (util/main-loop.c)
23
24
All this forced some amount of reordering in meson.build:
25
26
- Moved qom build definition before qemuutil. Doing it the other way
27
around (i.e. moving qemuutil after qom) isn't possible as a lot of
28
core libraries that live in between the two depend on it.
29
30
- Process the 'hw' subdir earlier, as it introduces files into the
31
'qom' source set.
32
33
No functional changes intended.
34
35
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
36
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
37
Acked-by: Markus Armbruster <armbru@redhat.com>
38
Message-id: 20220425075723.20019-2-nsaenzju@redhat.com
39
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
40
---
41
qapi/qom.json | 22 +++++--
42
meson.build | 23 ++++---
43
include/sysemu/event-loop-base.h | 36 +++++++++++
44
include/sysemu/iothread.h | 6 +-
45
event-loop-base.c | 104 +++++++++++++++++++++++++++++++
46
iothread.c | 65 ++++++-------------
47
6 files changed, 192 insertions(+), 64 deletions(-)
48
create mode 100644 include/sysemu/event-loop-base.h
49
create mode 100644 event-loop-base.c
50
51
diff --git a/qapi/qom.json b/qapi/qom.json
52
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
53
--- a/qapi/qom.json
17
--- a/block/parallels.c
54
+++ b/qapi/qom.json
18
+++ b/block/parallels.c
55
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
56
'*repeat': 'bool',
20
}
57
'*grab-toggle': 'GrabToggleKeys' } }
21
58
22
static int coroutine_fn GRAPH_RDLOCK
59
+##
23
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
60
+# @EventLoopBaseProperties:
24
- BdrvCheckMode fix)
61
+#
25
+parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
62
+# Common properties for event loops
26
+ BdrvCheckMode fix)
63
+#
27
{
64
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
28
BDRVParallelsState *s = bs->opaque;
65
+# 0 means that the engine will use its default.
29
- int64_t size, prev_off;
66
+# (default: 0)
30
+ int64_t size;
67
+#
31
int ret;
68
+# Since: 7.1
32
- uint32_t i;
69
+##
33
70
+{ 'struct': 'EventLoopBaseProperties',
34
size = bdrv_getlength(bs->file->bs);
71
+ 'data': { '*aio-max-batch': 'int' } }
35
if (size < 0) {
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
37
return size;
38
}
39
40
+ if (size > res->image_end_offset) {
41
+ int64_t count;
42
+ count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
43
+ fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
44
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
45
+ size - res->image_end_offset);
46
+ res->leaks += count;
47
+ if (fix & BDRV_FIX_LEAKS) {
48
+ Error *local_err = NULL;
72
+
49
+
73
##
50
+ /*
74
# @IothreadProperties:
51
+ * In order to really repair the image, we must shrink it.
75
#
52
+ * That means we have to pass exact=true.
76
@@ -XXX,XX +XXX,XX @@
53
+ */
77
# algorithm detects it is spending too long polling without
54
+ ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
78
# encountering events. 0 selects a default behaviour (default: 0)
55
+ PREALLOC_MODE_OFF, 0, &local_err);
79
#
56
+ if (ret < 0) {
80
-# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
57
+ error_report_err(local_err);
81
-# 0 means that the engine will use its default
58
+ res->check_errors++;
82
-# (default:0, since 6.1)
59
+ return ret;
83
+# The @aio-max-batch option is available since 6.1.
60
+ }
84
#
61
+ res->leaks_fixed += count;
85
# Since: 2.0
62
+ }
86
##
63
+ }
87
{ 'struct': 'IothreadProperties',
88
+ 'base': 'EventLoopBaseProperties',
89
'data': { '*poll-max-ns': 'int',
90
'*poll-grow': 'int',
91
- '*poll-shrink': 'int',
92
- '*aio-max-batch': 'int' } }
93
+ '*poll-shrink': 'int' } }
94
95
##
96
# @MemoryBackendProperties:
97
diff --git a/meson.build b/meson.build
98
index XXXXXXX..XXXXXXX 100644
99
--- a/meson.build
100
+++ b/meson.build
101
@@ -XXX,XX +XXX,XX @@ subdir('qom')
102
subdir('authz')
103
subdir('crypto')
104
subdir('ui')
105
+subdir('hw')
106
107
108
if enable_modules
109
@@ -XXX,XX +XXX,XX @@ if enable_modules
110
modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
111
endif
112
113
+qom_ss = qom_ss.apply(config_host, strict: false)
114
+libqom = static_library('qom', qom_ss.sources() + genh,
115
+ dependencies: [qom_ss.dependencies()],
116
+ name_suffix: 'fa')
117
+qom = declare_dependency(link_whole: libqom)
118
+
64
+
119
+event_loop_base = files('event-loop-base.c')
65
+ return 0;
120
+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh,
121
+ build_by_default: true)
122
+event_loop_base = declare_dependency(link_whole: event_loop_base,
123
+ dependencies: [qom])
124
+
125
stub_ss = stub_ss.apply(config_all, strict: false)
126
127
util_ss.add_all(trace_ss)
128
@@ -XXX,XX +XXX,XX @@ subdir('monitor')
129
subdir('net')
130
subdir('replay')
131
subdir('semihosting')
132
-subdir('hw')
133
subdir('tcg')
134
subdir('fpu')
135
subdir('accel')
136
@@ -XXX,XX +XXX,XX @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
137
capture: true,
138
command: [undefsym, nm, '@INPUT@'])
139
140
-qom_ss = qom_ss.apply(config_host, strict: false)
141
-libqom = static_library('qom', qom_ss.sources() + genh,
142
- dependencies: [qom_ss.dependencies()],
143
- name_suffix: 'fa')
144
-
145
-qom = declare_dependency(link_whole: libqom)
146
-
147
authz_ss = authz_ss.apply(config_host, strict: false)
148
libauthz = static_library('authz', authz_ss.sources() + genh,
149
dependencies: [authz_ss.dependencies()],
150
@@ -XXX,XX +XXX,XX @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
151
build_by_default: false)
152
153
blockdev = declare_dependency(link_whole: [libblockdev],
154
- dependencies: [block])
155
+ dependencies: [block, event_loop_base])
156
157
qmp_ss = qmp_ss.apply(config_host, strict: false)
158
libqmp = static_library('qmp', qmp_ss.sources() + genh,
159
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
160
new file mode 100644
161
index XXXXXXX..XXXXXXX
162
--- /dev/null
163
+++ b/include/sysemu/event-loop-base.h
164
@@ -XXX,XX +XXX,XX @@
165
+/*
166
+ * QEMU event-loop backend
167
+ *
168
+ * Copyright (C) 2022 Red Hat Inc
169
+ *
170
+ * Authors:
171
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
172
+ *
173
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
174
+ * See the COPYING file in the top-level directory.
175
+ */
176
+#ifndef QEMU_EVENT_LOOP_BASE_H
177
+#define QEMU_EVENT_LOOP_BASE_H
178
+
179
+#include "qom/object.h"
180
+#include "block/aio.h"
181
+#include "qemu/typedefs.h"
182
+
183
+#define TYPE_EVENT_LOOP_BASE "event-loop-base"
184
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
185
+ EVENT_LOOP_BASE)
186
+
187
+struct EventLoopBaseClass {
188
+ ObjectClass parent_class;
189
+
190
+ void (*init)(EventLoopBase *base, Error **errp);
191
+ void (*update_params)(EventLoopBase *base, Error **errp);
192
+};
193
+
194
+struct EventLoopBase {
195
+ Object parent;
196
+
197
+ /* AioContext AIO engine parameters */
198
+ int64_t aio_max_batch;
199
+};
200
+#endif
201
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
202
index XXXXXXX..XXXXXXX 100644
203
--- a/include/sysemu/iothread.h
204
+++ b/include/sysemu/iothread.h
205
@@ -XXX,XX +XXX,XX @@
206
#include "block/aio.h"
207
#include "qemu/thread.h"
208
#include "qom/object.h"
209
+#include "sysemu/event-loop-base.h"
210
211
#define TYPE_IOTHREAD "iothread"
212
213
struct IOThread {
214
- Object parent_obj;
215
+ EventLoopBase parent_obj;
216
217
QemuThread thread;
218
AioContext *ctx;
219
@@ -XXX,XX +XXX,XX @@ struct IOThread {
220
int64_t poll_max_ns;
221
int64_t poll_grow;
222
int64_t poll_shrink;
223
-
224
- /* AioContext AIO engine parameters */
225
- int64_t aio_max_batch;
226
};
227
typedef struct IOThread IOThread;
228
229
diff --git a/event-loop-base.c b/event-loop-base.c
230
new file mode 100644
231
index XXXXXXX..XXXXXXX
232
--- /dev/null
233
+++ b/event-loop-base.c
234
@@ -XXX,XX +XXX,XX @@
235
+/*
236
+ * QEMU event-loop base
237
+ *
238
+ * Copyright (C) 2022 Red Hat Inc
239
+ *
240
+ * Authors:
241
+ * Stefan Hajnoczi <stefanha@redhat.com>
242
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
243
+ *
244
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
245
+ * See the COPYING file in the top-level directory.
246
+ */
247
+
248
+#include "qemu/osdep.h"
249
+#include "qom/object_interfaces.h"
250
+#include "qapi/error.h"
251
+#include "sysemu/event-loop-base.h"
252
+
253
+typedef struct {
254
+ const char *name;
255
+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
256
+} EventLoopBaseParamInfo;
257
+
258
+static EventLoopBaseParamInfo aio_max_batch_info = {
259
+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
260
+};
261
+
262
+static void event_loop_base_get_param(Object *obj, Visitor *v,
263
+ const char *name, void *opaque, Error **errp)
264
+{
265
+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
266
+ EventLoopBaseParamInfo *info = opaque;
267
+ int64_t *field = (void *)event_loop_base + info->offset;
268
+
269
+ visit_type_int64(v, name, field, errp);
270
+}
66
+}
271
+
67
+
272
+static void event_loop_base_set_param(Object *obj, Visitor *v,
68
+static int coroutine_fn GRAPH_RDLOCK
273
+ const char *name, void *opaque, Error **errp)
69
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
70
+ BdrvCheckMode fix)
274
+{
71
+{
275
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
72
+ BDRVParallelsState *s = bs->opaque;
276
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
73
+ int64_t prev_off;
277
+ EventLoopBaseParamInfo *info = opaque;
74
+ int ret;
278
+ int64_t *field = (void *)base + info->offset;
75
+ uint32_t i;
279
+ int64_t value;
280
+
76
+
281
+ if (!visit_type_int64(v, name, &value, errp)) {
77
qemu_co_mutex_lock(&s->lock);
282
+ return;
78
79
parallels_check_unclean(bs, res, fix);
80
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
81
goto out;
82
}
83
84
+ ret = parallels_check_leak(bs, res, fix);
85
+ if (ret < 0) {
86
+ goto out;
283
+ }
87
+ }
284
+
88
+
285
+ if (value < 0) {
89
res->bfi.total_clusters = s->bat_size;
286
+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
90
res->bfi.compressed_clusters = 0; /* compression is not supported */
287
+ info->name, INT64_MAX);
91
288
+ return;
92
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
289
+ }
93
prev_off = off;
290
+
291
+ *field = value;
292
+
293
+ if (bc->update_params) {
294
+ bc->update_params(base, errp);
295
+ }
296
+
297
+ return;
298
+}
299
+
300
+static void event_loop_base_complete(UserCreatable *uc, Error **errp)
301
+{
302
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
303
+ EventLoopBase *base = EVENT_LOOP_BASE(uc);
304
+
305
+ if (bc->init) {
306
+ bc->init(base, errp);
307
+ }
308
+}
309
+
310
+static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
311
+{
312
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
313
+ ucc->complete = event_loop_base_complete;
314
+
315
+ object_class_property_add(klass, "aio-max-batch", "int",
316
+ event_loop_base_get_param,
317
+ event_loop_base_set_param,
318
+ NULL, &aio_max_batch_info);
319
+}
320
+
321
+static const TypeInfo event_loop_base_info = {
322
+ .name = TYPE_EVENT_LOOP_BASE,
323
+ .parent = TYPE_OBJECT,
324
+ .instance_size = sizeof(EventLoopBase),
325
+ .class_size = sizeof(EventLoopBaseClass),
326
+ .class_init = event_loop_base_class_init,
327
+ .abstract = true,
328
+ .interfaces = (InterfaceInfo[]) {
329
+ { TYPE_USER_CREATABLE },
330
+ { }
331
+ }
332
+};
333
+
334
+static void register_types(void)
335
+{
336
+ type_register_static(&event_loop_base_info);
337
+}
338
+type_init(register_types);
339
diff --git a/iothread.c b/iothread.c
340
index XXXXXXX..XXXXXXX 100644
341
--- a/iothread.c
342
+++ b/iothread.c
343
@@ -XXX,XX +XXX,XX @@
344
#include "qemu/module.h"
345
#include "block/aio.h"
346
#include "block/block.h"
347
+#include "sysemu/event-loop-base.h"
348
#include "sysemu/iothread.h"
349
#include "qapi/error.h"
350
#include "qapi/qapi-commands-misc.h"
351
@@ -XXX,XX +XXX,XX @@ static void iothread_init_gcontext(IOThread *iothread)
352
iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
353
}
354
355
-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
356
+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
357
{
358
+ IOThread *iothread = IOTHREAD(base);
359
ERRP_GUARD();
360
361
+ if (!iothread->ctx) {
362
+ return;
363
+ }
364
+
365
aio_context_set_poll_params(iothread->ctx,
366
iothread->poll_max_ns,
367
iothread->poll_grow,
368
@@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
369
}
94
}
370
95
371
aio_context_set_aio_params(iothread->ctx,
96
- if (size > res->image_end_offset) {
372
- iothread->aio_max_batch,
97
- int64_t count;
373
+ iothread->parent_obj.aio_max_batch,
98
- count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
374
errp);
99
- fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
375
}
100
- fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
376
101
- size - res->image_end_offset);
377
-static void iothread_complete(UserCreatable *obj, Error **errp)
102
- res->leaks += count;
378
+
103
- if (fix & BDRV_FIX_LEAKS) {
379
+static void iothread_init(EventLoopBase *base, Error **errp)
104
- Error *local_err = NULL;
380
{
381
Error *local_error = NULL;
382
- IOThread *iothread = IOTHREAD(obj);
383
+ IOThread *iothread = IOTHREAD(base);
384
char *thread_name;
385
386
iothread->stopping = false;
387
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
388
*/
389
iothread_init_gcontext(iothread);
390
391
- iothread_set_aio_context_params(iothread, &local_error);
392
+ iothread_set_aio_context_params(base, &local_error);
393
if (local_error) {
394
error_propagate(errp, local_error);
395
aio_context_unref(iothread->ctx);
396
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
397
* to inherit.
398
*/
399
thread_name = g_strdup_printf("IO %s",
400
- object_get_canonical_path_component(OBJECT(obj)));
401
+ object_get_canonical_path_component(OBJECT(base)));
402
qemu_thread_create(&iothread->thread, thread_name, iothread_run,
403
iothread, QEMU_THREAD_JOINABLE);
404
g_free(thread_name);
405
@@ -XXX,XX +XXX,XX @@ static IOThreadParamInfo poll_grow_info = {
406
static IOThreadParamInfo poll_shrink_info = {
407
"poll-shrink", offsetof(IOThread, poll_shrink),
408
};
409
-static IOThreadParamInfo aio_max_batch_info = {
410
- "aio-max-batch", offsetof(IOThread, aio_max_batch),
411
-};
412
413
static void iothread_get_param(Object *obj, Visitor *v,
414
const char *name, IOThreadParamInfo *info, Error **errp)
415
@@ -XXX,XX +XXX,XX @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
416
}
417
}
418
419
-static void iothread_get_aio_param(Object *obj, Visitor *v,
420
- const char *name, void *opaque, Error **errp)
421
-{
422
- IOThreadParamInfo *info = opaque;
423
-
105
-
424
- iothread_get_param(obj, v, name, info, errp);
106
- /*
425
-}
107
- * In order to really repair the image, we must shrink it.
426
-
108
- * That means we have to pass exact=true.
427
-static void iothread_set_aio_param(Object *obj, Visitor *v,
109
- */
428
- const char *name, void *opaque, Error **errp)
110
- ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
429
-{
111
- PREALLOC_MODE_OFF, 0, &local_err);
430
- IOThread *iothread = IOTHREAD(obj);
112
- if (ret < 0) {
431
- IOThreadParamInfo *info = opaque;
113
- error_report_err(local_err);
432
-
114
- res->check_errors++;
433
- if (!iothread_set_param(obj, v, name, info, errp)) {
115
- goto out;
434
- return;
116
- }
117
- res->leaks_fixed += count;
118
- }
435
- }
119
- }
436
-
120
-
437
- if (iothread->ctx) {
121
out:
438
- aio_context_set_aio_params(iothread->ctx,
122
qemu_co_mutex_unlock(&s->lock);
439
- iothread->aio_max_batch,
123
440
- errp);
441
- }
442
-}
443
-
444
static void iothread_class_init(ObjectClass *klass, void *class_data)
445
{
446
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
447
- ucc->complete = iothread_complete;
448
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
449
+
450
+ bc->init = iothread_init;
451
+ bc->update_params = iothread_set_aio_context_params;
452
453
object_class_property_add(klass, "poll-max-ns", "int",
454
iothread_get_poll_param,
455
@@ -XXX,XX +XXX,XX @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
456
iothread_get_poll_param,
457
iothread_set_poll_param,
458
NULL, &poll_shrink_info);
459
- object_class_property_add(klass, "aio-max-batch", "int",
460
- iothread_get_aio_param,
461
- iothread_set_aio_param,
462
- NULL, &aio_max_batch_info);
463
}
464
465
static const TypeInfo iothread_info = {
466
.name = TYPE_IOTHREAD,
467
- .parent = TYPE_OBJECT,
468
+ .parent = TYPE_EVENT_LOOP_BASE,
469
.class_init = iothread_class_init,
470
.instance_size = sizeof(IOThread),
471
.instance_init = iothread_instance_init,
472
.instance_finalize = iothread_instance_finalize,
473
- .interfaces = (InterfaceInfo[]) {
474
- {TYPE_USER_CREATABLE},
475
- {}
476
- },
477
};
478
479
static void iothread_register_types(void)
480
@@ -XXX,XX +XXX,XX @@ static int query_one_iothread(Object *object, void *opaque)
481
info->poll_max_ns = iothread->poll_max_ns;
482
info->poll_grow = iothread->poll_grow;
483
info->poll_shrink = iothread->poll_shrink;
484
- info->aio_max_batch = iothread->aio_max_batch;
485
+ info->aio_max_batch = iothread->parent_obj.aio_max_batch;
486
487
QAPI_LIST_APPEND(*tail, info);
488
return 0;
489
--
124
--
490
2.35.1
125
2.40.1
diff view generated by jsdifflib
1
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
The thread pool regulates itself: when idle, it kills threads until
3
We will add more and more checks so we need a better code structure
4
empty, when in demand, it creates new threads until full. This behaviour
4
in parallels_co_check. Let each check performs in a separate loop
5
doesn't play well with latency sensitive workloads where the price of
5
in a separate helper.
6
creating a new thread is too high. For example, when paired with qemu's
7
'-mlock', or using safety features like SafeStack, creating a new thread
8
has been measured take multiple milliseconds.
9
6
10
In order to mitigate this let's introduce a new 'EventLoopBase'
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
11
property to set the thread pool size. The threads will be created during
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
12
the pool's initialization or upon updating the property's value, remain
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
13
available during its lifetime regardless of demand, and destroyed upon
10
Message-Id: <20230424093147.197643-11-alexander.ivanov@virtuozzo.com>
14
freeing it. A properly characterized workload will then be able to
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
15
configure the pool to avoid any latency spikes.
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 52 +++++++++++++++++++++++++++--------------------
15
1 file changed, 30 insertions(+), 22 deletions(-)
16
16
17
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
17
diff --git a/block/parallels.c b/block/parallels.c
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Acked-by: Markus Armbruster <armbru@redhat.com>
20
Message-id: 20220425075723.20019-4-nsaenzju@redhat.com
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
qapi/qom.json | 10 +++++-
24
include/block/aio.h | 10 ++++++
25
include/block/thread-pool.h | 3 ++
26
include/sysemu/event-loop-base.h | 4 +++
27
event-loop-base.c | 23 +++++++++++++
28
iothread.c | 3 ++
29
util/aio-posix.c | 1 +
30
util/async.c | 20 ++++++++++++
31
util/main-loop.c | 9 ++++++
32
util/thread-pool.c | 55 +++++++++++++++++++++++++++++---
33
10 files changed, 133 insertions(+), 5 deletions(-)
34
35
diff --git a/qapi/qom.json b/qapi/qom.json
36
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
37
--- a/qapi/qom.json
19
--- a/block/parallels.c
38
+++ b/qapi/qom.json
20
+++ b/block/parallels.c
39
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
40
# 0 means that the engine will use its default.
22
return 0;
41
# (default: 0)
23
}
42
#
24
43
+# @thread-pool-min: minimum number of threads reserved in the thread pool
25
-static int coroutine_fn GRAPH_RDLOCK
44
+# (default:0)
26
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
45
+#
27
- BdrvCheckMode fix)
46
+# @thread-pool-max: maximum number of threads the thread pool can contain
28
+static void parallels_collect_statistics(BlockDriverState *bs,
47
+# (default:64)
29
+ BdrvCheckResult *res,
48
+#
30
+ BdrvCheckMode fix)
49
# Since: 7.1
31
{
50
##
32
BDRVParallelsState *s = bs->opaque;
51
{ 'struct': 'EventLoopBaseProperties',
33
- int64_t prev_off;
52
- 'data': { '*aio-max-batch': 'int' } }
34
- int ret;
53
+ 'data': { '*aio-max-batch': 'int',
35
+ int64_t off, prev_off;
54
+ '*thread-pool-min': 'int',
36
uint32_t i;
55
+ '*thread-pool-max': 'int' } }
37
56
38
- qemu_co_mutex_lock(&s->lock);
57
##
39
-
58
# @IothreadProperties:
40
- parallels_check_unclean(bs, res, fix);
59
diff --git a/include/block/aio.h b/include/block/aio.h
41
-
60
index XXXXXXX..XXXXXXX 100644
42
- ret = parallels_check_outside_image(bs, res, fix);
61
--- a/include/block/aio.h
43
- if (ret < 0) {
62
+++ b/include/block/aio.h
44
- goto out;
63
@@ -XXX,XX +XXX,XX @@ struct AioContext {
45
- }
64
QSLIST_HEAD(, Coroutine) scheduled_coroutines;
46
-
65
QEMUBH *co_schedule_bh;
47
- ret = parallels_check_leak(bs, res, fix);
66
48
- if (ret < 0) {
67
+ int thread_pool_min;
49
- goto out;
68
+ int thread_pool_max;
50
- }
69
/* Thread pool for performing work and receiving completion callbacks.
51
-
70
* Has its own locking.
52
res->bfi.total_clusters = s->bat_size;
71
*/
53
res->bfi.compressed_clusters = 0; /* compression is not supported */
72
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
54
73
void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
55
prev_off = 0;
74
Error **errp);
56
for (i = 0; i < s->bat_size; i++) {
75
57
- int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
76
+/**
58
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
77
+ * aio_context_set_thread_pool_params:
59
/*
78
+ * @ctx: the aio context
60
* If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
79
+ * @min: min number of threads to have readily available in the thread pool
61
* fixed. Skip not allocated and out-of-image BAT entries.
80
+ * @min: max number of threads the thread pool can contain
62
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
81
+ */
63
continue;
82
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
64
}
83
+ int64_t max, Error **errp);
65
84
#endif
66
- res->bfi.allocated_clusters++;
85
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
67
-
86
index XXXXXXX..XXXXXXX 100644
68
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
87
--- a/include/block/thread-pool.h
69
res->bfi.fragmented_clusters++;
88
+++ b/include/block/thread-pool.h
70
}
89
@@ -XXX,XX +XXX,XX @@
71
prev_off = off;
90
72
+ res->bfi.allocated_clusters++;
91
#include "block/block.h"
73
}
92
93
+#define THREAD_POOL_MAX_THREADS_DEFAULT 64
94
+
95
typedef int ThreadPoolFunc(void *opaque);
96
97
typedef struct ThreadPool ThreadPool;
98
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
99
int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
100
ThreadPoolFunc *func, void *arg);
101
void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
102
+void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
103
104
#endif
105
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/include/sysemu/event-loop-base.h
108
+++ b/include/sysemu/event-loop-base.h
109
@@ -XXX,XX +XXX,XX @@ struct EventLoopBase {
110
111
/* AioContext AIO engine parameters */
112
int64_t aio_max_batch;
113
+
114
+ /* AioContext thread pool parameters */
115
+ int64_t thread_pool_min;
116
+ int64_t thread_pool_max;
117
};
118
#endif
119
diff --git a/event-loop-base.c b/event-loop-base.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/event-loop-base.c
122
+++ b/event-loop-base.c
123
@@ -XXX,XX +XXX,XX @@
124
#include "qemu/osdep.h"
125
#include "qom/object_interfaces.h"
126
#include "qapi/error.h"
127
+#include "block/thread-pool.h"
128
#include "sysemu/event-loop-base.h"
129
130
typedef struct {
131
@@ -XXX,XX +XXX,XX @@ typedef struct {
132
ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
133
} EventLoopBaseParamInfo;
134
135
+static void event_loop_base_instance_init(Object *obj)
136
+{
137
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
138
+
139
+ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
140
+}
74
+}
141
+
75
+
142
static EventLoopBaseParamInfo aio_max_batch_info = {
76
+static int coroutine_fn GRAPH_RDLOCK
143
"aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
77
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
144
};
78
+ BdrvCheckMode fix)
145
+static EventLoopBaseParamInfo thread_pool_min_info = {
79
+{
146
+ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min),
80
+ BDRVParallelsState *s = bs->opaque;
147
+};
81
+ int ret;
148
+static EventLoopBaseParamInfo thread_pool_max_info = {
149
+ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max),
150
+};
151
152
static void event_loop_base_get_param(Object *obj, Visitor *v,
153
const char *name, void *opaque, Error **errp)
154
@@ -XXX,XX +XXX,XX @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
155
event_loop_base_get_param,
156
event_loop_base_set_param,
157
NULL, &aio_max_batch_info);
158
+ object_class_property_add(klass, "thread-pool-min", "int",
159
+ event_loop_base_get_param,
160
+ event_loop_base_set_param,
161
+ NULL, &thread_pool_min_info);
162
+ object_class_property_add(klass, "thread-pool-max", "int",
163
+ event_loop_base_get_param,
164
+ event_loop_base_set_param,
165
+ NULL, &thread_pool_max_info);
166
}
167
168
static const TypeInfo event_loop_base_info = {
169
.name = TYPE_EVENT_LOOP_BASE,
170
.parent = TYPE_OBJECT,
171
.instance_size = sizeof(EventLoopBase),
172
+ .instance_init = event_loop_base_instance_init,
173
.class_size = sizeof(EventLoopBaseClass),
174
.class_init = event_loop_base_class_init,
175
.abstract = true,
176
diff --git a/iothread.c b/iothread.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/iothread.c
179
+++ b/iothread.c
180
@@ -XXX,XX +XXX,XX @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
181
aio_context_set_aio_params(iothread->ctx,
182
iothread->parent_obj.aio_max_batch,
183
errp);
184
+
82
+
185
+ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min,
83
+ qemu_co_mutex_lock(&s->lock);
186
+ base->thread_pool_max, errp);
187
}
188
189
190
diff --git a/util/aio-posix.c b/util/aio-posix.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/util/aio-posix.c
193
+++ b/util/aio-posix.c
194
@@ -XXX,XX +XXX,XX @@
195
196
#include "qemu/osdep.h"
197
#include "block/block.h"
198
+#include "block/thread-pool.h"
199
#include "qemu/main-loop.h"
200
#include "qemu/rcu.h"
201
#include "qemu/rcu_queue.h"
202
diff --git a/util/async.c b/util/async.c
203
index XXXXXXX..XXXXXXX 100644
204
--- a/util/async.c
205
+++ b/util/async.c
206
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
207
208
ctx->aio_max_batch = 0;
209
210
+ ctx->thread_pool_min = 0;
211
+ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
212
+
84
+
213
return ctx;
85
+ parallels_check_unclean(bs, res, fix);
214
fail:
215
g_source_destroy(&ctx->source);
216
@@ -XXX,XX +XXX,XX @@ void qemu_set_current_aio_context(AioContext *ctx)
217
assert(!get_my_aiocontext());
218
set_my_aiocontext(ctx);
219
}
220
+
86
+
221
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
87
+ ret = parallels_check_outside_image(bs, res, fix);
222
+ int64_t max, Error **errp)
88
+ if (ret < 0) {
223
+{
89
+ goto out;
224
+
225
+ if (min > max || !max || min > INT_MAX || max > INT_MAX) {
226
+ error_setg(errp, "bad thread-pool-min/thread-pool-max values");
227
+ return;
228
+ }
90
+ }
229
+
91
+
230
+ ctx->thread_pool_min = min;
92
+ ret = parallels_check_leak(bs, res, fix);
231
+ ctx->thread_pool_max = max;
93
+ if (ret < 0) {
232
+
94
+ goto out;
233
+ if (ctx->thread_pool) {
234
+ thread_pool_update_params(ctx->thread_pool, ctx);
235
+ }
236
+}
237
diff --git a/util/main-loop.c b/util/main-loop.c
238
index XXXXXXX..XXXXXXX 100644
239
--- a/util/main-loop.c
240
+++ b/util/main-loop.c
241
@@ -XXX,XX +XXX,XX @@
242
#include "sysemu/replay.h"
243
#include "qemu/main-loop.h"
244
#include "block/aio.h"
245
+#include "block/thread-pool.h"
246
#include "qemu/error-report.h"
247
#include "qemu/queue.h"
248
#include "qemu/compiler.h"
249
@@ -XXX,XX +XXX,XX @@ int qemu_init_main_loop(Error **errp)
250
251
static void main_loop_update_params(EventLoopBase *base, Error **errp)
252
{
253
+ ERRP_GUARD();
254
+
255
if (!qemu_aio_context) {
256
error_setg(errp, "qemu aio context not ready");
257
return;
258
}
259
260
aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
261
+ if (*errp) {
262
+ return;
263
+ }
95
+ }
264
+
96
+
265
+ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
97
+ parallels_collect_statistics(bs, res, fix);
266
+ base->thread_pool_max, errp);
98
267
}
99
out:
268
100
qemu_co_mutex_unlock(&s->lock);
269
MainLoop *mloop;
270
diff --git a/util/thread-pool.c b/util/thread-pool.c
271
index XXXXXXX..XXXXXXX 100644
272
--- a/util/thread-pool.c
273
+++ b/util/thread-pool.c
274
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
275
QemuMutex lock;
276
QemuCond worker_stopped;
277
QemuSemaphore sem;
278
- int max_threads;
279
QEMUBH *new_thread_bh;
280
281
/* The following variables are only accessed from one AioContext. */
282
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
283
int new_threads; /* backlog of threads we need to create */
284
int pending_threads; /* threads created but not running yet */
285
bool stopping;
286
+ int min_threads;
287
+ int max_threads;
288
};
289
290
+static inline bool back_to_sleep(ThreadPool *pool, int ret)
291
+{
292
+ /*
293
+ * The semaphore timed out, we should exit the loop except when:
294
+ * - There is work to do, we raced with the signal.
295
+ * - The max threads threshold just changed, we raced with the signal.
296
+ * - The thread pool forces a minimum number of readily available threads.
297
+ */
298
+ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) ||
299
+ pool->cur_threads > pool->max_threads ||
300
+ pool->cur_threads <= pool->min_threads)) {
301
+ return true;
302
+ }
303
+
304
+ return false;
305
+}
306
+
307
static void *worker_thread(void *opaque)
308
{
309
ThreadPool *pool = opaque;
310
@@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque)
311
ret = qemu_sem_timedwait(&pool->sem, 10000);
312
qemu_mutex_lock(&pool->lock);
313
pool->idle_threads--;
314
- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
315
- if (ret == -1 || pool->stopping) {
316
+ } while (back_to_sleep(pool, ret));
317
+ if (ret == -1 || pool->stopping ||
318
+ pool->cur_threads > pool->max_threads) {
319
break;
320
}
321
322
@@ -XXX,XX +XXX,XX @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
323
thread_pool_submit_aio(pool, func, arg, NULL, NULL);
324
}
325
326
+void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
327
+{
328
+ qemu_mutex_lock(&pool->lock);
329
+
330
+ pool->min_threads = ctx->thread_pool_min;
331
+ pool->max_threads = ctx->thread_pool_max;
332
+
333
+ /*
334
+ * We either have to:
335
+ * - Increase the number available of threads until over the min_threads
336
+ * threshold.
337
+ * - Decrease the number of available threads until under the max_threads
338
+ * threshold.
339
+ * - Do nothing. The current number of threads fall in between the min and
340
+ * max thresholds. We'll let the pool manage itself.
341
+ */
342
+ for (int i = pool->cur_threads; i < pool->min_threads; i++) {
343
+ spawn_thread(pool);
344
+ }
345
+
346
+ for (int i = pool->cur_threads; i > pool->max_threads; i--) {
347
+ qemu_sem_post(&pool->sem);
348
+ }
349
+
350
+ qemu_mutex_unlock(&pool->lock);
351
+}
352
+
353
static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
354
{
355
if (!ctx) {
356
@@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
357
qemu_mutex_init(&pool->lock);
358
qemu_cond_init(&pool->worker_stopped);
359
qemu_sem_init(&pool->sem, 0);
360
- pool->max_threads = 64;
361
pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
362
363
QLIST_INIT(&pool->head);
364
QTAILQ_INIT(&pool->request_list);
365
+
366
+ thread_pool_update_params(pool, ctx);
367
}
368
369
ThreadPool *thread_pool_new(AioContext *ctx)
370
--
101
--
371
2.35.1
102
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Replace the way we use mutex in parallels_co_check() for simplier
4
and less error prone code.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-12-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 33 ++++++++++++++-------------------
13
1 file changed, 14 insertions(+), 19 deletions(-)
14
15
diff --git a/block/parallels.c b/block/parallels.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/parallels.c
18
+++ b/block/parallels.c
19
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
20
BDRVParallelsState *s = bs->opaque;
21
int ret;
22
23
- qemu_co_mutex_lock(&s->lock);
24
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
25
+ parallels_check_unclean(bs, res, fix);
26
27
- parallels_check_unclean(bs, res, fix);
28
+ ret = parallels_check_outside_image(bs, res, fix);
29
+ if (ret < 0) {
30
+ return ret;
31
+ }
32
33
- ret = parallels_check_outside_image(bs, res, fix);
34
- if (ret < 0) {
35
- goto out;
36
- }
37
+ ret = parallels_check_leak(bs, res, fix);
38
+ if (ret < 0) {
39
+ return ret;
40
+ }
41
42
- ret = parallels_check_leak(bs, res, fix);
43
- if (ret < 0) {
44
- goto out;
45
+ parallels_collect_statistics(bs, res, fix);
46
}
47
48
- parallels_collect_statistics(bs, res, fix);
49
-
50
-out:
51
- qemu_co_mutex_unlock(&s->lock);
52
-
53
- if (ret == 0) {
54
- ret = bdrv_co_flush(bs);
55
- if (ret < 0) {
56
- res->check_errors++;
57
- }
58
+ ret = bdrv_co_flush(bs);
59
+ if (ret < 0) {
60
+ res->check_errors++;
61
}
62
63
return ret;
64
--
65
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
All the offsets in the BAT must be lower than the file size.
4
Fix the check condition for correct check.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-13-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
diff --git a/block/parallels.c b/block/parallels.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/parallels.c
18
+++ b/block/parallels.c
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
20
high_off = 0;
21
for (i = 0; i < s->bat_size; i++) {
22
off = bat2sect(s, i) << BDRV_SECTOR_BITS;
23
- if (off > size) {
24
+ if (off + s->cluster_size > size) {
25
fprintf(stderr, "%s cluster %u is outside image\n",
26
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
27
res->corruptions++;
28
--
29
2.40.1
diff view generated by jsdifflib
New patch
1
1
From: Jean-Louis Dupond <jean-louis@dupond.be>
2
3
When we for example have a sparse qcow2 image and discard: unmap is enabled,
4
there can be a lot of fragmentation in the image after some time. Especially on VM's
5
that do a lot of writes/deletes.
6
This causes the qcow2 image to grow even over 110% of its virtual size,
7
because the free gaps in the image get too small to allocate new
8
continuous clusters. So it allocates new space at the end of the image.
9
10
Disabling discard is not an option, as discard is needed to keep the
11
incremental backup size as low as possible. Without discard, the
12
incremental backups would become large, as qemu thinks it's just dirty
13
blocks but it doesn't know the blocks are unneeded.
14
So we need to avoid fragmentation but also 'empty' the unneeded blocks in
15
the image to have a small incremental backup.
16
17
In addition, we also want to send the discards further down the stack, so
18
the underlying blocks are still discarded.
19
20
Therefor we introduce a new qcow2 option "discard-no-unref".
21
When setting this option to true, discards will no longer have the qcow2
22
driver relinquish cluster allocations. Other than that, the request is
23
handled as normal: All clusters in range are marked as zero, and, if
24
pass-discard-request is true, it is passed further down the stack.
25
The only difference is that the now-zero clusters are preallocated
26
instead of being unallocated.
27
This will avoid fragmentation on the qcow2 image.
28
29
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621
30
Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
31
Message-Id: <20230605084523.34134-2-jean-louis@dupond.be>
32
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
33
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
34
---
35
qapi/block-core.json | 12 ++++++++++++
36
block/qcow2.h | 3 +++
37
block/qcow2-cluster.c | 32 ++++++++++++++++++++++++++++----
38
block/qcow2.c | 18 ++++++++++++++++++
39
qemu-options.hx | 12 ++++++++++++
40
5 files changed, 73 insertions(+), 4 deletions(-)
41
42
diff --git a/qapi/block-core.json b/qapi/block-core.json
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/block-core.json
45
+++ b/qapi/block-core.json
46
@@ -XXX,XX +XXX,XX @@
47
# @pass-discard-other: whether discard requests for the data source
48
# should be issued on other occasions where a cluster gets freed
49
#
50
+# @discard-no-unref: when enabled, discards from the guest will not cause
51
+# cluster allocations to be relinquished. This prevents qcow2 fragmentation
52
+# that would be caused by such discards. Besides potential
53
+# performance degradation, such fragmentation can lead to increased
54
+# allocation of clusters past the end of the image file,
55
+# resulting in image files whose file length can grow much larger
56
+# than their guest disk size would suggest.
57
+# If image file length is of concern (e.g. when storing qcow2
58
+# images directly on block devices), you should consider enabling
59
+# this option. (since 8.1)
60
+#
61
# @overlap-check: which overlap checks to perform for writes to the
62
# image, defaults to 'cached' (since 2.2)
63
#
64
@@ -XXX,XX +XXX,XX @@
65
'*pass-discard-request': 'bool',
66
'*pass-discard-snapshot': 'bool',
67
'*pass-discard-other': 'bool',
68
+ '*discard-no-unref': 'bool',
69
'*overlap-check': 'Qcow2OverlapChecks',
70
'*cache-size': 'int',
71
'*l2-cache-size': 'int',
72
diff --git a/block/qcow2.h b/block/qcow2.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/block/qcow2.h
75
+++ b/block/qcow2.h
76
@@ -XXX,XX +XXX,XX @@
77
#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
78
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
79
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
80
+#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
81
#define QCOW2_OPT_OVERLAP "overlap-check"
82
#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
83
#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
84
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State {
85
86
bool discard_passthrough[QCOW2_DISCARD_MAX];
87
88
+ bool discard_no_unref;
89
+
90
int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
91
bool signaled_corruption;
92
93
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/block/qcow2-cluster.c
96
+++ b/block/qcow2-cluster.c
97
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
98
uint64_t new_l2_bitmap = old_l2_bitmap;
99
QCow2ClusterType cluster_type =
100
qcow2_get_cluster_type(bs, old_l2_entry);
101
+ bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
102
+ !full_discard &&
103
+ (s->discard_no_unref &&
104
+ type == QCOW2_DISCARD_REQUEST);
105
106
/*
107
* If full_discard is true, the cluster should not read back as zeroes,
108
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
109
new_l2_entry = new_l2_bitmap = 0;
110
} else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
111
if (has_subclusters(s)) {
112
- new_l2_entry = 0;
113
+ if (keep_reference) {
114
+ new_l2_entry = old_l2_entry;
115
+ } else {
116
+ new_l2_entry = 0;
117
+ }
118
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
119
} else {
120
- new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
121
+ if (s->qcow_version >= 3) {
122
+ if (keep_reference) {
123
+ new_l2_entry |= QCOW_OFLAG_ZERO;
124
+ } else {
125
+ new_l2_entry = QCOW_OFLAG_ZERO;
126
+ }
127
+ } else {
128
+ new_l2_entry = 0;
129
+ }
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
134
if (has_subclusters(s)) {
135
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
136
}
137
- /* Then decrease the refcount */
138
- qcow2_free_any_cluster(bs, old_l2_entry, type);
139
+ if (!keep_reference) {
140
+ /* Then decrease the refcount */
141
+ qcow2_free_any_cluster(bs, old_l2_entry, type);
142
+ } else if (s->discard_passthrough[type] &&
143
+ (cluster_type == QCOW2_CLUSTER_NORMAL ||
144
+ cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
145
+ /* If we keep the reference, pass on the discard still */
146
+ bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
147
+ s->cluster_size);
148
+ }
149
}
150
151
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
152
diff --git a/block/qcow2.c b/block/qcow2.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/block/qcow2.c
155
+++ b/block/qcow2.c
156
@@ -XXX,XX +XXX,XX @@ static const char *const mutable_opts[] = {
157
QCOW2_OPT_DISCARD_REQUEST,
158
QCOW2_OPT_DISCARD_SNAPSHOT,
159
QCOW2_OPT_DISCARD_OTHER,
160
+ QCOW2_OPT_DISCARD_NO_UNREF,
161
QCOW2_OPT_OVERLAP,
162
QCOW2_OPT_OVERLAP_TEMPLATE,
163
QCOW2_OPT_OVERLAP_MAIN_HEADER,
164
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qcow2_runtime_opts = {
165
.type = QEMU_OPT_BOOL,
166
.help = "Generate discard requests when other clusters are freed",
167
},
168
+ {
169
+ .name = QCOW2_OPT_DISCARD_NO_UNREF,
170
+ .type = QEMU_OPT_BOOL,
171
+ .help = "Do not unreference discarded clusters",
172
+ },
173
{
174
.name = QCOW2_OPT_OVERLAP,
175
.type = QEMU_OPT_STRING,
176
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2ReopenState {
177
bool use_lazy_refcounts;
178
int overlap_check;
179
bool discard_passthrough[QCOW2_DISCARD_MAX];
180
+ bool discard_no_unref;
181
uint64_t cache_clean_interval;
182
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
183
} Qcow2ReopenState;
184
@@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
185
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
186
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
187
188
+ r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
189
+ false);
190
+ if (r->discard_no_unref && s->qcow_version < 3) {
191
+ error_setg(errp,
192
+ "discard-no-unref is only supported since qcow2 version 3");
193
+ ret = -EINVAL;
194
+ goto fail;
195
+ }
196
+
197
switch (s->crypt_method_header) {
198
case QCOW_CRYPT_NONE:
199
if (encryptfmt) {
200
@@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_commit(BlockDriverState *bs,
201
s->discard_passthrough[i] = r->discard_passthrough[i];
202
}
203
204
+ s->discard_no_unref = r->discard_no_unref;
205
+
206
if (s->cache_clean_interval != r->cache_clean_interval) {
207
cache_clean_timer_del(bs);
208
s->cache_clean_interval = r->cache_clean_interval;
209
diff --git a/qemu-options.hx b/qemu-options.hx
210
index XXXXXXX..XXXXXXX 100644
211
--- a/qemu-options.hx
212
+++ b/qemu-options.hx
213
@@ -XXX,XX +XXX,XX @@ SRST
214
issued on other occasions where a cluster gets freed
215
(on/off; default: off)
216
217
+ ``discard-no-unref``
218
+ When enabled, discards from the guest will not cause cluster
219
+ allocations to be relinquished. This prevents qcow2 fragmentation
220
+ that would be caused by such discards. Besides potential
221
+ performance degradation, such fragmentation can lead to increased
222
+ allocation of clusters past the end of the image file,
223
+ resulting in image files whose file length can grow much larger
224
+ than their guest disk size would suggest.
225
+ If image file length is of concern (e.g. when storing qcow2
226
+ images directly on block devices), you should consider enabling
227
+ this option.
228
+
229
``overlap-check``
230
Which overlap checks to perform for writes to the image
231
(none/constant/cached/all; default: cached). For details or
232
--
233
2.40.1
diff view generated by jsdifflib