1
The following changes since commit 711c0418c8c1ce3a24346f058b001c4c5a2f0f81:
1
The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d:
2
2
3
Merge remote-tracking branch 'remotes/philmd/tags/mips-20210702' into staging (2021-07-04 14:04:12 +0100)
3
Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/hreitz/qemu.git tags/pull-block-2023-06-05
8
8
9
for you to fetch changes up to 9f460c64e13897117f35ffb61f6f5e0102cabc70:
9
for you to fetch changes up to 42a2890a76f4783cd1c212f27856edcf2b5e8a75:
10
10
11
block/io: Merge discard request alignments (2021-07-06 14:28:55 +0100)
11
qcow2: add discard-no-unref option (2023-06-05 13:15:42 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Block patches
15
16
- Fix padding of unaligned vectored requests to match the host alignment
17
for vectors with 1023 or 1024 buffers
18
- Refactor and fix bugs in parallels's image check functionality
19
- Add an option to the qcow2 driver to retain (qcow2-level) allocations
20
on discard requests from the guest (while still forwarding the discard
21
to the lower level and marking the range as zero)
15
22
16
----------------------------------------------------------------
23
----------------------------------------------------------------
24
Alexander Ivanov (12):
25
parallels: Out of image offset in BAT leads to image inflation
26
parallels: Fix high_off calculation in parallels_co_check()
27
parallels: Fix image_end_offset and data_end after out-of-image check
28
parallels: create parallels_set_bat_entry_helper() to assign BAT value
29
parallels: Use generic infrastructure for BAT writing in
30
parallels_co_check()
31
parallels: Move check of unclean image to a separate function
32
parallels: Move check of cluster outside image to a separate function
33
parallels: Fix statistics calculation
34
parallels: Move check of leaks to a separate function
35
parallels: Move statistic collection to a separate function
36
parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD
37
parallels: Incorrect condition in out-of-image check
17
38
18
Akihiko Odaki (3):
39
Hanna Czenczek (4):
19
block/file-posix: Optimize for macOS
40
util/iov: Make qiov_slice() public
20
block: Add backend_defaults property
41
block: Collapse padded I/O vecs exceeding IOV_MAX
21
block/io: Merge discard request alignments
42
util/iov: Remove qemu_iovec_init_extended()
43
iotests/iov-padding: New test
22
44
23
Stefan Hajnoczi (2):
45
Jean-Louis Dupond (1):
24
util/async: add a human-readable name to BHs for debugging
46
qcow2: add discard-no-unref option
25
util/async: print leaked BH name when AioContext finalizes
26
47
27
include/block/aio.h | 31 ++++++++++++++++++++++---
48
qapi/block-core.json | 12 ++
28
include/hw/block/block.h | 3 +++
49
block/qcow2.h | 3 +
29
include/qemu/main-loop.h | 4 +++-
50
include/qemu/iov.h | 8 +-
30
block/file-posix.c | 27 ++++++++++++++++++++--
51
block/io.c | 166 ++++++++++++++++++--
31
block/io.c | 2 ++
52
block/parallels.c | 190 ++++++++++++++++-------
32
hw/block/block.c | 42 ++++++++++++++++++++++++++++++----
53
block/qcow2-cluster.c | 32 +++-
33
tests/unit/ptimer-test-stubs.c | 2 +-
54
block/qcow2.c | 18 +++
34
util/async.c | 25 ++++++++++++++++----
55
util/iov.c | 89 ++---------
35
util/main-loop.c | 4 ++--
56
qemu-options.hx | 12 ++
36
tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++
57
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++
37
10 files changed, 161 insertions(+), 17 deletions(-)
58
tests/qemu-iotests/tests/iov-padding.out | 59 +++++++
59
11 files changed, 523 insertions(+), 151 deletions(-)
60
create mode 100755 tests/qemu-iotests/tests/iov-padding
61
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
38
62
39
--
63
--
40
2.31.1
64
2.40.1
41
diff view generated by jsdifflib
New patch
1
We want to inline qemu_iovec_init_extended() in block/io.c for padding
2
requests, and having access to qiov_slice() is useful for this. As a
3
public function, it is renamed to qemu_iovec_slice().
1
4
5
(We will need to count the number of I/O vector elements of a slice
6
there, and then later process this slice. Without qiov_slice(), we
7
would need to call qemu_iovec_subvec_niov(), and all further
8
IOV-processing functions may need to skip prefixing elements to
9
accomodate for a qiov_offset. Because qemu_iovec_subvec_niov()
10
internally calls qiov_slice(), we can just have the block/io.c code call
11
qiov_slice() itself, thus get the number of elements, and also create an
12
iovec array with the superfluous prefixing elements stripped, so the
13
following processing functions no longer need to skip them.)
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-2-hreitz@redhat.com>
19
---
20
include/qemu/iov.h | 3 +++
21
util/iov.c | 14 +++++++-------
22
2 files changed, 10 insertions(+), 7 deletions(-)
23
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/iov.h
27
+++ b/include/qemu/iov.h
28
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
29
void *tail_buf, size_t tail_len);
30
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
31
size_t offset, size_t len);
32
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
33
+ size_t offset, size_t len,
34
+ size_t *head, size_t *tail, int *niov);
35
int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
36
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
37
void qemu_iovec_concat(QEMUIOVector *dst,
38
diff --git a/util/iov.c b/util/iov.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/iov.c
41
+++ b/util/iov.c
42
@@ -XXX,XX +XXX,XX @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
43
}
44
45
/*
46
- * qiov_slice
47
+ * qemu_iovec_slice
48
*
49
* Find subarray of iovec's, containing requested range. @head would
50
* be offset in first iov (returned by the function), @tail would be
51
* count of extra bytes in last iovec (returned iov + @niov - 1).
52
*/
53
-static struct iovec *qiov_slice(QEMUIOVector *qiov,
54
- size_t offset, size_t len,
55
- size_t *head, size_t *tail, int *niov)
56
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
57
+ size_t offset, size_t len,
58
+ size_t *head, size_t *tail, int *niov)
59
{
60
struct iovec *iov, *end_iov;
61
62
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
63
size_t head, tail;
64
int niov;
65
66
- qiov_slice(qiov, offset, len, &head, &tail, &niov);
67
+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
68
69
return niov;
70
}
71
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
72
}
73
74
if (mid_len) {
75
- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
76
- &mid_head, &mid_tail, &mid_niov);
77
+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
78
+ &mid_head, &mid_tail, &mid_niov);
79
}
80
81
total_niov = !!head_len + mid_niov + !!tail_len;
82
--
83
2.40.1
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
1
When processing vectored guest requests that are not aligned to the
2
2
storage request alignment, we pad them by adding head and/or tail
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
3
buffers for a read-modify-write cycle.
4
Message-id: 20210705130458.97642-3-akihiko.odaki@gmail.com
4
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
6
with this padding, the vector can exceed that limit. As of
7
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
8
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
9
limit, instead returning an error to the guest.
10
11
To the guest, this appears as a random I/O error. We should not return
12
an I/O error to the guest when it issued a perfectly valid request.
13
14
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
15
longer than IOV_MAX, which generally seems to work (because the guest
16
assumes a smaller alignment than we really have, file-posix's
17
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
18
so emulate the request, so that the IOV_MAX does not matter). However,
19
that does not seem exactly great.
20
21
I see two ways to fix this problem:
22
1. We split such long requests into two requests.
23
2. We join some elements of the vector into new buffers to make it
24
shorter.
25
26
I am wary of (1), because it seems like it may have unintended side
27
effects.
28
29
(2) on the other hand seems relatively simple to implement, with
30
hopefully few side effects, so this patch does that.
31
32
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
33
is effectively replaced by the new function bdrv_create_padded_qiov(),
34
which not only wraps the request IOV with padding head/tail, but also
35
ensures that the resulting vector will not have more than IOV_MAX
36
elements. Putting that functionality into qemu_iovec_init_extended() is
37
infeasible because it requires allocating a bounce buffer; doing so
38
would require many more parameters (buffer alignment, how to initialize
39
the buffer, and out parameters like the buffer, its length, and the
40
original elements), which is not reasonable.
41
42
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
43
functionality into bdrv_create_padded_qiov() by using public
44
qemu_iovec_* functions, so that is what this patch does.
45
46
Because bdrv_pad_request() was the only "serious" user of
47
qemu_iovec_init_extended(), the next patch will remove the latter
48
function, so the functionality is not implemented twice.
49
50
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
51
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
52
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
53
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
6
---
54
---
7
block/io.c | 2 ++
55
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
8
1 file changed, 2 insertions(+)
56
1 file changed, 151 insertions(+), 15 deletions(-)
9
57
10
diff --git a/block/io.c b/block/io.c
58
diff --git a/block/io.c b/block/io.c
11
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
12
--- a/block/io.c
60
--- a/block/io.c
13
+++ b/block/io.c
61
+++ b/block/io.c
14
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
62
@@ -XXX,XX +XXX,XX @@ out:
15
63
* @merge_reads is true for small requests,
16
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
64
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
65
* head and tail exist but @buf_len == align and @tail_buf == @buf.
66
+ *
67
+ * @write is true for write requests, false for read requests.
68
+ *
69
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
70
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
71
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
72
+ * I/O vector elements so for read requests, the data can be copied back after
73
+ * the read is done.
74
*/
75
typedef struct BdrvRequestPadding {
76
uint8_t *buf;
77
@@ -XXX,XX +XXX,XX @@ typedef struct BdrvRequestPadding {
78
size_t head;
79
size_t tail;
80
bool merge_reads;
81
+ bool write;
82
QEMUIOVector local_qiov;
83
+
84
+ uint8_t *collapse_bounce_buf;
85
+ size_t collapse_len;
86
+ QEMUIOVector pre_collapse_qiov;
87
} BdrvRequestPadding;
88
89
static bool bdrv_init_padding(BlockDriverState *bs,
90
int64_t offset, int64_t bytes,
91
+ bool write,
92
BdrvRequestPadding *pad)
17
{
93
{
18
+ dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
94
int64_t align = bs->bl.request_alignment;
19
+ src->pdiscard_alignment);
95
@@ -XXX,XX +XXX,XX @@ static bool bdrv_init_padding(BlockDriverState *bs,
20
dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
96
pad->tail_buf = pad->buf + pad->buf_len - align;
21
dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
97
}
22
dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
98
99
+ pad->write = write;
100
+
101
return true;
102
}
103
104
@@ -XXX,XX +XXX,XX @@ zero_mem:
105
return 0;
106
}
107
108
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
109
+/**
110
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
111
+ */
112
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
113
{
114
+ if (pad->collapse_bounce_buf) {
115
+ if (!pad->write) {
116
+ /*
117
+ * If padding required elements in the vector to be collapsed into a
118
+ * bounce buffer, copy the bounce buffer content back
119
+ */
120
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
121
+ pad->collapse_bounce_buf, pad->collapse_len);
122
+ }
123
+ qemu_vfree(pad->collapse_bounce_buf);
124
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
125
+ }
126
if (pad->buf) {
127
qemu_vfree(pad->buf);
128
qemu_iovec_destroy(&pad->local_qiov);
129
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
130
memset(pad, 0, sizeof(*pad));
131
}
132
133
+/*
134
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
135
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
136
+ *
137
+ * To ensure this, when necessary, the first two or three elements of @iov are
138
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
139
+ * bounce buffer in pad->local_qiov.
140
+ *
141
+ * After performing a read request, the data from the bounce buffer must be
142
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
143
+ */
144
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
145
+ BdrvRequestPadding *pad,
146
+ struct iovec *iov, int niov,
147
+ size_t iov_offset, size_t bytes)
148
+{
149
+ int padded_niov, surplus_count, collapse_count;
150
+
151
+ /* Assert this invariant */
152
+ assert(niov <= IOV_MAX);
153
+
154
+ /*
155
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
156
+ * to the guest is not ideal, but there is little else we can do. At least
157
+ * this will practically never happen on 64-bit systems.
158
+ */
159
+ if (SIZE_MAX - pad->head < bytes ||
160
+ SIZE_MAX - pad->head - bytes < pad->tail)
161
+ {
162
+ return -EINVAL;
163
+ }
164
+
165
+ /* Length of the resulting IOV if we just concatenated everything */
166
+ padded_niov = !!pad->head + niov + !!pad->tail;
167
+
168
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
169
+
170
+ if (pad->head) {
171
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
172
+ }
173
+
174
+ /*
175
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
176
+ * Instead, merge the first two or three elements of @iov to reduce the
177
+ * number of vector elements as necessary.
178
+ */
179
+ if (padded_niov > IOV_MAX) {
180
+ /*
181
+ * Only head and tail can have lead to the number of entries exceeding
182
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
183
+ * to reduce the number of elements by `surplus_count`, so we merge that
184
+ * many elements plus one into one element.
185
+ */
186
+ surplus_count = padded_niov - IOV_MAX;
187
+ assert(surplus_count <= !!pad->head + !!pad->tail);
188
+ collapse_count = surplus_count + 1;
189
+
190
+ /*
191
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
192
+ * advance `iov` (and associated variables) by those elements.
193
+ */
194
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
195
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
196
+ collapse_count, iov_offset, SIZE_MAX);
197
+ iov += collapse_count;
198
+ iov_offset = 0;
199
+ niov -= collapse_count;
200
+ bytes -= pad->pre_collapse_qiov.size;
201
+
202
+ /*
203
+ * Construct the bounce buffer to match the length of the to-collapse
204
+ * vector elements, and for write requests, initialize it with the data
205
+ * from those elements. Then add it to `pad->local_qiov`.
206
+ */
207
+ pad->collapse_len = pad->pre_collapse_qiov.size;
208
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
209
+ if (pad->write) {
210
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
211
+ pad->collapse_bounce_buf, pad->collapse_len);
212
+ }
213
+ qemu_iovec_add(&pad->local_qiov,
214
+ pad->collapse_bounce_buf, pad->collapse_len);
215
+ }
216
+
217
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
218
+
219
+ if (pad->tail) {
220
+ qemu_iovec_add(&pad->local_qiov,
221
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
222
+ }
223
+
224
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
225
+ return 0;
226
+}
227
+
228
/*
229
* bdrv_pad_request
230
*
231
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
232
* read of padding, bdrv_padding_rmw_read() should be called separately if
233
* needed.
234
*
235
+ * @write is true for write requests, false for read requests.
236
+ *
237
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
238
* - on function start they represent original request
239
* - on failure or when padding is not needed they are unchanged
240
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
241
static int bdrv_pad_request(BlockDriverState *bs,
242
QEMUIOVector **qiov, size_t *qiov_offset,
243
int64_t *offset, int64_t *bytes,
244
+ bool write,
245
BdrvRequestPadding *pad, bool *padded,
246
BdrvRequestFlags *flags)
247
{
248
int ret;
249
+ struct iovec *sliced_iov;
250
+ int sliced_niov;
251
+ size_t sliced_head, sliced_tail;
252
253
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
254
255
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
256
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
257
if (padded) {
258
*padded = false;
259
}
260
return 0;
261
}
262
263
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
264
- *qiov, *qiov_offset, *bytes,
265
- pad->buf + pad->buf_len - pad->tail,
266
- pad->tail);
267
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
268
+ &sliced_head, &sliced_tail,
269
+ &sliced_niov);
270
+
271
+ /* Guaranteed by bdrv_check_qiov_request() */
272
+ assert(*bytes <= SIZE_MAX);
273
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
274
+ sliced_head, *bytes);
275
if (ret < 0) {
276
- bdrv_padding_destroy(pad);
277
+ bdrv_padding_finalize(pad);
278
return ret;
279
}
280
*bytes += pad->head + pad->tail;
281
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
282
flags |= BDRV_REQ_COPY_ON_READ;
283
}
284
285
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
286
- NULL, &flags);
287
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
288
+ &pad, NULL, &flags);
289
if (ret < 0) {
290
goto fail;
291
}
292
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
293
bs->bl.request_alignment,
294
qiov, qiov_offset, flags);
295
tracked_request_end(&req);
296
- bdrv_padding_destroy(&pad);
297
+ bdrv_padding_finalize(&pad);
298
299
fail:
300
bdrv_dec_in_flight(bs);
301
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
302
/* This flag doesn't make sense for padding or zero writes */
303
flags &= ~BDRV_REQ_REGISTERED_BUF;
304
305
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
306
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
307
if (padding) {
308
assert(!(flags & BDRV_REQ_NO_WAIT));
309
bdrv_make_request_serialising(req, align);
310
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
311
}
312
313
out:
314
- bdrv_padding_destroy(&pad);
315
+ bdrv_padding_finalize(&pad);
316
317
return ret;
318
}
319
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
320
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
321
* alignment only if there is no ZERO flag.
322
*/
323
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
324
- &padded, &flags);
325
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
326
+ &pad, &padded, &flags);
327
if (ret < 0) {
328
return ret;
329
}
330
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
331
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
332
qiov, qiov_offset, flags);
333
334
- bdrv_padding_destroy(&pad);
335
+ bdrv_padding_finalize(&pad);
336
337
out:
338
tracked_request_end(&req);
23
--
339
--
24
2.31.1
340
2.40.1
25
diff view generated by jsdifflib
New patch
1
bdrv_pad_request() was the main user of qemu_iovec_init_extended().
2
HEAD^ has removed that use, so we can remove qemu_iovec_init_extended()
3
now.
1
4
5
The only remaining user is qemu_iovec_init_slice(), which can easily
6
inline the small part it really needs.
7
8
Note that qemu_iovec_init_extended() offered a memcpy() optimization to
9
initialize the new I/O vector. qemu_iovec_concat_iov(), which is used
10
to replace its functionality, does not, but calls qemu_iovec_add() for
11
every single element. If we decide this optimization was important, we
12
will need to re-implement it in qemu_iovec_concat_iov(), which might
13
also benefit its pre-existing users.
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-4-hreitz@redhat.com>
19
---
20
include/qemu/iov.h | 5 ---
21
util/iov.c | 79 +++++++---------------------------------------
22
2 files changed, 11 insertions(+), 73 deletions(-)
23
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/iov.h
27
+++ b/include/qemu/iov.h
28
@@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
29
30
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
31
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
32
-int qemu_iovec_init_extended(
33
- QEMUIOVector *qiov,
34
- void *head_buf, size_t head_len,
35
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
36
- void *tail_buf, size_t tail_len);
37
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
38
size_t offset, size_t len);
39
struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
40
diff --git a/util/iov.c b/util/iov.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/util/iov.c
43
+++ b/util/iov.c
44
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
45
return niov;
46
}
47
48
-/*
49
- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
50
- * and @tail_buf buffer into new qiov.
51
- */
52
-int qemu_iovec_init_extended(
53
- QEMUIOVector *qiov,
54
- void *head_buf, size_t head_len,
55
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
56
- void *tail_buf, size_t tail_len)
57
-{
58
- size_t mid_head, mid_tail;
59
- int total_niov, mid_niov = 0;
60
- struct iovec *p, *mid_iov = NULL;
61
-
62
- assert(mid_qiov->niov <= IOV_MAX);
63
-
64
- if (SIZE_MAX - head_len < mid_len ||
65
- SIZE_MAX - head_len - mid_len < tail_len)
66
- {
67
- return -EINVAL;
68
- }
69
-
70
- if (mid_len) {
71
- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
72
- &mid_head, &mid_tail, &mid_niov);
73
- }
74
-
75
- total_niov = !!head_len + mid_niov + !!tail_len;
76
- if (total_niov > IOV_MAX) {
77
- return -EINVAL;
78
- }
79
-
80
- if (total_niov == 1) {
81
- qemu_iovec_init_buf(qiov, NULL, 0);
82
- p = &qiov->local_iov;
83
- } else {
84
- qiov->niov = qiov->nalloc = total_niov;
85
- qiov->size = head_len + mid_len + tail_len;
86
- p = qiov->iov = g_new(struct iovec, qiov->niov);
87
- }
88
-
89
- if (head_len) {
90
- p->iov_base = head_buf;
91
- p->iov_len = head_len;
92
- p++;
93
- }
94
-
95
- assert(!mid_niov == !mid_len);
96
- if (mid_niov) {
97
- memcpy(p, mid_iov, mid_niov * sizeof(*p));
98
- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
99
- p[0].iov_len -= mid_head;
100
- p[mid_niov - 1].iov_len -= mid_tail;
101
- p += mid_niov;
102
- }
103
-
104
- if (tail_len) {
105
- p->iov_base = tail_buf;
106
- p->iov_len = tail_len;
107
- }
108
-
109
- return 0;
110
-}
111
-
112
/*
113
* Check if the contents of subrange of qiov data is all zeroes.
114
*/
115
@@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
116
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
117
size_t offset, size_t len)
118
{
119
- int ret;
120
+ struct iovec *slice_iov;
121
+ int slice_niov;
122
+ size_t slice_head, slice_tail;
123
124
assert(source->size >= len);
125
assert(source->size - len >= offset);
126
127
- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
128
- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
129
- assert(ret == 0);
130
+ slice_iov = qemu_iovec_slice(source, offset, len,
131
+ &slice_head, &slice_tail, &slice_niov);
132
+ if (slice_niov == 1) {
133
+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
134
+ } else {
135
+ qemu_iovec_init(qiov, slice_niov);
136
+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
137
+ }
138
}
139
140
void qemu_iovec_destroy(QEMUIOVector *qiov)
141
--
142
2.40.1
diff view generated by jsdifflib
New patch
1
Test that even vectored IO requests with 1024 vector elements that are
2
not aligned to the device's request alignment will succeed.
1
3
4
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
6
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
7
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
8
---
9
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
10
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
11
2 files changed, 144 insertions(+)
12
create mode 100755 tests/qemu-iotests/tests/iov-padding
13
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
14
15
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
16
new file mode 100755
17
index XXXXXXX..XXXXXXX
18
--- /dev/null
19
+++ b/tests/qemu-iotests/tests/iov-padding
20
@@ -XXX,XX +XXX,XX @@
21
+#!/usr/bin/env bash
22
+# group: rw quick
23
+#
24
+# Check the interaction of request padding (to fit alignment restrictions) with
25
+# vectored I/O from the guest
26
+#
27
+# Copyright Red Hat
28
+#
29
+# This program is free software; you can redistribute it and/or modify
30
+# it under the terms of the GNU General Public License as published by
31
+# the Free Software Foundation; either version 2 of the License, or
32
+# (at your option) any later version.
33
+#
34
+# This program is distributed in the hope that it will be useful,
35
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37
+# GNU General Public License for more details.
38
+#
39
+# You should have received a copy of the GNU General Public License
40
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
41
+#
42
+
43
+seq=$(basename $0)
44
+echo "QA output created by $seq"
45
+
46
+status=1    # failure is the default!
47
+
48
+_cleanup()
49
+{
50
+ _cleanup_test_img
51
+}
52
+trap "_cleanup; exit \$status" 0 1 2 3 15
53
+
54
+# get standard environment, filters and checks
55
+cd ..
56
+. ./common.rc
57
+. ./common.filter
58
+
59
+_supported_fmt raw
60
+_supported_proto file
61
+
62
+_make_test_img 1M
63
+
64
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
65
+
66
+# Four combinations:
67
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
68
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
69
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
70
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
71
+for start_offset in 4096 512; do
72
+ for last_element_length in 512 4096; do
73
+ length=$((1023 * 512 + $last_element_length))
74
+
75
+ echo
76
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
77
+
78
+ # Fill with data for testing
79
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
80
+
81
+ # 1023 512-byte buffers, and then one with length $last_element_length
82
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
83
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
84
+ -c "writev $cmd_params" \
85
+ --image-opts \
86
+ "$IMGSPEC" \
87
+ | _filter_qemu_io
88
+
89
+ # Read all patterns -- read the part we just wrote with writev twice,
90
+ # once "normally", and once with a readv, so we see that that works, too
91
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
92
+ -c "read -P 1 0 $start_offset" \
93
+ -c "read -P 2 $start_offset $length" \
94
+ -c "readv $cmd_params" \
95
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
96
+ --image-opts \
97
+ "$IMGSPEC" \
98
+ | _filter_qemu_io
99
+ done
100
+done
101
+
102
+# success, all done
103
+echo "*** done"
104
+rm -f $seq.full
105
+status=0
106
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
107
new file mode 100644
108
index XXXXXXX..XXXXXXX
109
--- /dev/null
110
+++ b/tests/qemu-iotests/tests/iov-padding.out
111
@@ -XXX,XX +XXX,XX @@
112
+QA output created by iov-padding
113
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
114
+
115
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
116
+wrote 1048576/1048576 bytes at offset 0
117
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
118
+wrote 524288/524288 bytes at offset 4096
119
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
120
+read 4096/4096 bytes at offset 0
121
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+read 524288/524288 bytes at offset 4096
123
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
124
+read 524288/524288 bytes at offset 4096
125
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
126
+read 520192/520192 bytes at offset 528384
127
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
128
+
129
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
130
+wrote 1048576/1048576 bytes at offset 0
131
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
132
+wrote 527872/527872 bytes at offset 4096
133
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
134
+read 4096/4096 bytes at offset 0
135
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
136
+read 527872/527872 bytes at offset 4096
137
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
138
+read 527872/527872 bytes at offset 4096
139
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
140
+read 516608/516608 bytes at offset 531968
141
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
142
+
143
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
144
+wrote 1048576/1048576 bytes at offset 0
145
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
146
+wrote 524288/524288 bytes at offset 512
147
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
148
+read 512/512 bytes at offset 0
149
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
150
+read 524288/524288 bytes at offset 512
151
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
152
+read 524288/524288 bytes at offset 512
153
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
154
+read 523776/523776 bytes at offset 524800
155
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
156
+
157
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
158
+wrote 1048576/1048576 bytes at offset 0
159
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
160
+wrote 527872/527872 bytes at offset 512
161
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
162
+read 512/512 bytes at offset 0
163
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
164
+read 527872/527872 bytes at offset 512
165
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
166
+read 527872/527872 bytes at offset 512
167
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
168
+read 520192/520192 bytes at offset 528384
169
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
170
+*** done
171
--
172
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
data_end field in BDRVParallelsState is set to the biggest offset present
4
in BAT. If this offset is outside of the image, any further write will
5
create the cluster at this offset and/or the image will be truncated to
6
this offset on close. This is definitely not correct.
7
8
Raise an error in parallels_open() if data_end points outside the image
9
and it is not a check (let the check to repaire the image). Set data_end
10
to the end of the cluster with the last correct offset.
11
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
13
Message-Id: <20230424093147.197643-2-alexander.ivanov@virtuozzo.com>
14
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
15
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
16
---
17
block/parallels.c | 17 +++++++++++++++++
18
1 file changed, 17 insertions(+)
19
20
diff --git a/block/parallels.c b/block/parallels.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/parallels.c
23
+++ b/block/parallels.c
24
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
25
BDRVParallelsState *s = bs->opaque;
26
ParallelsHeader ph;
27
int ret, size, i;
28
+ int64_t file_nb_sectors;
29
QemuOpts *opts = NULL;
30
Error *local_err = NULL;
31
char *buf;
32
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
33
return ret;
34
}
35
36
+ file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
37
+ if (file_nb_sectors < 0) {
38
+ return -EINVAL;
39
+ }
40
+
41
ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
42
if (ret < 0) {
43
goto fail;
44
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
45
46
for (i = 0; i < s->bat_size; i++) {
47
int64_t off = bat2sect(s, i);
48
+ if (off >= file_nb_sectors) {
49
+ if (flags & BDRV_O_CHECK) {
50
+ continue;
51
+ }
52
+ error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
53
+ "is larger than file size (%" PRIi64 ")",
54
+ off << BDRV_SECTOR_BITS, i,
55
+ file_nb_sectors << BDRV_SECTOR_BITS);
56
+ ret = -EINVAL;
57
+ goto fail;
58
+ }
59
if (off >= s->data_end) {
60
s->data_end = off + s->tracks;
61
}
62
--
63
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Don't let high_off be more than the file size even if we don't fix the
4
image.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-3-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 4 ++--
14
1 file changed, 2 insertions(+), 2 deletions(-)
15
16
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/parallels.c
19
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
21
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
22
res->corruptions++;
23
if (fix & BDRV_FIX_ERRORS) {
24
- prev_off = 0;
25
s->bat_bitmap[i] = 0;
26
res->corruptions_fixed++;
27
flush_bat = true;
28
- continue;
29
}
30
+ prev_off = 0;
31
+ continue;
32
}
33
34
res->bfi.allocated_clusters++;
35
--
36
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Set data_end to the end of the last cluster inside the image. In such a
4
way we can be sure that corrupted offsets in the BAT can't affect on the
5
image size. If there are no allocated clusters set image_end_offset by
6
data_end.
7
8
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Denis V. Lunev <den@openvz.org>
10
Message-Id: <20230424093147.197643-4-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 8 +++++++-
15
1 file changed, 7 insertions(+), 1 deletion(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
22
}
23
}
24
25
- res->image_end_offset = high_off + s->cluster_size;
26
+ if (high_off == 0) {
27
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
28
+ } else {
29
+ res->image_end_offset = high_off + s->cluster_size;
30
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
31
+ }
32
+
33
if (size > res->image_end_offset) {
34
int64_t count;
35
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
36
--
37
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
This helper will be reused in next patches during parallels_co_check
4
rework to simplify its code.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-5-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 11 ++++++++---
14
1 file changed, 8 insertions(+), 3 deletions(-)
15
16
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/parallels.c
19
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
21
return start_off;
22
}
23
24
+static void parallels_set_bat_entry(BDRVParallelsState *s,
25
+ uint32_t index, uint32_t offset)
26
+{
27
+ s->bat_bitmap[index] = cpu_to_le32(offset);
28
+ bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
29
+}
30
+
31
static int64_t coroutine_fn GRAPH_RDLOCK
32
allocate_clusters(BlockDriverState *bs, int64_t sector_num,
33
int nb_sectors, int *pnum)
34
@@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
35
}
36
37
for (i = 0; i < to_allocate; i++) {
38
- s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
39
+ parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
40
s->data_end += s->tracks;
41
- bitmap_set(s->bat_dirty_bmap,
42
- bat_entry_off(idx + i) / s->bat_dirty_block, 1);
43
}
44
45
return bat2sect(s, idx) + sector_num % s->tracks;
46
--
47
2.40.1
diff view generated by jsdifflib
1
It can be difficult to debug issues with BHs in production environments.
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
Although BHs can usually be identified by looking up their ->cb()
3
function pointer, this requires debug information for the program. It is
4
also not possible to print human-readable diagnostics about BHs because
5
they have no identifier.
6
2
7
This patch adds a name to each BH. The name is not unique per instance
3
BAT is written in the context of conventional operations over the image
8
but differentiates between cb() functions, which is usually enough. It's
4
inside bdrv_co_flush() when it calls parallels_co_flush_to_os() callback.
9
done by changing aio_bh_new() and friends to macros that stringify cb.
5
Thus we should not modify BAT array directly, but call
6
parallels_set_bat_entry() helper and bdrv_co_flush() further on. After
7
that there is no need to manually write BAT and track its modification.
10
8
11
The next patch will use the name field when reporting leaked BHs.
9
This makes code more generic and allows to split parallels_set_bat_entry()
10
for independent pieces.
12
11
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Reviewed-by: Denis V. Lunev <den@openvz.org>
15
Message-Id: <20210414200247.917496-2-stefanha@redhat.com>
14
Message-Id: <20230424093147.197643-6-alexander.ivanov@virtuozzo.com>
15
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
16
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
16
---
17
---
17
include/block/aio.h | 31 ++++++++++++++++++++++++++++---
18
block/parallels.c | 23 ++++++++++-------------
18
include/qemu/main-loop.h | 4 +++-
19
1 file changed, 10 insertions(+), 13 deletions(-)
19
tests/unit/ptimer-test-stubs.c | 2 +-
20
util/async.c | 9 +++++++--
21
util/main-loop.c | 4 ++--
22
5 files changed, 41 insertions(+), 9 deletions(-)
23
20
24
diff --git a/include/block/aio.h b/include/block/aio.h
21
diff --git a/block/parallels.c b/block/parallels.c
25
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/aio.h
23
--- a/block/parallels.c
27
+++ b/include/block/aio.h
24
+++ b/block/parallels.c
28
@@ -XXX,XX +XXX,XX @@ void aio_context_acquire(AioContext *ctx);
25
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
29
/* Relinquish ownership of the AioContext. */
26
{
30
void aio_context_release(AioContext *ctx);
27
BDRVParallelsState *s = bs->opaque;
31
28
int64_t size, prev_off, high_off;
32
+/**
29
- int ret;
33
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
30
+ int ret = 0;
34
+ * run only once and as soon as possible.
31
uint32_t i;
35
+ *
32
- bool flush_bat = false;
36
+ * @name: A human-readable identifier for debugging purposes.
33
37
+ */
34
size = bdrv_getlength(bs->file->bs);
38
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
35
if (size < 0) {
39
+ const char *name);
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
37
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
38
res->corruptions++;
39
if (fix & BDRV_FIX_ERRORS) {
40
- s->bat_bitmap[i] = 0;
41
+ parallels_set_bat_entry(s, i, 0);
42
res->corruptions_fixed++;
43
- flush_bat = true;
44
}
45
prev_off = 0;
46
continue;
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
48
prev_off = off;
49
}
50
51
- ret = 0;
52
- if (flush_bat) {
53
- ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
54
- if (ret < 0) {
55
- res->check_errors++;
56
- goto out;
57
- }
58
- }
59
-
60
if (high_off == 0) {
61
res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
62
} else {
63
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
64
65
out:
66
qemu_co_mutex_unlock(&s->lock);
40
+
67
+
41
/**
68
+ if (ret == 0) {
42
* aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
69
+ ret = bdrv_co_flush(bs);
43
* only once and as soon as possible.
70
+ if (ret < 0) {
44
+ *
71
+ res->check_errors++;
45
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
72
+ }
46
+ * name string.
73
+ }
47
*/
48
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
49
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
50
+ aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
51
52
/**
53
- * aio_bh_new: Allocate a new bottom half structure.
54
+ * aio_bh_new_full: Allocate a new bottom half structure.
55
*
56
* Bottom halves are lightweight callbacks whose invocation is guaranteed
57
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
58
* is opaque and must be allocated prior to its use.
59
+ *
60
+ * @name: A human-readable identifier for debugging purposes.
61
*/
62
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
63
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
64
+ const char *name);
65
+
74
+
66
+/**
75
return ret;
67
+ * aio_bh_new: Allocate a new bottom half structure
68
+ *
69
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
70
+ * string.
71
+ */
72
+#define aio_bh_new(ctx, cb, opaque) \
73
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
74
75
/**
76
* aio_notify: Force processing of pending events.
77
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/qemu/main-loop.h
80
+++ b/include/qemu/main-loop.h
81
@@ -XXX,XX +XXX,XX @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
82
83
void qemu_fd_register(int fd);
84
85
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
86
+#define qemu_bh_new(cb, opaque) \
87
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)))
88
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
89
void qemu_bh_schedule_idle(QEMUBH *bh);
90
91
enum {
92
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/tests/unit/ptimer-test-stubs.c
95
+++ b/tests/unit/ptimer-test-stubs.c
96
@@ -XXX,XX +XXX,XX @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
97
return deadline;
98
}
76
}
99
77
100
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
101
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
102
{
103
QEMUBH *bh = g_new(QEMUBH, 1);
104
105
diff --git a/util/async.c b/util/async.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/util/async.c
108
+++ b/util/async.c
109
@@ -XXX,XX +XXX,XX @@ enum {
110
111
struct QEMUBH {
112
AioContext *ctx;
113
+ const char *name;
114
QEMUBHFunc *cb;
115
void *opaque;
116
QSLIST_ENTRY(QEMUBH) next;
117
@@ -XXX,XX +XXX,XX @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
118
return bh;
119
}
120
121
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
122
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
123
+ void *opaque, const char *name)
124
{
125
QEMUBH *bh;
126
bh = g_new(QEMUBH, 1);
127
@@ -XXX,XX +XXX,XX @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
128
.ctx = ctx,
129
.cb = cb,
130
.opaque = opaque,
131
+ .name = name,
132
};
133
aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
134
}
135
136
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
137
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
138
+ const char *name)
139
{
140
QEMUBH *bh;
141
bh = g_new(QEMUBH, 1);
142
@@ -XXX,XX +XXX,XX @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
143
.ctx = ctx,
144
.cb = cb,
145
.opaque = opaque,
146
+ .name = name,
147
};
148
return bh;
149
}
150
diff --git a/util/main-loop.c b/util/main-loop.c
151
index XXXXXXX..XXXXXXX 100644
152
--- a/util/main-loop.c
153
+++ b/util/main-loop.c
154
@@ -XXX,XX +XXX,XX @@ void main_loop_wait(int nonblocking)
155
156
/* Functions to operate on the main QEMU AioContext. */
157
158
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
159
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
160
{
161
- return aio_bh_new(qemu_aio_context, cb, opaque);
162
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
163
}
164
165
/*
166
--
78
--
167
2.31.1
79
2.40.1
168
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
We will add more and more checks so we need a better code structure
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
6
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
10
Message-Id: <20230424093147.197643-7-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 31 +++++++++++++++++++++----------
15
1 file changed, 21 insertions(+), 10 deletions(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
22
return ret;
23
}
24
25
+static void parallels_check_unclean(BlockDriverState *bs,
26
+ BdrvCheckResult *res,
27
+ BdrvCheckMode fix)
28
+{
29
+ BDRVParallelsState *s = bs->opaque;
30
+
31
+ if (!s->header_unclean) {
32
+ return;
33
+ }
34
+
35
+ fprintf(stderr, "%s image was not closed correctly\n",
36
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
37
+ res->corruptions++;
38
+ if (fix & BDRV_FIX_ERRORS) {
39
+ /* parallels_close will do the job right */
40
+ res->corruptions_fixed++;
41
+ s->header_unclean = false;
42
+ }
43
+}
44
45
static int coroutine_fn GRAPH_RDLOCK
46
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
48
}
49
50
qemu_co_mutex_lock(&s->lock);
51
- if (s->header_unclean) {
52
- fprintf(stderr, "%s image was not closed correctly\n",
53
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
54
- res->corruptions++;
55
- if (fix & BDRV_FIX_ERRORS) {
56
- /* parallels_close will do the job right */
57
- res->corruptions_fixed++;
58
- s->header_unclean = false;
59
- }
60
- }
61
+
62
+ parallels_check_unclean(bs, res, fix);
63
64
res->bfi.total_clusters = s->bat_size;
65
res->bfi.compressed_clusters = 0; /* compression is not supported */
66
--
67
2.40.1
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
This commit introduces "punch hole" operation and optimizes transfer
3
We will add more and more checks so we need a better code structure in
4
block size for macOS.
4
parallels_co_check. Let each check performs in a separate loop in a
5
separate helper.
5
6
6
Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
old version of this change:
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667
9
Message-Id: <20230424093147.197643-8-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 75 +++++++++++++++++++++++++++++++----------------
14
1 file changed, 49 insertions(+), 26 deletions(-)
9
15
10
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
16
diff --git a/block/parallels.c b/block/parallels.c
11
Message-id: 20210705130458.97642-1-akihiko.odaki@gmail.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
block/file-posix.c | 27 +++++++++++++++++++++++++--
15
1 file changed, 25 insertions(+), 2 deletions(-)
16
17
diff --git a/block/file-posix.c b/block/file-posix.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/block/file-posix.c
18
--- a/block/parallels.c
20
+++ b/block/file-posix.c
19
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs,
22
#if defined(HAVE_HOST_BLOCK_DEVICE)
23
#include <paths.h>
24
#include <sys/param.h>
25
+#include <sys/mount.h>
26
#include <IOKit/IOKitLib.h>
27
#include <IOKit/IOBSD.h>
28
#include <IOKit/storage/IOMediaBSDClient.h>
29
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
30
return;
31
}
32
33
+#if defined(__APPLE__) && (__MACH__)
34
+ struct statfs buf;
35
+
36
+ if (!fstatfs(s->fd, &buf)) {
37
+ bs->bl.opt_transfer = buf.f_iosize;
38
+ bs->bl.pdiscard_alignment = buf.f_bsize;
39
+ }
40
+#endif
41
+
42
if (bs->sg || S_ISBLK(st.st_mode)) {
43
int ret = hdev_get_max_hw_transfer(s->fd, &st);
44
45
@@ -XXX,XX +XXX,XX @@ out:
46
}
21
}
47
}
22
}
48
23
49
+#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD)
24
+static int coroutine_fn GRAPH_RDLOCK
50
static int translate_err(int err)
25
+parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
26
+ BdrvCheckMode fix)
27
+{
28
+ BDRVParallelsState *s = bs->opaque;
29
+ uint32_t i;
30
+ int64_t off, high_off, size;
31
+
32
+ size = bdrv_getlength(bs->file->bs);
33
+ if (size < 0) {
34
+ res->check_errors++;
35
+ return size;
36
+ }
37
+
38
+ high_off = 0;
39
+ for (i = 0; i < s->bat_size; i++) {
40
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
41
+ if (off > size) {
42
+ fprintf(stderr, "%s cluster %u is outside image\n",
43
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
44
+ res->corruptions++;
45
+ if (fix & BDRV_FIX_ERRORS) {
46
+ parallels_set_bat_entry(s, i, 0);
47
+ res->corruptions_fixed++;
48
+ }
49
+ continue;
50
+ }
51
+ if (high_off < off) {
52
+ high_off = off;
53
+ }
54
+ }
55
+
56
+ if (high_off == 0) {
57
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
58
+ } else {
59
+ res->image_end_offset = high_off + s->cluster_size;
60
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
61
+ }
62
+
63
+ return 0;
64
+}
65
+
66
static int coroutine_fn GRAPH_RDLOCK
67
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
68
BdrvCheckMode fix)
51
{
69
{
52
if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
70
BDRVParallelsState *s = bs->opaque;
53
@@ -XXX,XX +XXX,XX @@ static int translate_err(int err)
71
- int64_t size, prev_off, high_off;
72
- int ret = 0;
73
+ int64_t size, prev_off;
74
+ int ret;
75
uint32_t i;
76
77
size = bdrv_getlength(bs->file->bs);
78
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
79
80
parallels_check_unclean(bs, res, fix);
81
82
+ ret = parallels_check_outside_image(bs, res, fix);
83
+ if (ret < 0) {
84
+ goto out;
85
+ }
86
+
87
res->bfi.total_clusters = s->bat_size;
88
res->bfi.compressed_clusters = 0; /* compression is not supported */
89
90
- high_off = 0;
91
prev_off = 0;
92
for (i = 0; i < s->bat_size; i++) {
93
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
94
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
95
continue;
96
}
97
98
- /* cluster outside the image */
99
- if (off > size) {
100
- fprintf(stderr, "%s cluster %u is outside image\n",
101
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
102
- res->corruptions++;
103
- if (fix & BDRV_FIX_ERRORS) {
104
- parallels_set_bat_entry(s, i, 0);
105
- res->corruptions_fixed++;
106
- }
107
- prev_off = 0;
108
- continue;
109
- }
110
-
111
res->bfi.allocated_clusters++;
112
- if (off > high_off) {
113
- high_off = off;
114
- }
115
116
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
117
res->bfi.fragmented_clusters++;
118
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
119
prev_off = off;
54
}
120
}
55
return err;
121
56
}
122
- if (high_off == 0) {
57
+#endif
123
- res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
58
124
- } else {
59
#ifdef CONFIG_FALLOCATE
125
- res->image_end_offset = high_off + s->cluster_size;
60
static int do_fallocate(int fd, int mode, off_t offset, off_t len)
126
- s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
61
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque)
127
- }
62
}
128
-
63
} while (errno == EINTR);
129
if (size > res->image_end_offset) {
64
130
int64_t count;
65
- ret = -errno;
131
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
66
+ ret = translate_err(-errno);
67
#endif
68
} else {
69
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
70
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
71
aiocb->aio_offset, aiocb->aio_nbytes);
72
+ ret = translate_err(-errno);
73
+#elif defined(__APPLE__) && (__MACH__)
74
+ fpunchhole_t fpunchhole;
75
+ fpunchhole.fp_flags = 0;
76
+ fpunchhole.reserved = 0;
77
+ fpunchhole.fp_offset = aiocb->aio_offset;
78
+ fpunchhole.fp_length = aiocb->aio_nbytes;
79
+ if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
80
+ ret = errno == ENODEV ? -ENOTSUP : -errno;
81
+ } else {
82
+ ret = 0;
83
+ }
84
#endif
85
}
86
87
- ret = translate_err(ret);
88
if (ret == -ENOTSUP) {
89
s->has_discard = false;
90
}
91
--
132
--
92
2.31.1
133
2.40.1
93
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Exclude out-of-image clusters from allocated and fragmented clusters
4
calculation.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Message-Id: <20230424093147.197643-9-alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
---
11
block/parallels.c | 6 +++++-
12
1 file changed, 5 insertions(+), 1 deletion(-)
13
14
diff --git a/block/parallels.c b/block/parallels.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/parallels.c
17
+++ b/block/parallels.c
18
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
19
prev_off = 0;
20
for (i = 0; i < s->bat_size; i++) {
21
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
22
- if (off == 0) {
23
+ /*
24
+ * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
25
+ * fixed. Skip not allocated and out-of-image BAT entries.
26
+ */
27
+ if (off == 0 || off + s->cluster_size > res->image_end_offset) {
28
prev_off = 0;
29
continue;
30
}
31
--
32
2.40.1
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
backend_defaults property allow users to control if default block
3
We will add more and more checks so we need a better code structure
4
properties should be decided with backend information.
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
5
6
6
If it is off, any backend information will be discarded, which is
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
suitable if you plan to perform live migration to a different disk backend.
8
Message-Id: <20230424093147.197643-10-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 74 ++++++++++++++++++++++++++++-------------------
13
1 file changed, 45 insertions(+), 29 deletions(-)
8
14
9
If it is on, a block device may utilize backend information more
15
diff --git a/block/parallels.c b/block/parallels.c
10
aggressively.
11
12
By default, it is auto, which uses backend information for block
13
sizes and ignores the others, which is consistent with the older
14
versions.
15
16
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
17
Message-id: 20210705130458.97642-2-akihiko.odaki@gmail.com
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
include/hw/block/block.h | 3 +++
21
hw/block/block.c | 42 ++++++++++++++++++++++++++++++++++----
22
tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++++++
23
3 files changed, 79 insertions(+), 4 deletions(-)
24
25
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
26
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
27
--- a/include/hw/block/block.h
17
--- a/block/parallels.c
28
+++ b/include/hw/block/block.h
18
+++ b/block/parallels.c
29
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
30
31
typedef struct BlockConf {
32
BlockBackend *blk;
33
+ OnOffAuto backend_defaults;
34
uint32_t physical_block_size;
35
uint32_t logical_block_size;
36
uint32_t min_io_size;
37
@@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
38
}
20
}
39
21
40
#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \
22
static int coroutine_fn GRAPH_RDLOCK
41
+ DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \
23
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
42
+ _conf.backend_defaults, ON_OFF_AUTO_AUTO), \
24
- BdrvCheckMode fix)
43
DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
25
+parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
44
_conf.logical_block_size), \
26
+ BdrvCheckMode fix)
45
DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
46
diff --git a/hw/block/block.c b/hw/block/block.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/hw/block/block.c
49
+++ b/hw/block/block.c
50
@@ -XXX,XX +XXX,XX @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
51
{
27
{
52
BlockBackend *blk = conf->blk;
28
BDRVParallelsState *s = bs->opaque;
53
BlockSizes blocksizes;
29
- int64_t size, prev_off;
54
- int backend_ret;
30
+ int64_t size;
55
+ BlockDriverState *bs;
31
int ret;
56
+ bool use_blocksizes;
32
- uint32_t i;
57
+ bool use_bs;
33
34
size = bdrv_getlength(bs->file->bs);
35
if (size < 0) {
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
37
return size;
38
}
39
40
+ if (size > res->image_end_offset) {
41
+ int64_t count;
42
+ count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
43
+ fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
44
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
45
+ size - res->image_end_offset);
46
+ res->leaks += count;
47
+ if (fix & BDRV_FIX_LEAKS) {
48
+ Error *local_err = NULL;
58
+
49
+
59
+ switch (conf->backend_defaults) {
50
+ /*
60
+ case ON_OFF_AUTO_AUTO:
51
+ * In order to really repair the image, we must shrink it.
61
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
52
+ * That means we have to pass exact=true.
62
+ use_bs = false;
53
+ */
63
+ break;
54
+ ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
64
+
55
+ PREALLOC_MODE_OFF, 0, &local_err);
65
+ case ON_OFF_AUTO_ON:
56
+ if (ret < 0) {
66
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
57
+ error_report_err(local_err);
67
+ bs = blk_bs(blk);
58
+ res->check_errors++;
68
+ use_bs = bs;
59
+ return ret;
69
+ break;
70
+
71
+ case ON_OFF_AUTO_OFF:
72
+ use_blocksizes = false;
73
+ use_bs = false;
74
+ break;
75
+
76
+ default:
77
+ abort();
78
+ }
79
80
- backend_ret = blk_probe_blocksizes(blk, &blocksizes);
81
/* fill in detected values if they are not defined via qemu command line */
82
if (!conf->physical_block_size) {
83
- if (!backend_ret) {
84
+ if (use_blocksizes) {
85
conf->physical_block_size = blocksizes.phys;
86
} else {
87
conf->physical_block_size = BDRV_SECTOR_SIZE;
88
}
89
}
90
if (!conf->logical_block_size) {
91
- if (!backend_ret) {
92
+ if (use_blocksizes) {
93
conf->logical_block_size = blocksizes.log;
94
} else {
95
conf->logical_block_size = BDRV_SECTOR_SIZE;
96
}
97
}
98
+ if (use_bs) {
99
+ if (!conf->opt_io_size) {
100
+ conf->opt_io_size = bs->bl.opt_transfer;
101
+ }
102
+ if (conf->discard_granularity == -1) {
103
+ if (bs->bl.pdiscard_alignment) {
104
+ conf->discard_granularity = bs->bl.pdiscard_alignment;
105
+ } else if (bs->bl.request_alignment != 1) {
106
+ conf->discard_granularity = bs->bl.request_alignment;
107
+ }
60
+ }
61
+ res->leaks_fixed += count;
108
+ }
62
+ }
109
+ }
63
+ }
110
64
+
111
if (conf->logical_block_size > conf->physical_block_size) {
65
+ return 0;
112
error_setg(errp,
66
+}
113
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
67
+
114
index XXXXXXX..XXXXXXX 100644
68
+static int coroutine_fn GRAPH_RDLOCK
115
--- a/tests/qemu-iotests/172.out
69
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
116
+++ b/tests/qemu-iotests/172.out
70
+ BdrvCheckMode fix)
117
@@ -XXX,XX +XXX,XX @@ Testing:
71
+{
118
dev: floppy, id ""
72
+ BDRVParallelsState *s = bs->opaque;
119
unit = 0 (0x0)
73
+ int64_t prev_off;
120
drive = "floppy0"
74
+ int ret;
121
+ backend_defaults = "auto"
75
+ uint32_t i;
122
logical_block_size = 512 (512 B)
76
+
123
physical_block_size = 512 (512 B)
77
qemu_co_mutex_lock(&s->lock);
124
min_io_size = 0 (0 B)
78
125
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2
79
parallels_check_unclean(bs, res, fix);
126
dev: floppy, id ""
80
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
127
unit = 0 (0x0)
81
goto out;
128
drive = "floppy0"
82
}
129
+ backend_defaults = "auto"
83
130
logical_block_size = 512 (512 B)
84
+ ret = parallels_check_leak(bs, res, fix);
131
physical_block_size = 512 (512 B)
85
+ if (ret < 0) {
132
min_io_size = 0 (0 B)
86
+ goto out;
133
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2
87
+ }
134
dev: floppy, id ""
88
+
135
unit = 1 (0x1)
89
res->bfi.total_clusters = s->bat_size;
136
drive = "floppy1"
90
res->bfi.compressed_clusters = 0; /* compression is not supported */
137
+ backend_defaults = "auto"
91
138
logical_block_size = 512 (512 B)
92
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
139
physical_block_size = 512 (512 B)
93
prev_off = off;
140
min_io_size = 0 (0 B)
94
}
141
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2
95
142
dev: floppy, id ""
96
- if (size > res->image_end_offset) {
143
unit = 0 (0x0)
97
- int64_t count;
144
drive = "floppy0"
98
- count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
145
+ backend_defaults = "auto"
99
- fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
146
logical_block_size = 512 (512 B)
100
- fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
147
physical_block_size = 512 (512 B)
101
- size - res->image_end_offset);
148
min_io_size = 0 (0 B)
102
- res->leaks += count;
149
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
103
- if (fix & BDRV_FIX_LEAKS) {
150
dev: floppy, id ""
104
- Error *local_err = NULL;
151
unit = 1 (0x1)
105
-
152
drive = "floppy1"
106
- /*
153
+ backend_defaults = "auto"
107
- * In order to really repair the image, we must shrink it.
154
logical_block_size = 512 (512 B)
108
- * That means we have to pass exact=true.
155
physical_block_size = 512 (512 B)
109
- */
156
min_io_size = 0 (0 B)
110
- ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
157
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
111
- PREALLOC_MODE_OFF, 0, &local_err);
158
dev: floppy, id ""
112
- if (ret < 0) {
159
unit = 0 (0x0)
113
- error_report_err(local_err);
160
drive = "floppy0"
114
- res->check_errors++;
161
+ backend_defaults = "auto"
115
- goto out;
162
logical_block_size = 512 (512 B)
116
- }
163
physical_block_size = 512 (512 B)
117
- res->leaks_fixed += count;
164
min_io_size = 0 (0 B)
118
- }
165
@@ -XXX,XX +XXX,XX @@ Testing: -fdb
119
- }
166
dev: floppy, id ""
120
-
167
unit = 1 (0x1)
121
out:
168
drive = "floppy1"
122
qemu_co_mutex_unlock(&s->lock);
169
+ backend_defaults = "auto"
123
170
logical_block_size = 512 (512 B)
171
physical_block_size = 512 (512 B)
172
min_io_size = 0 (0 B)
173
@@ -XXX,XX +XXX,XX @@ Testing: -fdb
174
dev: floppy, id ""
175
unit = 0 (0x0)
176
drive = "floppy0"
177
+ backend_defaults = "auto"
178
logical_block_size = 512 (512 B)
179
physical_block_size = 512 (512 B)
180
min_io_size = 0 (0 B)
181
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
182
dev: floppy, id ""
183
unit = 0 (0x0)
184
drive = "floppy0"
185
+ backend_defaults = "auto"
186
logical_block_size = 512 (512 B)
187
physical_block_size = 512 (512 B)
188
min_io_size = 0 (0 B)
189
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
190
dev: floppy, id ""
191
unit = 1 (0x1)
192
drive = "floppy1"
193
+ backend_defaults = "auto"
194
logical_block_size = 512 (512 B)
195
physical_block_size = 512 (512 B)
196
min_io_size = 0 (0 B)
197
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
198
dev: floppy, id ""
199
unit = 0 (0x0)
200
drive = "floppy0"
201
+ backend_defaults = "auto"
202
logical_block_size = 512 (512 B)
203
physical_block_size = 512 (512 B)
204
min_io_size = 0 (0 B)
205
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
206
dev: floppy, id ""
207
unit = 1 (0x1)
208
drive = "floppy1"
209
+ backend_defaults = "auto"
210
logical_block_size = 512 (512 B)
211
physical_block_size = 512 (512 B)
212
min_io_size = 0 (0 B)
213
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
214
dev: floppy, id ""
215
unit = 0 (0x0)
216
drive = "floppy0"
217
+ backend_defaults = "auto"
218
logical_block_size = 512 (512 B)
219
physical_block_size = 512 (512 B)
220
min_io_size = 0 (0 B)
221
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
222
dev: floppy, id ""
223
unit = 0 (0x0)
224
drive = "none0"
225
+ backend_defaults = "auto"
226
logical_block_size = 512 (512 B)
227
physical_block_size = 512 (512 B)
228
min_io_size = 0 (0 B)
229
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
230
dev: floppy, id ""
231
unit = 1 (0x1)
232
drive = "none0"
233
+ backend_defaults = "auto"
234
logical_block_size = 512 (512 B)
235
physical_block_size = 512 (512 B)
236
min_io_size = 0 (0 B)
237
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
238
dev: floppy, id ""
239
unit = 1 (0x1)
240
drive = "none1"
241
+ backend_defaults = "auto"
242
logical_block_size = 512 (512 B)
243
physical_block_size = 512 (512 B)
244
min_io_size = 0 (0 B)
245
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
246
dev: floppy, id ""
247
unit = 0 (0x0)
248
drive = "none0"
249
+ backend_defaults = "auto"
250
logical_block_size = 512 (512 B)
251
physical_block_size = 512 (512 B)
252
min_io_size = 0 (0 B)
253
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
254
dev: floppy, id ""
255
unit = 1 (0x1)
256
drive = "none0"
257
+ backend_defaults = "auto"
258
logical_block_size = 512 (512 B)
259
physical_block_size = 512 (512 B)
260
min_io_size = 0 (0 B)
261
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
262
dev: floppy, id ""
263
unit = 0 (0x0)
264
drive = "floppy0"
265
+ backend_defaults = "auto"
266
logical_block_size = 512 (512 B)
267
physical_block_size = 512 (512 B)
268
min_io_size = 0 (0 B)
269
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
270
dev: floppy, id ""
271
unit = 1 (0x1)
272
drive = "none0"
273
+ backend_defaults = "auto"
274
logical_block_size = 512 (512 B)
275
physical_block_size = 512 (512 B)
276
min_io_size = 0 (0 B)
277
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
278
dev: floppy, id ""
279
unit = 0 (0x0)
280
drive = "floppy0"
281
+ backend_defaults = "auto"
282
logical_block_size = 512 (512 B)
283
physical_block_size = 512 (512 B)
284
min_io_size = 0 (0 B)
285
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
286
dev: floppy, id ""
287
unit = 0 (0x0)
288
drive = "none0"
289
+ backend_defaults = "auto"
290
logical_block_size = 512 (512 B)
291
physical_block_size = 512 (512 B)
292
min_io_size = 0 (0 B)
293
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
294
dev: floppy, id ""
295
unit = 1 (0x1)
296
drive = "floppy1"
297
+ backend_defaults = "auto"
298
logical_block_size = 512 (512 B)
299
physical_block_size = 512 (512 B)
300
min_io_size = 0 (0 B)
301
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
302
dev: floppy, id ""
303
unit = 0 (0x0)
304
drive = "none0"
305
+ backend_defaults = "auto"
306
logical_block_size = 512 (512 B)
307
physical_block_size = 512 (512 B)
308
min_io_size = 0 (0 B)
309
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
310
dev: floppy, id ""
311
unit = 1 (0x1)
312
drive = "floppy1"
313
+ backend_defaults = "auto"
314
logical_block_size = 512 (512 B)
315
physical_block_size = 512 (512 B)
316
min_io_size = 0 (0 B)
317
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
318
dev: floppy, id ""
319
unit = 1 (0x1)
320
drive = "none0"
321
+ backend_defaults = "auto"
322
logical_block_size = 512 (512 B)
323
physical_block_size = 512 (512 B)
324
min_io_size = 0 (0 B)
325
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
326
dev: floppy, id ""
327
unit = 0 (0x0)
328
drive = "floppy0"
329
+ backend_defaults = "auto"
330
logical_block_size = 512 (512 B)
331
physical_block_size = 512 (512 B)
332
min_io_size = 0 (0 B)
333
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
334
dev: floppy, id ""
335
unit = 1 (0x1)
336
drive = "none0"
337
+ backend_defaults = "auto"
338
logical_block_size = 512 (512 B)
339
physical_block_size = 512 (512 B)
340
min_io_size = 0 (0 B)
341
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
342
dev: floppy, id ""
343
unit = 0 (0x0)
344
drive = "floppy0"
345
+ backend_defaults = "auto"
346
logical_block_size = 512 (512 B)
347
physical_block_size = 512 (512 B)
348
min_io_size = 0 (0 B)
349
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device
350
dev: floppy, id ""
351
unit = 0 (0x0)
352
drive = "none0"
353
+ backend_defaults = "auto"
354
logical_block_size = 512 (512 B)
355
physical_block_size = 512 (512 B)
356
min_io_size = 0 (0 B)
357
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy
358
dev: floppy, id ""
359
unit = 0 (0x0)
360
drive = ""
361
+ backend_defaults = "auto"
362
logical_block_size = 512 (512 B)
363
physical_block_size = 512 (512 B)
364
min_io_size = 0 (0 B)
365
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=120
366
dev: floppy, id ""
367
unit = 0 (0x0)
368
drive = ""
369
+ backend_defaults = "auto"
370
logical_block_size = 512 (512 B)
371
physical_block_size = 512 (512 B)
372
min_io_size = 0 (0 B)
373
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=144
374
dev: floppy, id ""
375
unit = 0 (0x0)
376
drive = ""
377
+ backend_defaults = "auto"
378
logical_block_size = 512 (512 B)
379
physical_block_size = 512 (512 B)
380
min_io_size = 0 (0 B)
381
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=288
382
dev: floppy, id ""
383
unit = 0 (0x0)
384
drive = ""
385
+ backend_defaults = "auto"
386
logical_block_size = 512 (512 B)
387
physical_block_size = 512 (512 B)
388
min_io_size = 0 (0 B)
389
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
390
dev: floppy, id ""
391
unit = 0 (0x0)
392
drive = "none0"
393
+ backend_defaults = "auto"
394
logical_block_size = 512 (512 B)
395
physical_block_size = 512 (512 B)
396
min_io_size = 0 (0 B)
397
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
398
dev: floppy, id ""
399
unit = 0 (0x0)
400
drive = "none0"
401
+ backend_defaults = "auto"
402
logical_block_size = 512 (512 B)
403
physical_block_size = 512 (512 B)
404
min_io_size = 0 (0 B)
405
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
406
dev: floppy, id ""
407
unit = 0 (0x0)
408
drive = "none0"
409
+ backend_defaults = "auto"
410
logical_block_size = 512 (512 B)
411
physical_block_size = 512 (512 B)
412
min_io_size = 0 (0 B)
413
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
414
dev: floppy, id ""
415
unit = 0 (0x0)
416
drive = "none0"
417
+ backend_defaults = "auto"
418
logical_block_size = 512 (512 B)
419
physical_block_size = 512 (512 B)
420
min_io_size = 0 (0 B)
421
--
124
--
422
2.31.1
125
2.40.1
423
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
We will add more and more checks so we need a better code structure
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
6
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
10
Message-Id: <20230424093147.197643-11-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 52 +++++++++++++++++++++++++++--------------------
15
1 file changed, 30 insertions(+), 22 deletions(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
22
return 0;
23
}
24
25
-static int coroutine_fn GRAPH_RDLOCK
26
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
27
- BdrvCheckMode fix)
28
+static void parallels_collect_statistics(BlockDriverState *bs,
29
+ BdrvCheckResult *res,
30
+ BdrvCheckMode fix)
31
{
32
BDRVParallelsState *s = bs->opaque;
33
- int64_t prev_off;
34
- int ret;
35
+ int64_t off, prev_off;
36
uint32_t i;
37
38
- qemu_co_mutex_lock(&s->lock);
39
-
40
- parallels_check_unclean(bs, res, fix);
41
-
42
- ret = parallels_check_outside_image(bs, res, fix);
43
- if (ret < 0) {
44
- goto out;
45
- }
46
-
47
- ret = parallels_check_leak(bs, res, fix);
48
- if (ret < 0) {
49
- goto out;
50
- }
51
-
52
res->bfi.total_clusters = s->bat_size;
53
res->bfi.compressed_clusters = 0; /* compression is not supported */
54
55
prev_off = 0;
56
for (i = 0; i < s->bat_size; i++) {
57
- int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
58
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
59
/*
60
* If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
61
* fixed. Skip not allocated and out-of-image BAT entries.
62
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
63
continue;
64
}
65
66
- res->bfi.allocated_clusters++;
67
-
68
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
69
res->bfi.fragmented_clusters++;
70
}
71
prev_off = off;
72
+ res->bfi.allocated_clusters++;
73
}
74
+}
75
+
76
+static int coroutine_fn GRAPH_RDLOCK
77
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
78
+ BdrvCheckMode fix)
79
+{
80
+ BDRVParallelsState *s = bs->opaque;
81
+ int ret;
82
+
83
+ qemu_co_mutex_lock(&s->lock);
84
+
85
+ parallels_check_unclean(bs, res, fix);
86
+
87
+ ret = parallels_check_outside_image(bs, res, fix);
88
+ if (ret < 0) {
89
+ goto out;
90
+ }
91
+
92
+ ret = parallels_check_leak(bs, res, fix);
93
+ if (ret < 0) {
94
+ goto out;
95
+ }
96
+
97
+ parallels_collect_statistics(bs, res, fix);
98
99
out:
100
qemu_co_mutex_unlock(&s->lock);
101
--
102
2.40.1
diff view generated by jsdifflib
1
BHs must be deleted before the AioContext is finalized. If not, it's a
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
bug and probably indicates that some part of the program still expects
3
the BH to run in the future. That can lead to memory leaks, inconsistent
4
state, or just hangs.
5
2
6
Unfortunately the assert(flags & BH_DELETED) call in aio_ctx_finalize()
3
Replace the way we use mutex in parallels_co_check() for simplier
7
is difficult to debug because the assertion failure contains no
4
and less error prone code.
8
information about the BH!
9
5
10
Use the QEMUBH name field added in the previous patch to show a useful
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
11
error when a leaked BH is detected.
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-12-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 33 ++++++++++++++-------------------
13
1 file changed, 14 insertions(+), 19 deletions(-)
12
14
13
Suggested-by: Eric Ernst <eric.g.ernst@gmail.com>
15
diff --git a/block/parallels.c b/block/parallels.c
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-Id: <20210414200247.917496-3-stefanha@redhat.com>
16
---
17
util/async.c | 16 ++++++++++++++--
18
1 file changed, 14 insertions(+), 2 deletions(-)
19
20
diff --git a/util/async.c b/util/async.c
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/util/async.c
17
--- a/block/parallels.c
23
+++ b/util/async.c
18
+++ b/block/parallels.c
24
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
19
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
25
assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
20
BDRVParallelsState *s = bs->opaque;
26
21
int ret;
27
while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
22
28
- /* qemu_bh_delete() must have been called on BHs in this AioContext */
23
- qemu_co_mutex_lock(&s->lock);
29
- assert(flags & BH_DELETED);
24
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
30
+ /*
25
+ parallels_check_unclean(bs, res, fix);
31
+ * qemu_bh_delete() must have been called on BHs in this AioContext. In
26
32
+ * many cases memory leaks, hangs, or inconsistent state occur when a
27
- parallels_check_unclean(bs, res, fix);
33
+ * BH is leaked because something still expects it to run.
28
+ ret = parallels_check_outside_image(bs, res, fix);
34
+ *
29
+ if (ret < 0) {
35
+ * If you hit this, fix the lifecycle of the BH so that
30
+ return ret;
36
+ * qemu_bh_delete() and any associated cleanup is called before the
37
+ * AioContext is finalized.
38
+ */
39
+ if (unlikely(!(flags & BH_DELETED))) {
40
+ fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
41
+ __func__, bh->name);
42
+ abort();
43
+ }
31
+ }
44
32
45
g_free(bh);
33
- ret = parallels_check_outside_image(bs, res, fix);
34
- if (ret < 0) {
35
- goto out;
36
- }
37
+ ret = parallels_check_leak(bs, res, fix);
38
+ if (ret < 0) {
39
+ return ret;
40
+ }
41
42
- ret = parallels_check_leak(bs, res, fix);
43
- if (ret < 0) {
44
- goto out;
45
+ parallels_collect_statistics(bs, res, fix);
46
}
46
}
47
48
- parallels_collect_statistics(bs, res, fix);
49
-
50
-out:
51
- qemu_co_mutex_unlock(&s->lock);
52
-
53
- if (ret == 0) {
54
- ret = bdrv_co_flush(bs);
55
- if (ret < 0) {
56
- res->check_errors++;
57
- }
58
+ ret = bdrv_co_flush(bs);
59
+ if (ret < 0) {
60
+ res->check_errors++;
61
}
62
63
return ret;
47
--
64
--
48
2.31.1
65
2.40.1
49
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
All the offsets in the BAT must be lower than the file size.
4
Fix the check condition for correct check.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-13-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
---
12
block/parallels.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
diff --git a/block/parallels.c b/block/parallels.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/parallels.c
18
+++ b/block/parallels.c
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
20
high_off = 0;
21
for (i = 0; i < s->bat_size; i++) {
22
off = bat2sect(s, i) << BDRV_SECTOR_BITS;
23
- if (off > size) {
24
+ if (off + s->cluster_size > size) {
25
fprintf(stderr, "%s cluster %u is outside image\n",
26
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
27
res->corruptions++;
28
--
29
2.40.1
diff view generated by jsdifflib
New patch
1
1
From: Jean-Louis Dupond <jean-louis@dupond.be>
2
3
When we for example have a sparse qcow2 image and discard: unmap is enabled,
4
there can be a lot of fragmentation in the image after some time. Especially on VM's
5
that do a lot of writes/deletes.
6
This causes the qcow2 image to grow even over 110% of its virtual size,
7
because the free gaps in the image get too small to allocate new
8
continuous clusters. So it allocates new space at the end of the image.
9
10
Disabling discard is not an option, as discard is needed to keep the
11
incremental backup size as low as possible. Without discard, the
12
incremental backups would become large, as qemu thinks it's just dirty
13
blocks but it doesn't know the blocks are unneeded.
14
So we need to avoid fragmentation but also 'empty' the unneeded blocks in
15
the image to have a small incremental backup.
16
17
In addition, we also want to send the discards further down the stack, so
18
the underlying blocks are still discarded.
19
20
Therefor we introduce a new qcow2 option "discard-no-unref".
21
When setting this option to true, discards will no longer have the qcow2
22
driver relinquish cluster allocations. Other than that, the request is
23
handled as normal: All clusters in range are marked as zero, and, if
24
pass-discard-request is true, it is passed further down the stack.
25
The only difference is that the now-zero clusters are preallocated
26
instead of being unallocated.
27
This will avoid fragmentation on the qcow2 image.
28
29
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621
30
Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
31
Message-Id: <20230605084523.34134-2-jean-louis@dupond.be>
32
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
33
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
34
---
35
qapi/block-core.json | 12 ++++++++++++
36
block/qcow2.h | 3 +++
37
block/qcow2-cluster.c | 32 ++++++++++++++++++++++++++++----
38
block/qcow2.c | 18 ++++++++++++++++++
39
qemu-options.hx | 12 ++++++++++++
40
5 files changed, 73 insertions(+), 4 deletions(-)
41
42
diff --git a/qapi/block-core.json b/qapi/block-core.json
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/block-core.json
45
+++ b/qapi/block-core.json
46
@@ -XXX,XX +XXX,XX @@
47
# @pass-discard-other: whether discard requests for the data source
48
# should be issued on other occasions where a cluster gets freed
49
#
50
+# @discard-no-unref: when enabled, discards from the guest will not cause
51
+# cluster allocations to be relinquished. This prevents qcow2 fragmentation
52
+# that would be caused by such discards. Besides potential
53
+# performance degradation, such fragmentation can lead to increased
54
+# allocation of clusters past the end of the image file,
55
+# resulting in image files whose file length can grow much larger
56
+# than their guest disk size would suggest.
57
+# If image file length is of concern (e.g. when storing qcow2
58
+# images directly on block devices), you should consider enabling
59
+# this option. (since 8.1)
60
+#
61
# @overlap-check: which overlap checks to perform for writes to the
62
# image, defaults to 'cached' (since 2.2)
63
#
64
@@ -XXX,XX +XXX,XX @@
65
'*pass-discard-request': 'bool',
66
'*pass-discard-snapshot': 'bool',
67
'*pass-discard-other': 'bool',
68
+ '*discard-no-unref': 'bool',
69
'*overlap-check': 'Qcow2OverlapChecks',
70
'*cache-size': 'int',
71
'*l2-cache-size': 'int',
72
diff --git a/block/qcow2.h b/block/qcow2.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/block/qcow2.h
75
+++ b/block/qcow2.h
76
@@ -XXX,XX +XXX,XX @@
77
#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
78
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
79
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
80
+#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
81
#define QCOW2_OPT_OVERLAP "overlap-check"
82
#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
83
#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
84
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State {
85
86
bool discard_passthrough[QCOW2_DISCARD_MAX];
87
88
+ bool discard_no_unref;
89
+
90
int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
91
bool signaled_corruption;
92
93
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/block/qcow2-cluster.c
96
+++ b/block/qcow2-cluster.c
97
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
98
uint64_t new_l2_bitmap = old_l2_bitmap;
99
QCow2ClusterType cluster_type =
100
qcow2_get_cluster_type(bs, old_l2_entry);
101
+ bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
102
+ !full_discard &&
103
+ (s->discard_no_unref &&
104
+ type == QCOW2_DISCARD_REQUEST);
105
106
/*
107
* If full_discard is true, the cluster should not read back as zeroes,
108
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
109
new_l2_entry = new_l2_bitmap = 0;
110
} else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
111
if (has_subclusters(s)) {
112
- new_l2_entry = 0;
113
+ if (keep_reference) {
114
+ new_l2_entry = old_l2_entry;
115
+ } else {
116
+ new_l2_entry = 0;
117
+ }
118
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
119
} else {
120
- new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
121
+ if (s->qcow_version >= 3) {
122
+ if (keep_reference) {
123
+ new_l2_entry |= QCOW_OFLAG_ZERO;
124
+ } else {
125
+ new_l2_entry = QCOW_OFLAG_ZERO;
126
+ }
127
+ } else {
128
+ new_l2_entry = 0;
129
+ }
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
134
if (has_subclusters(s)) {
135
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
136
}
137
- /* Then decrease the refcount */
138
- qcow2_free_any_cluster(bs, old_l2_entry, type);
139
+ if (!keep_reference) {
140
+ /* Then decrease the refcount */
141
+ qcow2_free_any_cluster(bs, old_l2_entry, type);
142
+ } else if (s->discard_passthrough[type] &&
143
+ (cluster_type == QCOW2_CLUSTER_NORMAL ||
144
+ cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
145
+ /* If we keep the reference, pass on the discard still */
146
+ bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
147
+ s->cluster_size);
148
+ }
149
}
150
151
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
152
diff --git a/block/qcow2.c b/block/qcow2.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/block/qcow2.c
155
+++ b/block/qcow2.c
156
@@ -XXX,XX +XXX,XX @@ static const char *const mutable_opts[] = {
157
QCOW2_OPT_DISCARD_REQUEST,
158
QCOW2_OPT_DISCARD_SNAPSHOT,
159
QCOW2_OPT_DISCARD_OTHER,
160
+ QCOW2_OPT_DISCARD_NO_UNREF,
161
QCOW2_OPT_OVERLAP,
162
QCOW2_OPT_OVERLAP_TEMPLATE,
163
QCOW2_OPT_OVERLAP_MAIN_HEADER,
164
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qcow2_runtime_opts = {
165
.type = QEMU_OPT_BOOL,
166
.help = "Generate discard requests when other clusters are freed",
167
},
168
+ {
169
+ .name = QCOW2_OPT_DISCARD_NO_UNREF,
170
+ .type = QEMU_OPT_BOOL,
171
+ .help = "Do not unreference discarded clusters",
172
+ },
173
{
174
.name = QCOW2_OPT_OVERLAP,
175
.type = QEMU_OPT_STRING,
176
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2ReopenState {
177
bool use_lazy_refcounts;
178
int overlap_check;
179
bool discard_passthrough[QCOW2_DISCARD_MAX];
180
+ bool discard_no_unref;
181
uint64_t cache_clean_interval;
182
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
183
} Qcow2ReopenState;
184
@@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
185
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
186
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
187
188
+ r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
189
+ false);
190
+ if (r->discard_no_unref && s->qcow_version < 3) {
191
+ error_setg(errp,
192
+ "discard-no-unref is only supported since qcow2 version 3");
193
+ ret = -EINVAL;
194
+ goto fail;
195
+ }
196
+
197
switch (s->crypt_method_header) {
198
case QCOW_CRYPT_NONE:
199
if (encryptfmt) {
200
@@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_commit(BlockDriverState *bs,
201
s->discard_passthrough[i] = r->discard_passthrough[i];
202
}
203
204
+ s->discard_no_unref = r->discard_no_unref;
205
+
206
if (s->cache_clean_interval != r->cache_clean_interval) {
207
cache_clean_timer_del(bs);
208
s->cache_clean_interval = r->cache_clean_interval;
209
diff --git a/qemu-options.hx b/qemu-options.hx
210
index XXXXXXX..XXXXXXX 100644
211
--- a/qemu-options.hx
212
+++ b/qemu-options.hx
213
@@ -XXX,XX +XXX,XX @@ SRST
214
issued on other occasions where a cluster gets freed
215
(on/off; default: off)
216
217
+ ``discard-no-unref``
218
+ When enabled, discards from the guest will not cause cluster
219
+ allocations to be relinquished. This prevents qcow2 fragmentation
220
+ that would be caused by such discards. Besides potential
221
+ performance degradation, such fragmentation can lead to increased
222
+ allocation of clusters past the end of the image file,
223
+ resulting in image files whose file length can grow much larger
224
+ than their guest disk size would suggest.
225
+ If image file length is of concern (e.g. when storing qcow2
226
+ images directly on block devices), you should consider enabling
227
+ this option.
228
+
229
``overlap-check``
230
Which overlap checks to perform for writes to the image
231
(none/constant/cached/all; default: cached). For details or
232
--
233
2.40.1
diff view generated by jsdifflib