1
The following changes since commit 0b6206b9c6825619cd721085fe082d7a0abc9af4:
1
The following changes since commit 474f3938d79ab36b9231c9ad3b5a9314c2aeacde:
2
2
3
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210914-4' into staging (2021-09-15 13:27:49 +0100)
3
Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' into staging (2019-06-21 15:40:50 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/XanClic/qemu.git tags/pull-block-2021-09-15
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-24
8
8
9
for you to fetch changes up to 1899bf47375ad40555dcdff12ba49b4b8b82df38:
9
for you to fetch changes up to ab5d4a30f7f3803ca5106b370969c1b7b54136f8:
10
10
11
qemu-img: Add -F shorthand to convert (2021-09-15 18:42:38 +0200)
11
iotests: Fix 205 for concurrent runs (2019-06-24 16:01:40 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches:
14
Block patches:
15
- Block-status cache for data regions
15
- The SSH block driver now uses libssh instead of libssh2
16
- qcow2 optimization (when using subclusters)
16
- The VMDK block driver gets read-only support for the seSparse
17
- iotests delinting, and let 297 (lint checker) cover named iotests
17
subformat
18
- qcow2 check improvements
18
- Various fixes
19
- Added -F (target backing file format) option to qemu-img convert
19
20
- Mirror job fix
20
---
21
- Fix for when a migration is initiated while a backup job runs
21
22
- Fix for uncached qemu-img convert to a volume with 4k sectors (for an
22
v2:
23
unaligned image)
23
- Squashed Pino's fix for pre-0.8 libssh into the libssh patch
24
- Minor gluster driver fix
25
24
26
----------------------------------------------------------------
25
----------------------------------------------------------------
27
Eric Blake (1):
26
Anton Nefedov (1):
28
qemu-img: Add -F shorthand to convert
27
iotest 134: test cluster-misaligned encrypted write
29
28
30
Hanna Reitz (15):
29
Klaus Birkelund Jensen (1):
31
gluster: Align block-status tail
30
nvme: do not advertise support for unsupported arbitration mechanism
32
block: Drop BDS comment regarding bdrv_append()
33
block: block-status cache for data regions
34
block: Clarify that @bytes is no limit on *pnum
35
block/file-posix: Do not force-cap *pnum
36
block/gluster: Do not force-cap *pnum
37
block/iscsi: Do not force-cap *pnum
38
iotests: Fix unspecified-encoding pylint warnings
39
iotests: Fix use-{list,dict}-literal warnings
40
iotests/297: Drop 169 and 199 from the skip list
41
migrate-bitmaps-postcopy-test: Fix pylint warnings
42
migrate-bitmaps-test: Fix pylint warnings
43
mirror-top-perms: Fix AbnormalShutdown path
44
iotests/297: Cover tests/
45
qemu-img: Allow target be aligned to sector size
46
31
47
Stefano Garzarella (1):
32
Max Reitz (1):
48
block/mirror: fix NULL pointer dereference in
33
iotests: Fix 205 for concurrent runs
49
mirror_wait_on_conflicts()
50
34
51
Vladimir Sementsov-Ogievskiy (15):
35
Pino Toscano (1):
52
tests: add migrate-during-backup
36
ssh: switch from libssh2 to libssh
53
block: bdrv_inactivate_recurse(): check for permissions and fix crash
54
simplebench: add img_bench_templater.py
55
qcow2: refactor handle_dependencies() loop body
56
qcow2: handle_dependencies(): relax conflict detection
57
qcow2-refcount: improve style of check_refcounts_l2()
58
qcow2: compressed read: simplify cluster descriptor passing
59
qcow2: introduce qcow2_parse_compressed_l2_entry() helper
60
qcow2-refcount: introduce fix_l2_entry_by_zero()
61
qcow2-refcount: fix_l2_entry_by_zero(): also zero L2 entry bitmap
62
qcow2-refcount: check_refcounts_l2(): check l2_bitmap
63
qcow2-refcount: check_refcounts_l2(): check reserved bits
64
qcow2-refcount: improve style of check_refcounts_l1()
65
qcow2-refcount: check_refcounts_l1(): check reserved bits
66
qcow2-refcount: check_refblocks(): add separate message for reserved
67
37
68
docs/tools/qemu-img.rst | 4 +-
38
Sam Eiderman (3):
69
block/qcow2.h | 7 +-
39
vmdk: Fix comment regarding max l1_size coverage
70
include/block/block_int.h | 61 +++-
40
vmdk: Reduce the max bound for L1 table size
71
block.c | 88 +++++
41
vmdk: Add read-only support for seSparse snapshots
72
block/file-posix.c | 7 +-
42
73
block/gluster.c | 23 +-
43
Vladimir Sementsov-Ogievskiy (1):
74
block/io.c | 68 +++-
44
blockdev: enable non-root nodes for transaction drive-backup source
75
block/iscsi.c | 3 -
45
76
block/mirror.c | 25 +-
46
configure | 65 +-
77
block/qcow2-cluster.c | 78 +++--
47
block/Makefile.objs | 6 +-
78
block/qcow2-refcount.c | 326 ++++++++++++------
48
block/ssh.c | 652 ++++++++++--------
79
block/qcow2.c | 13 +-
49
block/vmdk.c | 372 +++++++++-
80
qemu-img.c | 18 +-
50
blockdev.c | 2 +-
81
qemu-img-cmds.hx | 2 +-
51
hw/block/nvme.c | 1 -
82
scripts/simplebench/img_bench_templater.py | 95 +++++
52
.travis.yml | 4 +-
83
scripts/simplebench/table_templater.py | 62 ++++
53
block/trace-events | 14 +-
84
tests/qemu-iotests/122 | 2 +-
54
docs/qemu-block-drivers.texi | 2 +-
85
tests/qemu-iotests/271 | 5 +-
55
.../dockerfiles/debian-win32-cross.docker | 1 -
86
tests/qemu-iotests/271.out | 4 +-
56
.../dockerfiles/debian-win64-cross.docker | 1 -
87
tests/qemu-iotests/297 | 9 +-
57
tests/docker/dockerfiles/fedora.docker | 4 +-
88
tests/qemu-iotests/iotests.py | 12 +-
58
tests/docker/dockerfiles/ubuntu.docker | 2 +-
89
.../tests/migrate-bitmaps-postcopy-test | 13 +-
59
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
90
tests/qemu-iotests/tests/migrate-bitmaps-test | 43 ++-
60
tests/qemu-iotests/059.out | 2 +-
91
.../qemu-iotests/tests/migrate-during-backup | 97 ++++++
61
tests/qemu-iotests/134 | 9 +
92
.../tests/migrate-during-backup.out | 5 +
62
tests/qemu-iotests/134.out | 10 +
93
tests/qemu-iotests/tests/mirror-top-perms | 2 +-
63
tests/qemu-iotests/205 | 2 +-
94
26 files changed, 855 insertions(+), 217 deletions(-)
64
tests/qemu-iotests/207 | 54 +-
95
create mode 100755 scripts/simplebench/img_bench_templater.py
65
tests/qemu-iotests/207.out | 2 +-
96
create mode 100644 scripts/simplebench/table_templater.py
66
20 files changed, 823 insertions(+), 384 deletions(-)
97
create mode 100755 tests/qemu-iotests/tests/migrate-during-backup
98
create mode 100644 tests/qemu-iotests/tests/migrate-during-backup.out
99
67
100
--
68
--
101
2.31.1
69
2.21.0
102
70
103
71
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
gluster's block-status implementation is basically a copy of that in
4
block/file-posix.c, there is only one thing missing, and that is
5
aligning trailing data extents to the request alignment (as added by
6
commit 9c3db310ff0).
7
8
Note that 9c3db310ff0 mentions that "there seems to be no other block
9
driver that sets request_alignment and [...]", but while block/gluster.c
10
does indeed not set request_alignment, block/io.c's
11
bdrv_refresh_limits() will still default to an alignment of 512 because
12
block/gluster.c does not provide a byte-aligned read function.
13
Therefore, unaligned tails can conceivably occur, and so we should apply
14
the change from 9c3db310ff0 to gluster's block-status implementation.
15
16
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
17
Signed-off-by: Max Reitz <mreitz@redhat.com>
18
Message-Id: <20210805143603.59503-1-mreitz@redhat.com>
19
Reviewed-by: Eric Blake <eblake@redhat.com>
20
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
21
---
22
block/gluster.c | 16 ++++++++++++++++
23
1 file changed, 16 insertions(+)
24
25
diff --git a/block/gluster.c b/block/gluster.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/block/gluster.c
28
+++ b/block/gluster.c
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
30
off_t data = 0, hole = 0;
31
int ret = -EINVAL;
32
33
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
34
+
35
if (!s->fd) {
36
return ret;
37
}
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
39
/* On a data extent, compute bytes to the end of the extent,
40
* possibly including a partial sector at EOF. */
41
*pnum = MIN(bytes, hole - offset);
42
+
43
+ /*
44
+ * We are not allowed to return partial sectors, though, so
45
+ * round up if necessary.
46
+ */
47
+ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
48
+ int64_t file_length = qemu_gluster_getlength(bs);
49
+ if (file_length > 0) {
50
+ /* Ignore errors, this is just a safeguard */
51
+ assert(hole == file_length);
52
+ }
53
+ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
54
+ }
55
+
56
ret = BDRV_BLOCK_DATA;
57
} else {
58
/* On a hole, compute bytes to the beginning of the next extent. */
59
--
60
2.31.1
61
62
diff view generated by jsdifflib
Deleted patch
1
There is a comment above the BDS definition stating care must be taken
2
to consider handling newly added fields in bdrv_append().
3
1
4
Actually, this comment should have said "bdrv_swap()" as of 4ddc07cac
5
(nine years ago), and in any case, bdrv_swap() was dropped in
6
8e419aefa (six years ago). So no such care is necessary anymore.
7
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20210812084148.14458-2-hreitz@redhat.com>
13
---
14
include/block/block_int.h | 6 ------
15
1 file changed, 6 deletions(-)
16
17
diff --git a/include/block/block_int.h b/include/block/block_int.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block_int.h
20
+++ b/include/block/block_int.h
21
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
22
QLIST_ENTRY(BdrvChild) next_parent;
23
};
24
25
-/*
26
- * Note: the function bdrv_append() copies and swaps contents of
27
- * BlockDriverStates, so if you add new fields to this struct, please
28
- * inspect bdrv_append() to determine if the new fields need to be
29
- * copied as well.
30
- */
31
struct BlockDriverState {
32
/* Protected by big QEMU lock or read-only after opening. No special
33
* locking needed during I/O...
34
--
35
2.31.1
36
37
diff view generated by jsdifflib
Deleted patch
1
As we have attempted before
2
(https://lists.gnu.org/archive/html/qemu-devel/2019-01/msg06451.html,
3
"file-posix: Cache lseek result for data regions";
4
https://lists.nongnu.org/archive/html/qemu-block/2021-02/msg00934.html,
5
"file-posix: Cache next hole"), this patch seeks to reduce the number of
6
SEEK_DATA/HOLE operations the file-posix driver has to perform. The
7
main difference is that this time it is implemented as part of the
8
general block layer code.
9
1
10
The problem we face is that on some filesystems or in some
11
circumstances, SEEK_DATA/HOLE is unreasonably slow. Given the
12
implementation is outside of qemu, there is little we can do about its
13
performance.
14
15
We have already introduced the want_zero parameter to
16
bdrv_co_block_status() to reduce the number of SEEK_DATA/HOLE calls
17
unless we really want zero information; but sometimes we do want that
18
information, because for files that consist largely of zero areas,
19
special-casing those areas can give large performance boosts. So the
20
real problem is with files that consist largely of data, so that
21
inquiring the block status does not gain us much performance, but where
22
such an inquiry itself takes a lot of time.
23
24
To address this, we want to cache data regions. Most of the time, when
25
bad performance is reported, it is in places where the image is iterated
26
over from start to end (qemu-img convert or the mirror job), so a simple
27
yet effective solution is to cache only the current data region.
28
29
(Note that only caching data regions but not zero regions means that
30
returning false information from the cache is not catastrophic: Treating
31
zeroes as data is fine. While we try to invalidate the cache on zero
32
writes and discards, such incongruences may still occur when there are
33
other processes writing to the image.)
34
35
We only use the cache for nodes without children (i.e. protocol nodes),
36
because that is where the problem is: Drivers that rely on block-status
37
implementations outside of qemu (e.g. SEEK_DATA/HOLE).
38
39
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/307
40
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
41
Message-Id: <20210812084148.14458-3-hreitz@redhat.com>
42
Reviewed-by: Eric Blake <eblake@redhat.com>
43
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
44
[hreitz: Added `local_file == bs` assertion, as suggested by Vladimir]
45
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
46
---
47
include/block/block_int.h | 50 ++++++++++++++++++++++++
48
block.c | 80 +++++++++++++++++++++++++++++++++++++++
49
block/io.c | 68 +++++++++++++++++++++++++++++++--
50
3 files changed, 195 insertions(+), 3 deletions(-)
51
52
diff --git a/include/block/block_int.h b/include/block/block_int.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/include/block/block_int.h
55
+++ b/include/block/block_int.h
56
@@ -XXX,XX +XXX,XX @@
57
#include "qemu/hbitmap.h"
58
#include "block/snapshot.h"
59
#include "qemu/throttle.h"
60
+#include "qemu/rcu.h"
61
62
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
63
64
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
65
QLIST_ENTRY(BdrvChild) next_parent;
66
};
67
68
+/*
69
+ * Allows bdrv_co_block_status() to cache one data region for a
70
+ * protocol node.
71
+ *
72
+ * @valid: Whether the cache is valid (should be accessed with atomic
73
+ * functions so this can be reset by RCU readers)
74
+ * @data_start: Offset where we know (or strongly assume) is data
75
+ * @data_end: Offset where the data region ends (which is not necessarily
76
+ * the start of a zeroed region)
77
+ */
78
+typedef struct BdrvBlockStatusCache {
79
+ struct rcu_head rcu;
80
+
81
+ bool valid;
82
+ int64_t data_start;
83
+ int64_t data_end;
84
+} BdrvBlockStatusCache;
85
+
86
struct BlockDriverState {
87
/* Protected by big QEMU lock or read-only after opening. No special
88
* locking needed during I/O...
89
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
90
91
/* BdrvChild links to this node may never be frozen */
92
bool never_freeze;
93
+
94
+ /* Lock for block-status cache RCU writers */
95
+ CoMutex bsc_modify_lock;
96
+ /* Always non-NULL, but must only be dereferenced under an RCU read guard */
97
+ BdrvBlockStatusCache *block_status_cache;
98
};
99
100
struct BlockBackendRootState {
101
@@ -XXX,XX +XXX,XX @@ static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
102
*/
103
void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
104
105
+/**
106
+ * Check whether the given offset is in the cached block-status data
107
+ * region.
108
+ *
109
+ * If it is, and @pnum is not NULL, *pnum is set to
110
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
111
+ * @offset, are data (according to the cache).
112
+ * Otherwise, *pnum is not touched.
113
+ */
114
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
115
+
116
+/**
117
+ * If [offset, offset + bytes) overlaps with the currently cached
118
+ * block-status region, invalidate the cache.
119
+ *
120
+ * (To be used by I/O paths that cause data regions to be zero or
121
+ * holes.)
122
+ */
123
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
124
+ int64_t offset, int64_t bytes);
125
+
126
+/**
127
+ * Mark the range [offset, offset + bytes) as a data region.
128
+ */
129
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
130
+
131
#endif /* BLOCK_INT_H */
132
diff --git a/block.c b/block.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/block.c
135
+++ b/block.c
136
@@ -XXX,XX +XXX,XX @@
137
#include "qemu/timer.h"
138
#include "qemu/cutils.h"
139
#include "qemu/id.h"
140
+#include "qemu/range.h"
141
+#include "qemu/rcu.h"
142
#include "block/coroutines.h"
143
144
#ifdef CONFIG_BSD
145
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_new(void)
146
147
qemu_co_queue_init(&bs->flush_queue);
148
149
+ qemu_co_mutex_init(&bs->bsc_modify_lock);
150
+ bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
151
+
152
for (i = 0; i < bdrv_drain_all_count; i++) {
153
bdrv_drained_begin(bs);
154
}
155
@@ -XXX,XX +XXX,XX @@ static void bdrv_close(BlockDriverState *bs)
156
bs->explicit_options = NULL;
157
qobject_unref(bs->full_open_options);
158
bs->full_open_options = NULL;
159
+ g_free(bs->block_status_cache);
160
+ bs->block_status_cache = NULL;
161
162
bdrv_release_named_dirty_bitmaps(bs);
163
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
164
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
165
{
166
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
167
}
168
+
169
+/**
170
+ * Check whether [offset, offset + bytes) overlaps with the cached
171
+ * block-status data region.
172
+ *
173
+ * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
174
+ * which is what bdrv_bsc_is_data()'s interface needs.
175
+ * Otherwise, *pnum is not touched.
176
+ */
177
+static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
178
+ int64_t offset, int64_t bytes,
179
+ int64_t *pnum)
180
+{
181
+ BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
182
+ bool overlaps;
183
+
184
+ overlaps =
185
+ qatomic_read(&bsc->valid) &&
186
+ ranges_overlap(offset, bytes, bsc->data_start,
187
+ bsc->data_end - bsc->data_start);
188
+
189
+ if (overlaps && pnum) {
190
+ *pnum = bsc->data_end - offset;
191
+ }
192
+
193
+ return overlaps;
194
+}
195
+
196
+/**
197
+ * See block_int.h for this function's documentation.
198
+ */
199
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
200
+{
201
+ RCU_READ_LOCK_GUARD();
202
+
203
+ return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
204
+}
205
+
206
+/**
207
+ * See block_int.h for this function's documentation.
208
+ */
209
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
210
+ int64_t offset, int64_t bytes)
211
+{
212
+ RCU_READ_LOCK_GUARD();
213
+
214
+ if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
215
+ qatomic_set(&bs->block_status_cache->valid, false);
216
+ }
217
+}
218
+
219
+/**
220
+ * See block_int.h for this function's documentation.
221
+ */
222
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
223
+{
224
+ BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
225
+ BdrvBlockStatusCache *old_bsc;
226
+
227
+ *new_bsc = (BdrvBlockStatusCache) {
228
+ .valid = true,
229
+ .data_start = offset,
230
+ .data_end = offset + bytes,
231
+ };
232
+
233
+ QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
234
+
235
+ old_bsc = qatomic_rcu_read(&bs->block_status_cache);
236
+ qatomic_rcu_set(&bs->block_status_cache, new_bsc);
237
+ if (old_bsc) {
238
+ g_free_rcu(old_bsc, rcu);
239
+ }
240
+}
241
diff --git a/block/io.c b/block/io.c
242
index XXXXXXX..XXXXXXX 100644
243
--- a/block/io.c
244
+++ b/block/io.c
245
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
246
return -ENOTSUP;
247
}
248
249
+ /* Invalidate the cached block-status data range if this write overlaps */
250
+ bdrv_bsc_invalidate_range(bs, offset, bytes);
251
+
252
assert(alignment % bs->bl.request_alignment == 0);
253
head = offset % alignment;
254
tail = (offset + bytes) % alignment;
255
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
256
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
257
258
if (bs->drv->bdrv_co_block_status) {
259
- ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
260
- aligned_bytes, pnum, &local_map,
261
- &local_file);
262
+ /*
263
+ * Use the block-status cache only for protocol nodes: Format
264
+ * drivers are generally quick to inquire the status, but protocol
265
+ * drivers often need to get information from outside of qemu, so
266
+ * we do not have control over the actual implementation. There
267
+ * have been cases where inquiring the status took an unreasonably
268
+ * long time, and we can do nothing in qemu to fix it.
269
+ * This is especially problematic for images with large data areas,
270
+ * because finding the few holes in them and giving them special
271
+ * treatment does not gain much performance. Therefore, we try to
272
+ * cache the last-identified data region.
273
+ *
274
+ * Second, limiting ourselves to protocol nodes allows us to assume
275
+ * the block status for data regions to be DATA | OFFSET_VALID, and
276
+ * that the host offset is the same as the guest offset.
277
+ *
278
+ * Note that it is possible that external writers zero parts of
279
+ * the cached regions without the cache being invalidated, and so
280
+ * we may report zeroes as data. This is not catastrophic,
281
+ * however, because reporting zeroes as data is fine.
282
+ */
283
+ if (QLIST_EMPTY(&bs->children) &&
284
+ bdrv_bsc_is_data(bs, aligned_offset, pnum))
285
+ {
286
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
287
+ local_file = bs;
288
+ local_map = aligned_offset;
289
+ } else {
290
+ ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
291
+ aligned_bytes, pnum, &local_map,
292
+ &local_file);
293
+
294
+ /*
295
+ * Note that checking QLIST_EMPTY(&bs->children) is also done when
296
+ * the cache is queried above. Technically, we do not need to check
297
+ * it here; the worst that can happen is that we fill the cache for
298
+ * non-protocol nodes, and then it is never used. However, filling
299
+ * the cache requires an RCU update, so double check here to avoid
300
+ * such an update if possible.
301
+ */
302
+ if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
303
+ QLIST_EMPTY(&bs->children))
304
+ {
305
+ /*
306
+ * When a protocol driver reports BLOCK_OFFSET_VALID, the
307
+ * returned local_map value must be the same as the offset we
308
+ * have passed (aligned_offset), and local_bs must be the node
309
+ * itself.
310
+ * Assert this, because we follow this rule when reading from
311
+ * the cache (see the `local_file = bs` and
312
+ * `local_map = aligned_offset` assignments above), and the
313
+ * result the cache delivers must be the same as the driver
314
+ * would deliver.
315
+ */
316
+ assert(local_file == bs);
317
+ assert(local_map == aligned_offset);
318
+ bdrv_bsc_fill(bs, aligned_offset, *pnum);
319
+ }
320
+ }
321
} else {
322
/* Default code for filters */
323
324
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
325
return 0;
326
}
327
328
+ /* Invalidate the cached block-status data range if this discard overlaps */
329
+ bdrv_bsc_invalidate_range(bs, offset, bytes);
330
+
331
/* Discard is advisory, but some devices track and coalesce
332
* unaligned requests, so we must pass everything down rather than
333
* round here. Still, most devices will just silently ignore
334
--
335
2.31.1
336
337
diff view generated by jsdifflib
Deleted patch
1
.bdrv_co_block_status() implementations are free to return a *pnum that
2
exceeds @bytes, because bdrv_co_block_status() in block/io.c will clamp
3
*pnum as necessary.
4
1
5
On the other hand, if drivers' implementations return values for *pnum
6
that are as large as possible, our recently introduced block-status
7
cache will become more effective.
8
9
So, make a note in block_int.h that @bytes is no upper limit for *pnum.
10
11
Suggested-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
14
Message-Id: <20210812084148.14458-4-hreitz@redhat.com>
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
---
17
include/block/block_int.h | 9 +++++++++
18
1 file changed, 9 insertions(+)
19
20
diff --git a/include/block/block_int.h b/include/block/block_int.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/block/block_int.h
23
+++ b/include/block/block_int.h
24
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
25
* clamped to bdrv_getlength() and aligned to request_alignment,
26
* as well as non-NULL pnum, map, and file; in turn, the driver
27
* must return an error or set pnum to an aligned non-zero value.
28
+ *
29
+ * Note that @bytes is just a hint on how big of a region the
30
+ * caller wants to inspect. It is not a limit on *pnum.
31
+ * Implementations are free to return larger values of *pnum if
32
+ * doing so does not incur a performance penalty.
33
+ *
34
+ * block/io.c's bdrv_co_block_status() will utilize an unclamped
35
+ * *pnum value for the block-status cache on protocol nodes, prior
36
+ * to clamping *pnum for return to its caller.
37
*/
38
int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
39
bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
40
--
41
2.31.1
42
43
diff view generated by jsdifflib
Deleted patch
1
bdrv_co_block_status() does it for us, we do not need to do it here.
2
1
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
4
cache larger data regions than requested by its caller.
5
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20210812084148.14458-5-hreitz@redhat.com>
11
---
12
block/file-posix.c | 7 ++++---
13
1 file changed, 4 insertions(+), 3 deletions(-)
14
15
diff --git a/block/file-posix.c b/block/file-posix.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/file-posix.c
18
+++ b/block/file-posix.c
19
@@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start,
20
* the specified offset) that are known to be in the same
21
* allocated/unallocated state.
22
*
23
- * 'bytes' is the max value 'pnum' should be set to.
24
+ * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may
25
+ * well exceed it.
26
*/
27
static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
28
bool want_zero,
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
30
} else if (data == offset) {
31
/* On a data extent, compute bytes to the end of the extent,
32
* possibly including a partial sector at EOF. */
33
- *pnum = MIN(bytes, hole - offset);
34
+ *pnum = hole - offset;
35
36
/*
37
* We are not allowed to return partial sectors, though, so
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
39
} else {
40
/* On a hole, compute bytes to the beginning of the next extent. */
41
assert(hole == offset);
42
- *pnum = MIN(bytes, data - offset);
43
+ *pnum = data - offset;
44
ret = BDRV_BLOCK_ZERO;
45
}
46
*map = offset;
47
--
48
2.31.1
49
50
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
2
2
3
Although we have long supported 'qemu-img convert -o
3
The device mistakenly reports that the Weighted Round Robin with Urgent
4
backing_file=foo,backing_fmt=bar', the fact that we have a shortcut -B
4
Priority Class arbitration mechanism is supported.
5
for backing_file but none for backing_fmt has made it more likely that
6
users accidentally run into:
7
5
8
qemu-img: warning: Deprecated use of backing file without explicit backing format
6
It is not.
9
7
10
when using -B instead of -o. For similarity with other qemu-img
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
11
commands, such as create and compare, add '-F $fmt' as the shorthand
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
12
for '-o backing_fmt=$fmt'. Update iotest 122 for coverage of both
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
13
spellings.
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
13
hw/block/nvme.c | 1 -
14
1 file changed, 1 deletion(-)
14
15
15
Signed-off-by: Eric Blake <eblake@redhat.com>
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
16
Message-Id: <20210913131735.1948339-1-eblake@redhat.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
19
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
20
---
21
docs/tools/qemu-img.rst | 4 ++--
22
qemu-img.c | 10 +++++++---
23
qemu-img-cmds.hx | 2 +-
24
tests/qemu-iotests/122 | 2 +-
25
4 files changed, 11 insertions(+), 7 deletions(-)
26
27
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
28
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/tools/qemu-img.rst
18
--- a/hw/block/nvme.c
30
+++ b/docs/tools/qemu-img.rst
19
+++ b/hw/block/nvme.c
31
@@ -XXX,XX +XXX,XX @@ Command description:
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
32
4
21
n->bar.cap = 0;
33
Error on reading data
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
34
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
35
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
36
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F backing_fmt]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
37
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
38
Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM*
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
39
to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can
40
@@ -XXX,XX +XXX,XX @@ Command description:
41
You can use the *BACKING_FILE* option to force the output image to be
42
created as a copy on write image of the specified base image; the
43
*BACKING_FILE* should have the same content as the input's base image,
44
- however the path, image format, etc may differ.
45
+ however the path, image format (as given by *BACKING_FMT*), etc may differ.
46
47
If a relative path name is given, the backing file is looked up relative to
48
the directory containing *OUTPUT_FILENAME*.
49
diff --git a/qemu-img.c b/qemu-img.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/qemu-img.c
52
+++ b/qemu-img.c
53
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
54
int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
55
const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
56
*src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
57
- *out_filename, *out_baseimg_param, *snapshot_name = NULL;
58
+ *out_filename, *out_baseimg_param, *snapshot_name = NULL,
59
+ *backing_fmt = NULL;
60
BlockDriver *drv = NULL, *proto_drv = NULL;
61
BlockDriverInfo bdi;
62
BlockDriverState *out_bs;
63
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
64
{"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
65
{0, 0, 0, 0}
66
};
67
- c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
68
+ c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
69
long_options, NULL);
70
if (c == -1) {
71
break;
72
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
73
case 'c':
74
s.compressed = true;
75
break;
76
+ case 'F':
77
+ backing_fmt = optarg;
78
+ break;
79
case 'o':
80
if (accumulate_options(&options, optarg) < 0) {
81
goto fail_getopt;
82
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
83
84
qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
85
s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
86
- ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
87
+ ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
88
if (ret < 0) {
89
goto out;
90
}
91
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
92
index XXXXXXX..XXXXXXX 100644
93
--- a/qemu-img-cmds.hx
94
+++ b/qemu-img-cmds.hx
95
@@ -XXX,XX +XXX,XX @@ SRST
96
ERST
97
98
DEF("convert", img_convert,
99
- "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
100
+ "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file [-F backing_fmt]] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
101
SRST
102
.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
103
ERST
104
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
105
index XXXXXXX..XXXXXXX 100755
106
--- a/tests/qemu-iotests/122
107
+++ b/tests/qemu-iotests/122
108
@@ -XXX,XX +XXX,XX @@ echo
109
_make_test_img -b "$TEST_IMG".base -F $IMGFMT
110
111
$QEMU_IO -c "write -P 0 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
112
-$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
113
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -F $IMGFMT \
114
"$TEST_IMG" "$TEST_IMG".orig
115
$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
116
$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
117
--
28
--
118
2.31.1
29
2.21.0
119
30
120
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Split checking for reserved bits out of aligned offset check.
3
We forget to enable it for transaction .prepare, while it is already
4
enabled in do_drive_backup since commit a2d665c1bc362
5
"blockdev: loosen restrictions on drive-backup source node"
4
6
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
7
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
9
Reviewed-by: John Snow <jsnow@redhat.com>
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Message-Id: <20210914122454.141075-11-vsementsov@virtuozzo.com>
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
---
11
---
12
block/qcow2.h | 1 +
12
blockdev.c | 2 +-
13
block/qcow2-refcount.c | 10 +++++++++-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
2 files changed, 10 insertions(+), 1 deletion(-)
15
14
16
diff --git a/block/qcow2.h b/block/qcow2.h
15
diff --git a/blockdev.c b/blockdev.c
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.h
17
--- a/blockdev.c
19
+++ b/block/qcow2.h
18
+++ b/blockdev.c
20
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
21
#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
22
21
backup = common->action->u.drive_backup.data;
23
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
22
24
+#define REFT_RESERVED_MASK 0x1ffULL
23
- bs = qmp_get_root_bs(backup->device, errp);
25
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
26
#define INV_OFFSET (-1ULL)
25
if (!bs) {
27
26
return;
28
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
27
}
29
index XXXXXXX..XXXXXXX 100644
30
--- a/block/qcow2-refcount.c
31
+++ b/block/qcow2-refcount.c
32
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
33
34
for(i = 0; i < s->refcount_table_size; i++) {
35
uint64_t offset, cluster;
36
- offset = s->refcount_table[i];
37
+ offset = s->refcount_table[i] & REFT_OFFSET_MASK;
38
cluster = offset >> s->cluster_bits;
39
40
+ if (s->refcount_table[i] & REFT_RESERVED_MASK) {
41
+ fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
42
+ "reserved bits set\n", i);
43
+ res->corruptions++;
44
+ *rebuild = true;
45
+ continue;
46
+ }
47
+
48
/* Refcount blocks are cluster aligned */
49
if (offset_into_cluster(s, offset)) {
50
fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
51
--
28
--
52
2.31.1
29
2.21.0
53
30
54
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
2
2
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
3
COW (even empty/zero) areas require encryption too
4
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
4
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
Message-Id: <20210914122454.141075-10-vsementsov@virtuozzo.com>
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
11
---
10
block/qcow2.h | 1 +
12
tests/qemu-iotests/134 | 9 +++++++++
11
block/qcow2-refcount.c | 6 ++++++
13
tests/qemu-iotests/134.out | 10 ++++++++++
12
2 files changed, 7 insertions(+)
14
2 files changed, 19 insertions(+)
13
15
14
diff --git a/block/qcow2.h b/block/qcow2.h
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/134
19
+++ b/tests/qemu-iotests/134
20
@@ -XXX,XX +XXX,XX @@ echo
21
echo "== reading whole image =="
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
23
24
+echo
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
15
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2.h
38
--- a/tests/qemu-iotests/134.out
17
+++ b/block/qcow2.h
39
+++ b/tests/qemu-iotests/134.out
18
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
19
(QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
41
read 134217728/134217728 bytes at offset 0
20
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
21
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
43
22
+#define L1E_RESERVED_MASK 0x7f000000000001ffULL
44
+== rewriting cluster part ==
23
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
45
+wrote 512/512 bytes at offset 512
24
#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
25
26
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block/qcow2-refcount.c
29
+++ b/block/qcow2-refcount.c
30
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
31
continue;
32
}
33
34
+ if (l1_table[i] & L1E_RESERVED_MASK) {
35
+ fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
36
+ "%" PRIx64 "\n", l1_table[i]);
37
+ res->corruptions++;
38
+ }
39
+
47
+
40
l2_offset = l1_table[i] & L1E_OFFSET_MASK;
48
+== verify pattern ==
41
49
+read 512/512 bytes at offset 0
42
/* Mark L2 table as used */
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
51
+read 512/512 bytes at offset 512
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
53
+
54
== rewriting whole image ==
55
wrote 134217728/134217728 bytes at offset 0
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
43
--
57
--
44
2.31.1
58
2.21.0
45
59
46
60
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
- use g_autofree for l1_table
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
4
- better name for size in bytes variable
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
5
- reduce code blocks nesting
5
default coverage in the moved comment.
6
- whitespaces, braces, newlines
7
6
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
The previous vmdk4 calculation:
9
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
8
10
Message-Id: <20210914122454.141075-9-vsementsov@virtuozzo.com>
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
11
The added vmdk3 calculation:
12
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
30
---
13
block/qcow2-refcount.c | 98 +++++++++++++++++++++---------------------
31
block/vmdk.c | 11 ++++++++---
14
1 file changed, 50 insertions(+), 48 deletions(-)
32
1 file changed, 8 insertions(+), 3 deletions(-)
15
33
16
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
34
diff --git a/block/vmdk.c b/block/vmdk.c
17
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2-refcount.c
36
--- a/block/vmdk.c
19
+++ b/block/qcow2-refcount.c
37
+++ b/block/vmdk.c
20
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
21
int flags, BdrvCheckMode fix, bool active)
39
return -EFBIG;
22
{
23
BDRVQcow2State *s = bs->opaque;
24
- uint64_t *l1_table = NULL, l2_offset, l1_size2;
25
+ size_t l1_size_bytes = l1_size * L1E_SIZE;
26
+ g_autofree uint64_t *l1_table = NULL;
27
+ uint64_t l2_offset;
28
int i, ret;
29
30
- l1_size2 = l1_size * L1E_SIZE;
31
+ if (!l1_size) {
32
+ return 0;
33
+ }
34
35
/* Mark L1 table as used */
36
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
37
- l1_table_offset, l1_size2);
38
+ l1_table_offset, l1_size_bytes);
39
if (ret < 0) {
40
- goto fail;
41
+ return ret;
42
+ }
43
+
44
+ l1_table = g_try_malloc(l1_size_bytes);
45
+ if (l1_table == NULL) {
46
+ res->check_errors++;
47
+ return -ENOMEM;
48
}
40
}
49
41
if (l1_size > 512 * 1024 * 1024) {
50
/* Read L1 table entries from disk */
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
51
- if (l1_size2 > 0) {
43
+ /*
52
- l1_table = g_try_malloc(l1_size2);
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
53
- if (l1_table == NULL) {
45
* big l1_size, we don't want unbounded value to allocate the table.
54
- ret = -ENOMEM;
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
55
- res->check_errors++;
47
- * size */
56
- goto fail;
48
+ * Limit it to 512M, which is:
57
- }
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
58
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
50
+ * cluster size: 64KB, L2 table size: 512 entries
59
- if (ret < 0) {
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
60
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
52
+ * cluster size: 512B, L2 table size: 4096 entries
61
- res->check_errors++;
53
+ */
62
- goto fail;
54
error_setg(errp, "L1 size too big");
63
- }
55
return -EFBIG;
64
- for(i = 0;i < l1_size; i++)
65
- be64_to_cpus(&l1_table[i]);
66
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes);
67
+ if (ret < 0) {
68
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
69
+ res->check_errors++;
70
+ return ret;
71
+ }
72
+
73
+ for (i = 0; i < l1_size; i++) {
74
+ be64_to_cpus(&l1_table[i]);
75
}
56
}
76
77
/* Do the actual checks */
78
- for(i = 0; i < l1_size; i++) {
79
- l2_offset = l1_table[i];
80
- if (l2_offset) {
81
- /* Mark L2 table as used */
82
- l2_offset &= L1E_OFFSET_MASK;
83
- ret = qcow2_inc_refcounts_imrt(bs, res,
84
- refcount_table, refcount_table_size,
85
- l2_offset, s->cluster_size);
86
- if (ret < 0) {
87
- goto fail;
88
- }
89
+ for (i = 0; i < l1_size; i++) {
90
+ if (!l1_table[i]) {
91
+ continue;
92
+ }
93
94
- /* L2 tables are cluster aligned */
95
- if (offset_into_cluster(s, l2_offset)) {
96
- fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
97
- "cluster aligned; L1 entry corrupted\n", l2_offset);
98
- res->corruptions++;
99
- }
100
+ l2_offset = l1_table[i] & L1E_OFFSET_MASK;
101
102
- /* Process and check L2 entries */
103
- ret = check_refcounts_l2(bs, res, refcount_table,
104
- refcount_table_size, l2_offset, flags,
105
- fix, active);
106
- if (ret < 0) {
107
- goto fail;
108
- }
109
+ /* Mark L2 table as used */
110
+ ret = qcow2_inc_refcounts_imrt(bs, res,
111
+ refcount_table, refcount_table_size,
112
+ l2_offset, s->cluster_size);
113
+ if (ret < 0) {
114
+ return ret;
115
+ }
116
+
117
+ /* L2 tables are cluster aligned */
118
+ if (offset_into_cluster(s, l2_offset)) {
119
+ fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
120
+ "cluster aligned; L1 entry corrupted\n", l2_offset);
121
+ res->corruptions++;
122
+ }
123
+
124
+ /* Process and check L2 entries */
125
+ ret = check_refcounts_l2(bs, res, refcount_table,
126
+ refcount_table_size, l2_offset, flags,
127
+ fix, active);
128
+ if (ret < 0) {
129
+ return ret;
130
}
131
}
132
- g_free(l1_table);
133
- return 0;
134
135
-fail:
136
- g_free(l1_table);
137
- return ret;
138
+ return 0;
139
}
140
141
/*
142
--
57
--
143
2.31.1
58
2.21.0
144
59
145
60
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
There is no conflict and no dependency if we have parallel writes to
3
512M of L1 entries is a very loose bound, only 32M are required to store
4
different subclusters of one cluster when the cluster itself is already
4
the maximal supported VMDK file size of 2TB.
5
allocated. So, relax extra dependency.
6
5
7
Measure performance:
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
8
First, prepare build/qemu-img-old and build/qemu-img-new images.
7
table size.
9
8
10
cd scripts/simplebench
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
11
./img_bench_templater.py
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
17
---
18
block/vmdk.c | 13 +++++++------
19
tests/qemu-iotests/059.out | 2 +-
20
2 files changed, 8 insertions(+), 7 deletions(-)
12
21
13
Paste the following to stdin of running script:
22
diff --git a/block/vmdk.c b/block/vmdk.c
14
15
qemu_img=../../build/qemu-img-{old|new}
16
$qemu_img create -f qcow2 -o extended_l2=on /ssd/x.qcow2 1G
17
$qemu_img bench -c 100000 -d 8 [-s 2K|-s 2K -o 512|-s $((1024*2+512))] \
18
-w -t none -n /ssd/x.qcow2
19
20
The result:
21
22
All results are in seconds
23
24
------------------ --------- ---------
25
old new
26
-s 2K 6.7 ± 15% 6.2 ± 12%
27
-7%
28
-s 2K -o 512 13 ± 3% 11 ± 5%
29
-16%
30
-s $((1024*2+512)) 9.5 ± 4% 8.4
31
-12%
32
------------------ --------- ---------
33
34
So small writes are more independent now and that helps to keep deeper
35
io queue which improves performance.
36
37
271 iotest output becomes racy for three allocation in one cluster.
38
Second and third writes may finish in different order. Second and
39
third requests don't depend on each other any more. Still they both
40
depend on first request anyway. Filter out second and third write
41
offsets to cover both possible outputs.
42
43
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
44
Message-Id: <20210824101517.59802-4-vsementsov@virtuozzo.com>
45
Reviewed-by: Eric Blake <eblake@redhat.com>
46
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
47
[hreitz: s/ an / and /]
48
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
49
---
50
block/qcow2-cluster.c | 11 +++++++++++
51
tests/qemu-iotests/271 | 5 ++++-
52
tests/qemu-iotests/271.out | 4 ++--
53
3 files changed, 17 insertions(+), 3 deletions(-)
54
55
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
56
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
57
--- a/block/qcow2-cluster.c
24
--- a/block/vmdk.c
58
+++ b/block/qcow2-cluster.c
25
+++ b/block/vmdk.c
59
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
60
continue;
27
error_setg(errp, "Invalid granularity, image may be corrupt");
61
}
28
return -EFBIG;
62
29
}
63
+ if (old_alloc->keep_old_clusters &&
30
- if (l1_size > 512 * 1024 * 1024) {
64
+ (end <= l2meta_cow_start(old_alloc) ||
31
+ if (l1_size > 32 * 1024 * 1024) {
65
+ start >= l2meta_cow_end(old_alloc)))
32
/*
66
+ {
33
* Although with big capacity and small l1_entry_sectors, we can get a
67
+ /*
34
* big l1_size, we don't want unbounded value to allocate the table.
68
+ * Clusters intersect but COW areas don't. And cluster itself is
35
- * Limit it to 512M, which is:
69
+ * already allocated. So, there is no actual conflict.
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
70
+ */
37
- * cluster size: 64KB, L2 table size: 512 entries
71
+ continue;
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
72
+ }
39
- * cluster size: 512B, L2 table size: 4096 entries
73
+
40
+ * Limit it to 32M, which is enough to store:
74
/* Conflict */
41
+ * 8TB - for both VMDK3 & VMDK4 with
75
42
+ * minimal cluster size: 512B
76
if (start < old_start) {
43
+ * minimal L2 table size: 512 entries
77
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
44
+ * 8 TB is still more than the maximal value supported for
78
index XXXXXXX..XXXXXXX 100755
45
+ * VMDK3 & VMDK4 which is 2TB.
79
--- a/tests/qemu-iotests/271
46
*/
80
+++ b/tests/qemu-iotests/271
47
error_setg(errp, "L1 size too big");
81
@@ -XXX,XX +XXX,XX @@ EOF
48
return -EFBIG;
82
}
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
83
84
_make_test_img -o extended_l2=on 1M
85
-_concurrent_io | $QEMU_IO | _filter_qemu_io
86
+# Second and third writes in _concurrent_io() are independent and may finish in
87
+# different order. So, filter offset out to match both possible variants.
88
+_concurrent_io | $QEMU_IO | _filter_qemu_io | \
89
+ $SED -e 's/\(20480\|40960\)/OFFSET/'
90
_concurrent_verify | $QEMU_IO | _filter_qemu_io
91
92
# success, all done
93
diff --git a/tests/qemu-iotests/271.out b/tests/qemu-iotests/271.out
94
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
95
--- a/tests/qemu-iotests/271.out
51
--- a/tests/qemu-iotests/059.out
96
+++ b/tests/qemu-iotests/271.out
52
+++ b/tests/qemu-iotests/059.out
97
@@ -XXX,XX +XXX,XX @@ blkdebug: Suspended request 'A'
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
98
blkdebug: Resuming request 'A'
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
99
wrote 2048/2048 bytes at offset 30720
55
100
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
56
=== Testing afl image with a very large capacity ===
101
-wrote 2048/2048 bytes at offset 20480
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
102
+wrote 2048/2048 bytes at offset OFFSET
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
103
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
104
-wrote 2048/2048 bytes at offset 40960
105
+wrote 2048/2048 bytes at offset OFFSET
106
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
107
*** done
59
*** done
108
--
60
--
109
2.31.1
61
2.21.0
110
62
111
63
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Split fix_l2_entry_by_zero() out of check_refcounts_l2() to be
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
4
reused in further patch.
4
QEMU).
5
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
This format was lacking in the following:
7
Reviewed-by: Eric Blake <eblake@redhat.com>
7
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
9
Message-Id: <20210914122454.141075-5-vsementsov@virtuozzo.com>
9
allowing access to only 2TB (slightly less) of data.
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
* The grain size (default) was 512 bytes - leading to data
11
fragmentation and many grain tables.
12
* For space reclamation purposes, it was necessary to find all the
13
grains which are not pointed to by any grain table - so a reverse
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
constructed - which takes large amounts of CPU/RAM.
16
17
The format specification can be found in VMware's documentation:
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
21
introduced: SESparse (Space Efficient).
22
23
This format fixes the above issues:
24
25
* All entries are now 64-bit.
26
* The grain size (default) is 4KB.
27
* Grain directory and grain tables are now located at the beginning
28
of the file.
29
+ seSparse format reserves space for all grain tables.
30
+ Grain tables can be addressed using an index.
31
+ Grains are located in the end of the file and can also be
32
addressed with an index.
33
- seSparse vmdks of large disks (64TB) have huge preallocated
34
headers - mainly due to L2 tables, even for empty snapshots.
35
* The header contains a reverse mapping ("backmap") of "offset of
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
152
---
12
block/qcow2-refcount.c | 87 +++++++++++++++++++++++++++++-------------
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
13
1 file changed, 60 insertions(+), 27 deletions(-)
154
1 file changed, 342 insertions(+), 16 deletions(-)
14
155
15
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
156
diff --git a/block/vmdk.c b/block/vmdk.c
16
index XXXXXXX..XXXXXXX 100644
157
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2-refcount.c
158
--- a/block/vmdk.c
18
+++ b/block/qcow2-refcount.c
159
+++ b/block/vmdk.c
19
@@ -XXX,XX +XXX,XX @@ enum {
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
20
CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
161
uint16_t compressAlgorithm;
21
};
162
} QEMU_PACKED VMDK4Header;
22
163
23
+/*
164
+typedef struct VMDKSESparseConstHeader {
24
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN.
165
+ uint64_t magic;
25
+ *
166
+ uint64_t version;
26
+ * This function decrements res->corruptions on success, so the caller is
167
+ uint64_t capacity;
27
+ * responsible to increment res->corruptions prior to the call.
168
+ uint64_t grain_size;
28
+ *
169
+ uint64_t grain_table_size;
29
+ * On failure in-memory @l2_table may be modified.
170
+ uint64_t flags;
30
+ */
171
+ uint64_t reserved1;
31
+static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
172
+ uint64_t reserved2;
32
+ uint64_t l2_offset,
173
+ uint64_t reserved3;
33
+ uint64_t *l2_table, int l2_index, bool active,
174
+ uint64_t reserved4;
34
+ bool *metadata_overlap)
175
+ uint64_t volatile_header_offset;
176
+ uint64_t volatile_header_size;
177
+ uint64_t journal_header_offset;
178
+ uint64_t journal_header_size;
179
+ uint64_t journal_offset;
180
+ uint64_t journal_size;
181
+ uint64_t grain_dir_offset;
182
+ uint64_t grain_dir_size;
183
+ uint64_t grain_tables_offset;
184
+ uint64_t grain_tables_size;
185
+ uint64_t free_bitmap_offset;
186
+ uint64_t free_bitmap_size;
187
+ uint64_t backmap_offset;
188
+ uint64_t backmap_size;
189
+ uint64_t grains_offset;
190
+ uint64_t grains_size;
191
+ uint8_t pad[304];
192
+} QEMU_PACKED VMDKSESparseConstHeader;
193
+
194
+typedef struct VMDKSESparseVolatileHeader {
195
+ uint64_t magic;
196
+ uint64_t free_gt_number;
197
+ uint64_t next_txn_seq_number;
198
+ uint64_t replay_journal;
199
+ uint8_t pad[480];
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
201
+
202
#define L2_CACHE_SIZE 16
203
204
typedef struct VmdkExtent {
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
206
bool compressed;
207
bool has_marker;
208
bool has_zero_grain;
209
+ bool sesparse;
210
+ uint64_t sesparse_l2_tables_offset;
211
+ uint64_t sesparse_clusters_offset;
212
+ int32_t entry_size;
213
int version;
214
int64_t sectors;
215
int64_t end_sector;
216
int64_t flat_start_offset;
217
int64_t l1_table_offset;
218
int64_t l1_backup_table_offset;
219
- uint32_t *l1_table;
220
+ void *l1_table;
221
uint32_t *l1_backup_table;
222
unsigned int l1_size;
223
uint32_t l1_entry_sectors;
224
225
unsigned int l2_size;
226
- uint32_t *l2_cache;
227
+ void *l2_cache;
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
230
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
232
* minimal L2 table size: 512 entries
233
* 8 TB is still more than the maximal value supported for
234
* VMDK3 & VMDK4 which is 2TB.
235
+ * 64TB - for "ESXi seSparse Extent"
236
+ * minimal cluster size: 512B (default is 4KB)
237
+ * L2 table size: 4096 entries (const).
238
+ * 64TB is more than the maximal value supported for
239
+ * seSparse VMDKs (which is slightly less than 64TB)
240
*/
241
error_setg(errp, "L1 size too big");
242
return -EFBIG;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
244
extent->l2_size = l2_size;
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
288
return ret;
289
}
290
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
293
+
294
+/* Strict checks - format not officially documented */
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
296
+ Error **errp)
35
+{
297
+{
36
+ BDRVQcow2State *s = bs->opaque;
298
+ header->magic = le64_to_cpu(header->magic);
299
+ header->version = le64_to_cpu(header->version);
300
+ header->grain_size = le64_to_cpu(header->grain_size);
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
302
+ header->flags = le64_to_cpu(header->flags);
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
307
+
308
+ header->volatile_header_offset =
309
+ le64_to_cpu(header->volatile_header_offset);
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
311
+
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
314
+
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
316
+ header->journal_size = le64_to_cpu(header->journal_size);
317
+
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
320
+
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
323
+
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
326
+
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
329
+
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
331
+ header->grains_size = le64_to_cpu(header->grains_size);
332
+
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
335
+ header->magic);
336
+ return -EINVAL;
337
+ }
338
+
339
+ if (header->version != 0x0000000200000001) {
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
341
+ header->version);
342
+ return -ENOTSUP;
343
+ }
344
+
345
+ if (header->grain_size != 8) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
347
+ header->grain_size);
348
+ return -ENOTSUP;
349
+ }
350
+
351
+ if (header->grain_table_size != 64) {
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
353
+ header->grain_table_size);
354
+ return -ENOTSUP;
355
+ }
356
+
357
+ if (header->flags != 0) {
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
359
+ header->flags);
360
+ return -ENOTSUP;
361
+ }
362
+
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
365
+ error_setg(errp, "Unsupported reserved bits:"
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
368
+ header->reserved1, header->reserved2,
369
+ header->reserved3, header->reserved4);
370
+ return -ENOTSUP;
371
+ }
372
+
373
+ /* check that padding is 0 */
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
375
+ error_setg(errp, "Unsupported non-zero const header padding");
376
+ return -ENOTSUP;
377
+ }
378
+
379
+ return 0;
380
+}
381
+
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
383
+ Error **errp)
384
+{
385
+ header->magic = le64_to_cpu(header->magic);
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
389
+
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
392
+ header->magic);
393
+ return -EINVAL;
394
+ }
395
+
396
+ if (header->replay_journal) {
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
398
+ return -ENOTSUP;
399
+ }
400
+
401
+ /* check that padding is 0 */
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
407
+ return 0;
408
+}
409
+
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
411
+ BdrvChild *file,
412
+ int flags, Error **errp)
413
+{
37
+ int ret;
414
+ int ret;
38
+ int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
415
+ VMDKSESparseConstHeader const_header;
39
+ uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
416
+ VMDKSESparseVolatileHeader volatile_header;
40
+ int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
417
+ VmdkExtent *extent;
41
+ uint64_t l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
418
+
42
+
419
+ ret = bdrv_apply_auto_read_only(bs,
43
+ set_l2_entry(s, l2_table, l2_index, l2_entry);
420
+ "No write support for seSparse images available", errp);
44
+ ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
45
+ false);
46
+ if (metadata_overlap) {
47
+ *metadata_overlap = ret < 0;
48
+ }
49
+ if (ret < 0) {
421
+ if (ret < 0) {
50
+ fprintf(stderr, "ERROR: Overlap check failed\n");
422
+ return ret;
51
+ goto fail;
423
+ }
52
+ }
424
+
53
+
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
54
+ ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx],
426
+
55
+ l2_entry_size(s));
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
56
+ if (ret < 0) {
428
+ if (ret < 0) {
57
+ fprintf(stderr, "ERROR: Failed to overwrite L2 "
429
+ bdrv_refresh_filename(file->bs);
58
+ "table entry: %s\n", strerror(-ret));
430
+ error_setg_errno(errp, -ret,
59
+ goto fail;
431
+ "Could not read const header from file '%s'",
60
+ }
432
+ file->bs->filename);
61
+
433
+ return ret;
62
+ res->corruptions--;
434
+ }
63
+ res->corruptions_fixed++;
435
+
64
+ return 0;
436
+ /* check const header */
65
+
437
+ ret = check_se_sparse_const_header(&const_header, errp);
66
+fail:
438
+ if (ret < 0) {
67
+ res->check_errors++;
439
+ return ret;
440
+ }
441
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
443
+
444
+ ret = bdrv_pread(file,
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
446
+ &volatile_header, sizeof(volatile_header));
447
+ if (ret < 0) {
448
+ bdrv_refresh_filename(file->bs);
449
+ error_setg_errno(errp, -ret,
450
+ "Could not read volatile header from file '%s'",
451
+ file->bs->filename);
452
+ return ret;
453
+ }
454
+
455
+ /* check volatile header */
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
457
+ if (ret < 0) {
458
+ return ret;
459
+ }
460
+
461
+ ret = vmdk_add_extent(bs, file, false,
462
+ const_header.capacity,
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
464
+ 0,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
68
+ return ret;
487
+ return ret;
69
+}
488
+}
70
+
489
+
71
/*
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
72
* Increases the refcount in the given refcount table for the all clusters
491
QDict *options, Error **errp);
73
* referenced in the L2 table. While doing so, performs some checks on L2
492
74
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
75
int i, ret;
494
* RW [size in sectors] SPARSE "file-name.vmdk"
76
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
495
* RW [size in sectors] VMFS "file-name.vmdk"
77
g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
78
+ bool metadata_overlap;
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
79
498
*/
80
/* Read L2 table from disk */
499
flat_offset = -1;
81
ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
82
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
83
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
502
84
offset);
503
if (sectors <= 0 ||
85
if (fix & BDRV_FIX_ERRORS) {
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
86
- int idx = i * (l2_entry_size(s) / sizeof(uint64_t));
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
87
- uint64_t l2e_offset =
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
88
- l2_offset + (uint64_t)i * l2_entry_size(s);
507
+ strcmp(type, "SESPARSE")) ||
89
- int ign = active ? QCOW2_OL_ACTIVE_L2 :
508
(strcmp(access, "RW"))) {
90
- QCOW2_OL_INACTIVE_L2;
509
continue;
91
-
510
}
92
- l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
93
- set_l2_entry(s, l2_table, i, l2_entry);
512
return ret;
94
- ret = qcow2_pre_write_overlap_check(bs, ign,
513
}
95
- l2e_offset, l2_entry_size(s), false);
514
extent = &s->extents[s->num_extents - 1];
96
- if (ret < 0) {
515
+ } else if (!strcmp(type, "SESPARSE")) {
97
- fprintf(stderr, "ERROR: Overlap check failed\n");
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
98
- res->check_errors++;
517
+ if (ret) {
99
+ ret = fix_l2_entry_by_zero(bs, res, l2_offset,
518
+ bdrv_unref_child(bs, extent_file);
100
+ l2_table, i, active,
519
+ return ret;
101
+ &metadata_overlap);
520
+ }
102
+ if (metadata_overlap) {
521
+ extent = &s->extents[s->num_extents - 1];
103
/*
522
} else {
104
* Something is seriously wrong, so abort checking
523
error_setg(errp, "Unsupported extent type '%s'", type);
105
* this L2 table.
524
bdrv_unref_child(bs, extent_file);
106
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
107
return ret;
526
if (strcmp(ct, "monolithicFlat") &&
108
}
527
strcmp(ct, "vmfs") &&
109
528
strcmp(ct, "vmfsSparse") &&
110
- ret = bdrv_pwrite_sync(bs->file, l2e_offset,
529
+ strcmp(ct, "seSparse") &&
111
- &l2_table[idx],
530
strcmp(ct, "twoGbMaxExtentSparse") &&
112
- l2_entry_size(s));
531
strcmp(ct, "twoGbMaxExtentFlat")) {
113
- if (ret < 0) {
532
error_setg(errp, "Unsupported image type '%s'", ct);
114
- fprintf(stderr, "ERROR: Failed to overwrite L2 "
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
115
- "table entry: %s\n", strerror(-ret));
534
{
116
- res->check_errors++;
535
unsigned int l1_index, l2_offset, l2_index;
117
- /*
536
int min_index, i, j;
118
- * Do not abort, continue checking the rest of this
537
- uint32_t min_count, *l2_table;
119
- * L2 table's entries.
538
+ uint32_t min_count;
120
- */
539
+ void *l2_table;
121
- } else {
540
bool zeroed = false;
122
- res->corruptions--;
541
int64_t ret;
123
- res->corruptions_fixed++;
542
int64_t cluster_sector;
124
+ if (ret == 0) {
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
125
/*
544
126
* Skip marking the cluster as used
545
if (m_data) {
127
* (it is unused now).
546
m_data->valid = 0;
128
*/
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
129
continue;
548
if (l1_index >= extent->l1_size) {
130
}
549
return VMDK_ERROR;
131
+
550
}
132
+ /*
551
- l2_offset = extent->l1_table[l1_index];
133
+ * Failed to fix.
552
+ if (extent->sesparse) {
134
+ * Do not abort, continue checking the rest of this
553
+ uint64_t l2_offset_u64;
135
+ * L2 table's entries.
554
+
136
+ */
555
+ assert(extent->entry_size == sizeof(uint64_t));
137
}
556
+
138
} else {
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
139
fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
558
+ if (l2_offset_u64 == 0) {
559
+ l2_offset = 0;
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
561
+ /*
562
+ * Top most nibble is 0x1 if grain table is allocated.
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
565
+ * grain directories which is smaller than uint32,
566
+ * where 16MB is the only supported default grain table coverage.
567
+ */
568
+ return VMDK_ERROR;
569
+ } else {
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
574
+ return VMDK_ERROR;
575
+ }
576
+ l2_offset = (unsigned int)(l2_offset_u64);
577
+ }
578
+ } else {
579
+ assert(extent->entry_size == sizeof(uint32_t));
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
581
+ }
582
if (!l2_offset) {
583
return VMDK_UNALLOC;
584
}
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
586
extent->l2_cache_counts[j] >>= 1;
587
}
588
}
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
591
goto found;
592
}
593
}
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
595
min_index = i;
596
}
597
}
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
601
if (bdrv_pread(extent->file,
602
(int64_t)l2_offset * 512,
603
l2_table,
604
- extent->l2_size * sizeof(uint32_t)
605
- ) != extent->l2_size * sizeof(uint32_t)) {
606
+ l2_size_bytes
607
+ ) != l2_size_bytes) {
608
return VMDK_ERROR;
609
}
610
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
612
extent->l2_cache_counts[min_index] = 1;
613
found:
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
616
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
618
- zeroed = true;
619
+ if (extent->sesparse) {
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
621
+ switch (cluster_sector & 0xf000000000000000) {
622
+ case 0x0000000000000000:
623
+ /* unallocated grain */
624
+ if (cluster_sector != 0) {
625
+ return VMDK_ERROR;
626
+ }
627
+ break;
628
+ case 0x1000000000000000:
629
+ /* scsi-unmapped grain - fallthrough */
630
+ case 0x2000000000000000:
631
+ /* zero grain */
632
+ zeroed = true;
633
+ break;
634
+ case 0x3000000000000000:
635
+ /* allocated grain */
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
638
+ cluster_sector = extent->sesparse_clusters_offset +
639
+ cluster_sector * extent->cluster_sectors;
640
+ break;
641
+ default:
642
+ return VMDK_ERROR;
643
+ }
644
+ } else {
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
646
+
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
648
+ zeroed = true;
649
+ }
650
}
651
652
if (!cluster_sector || zeroed) {
653
if (!allocate) {
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
655
}
656
+ assert(!extent->sesparse);
657
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
659
return VMDK_ERROR;
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
661
m_data->l1_index = l1_index;
662
m_data->l2_index = l2_index;
663
m_data->l2_offset = l2_offset;
664
- m_data->l2_cache_entry = &l2_table[l2_index];
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
666
}
667
}
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
670
if (!extent) {
671
return -EIO;
672
}
673
+ if (extent->sesparse) {
674
+ return -ENOTSUP;
675
+ }
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
678
- offset_in_cluster);
140
--
679
--
141
2.31.1
680
2.21.0
142
681
143
682
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Pino Toscano <ptoscano@redhat.com>
2
2
3
Add helper to parse compressed l2_entry and use it everywhere instead
3
Rewrite the implementation of the ssh block driver to use libssh instead
4
of open-coding.
4
of libssh2. The libssh library has various advantages over libssh2:
5
- easier API for authentication (for example for using ssh-agent)
6
- easier API for known_hosts handling
7
- supports newer types of keys in known_hosts
5
8
6
Note, that in most places we move to precise coffset/csize instead of
9
Use APIs/features available in libssh 0.8 conditionally, to support
7
sector-aligned. Still it should work good enough for updating
10
older versions (which are not recommended though).
8
refcounts.
9
11
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Adjust the iotest 207 according to the different error message, and to
11
Reviewed-by: Eric Blake <eblake@redhat.com>
13
find the default key type for localhost (to properly compare the
12
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
14
fingerprint with).
13
Message-Id: <20210914122454.141075-4-vsementsov@virtuozzo.com>
15
Contributed-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
16
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
20
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Message-id: 5873173.t2JhDm7DL7@lindworm.usersys.redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
28
---
16
block/qcow2.h | 3 ++-
29
configure | 65 +-
17
block/qcow2-cluster.c | 15 +++++++++++++++
30
block/Makefile.objs | 6 +-
18
block/qcow2-refcount.c | 36 +++++++++++++++++-------------------
31
block/ssh.c | 652 ++++++++++--------
19
block/qcow2.c | 9 ++-------
32
.travis.yml | 4 +-
20
4 files changed, 36 insertions(+), 27 deletions(-)
33
block/trace-events | 14 +-
34
docs/qemu-block-drivers.texi | 2 +-
35
.../dockerfiles/debian-win32-cross.docker | 1 -
36
.../dockerfiles/debian-win64-cross.docker | 1 -
37
tests/docker/dockerfiles/fedora.docker | 4 +-
38
tests/docker/dockerfiles/ubuntu.docker | 2 +-
39
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
40
tests/qemu-iotests/207 | 54 +-
41
tests/qemu-iotests/207.out | 2 +-
42
13 files changed, 449 insertions(+), 360 deletions(-)
21
43
22
diff --git a/block/qcow2.h b/block/qcow2.h
44
diff --git a/configure b/configure
45
index XXXXXXX..XXXXXXX 100755
46
--- a/configure
47
+++ b/configure
48
@@ -XXX,XX +XXX,XX @@ auth_pam=""
49
vte=""
50
virglrenderer=""
51
tpm=""
52
-libssh2=""
53
+libssh=""
54
live_block_migration="yes"
55
numa=""
56
tcmalloc="no"
57
@@ -XXX,XX +XXX,XX @@ for opt do
58
;;
59
--enable-tpm) tpm="yes"
60
;;
61
- --disable-libssh2) libssh2="no"
62
+ --disable-libssh) libssh="no"
63
;;
64
- --enable-libssh2) libssh2="yes"
65
+ --enable-libssh) libssh="yes"
66
;;
67
--disable-live-block-migration) live_block_migration="no"
68
;;
69
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
70
coroutine-pool coroutine freelist (better performance)
71
glusterfs GlusterFS backend
72
tpm TPM support
73
- libssh2 ssh block device support
74
+ libssh ssh block device support
75
numa libnuma support
76
libxml2 for Parallels image format
77
tcmalloc tcmalloc support
78
@@ -XXX,XX +XXX,XX @@ EOF
79
fi
80
81
##########################################
82
-# libssh2 probe
83
-min_libssh2_version=1.2.8
84
-if test "$libssh2" != "no" ; then
85
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
86
- libssh2_cflags=$($pkg_config libssh2 --cflags)
87
- libssh2_libs=$($pkg_config libssh2 --libs)
88
- libssh2=yes
89
+# libssh probe
90
+if test "$libssh" != "no" ; then
91
+ if $pkg_config --exists libssh; then
92
+ libssh_cflags=$($pkg_config libssh --cflags)
93
+ libssh_libs=$($pkg_config libssh --libs)
94
+ libssh=yes
95
else
96
- if test "$libssh2" = "yes" ; then
97
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
98
+ if test "$libssh" = "yes" ; then
99
+ error_exit "libssh required for --enable-libssh"
100
fi
101
- libssh2=no
102
+ libssh=no
103
fi
104
fi
105
106
##########################################
107
-# libssh2_sftp_fsync probe
108
+# Check for libssh 0.8
109
+# This is done like this instead of using the LIBSSH_VERSION_* and
110
+# SSH_VERSION_* macros because some distributions in the past shipped
111
+# snapshots of the future 0.8 from Git, and those snapshots did not
112
+# have updated version numbers (still referring to 0.7.0).
113
114
-if test "$libssh2" = "yes"; then
115
+if test "$libssh" = "yes"; then
116
cat > $TMPC <<EOF
117
-#include <stdio.h>
118
-#include <libssh2.h>
119
-#include <libssh2_sftp.h>
120
-int main(void) {
121
- LIBSSH2_SESSION *session;
122
- LIBSSH2_SFTP *sftp;
123
- LIBSSH2_SFTP_HANDLE *sftp_handle;
124
- session = libssh2_session_init ();
125
- sftp = libssh2_sftp_init (session);
126
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
127
- libssh2_sftp_fsync (sftp_handle);
128
- return 0;
129
-}
130
+#include <libssh/libssh.h>
131
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
132
EOF
133
- # libssh2_cflags/libssh2_libs defined in previous test.
134
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
135
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
136
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
137
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
138
fi
139
fi
140
141
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
142
echo "gcov $gcov_tool"
143
echo "gcov enabled $gcov"
144
echo "TPM support $tpm"
145
-echo "libssh2 support $libssh2"
146
+echo "libssh support $libssh"
147
echo "QOM debugging $qom_cast_debug"
148
echo "Live block migration $live_block_migration"
149
echo "lzo support $lzo"
150
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
151
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
152
fi
153
154
-if test "$libssh2" = "yes" ; then
155
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
156
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
157
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
158
+if test "$libssh" = "yes" ; then
159
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
160
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
161
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
162
fi
163
164
if test "$live_block_migration" = "yes" ; then
165
diff --git a/block/Makefile.objs b/block/Makefile.objs
23
index XXXXXXX..XXXXXXX 100644
166
index XXXXXXX..XXXXXXX 100644
24
--- a/block/qcow2.h
167
--- a/block/Makefile.objs
25
+++ b/block/qcow2.h
168
+++ b/block/Makefile.objs
169
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
170
block-obj-$(CONFIG_RBD) += rbd.o
171
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
172
block-obj-$(CONFIG_VXHS) += vxhs.o
173
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
174
+block-obj-$(CONFIG_LIBSSH) += ssh.o
175
block-obj-y += accounting.o dirty-bitmap.o
176
block-obj-y += write-threshold.o
177
block-obj-y += backup.o
178
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
179
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
180
gluster.o-libs := $(GLUSTERFS_LIBS)
181
vxhs.o-libs := $(VXHS_LIBS)
182
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
183
-ssh.o-libs := $(LIBSSH2_LIBS)
184
+ssh.o-cflags := $(LIBSSH_CFLAGS)
185
+ssh.o-libs := $(LIBSSH_LIBS)
186
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
187
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
188
dmg-bz2.o-libs := $(BZIP2_LIBS)
189
diff --git a/block/ssh.c b/block/ssh.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/block/ssh.c
192
+++ b/block/ssh.c
26
@@ -XXX,XX +XXX,XX @@
193
@@ -XXX,XX +XXX,XX @@
27
194
28
/* Defined in the qcow2 spec (compressed cluster descriptor) */
195
#include "qemu/osdep.h"
29
#define QCOW2_COMPRESSED_SECTOR_SIZE 512U
196
30
-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
197
-#include <libssh2.h>
31
198
-#include <libssh2_sftp.h>
32
/* Must be at least 2 to cover COW */
199
+#include <libssh/libssh.h>
33
#define MIN_L2_CACHE_SIZE 2 /* cache entries */
200
+#include <libssh/sftp.h>
34
@@ -XXX,XX +XXX,XX @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
201
35
uint64_t offset,
202
#include "block/block_int.h"
36
int compressed_size,
203
#include "block/qdict.h"
37
uint64_t *host_offset);
204
@@ -XXX,XX +XXX,XX @@
38
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
205
#include "trace.h"
39
+ uint64_t *coffset, int *csize);
206
40
207
/*
41
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
208
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
42
void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
209
- * that this requires that libssh2 was specially compiled with the
43
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
210
- * `./configure --enable-debug' option, so most likely you will have
44
index XXXXXXX..XXXXXXX 100644
211
- * to compile it yourself. The meaning of <bitmask> is described
45
--- a/block/qcow2-cluster.c
212
- * here: http://www.libssh2.org/libssh2_trace.html
46
+++ b/block/qcow2-cluster.c
213
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
47
@@ -XXX,XX +XXX,XX @@ fail:
214
+ * The meaning of <level> is described here:
48
g_free(l1_table);
215
+ * http://api.libssh.org/master/group__libssh__log.html
216
*/
217
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
218
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
219
220
typedef struct BDRVSSHState {
221
/* Coroutine. */
222
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
223
224
/* SSH connection. */
225
int sock; /* socket */
226
- LIBSSH2_SESSION *session; /* ssh session */
227
- LIBSSH2_SFTP *sftp; /* sftp session */
228
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
229
+ ssh_session session; /* ssh session */
230
+ sftp_session sftp; /* sftp session */
231
+ sftp_file sftp_handle; /* sftp remote file handle */
232
233
- /* See ssh_seek() function below. */
234
- int64_t offset;
235
- bool offset_op_read;
236
-
237
- /* File attributes at open. We try to keep the .filesize field
238
+ /*
239
+ * File attributes at open. We try to keep the .size field
240
* updated if it changes (eg by writing at the end of the file).
241
*/
242
- LIBSSH2_SFTP_ATTRIBUTES attrs;
243
+ sftp_attributes attrs;
244
245
InetSocketAddress *inet;
246
247
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
248
{
249
memset(s, 0, sizeof *s);
250
s->sock = -1;
251
- s->offset = -1;
252
qemu_co_mutex_init(&s->lock);
253
}
254
255
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
256
{
257
g_free(s->user);
258
259
+ if (s->attrs) {
260
+ sftp_attributes_free(s->attrs);
261
+ }
262
if (s->sftp_handle) {
263
- libssh2_sftp_close(s->sftp_handle);
264
+ sftp_close(s->sftp_handle);
265
}
266
if (s->sftp) {
267
- libssh2_sftp_shutdown(s->sftp);
268
+ sftp_free(s->sftp);
269
}
270
if (s->session) {
271
- libssh2_session_disconnect(s->session,
272
- "from qemu ssh client: "
273
- "user closed the connection");
274
- libssh2_session_free(s->session);
275
- }
276
- if (s->sock >= 0) {
277
- close(s->sock);
278
+ ssh_disconnect(s->session);
279
+ ssh_free(s->session); /* This frees s->sock */
280
}
281
}
282
283
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
284
va_end(args);
285
286
if (s->session) {
287
- char *ssh_err;
288
+ const char *ssh_err;
289
int ssh_err_code;
290
291
- /* This is not an errno. See <libssh2.h>. */
292
- ssh_err_code = libssh2_session_last_error(s->session,
293
- &ssh_err, NULL, 0);
294
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
295
+ /* This is not an errno. See <libssh/libssh.h>. */
296
+ ssh_err = ssh_get_error(s->session);
297
+ ssh_err_code = ssh_get_error_code(s->session);
298
+ error_setg(errp, "%s: %s (libssh error code: %d)",
299
msg, ssh_err, ssh_err_code);
300
} else {
301
error_setg(errp, "%s", msg);
302
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
303
va_end(args);
304
305
if (s->sftp) {
306
- char *ssh_err;
307
+ const char *ssh_err;
308
int ssh_err_code;
309
- unsigned long sftp_err_code;
310
+ int sftp_err_code;
311
312
- /* This is not an errno. See <libssh2.h>. */
313
- ssh_err_code = libssh2_session_last_error(s->session,
314
- &ssh_err, NULL, 0);
315
- /* See <libssh2_sftp.h>. */
316
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
317
+ /* This is not an errno. See <libssh/libssh.h>. */
318
+ ssh_err = ssh_get_error(s->session);
319
+ ssh_err_code = ssh_get_error_code(s->session);
320
+ /* See <libssh/sftp.h>. */
321
+ sftp_err_code = sftp_get_error(s->sftp);
322
323
error_setg(errp,
324
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
325
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
326
msg, ssh_err, ssh_err_code, sftp_err_code);
327
} else {
328
error_setg(errp, "%s", msg);
329
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
330
331
static void sftp_error_trace(BDRVSSHState *s, const char *op)
332
{
333
- char *ssh_err;
334
+ const char *ssh_err;
335
int ssh_err_code;
336
- unsigned long sftp_err_code;
337
+ int sftp_err_code;
338
339
- /* This is not an errno. See <libssh2.h>. */
340
- ssh_err_code = libssh2_session_last_error(s->session,
341
- &ssh_err, NULL, 0);
342
- /* See <libssh2_sftp.h>. */
343
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
344
+ /* This is not an errno. See <libssh/libssh.h>. */
345
+ ssh_err = ssh_get_error(s->session);
346
+ ssh_err_code = ssh_get_error_code(s->session);
347
+ /* See <libssh/sftp.h>. */
348
+ sftp_err_code = sftp_get_error(s->sftp);
349
350
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
351
}
352
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
353
parse_uri(filename, options, errp);
354
}
355
356
-static int check_host_key_knownhosts(BDRVSSHState *s,
357
- const char *host, int port, Error **errp)
358
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
359
{
360
- const char *home;
361
- char *knh_file = NULL;
362
- LIBSSH2_KNOWNHOSTS *knh = NULL;
363
- struct libssh2_knownhost *found;
364
- int ret, r;
365
- const char *hostkey;
366
- size_t len;
367
- int type;
368
-
369
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
370
- if (!hostkey) {
371
+ int ret;
372
+#ifdef HAVE_LIBSSH_0_8
373
+ enum ssh_known_hosts_e state;
374
+ int r;
375
+ ssh_key pubkey;
376
+ enum ssh_keytypes_e pubkey_type;
377
+ unsigned char *server_hash = NULL;
378
+ size_t server_hash_len;
379
+ char *fingerprint = NULL;
380
+
381
+ state = ssh_session_is_known_server(s->session);
382
+ trace_ssh_server_status(state);
383
+
384
+ switch (state) {
385
+ case SSH_KNOWN_HOSTS_OK:
386
+ /* OK */
387
+ trace_ssh_check_host_key_knownhosts();
388
+ break;
389
+ case SSH_KNOWN_HOSTS_CHANGED:
390
ret = -EINVAL;
391
- session_error_setg(errp, s, "failed to read remote host key");
392
+ r = ssh_get_server_publickey(s->session, &pubkey);
393
+ if (r == 0) {
394
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
395
+ &server_hash, &server_hash_len);
396
+ pubkey_type = ssh_key_type(pubkey);
397
+ ssh_key_free(pubkey);
398
+ }
399
+ if (r == 0) {
400
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
401
+ server_hash,
402
+ server_hash_len);
403
+ ssh_clean_pubkey_hash(&server_hash);
404
+ }
405
+ if (fingerprint) {
406
+ error_setg(errp,
407
+ "host key (%s key with fingerprint %s) does not match "
408
+ "the one in known_hosts; this may be a possible attack",
409
+ ssh_key_type_to_char(pubkey_type), fingerprint);
410
+ ssh_string_free_char(fingerprint);
411
+ } else {
412
+ error_setg(errp,
413
+ "host key does not match the one in known_hosts; this "
414
+ "may be a possible attack");
415
+ }
416
goto out;
417
- }
418
-
419
- knh = libssh2_knownhost_init(s->session);
420
- if (!knh) {
421
+ case SSH_KNOWN_HOSTS_OTHER:
422
ret = -EINVAL;
423
- session_error_setg(errp, s,
424
- "failed to initialize known hosts support");
425
+ error_setg(errp,
426
+ "host key for this server not found, another type exists");
427
+ goto out;
428
+ case SSH_KNOWN_HOSTS_UNKNOWN:
429
+ ret = -EINVAL;
430
+ error_setg(errp, "no host key was found in known_hosts");
431
+ goto out;
432
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
433
+ ret = -ENOENT;
434
+ error_setg(errp, "known_hosts file not found");
435
+ goto out;
436
+ case SSH_KNOWN_HOSTS_ERROR:
437
+ ret = -EINVAL;
438
+ error_setg(errp, "error while checking the host");
439
+ goto out;
440
+ default:
441
+ ret = -EINVAL;
442
+ error_setg(errp, "error while checking for known server (%d)", state);
443
goto out;
444
}
445
+#else /* !HAVE_LIBSSH_0_8 */
446
+ int state;
447
448
- home = getenv("HOME");
449
- if (home) {
450
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
451
- } else {
452
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
453
- }
454
-
455
- /* Read all known hosts from OpenSSH-style known_hosts file. */
456
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
457
+ state = ssh_is_server_known(s->session);
458
+ trace_ssh_server_status(state);
459
460
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
461
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
462
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
463
- &found);
464
- switch (r) {
465
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
466
+ switch (state) {
467
+ case SSH_SERVER_KNOWN_OK:
468
/* OK */
469
- trace_ssh_check_host_key_knownhosts(found->key);
470
+ trace_ssh_check_host_key_knownhosts();
471
break;
472
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
473
+ case SSH_SERVER_KNOWN_CHANGED:
474
ret = -EINVAL;
475
- session_error_setg(errp, s,
476
- "host key does not match the one in known_hosts"
477
- " (found key %s)", found->key);
478
+ error_setg(errp,
479
+ "host key does not match the one in known_hosts; this "
480
+ "may be a possible attack");
481
goto out;
482
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
483
+ case SSH_SERVER_FOUND_OTHER:
484
ret = -EINVAL;
485
- session_error_setg(errp, s, "no host key was found in known_hosts");
486
+ error_setg(errp,
487
+ "host key for this server not found, another type exists");
488
+ goto out;
489
+ case SSH_SERVER_FILE_NOT_FOUND:
490
+ ret = -ENOENT;
491
+ error_setg(errp, "known_hosts file not found");
492
goto out;
493
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
494
+ case SSH_SERVER_NOT_KNOWN:
495
ret = -EINVAL;
496
- session_error_setg(errp, s,
497
- "failure matching the host key with known_hosts");
498
+ error_setg(errp, "no host key was found in known_hosts");
499
+ goto out;
500
+ case SSH_SERVER_ERROR:
501
+ ret = -EINVAL;
502
+ error_setg(errp, "server error");
503
goto out;
504
default:
505
ret = -EINVAL;
506
- session_error_setg(errp, s, "unknown error matching the host key"
507
- " with known_hosts (%d)", r);
508
+ error_setg(errp, "error while checking for known server (%d)", state);
509
goto out;
510
}
511
+#endif /* !HAVE_LIBSSH_0_8 */
512
513
/* known_hosts checking successful. */
514
ret = 0;
515
516
out:
517
- if (knh != NULL) {
518
- libssh2_knownhost_free(knh);
519
- }
520
- g_free(knh_file);
49
return ret;
521
return ret;
50
}
522
}
51
+
523
52
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
524
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
53
+ uint64_t *coffset, int *csize)
525
54
+{
526
static int
55
+ BDRVQcow2State *s = bs->opaque;
527
check_host_key_hash(BDRVSSHState *s, const char *hash,
56
+ int nb_csectors;
528
- int hash_type, size_t fingerprint_len, Error **errp)
57
+
529
+ enum ssh_publickey_hash_type type, Error **errp)
58
+ assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
530
{
59
+
531
- const char *fingerprint;
60
+ *coffset = l2_entry & s->cluster_offset_mask;
532
-
61
+
533
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
62
+ nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
534
- if (!fingerprint) {
63
+ *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
535
+ int r;
64
+ (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1));
536
+ ssh_key pubkey;
65
+}
537
+ unsigned char *server_hash;
66
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
538
+ size_t server_hash_len;
539
+
540
+#ifdef HAVE_LIBSSH_0_8
541
+ r = ssh_get_server_publickey(s->session, &pubkey);
542
+#else
543
+ r = ssh_get_publickey(s->session, &pubkey);
544
+#endif
545
+ if (r != SSH_OK) {
546
session_error_setg(errp, s, "failed to read remote host key");
547
return -EINVAL;
548
}
549
550
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
551
- hash) != 0) {
552
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
553
+ ssh_key_free(pubkey);
554
+ if (r != 0) {
555
+ session_error_setg(errp, s,
556
+ "failed reading the hash of the server SSH key");
557
+ return -EINVAL;
558
+ }
559
+
560
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
561
+ ssh_clean_pubkey_hash(&server_hash);
562
+ if (r != 0) {
563
error_setg(errp, "remote host key does not match host_key_check '%s'",
564
hash);
565
return -EPERM;
566
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
567
return 0;
568
}
569
570
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
571
- SshHostKeyCheck *hkc, Error **errp)
572
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
573
{
574
SshHostKeyCheckMode mode;
575
576
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
577
case SSH_HOST_KEY_CHECK_MODE_HASH:
578
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
579
return check_host_key_hash(s, hkc->u.hash.hash,
580
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
581
+ SSH_PUBLICKEY_HASH_MD5, errp);
582
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
583
return check_host_key_hash(s, hkc->u.hash.hash,
584
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
585
+ SSH_PUBLICKEY_HASH_SHA1, errp);
586
}
587
g_assert_not_reached();
588
break;
589
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
590
- return check_host_key_knownhosts(s, host, port, errp);
591
+ return check_host_key_knownhosts(s, errp);
592
default:
593
g_assert_not_reached();
594
}
595
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
596
return -EINVAL;
597
}
598
599
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
600
+static int authenticate(BDRVSSHState *s, Error **errp)
601
{
602
int r, ret;
603
- const char *userauthlist;
604
- LIBSSH2_AGENT *agent = NULL;
605
- struct libssh2_agent_publickey *identity;
606
- struct libssh2_agent_publickey *prev_identity = NULL;
607
+ int method;
608
609
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
610
- if (strstr(userauthlist, "publickey") == NULL) {
611
+ /* Try to authenticate with the "none" method. */
612
+ r = ssh_userauth_none(s->session, NULL);
613
+ if (r == SSH_AUTH_ERROR) {
614
ret = -EPERM;
615
- error_setg(errp,
616
- "remote server does not support \"publickey\" authentication");
617
+ session_error_setg(errp, s, "failed to authenticate using none "
618
+ "authentication");
619
goto out;
620
- }
621
-
622
- /* Connect to ssh-agent and try each identity in turn. */
623
- agent = libssh2_agent_init(s->session);
624
- if (!agent) {
625
- ret = -EINVAL;
626
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
627
- goto out;
628
- }
629
- if (libssh2_agent_connect(agent)) {
630
- ret = -ECONNREFUSED;
631
- session_error_setg(errp, s, "failed to connect to ssh-agent");
632
- goto out;
633
- }
634
- if (libssh2_agent_list_identities(agent)) {
635
- ret = -EINVAL;
636
- session_error_setg(errp, s,
637
- "failed requesting identities from ssh-agent");
638
+ } else if (r == SSH_AUTH_SUCCESS) {
639
+ /* Authenticated! */
640
+ ret = 0;
641
goto out;
642
}
643
644
- for(;;) {
645
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
646
- if (r == 1) { /* end of list */
647
- break;
648
- }
649
- if (r < 0) {
650
+ method = ssh_userauth_list(s->session, NULL);
651
+ trace_ssh_auth_methods(method);
652
+
653
+ /*
654
+ * Try to authenticate with publickey, using the ssh-agent
655
+ * if available.
656
+ */
657
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
658
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
659
+ if (r == SSH_AUTH_ERROR) {
660
ret = -EINVAL;
661
- session_error_setg(errp, s,
662
- "failed to obtain identity from ssh-agent");
663
+ session_error_setg(errp, s, "failed to authenticate using "
664
+ "publickey authentication");
665
goto out;
666
- }
667
- r = libssh2_agent_userauth(agent, user, identity);
668
- if (r == 0) {
669
+ } else if (r == SSH_AUTH_SUCCESS) {
670
/* Authenticated! */
671
ret = 0;
672
goto out;
673
}
674
- /* Failed to authenticate with this identity, try the next one. */
675
- prev_identity = identity;
676
}
677
678
ret = -EPERM;
679
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
680
"and the identities held by your ssh-agent");
681
682
out:
683
- if (agent != NULL) {
684
- /* Note: libssh2 implementation implicitly calls
685
- * libssh2_agent_disconnect if necessary.
686
- */
687
- libssh2_agent_free(agent);
688
- }
689
-
690
return ret;
691
}
692
693
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
694
int ssh_flags, int creat_mode, Error **errp)
695
{
696
int r, ret;
697
- long port = 0;
698
+ unsigned int port = 0;
699
+ int new_sock = -1;
700
701
if (opts->has_user) {
702
s->user = g_strdup(opts->user);
703
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
704
s->inet = opts->server;
705
opts->server = NULL;
706
707
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
708
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
709
error_setg(errp, "Use only numeric port value");
710
ret = -EINVAL;
711
goto err;
712
}
713
714
/* Open the socket and connect. */
715
- s->sock = inet_connect_saddr(s->inet, errp);
716
- if (s->sock < 0) {
717
+ new_sock = inet_connect_saddr(s->inet, errp);
718
+ if (new_sock < 0) {
719
ret = -EIO;
720
goto err;
721
}
722
723
+ /*
724
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
725
+ * but do not fail if it cannot be disabled.
726
+ */
727
+ r = socket_set_nodelay(new_sock);
728
+ if (r < 0) {
729
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
730
+ s->inet->host, strerror(errno));
731
+ }
732
+
733
/* Create SSH session. */
734
- s->session = libssh2_session_init();
735
+ s->session = ssh_new();
736
if (!s->session) {
737
ret = -EINVAL;
738
- session_error_setg(errp, s, "failed to initialize libssh2 session");
739
+ session_error_setg(errp, s, "failed to initialize libssh session");
740
goto err;
741
}
742
743
-#if TRACE_LIBSSH2 != 0
744
- libssh2_trace(s->session, TRACE_LIBSSH2);
745
-#endif
746
+ /*
747
+ * Make sure we are in blocking mode during the connection and
748
+ * authentication phases.
749
+ */
750
+ ssh_set_blocking(s->session, 1);
751
752
- r = libssh2_session_handshake(s->session, s->sock);
753
- if (r != 0) {
754
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
755
+ if (r < 0) {
756
+ ret = -EINVAL;
757
+ session_error_setg(errp, s,
758
+ "failed to set the user in the libssh session");
759
+ goto err;
760
+ }
761
+
762
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
763
+ if (r < 0) {
764
+ ret = -EINVAL;
765
+ session_error_setg(errp, s,
766
+ "failed to set the host in the libssh session");
767
+ goto err;
768
+ }
769
+
770
+ if (port > 0) {
771
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
772
+ if (r < 0) {
773
+ ret = -EINVAL;
774
+ session_error_setg(errp, s,
775
+ "failed to set the port in the libssh session");
776
+ goto err;
777
+ }
778
+ }
779
+
780
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
781
+ if (r < 0) {
782
+ ret = -EINVAL;
783
+ session_error_setg(errp, s,
784
+ "failed to disable the compression in the libssh "
785
+ "session");
786
+ goto err;
787
+ }
788
+
789
+ /* Read ~/.ssh/config. */
790
+ r = ssh_options_parse_config(s->session, NULL);
791
+ if (r < 0) {
792
+ ret = -EINVAL;
793
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
794
+ goto err;
795
+ }
796
+
797
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
798
+ if (r < 0) {
799
+ ret = -EINVAL;
800
+ session_error_setg(errp, s,
801
+ "failed to set the socket in the libssh session");
802
+ goto err;
803
+ }
804
+ /* libssh took ownership of the socket. */
805
+ s->sock = new_sock;
806
+ new_sock = -1;
807
+
808
+ /* Connect. */
809
+ r = ssh_connect(s->session);
810
+ if (r != SSH_OK) {
811
ret = -EINVAL;
812
session_error_setg(errp, s, "failed to establish SSH session");
813
goto err;
814
}
815
816
/* Check the remote host's key against known_hosts. */
817
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
818
+ ret = check_host_key(s, opts->host_key_check, errp);
819
if (ret < 0) {
820
goto err;
821
}
822
823
/* Authenticate. */
824
- ret = authenticate(s, s->user, errp);
825
+ ret = authenticate(s, errp);
826
if (ret < 0) {
827
goto err;
828
}
829
830
/* Start SFTP. */
831
- s->sftp = libssh2_sftp_init(s->session);
832
+ s->sftp = sftp_new(s->session);
833
if (!s->sftp) {
834
- session_error_setg(errp, s, "failed to initialize sftp handle");
835
+ session_error_setg(errp, s, "failed to create sftp handle");
836
+ ret = -EINVAL;
837
+ goto err;
838
+ }
839
+
840
+ r = sftp_init(s->sftp);
841
+ if (r < 0) {
842
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
843
ret = -EINVAL;
844
goto err;
845
}
846
847
/* Open the remote file. */
848
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
849
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
850
- creat_mode);
851
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
852
if (!s->sftp_handle) {
853
- session_error_setg(errp, s, "failed to open remote file '%s'",
854
- opts->path);
855
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
856
+ opts->path);
857
ret = -EINVAL;
858
goto err;
859
}
860
861
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
862
- if (r < 0) {
863
+ /* Make sure the SFTP file is handled in blocking mode. */
864
+ sftp_file_set_blocking(s->sftp_handle);
865
+
866
+ s->attrs = sftp_fstat(s->sftp_handle);
867
+ if (!s->attrs) {
868
sftp_error_setg(errp, s, "failed to read file attributes");
869
return -EINVAL;
870
}
871
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
872
return 0;
873
874
err:
875
+ if (s->attrs) {
876
+ sftp_attributes_free(s->attrs);
877
+ }
878
+ s->attrs = NULL;
879
if (s->sftp_handle) {
880
- libssh2_sftp_close(s->sftp_handle);
881
+ sftp_close(s->sftp_handle);
882
}
883
s->sftp_handle = NULL;
884
if (s->sftp) {
885
- libssh2_sftp_shutdown(s->sftp);
886
+ sftp_free(s->sftp);
887
}
888
s->sftp = NULL;
889
if (s->session) {
890
- libssh2_session_disconnect(s->session,
891
- "from qemu ssh client: "
892
- "error opening connection");
893
- libssh2_session_free(s->session);
894
+ ssh_disconnect(s->session);
895
+ ssh_free(s->session);
896
}
897
s->session = NULL;
898
+ s->sock = -1;
899
+ if (new_sock >= 0) {
900
+ close(new_sock);
901
+ }
902
903
return ret;
904
}
905
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
906
907
ssh_state_init(s);
908
909
- ssh_flags = LIBSSH2_FXF_READ;
910
+ ssh_flags = 0;
911
if (bdrv_flags & BDRV_O_RDWR) {
912
- ssh_flags |= LIBSSH2_FXF_WRITE;
913
+ ssh_flags |= O_RDWR;
914
+ } else {
915
+ ssh_flags |= O_RDONLY;
916
}
917
918
opts = ssh_parse_options(options, errp);
919
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
920
}
921
922
/* Go non-blocking. */
923
- libssh2_session_set_blocking(s->session, 0);
924
+ ssh_set_blocking(s->session, 0);
925
926
qapi_free_BlockdevOptionsSsh(opts);
927
928
return 0;
929
930
err:
931
- if (s->sock >= 0) {
932
- close(s->sock);
933
- }
934
- s->sock = -1;
935
-
936
qapi_free_BlockdevOptionsSsh(opts);
937
938
return ret;
939
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
940
{
941
ssize_t ret;
942
char c[1] = { '\0' };
943
- int was_blocking = libssh2_session_get_blocking(s->session);
944
+ int was_blocking = ssh_is_blocking(s->session);
945
946
/* offset must be strictly greater than the current size so we do
947
* not overwrite anything */
948
- assert(offset > 0 && offset > s->attrs.filesize);
949
+ assert(offset > 0 && offset > s->attrs->size);
950
951
- libssh2_session_set_blocking(s->session, 1);
952
+ ssh_set_blocking(s->session, 1);
953
954
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
955
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
956
+ sftp_seek64(s->sftp_handle, offset - 1);
957
+ ret = sftp_write(s->sftp_handle, c, 1);
958
959
- libssh2_session_set_blocking(s->session, was_blocking);
960
+ ssh_set_blocking(s->session, was_blocking);
961
962
if (ret < 0) {
963
sftp_error_setg(errp, s, "Failed to grow file");
964
return -EIO;
965
}
966
967
- s->attrs.filesize = offset;
968
+ s->attrs->size = offset;
969
return 0;
970
}
971
972
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
973
ssh_state_init(&s);
974
975
ret = connect_to_ssh(&s, opts->location,
976
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
977
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
978
+ O_RDWR | O_CREAT | O_TRUNC,
979
0644, errp);
980
if (ret < 0) {
981
goto fail;
982
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
983
/* Assume false, unless we can positively prove it's true. */
984
int has_zero_init = 0;
985
986
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
987
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
988
- has_zero_init = 1;
989
- }
990
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
991
+ has_zero_init = 1;
992
}
993
994
return has_zero_init;
995
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
996
.co = qemu_coroutine_self()
997
};
998
999
- r = libssh2_session_block_directions(s->session);
1000
+ r = ssh_get_poll_flags(s->session);
1001
1002
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1003
+ if (r & SSH_READ_PENDING) {
1004
rd_handler = restart_coroutine;
1005
}
1006
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1007
+ if (r & SSH_WRITE_PENDING) {
1008
wr_handler = restart_coroutine;
1009
}
1010
1011
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1012
trace_ssh_co_yield_back(s->sock);
1013
}
1014
1015
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
1016
- * in the remote file. Notice that it just updates a field in the
1017
- * sftp_handle structure, so there is no network traffic and it cannot
1018
- * fail.
1019
- *
1020
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1021
- * performance since it causes the handle to throw away all in-flight
1022
- * reads and buffered readahead data. Therefore this function tries
1023
- * to be intelligent about when to call the underlying libssh2 function.
1024
- */
1025
-#define SSH_SEEK_WRITE 0
1026
-#define SSH_SEEK_READ 1
1027
-#define SSH_SEEK_FORCE 2
1028
-
1029
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
1030
-{
1031
- bool op_read = (flags & SSH_SEEK_READ) != 0;
1032
- bool force = (flags & SSH_SEEK_FORCE) != 0;
1033
-
1034
- if (force || op_read != s->offset_op_read || offset != s->offset) {
1035
- trace_ssh_seek(offset);
1036
- libssh2_sftp_seek64(s->sftp_handle, offset);
1037
- s->offset = offset;
1038
- s->offset_op_read = op_read;
1039
- }
1040
-}
1041
-
1042
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1043
int64_t offset, size_t size,
1044
QEMUIOVector *qiov)
1045
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1046
1047
trace_ssh_read(offset, size);
1048
1049
- ssh_seek(s, offset, SSH_SEEK_READ);
1050
+ trace_ssh_seek(offset);
1051
+ sftp_seek64(s->sftp_handle, offset);
1052
1053
/* This keeps track of the current iovec element ('i'), where we
1054
* will write to next ('buf'), and the end of the current iovec
1055
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1056
buf = i->iov_base;
1057
end_of_vec = i->iov_base + i->iov_len;
1058
1059
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1060
- * although it will also do readahead behind our backs. Therefore
1061
- * we may have to do repeated reads here until we have read 'size'
1062
- * bytes.
1063
- */
1064
for (got = 0; got < size; ) {
1065
+ size_t request_read_size;
1066
again:
1067
- trace_ssh_read_buf(buf, end_of_vec - buf);
1068
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1069
- trace_ssh_read_return(r);
1070
+ /*
1071
+ * The size of SFTP packets is limited to 32K bytes, so limit
1072
+ * the amount of data requested to 16K, as libssh currently
1073
+ * does not handle multiple requests on its own.
1074
+ */
1075
+ request_read_size = MIN(end_of_vec - buf, 16384);
1076
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1077
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1078
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1079
1080
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1081
+ if (r == SSH_AGAIN) {
1082
co_yield(s, bs);
1083
goto again;
1084
}
1085
- if (r < 0) {
1086
- sftp_error_trace(s, "read");
1087
- s->offset = -1;
1088
- return -EIO;
1089
- }
1090
- if (r == 0) {
1091
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
1092
/* EOF: Short read so pad the buffer with zeroes and return it. */
1093
qemu_iovec_memset(qiov, got, 0, size - got);
1094
return 0;
1095
}
1096
+ if (r <= 0) {
1097
+ sftp_error_trace(s, "read");
1098
+ return -EIO;
1099
+ }
1100
1101
got += r;
1102
buf += r;
1103
- s->offset += r;
1104
if (buf >= end_of_vec && got < size) {
1105
i++;
1106
buf = i->iov_base;
1107
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1108
1109
trace_ssh_write(offset, size);
1110
1111
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1112
+ trace_ssh_seek(offset);
1113
+ sftp_seek64(s->sftp_handle, offset);
1114
1115
/* This keeps track of the current iovec element ('i'), where we
1116
* will read from next ('buf'), and the end of the current iovec
1117
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1118
end_of_vec = i->iov_base + i->iov_len;
1119
1120
for (written = 0; written < size; ) {
1121
+ size_t request_write_size;
1122
again:
1123
- trace_ssh_write_buf(buf, end_of_vec - buf);
1124
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1125
- trace_ssh_write_return(r);
1126
+ /*
1127
+ * Avoid too large data packets, as libssh currently does not
1128
+ * handle multiple requests on its own.
1129
+ */
1130
+ request_write_size = MIN(end_of_vec - buf, 131072);
1131
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1132
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1133
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1134
1135
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1136
+ if (r == SSH_AGAIN) {
1137
co_yield(s, bs);
1138
goto again;
1139
}
1140
if (r < 0) {
1141
sftp_error_trace(s, "write");
1142
- s->offset = -1;
1143
return -EIO;
1144
}
1145
- /* The libssh2 API is very unclear about this. A comment in
1146
- * the code says "nothing was acked, and no EAGAIN was
1147
- * received!" which apparently means that no data got sent
1148
- * out, and the underlying channel didn't return any EAGAIN
1149
- * indication. I think this is a bug in either libssh2 or
1150
- * OpenSSH (server-side). In any case, forcing a seek (to
1151
- * discard libssh2 internal buffers), and then trying again
1152
- * works for me.
1153
- */
1154
- if (r == 0) {
1155
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1156
- co_yield(s, bs);
1157
- goto again;
1158
- }
1159
1160
written += r;
1161
buf += r;
1162
- s->offset += r;
1163
if (buf >= end_of_vec && written < size) {
1164
i++;
1165
buf = i->iov_base;
1166
end_of_vec = i->iov_base + i->iov_len;
1167
}
1168
1169
- if (offset + written > s->attrs.filesize)
1170
- s->attrs.filesize = offset + written;
1171
+ if (offset + written > s->attrs->size) {
1172
+ s->attrs->size = offset + written;
1173
+ }
1174
}
1175
1176
return 0;
1177
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
1178
}
1179
}
1180
1181
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
1182
+#ifdef HAVE_LIBSSH_0_8
1183
1184
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
1185
{
1186
int r;
1187
1188
trace_ssh_flush();
1189
+
1190
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
1191
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
1192
+ return 0;
1193
+ }
1194
again:
1195
- r = libssh2_sftp_fsync(s->sftp_handle);
1196
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1197
+ r = sftp_fsync(s->sftp_handle);
1198
+ if (r == SSH_AGAIN) {
1199
co_yield(s, bs);
1200
goto again;
1201
}
1202
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
1203
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
1204
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
1205
- return 0;
1206
- }
1207
if (r < 0) {
1208
sftp_error_trace(s, "fsync");
1209
return -EIO;
1210
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1211
return ret;
1212
}
1213
1214
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
1215
+#else /* !HAVE_LIBSSH_0_8 */
1216
1217
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1218
{
1219
BDRVSSHState *s = bs->opaque;
1220
1221
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
1222
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
1223
return 0;
1224
}
1225
1226
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
1227
+#endif /* !HAVE_LIBSSH_0_8 */
1228
1229
static int64_t ssh_getlength(BlockDriverState *bs)
1230
{
1231
BDRVSSHState *s = bs->opaque;
1232
int64_t length;
1233
1234
- /* Note we cannot make a libssh2 call here. */
1235
- length = (int64_t) s->attrs.filesize;
1236
+ /* Note we cannot make a libssh call here. */
1237
+ length = (int64_t) s->attrs->size;
1238
trace_ssh_getlength(length);
1239
1240
return length;
1241
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
1242
return -ENOTSUP;
1243
}
1244
1245
- if (offset < s->attrs.filesize) {
1246
+ if (offset < s->attrs->size) {
1247
error_setg(errp, "ssh driver does not support shrinking files");
1248
return -ENOTSUP;
1249
}
1250
1251
- if (offset == s->attrs.filesize) {
1252
+ if (offset == s->attrs->size) {
1253
return 0;
1254
}
1255
1256
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1257
{
1258
int r;
1259
1260
- r = libssh2_init(0);
1261
+ r = ssh_init();
1262
if (r != 0) {
1263
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1264
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1265
exit(EXIT_FAILURE);
1266
}
1267
1268
+#if TRACE_LIBSSH != 0
1269
+ ssh_set_log_level(TRACE_LIBSSH);
1270
+#endif
1271
+
1272
bdrv_register(&bdrv_ssh);
1273
}
1274
1275
diff --git a/.travis.yml b/.travis.yml
67
index XXXXXXX..XXXXXXX 100644
1276
index XXXXXXX..XXXXXXX 100644
68
--- a/block/qcow2-refcount.c
1277
--- a/.travis.yml
69
+++ b/block/qcow2-refcount.c
1278
+++ b/.travis.yml
70
@@ -XXX,XX +XXX,XX @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
1279
@@ -XXX,XX +XXX,XX @@ addons:
71
switch (ctype) {
1280
- libseccomp-dev
72
case QCOW2_CLUSTER_COMPRESSED:
1281
- libspice-protocol-dev
73
{
1282
- libspice-server-dev
74
- int64_t offset = (l2_entry & s->cluster_offset_mask)
1283
- - libssh2-1-dev
75
- & QCOW2_COMPRESSED_SECTOR_MASK;
1284
+ - libssh-dev
76
- int size = QCOW2_COMPRESSED_SECTOR_SIZE *
1285
- liburcu-dev
77
- (((l2_entry >> s->csize_shift) & s->csize_mask) + 1);
1286
- libusb-1.0-0-dev
78
- qcow2_free_clusters(bs, offset, size, type);
1287
- libvte-2.91-dev
79
+ uint64_t coffset;
1288
@@ -XXX,XX +XXX,XX @@ matrix:
80
+ int csize;
1289
- libseccomp-dev
81
+
1290
- libspice-protocol-dev
82
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1291
- libspice-server-dev
83
+ qcow2_free_clusters(bs, coffset, csize, type);
1292
- - libssh2-1-dev
84
}
1293
+ - libssh-dev
85
break;
1294
- liburcu-dev
86
case QCOW2_CLUSTER_NORMAL:
1295
- libusb-1.0-0-dev
87
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
1296
- libvte-2.91-dev
88
bool l1_allocated = false;
1297
diff --git a/block/trace-events b/block/trace-events
89
int64_t old_entry, old_l2_offset;
90
unsigned slice, slice_size2, n_slices;
91
- int i, j, l1_modified = 0, nb_csectors;
92
+ int i, j, l1_modified = 0;
93
int ret;
94
95
assert(addend >= -1 && addend <= 1);
96
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
97
98
switch (qcow2_get_cluster_type(bs, entry)) {
99
case QCOW2_CLUSTER_COMPRESSED:
100
- nb_csectors = ((entry >> s->csize_shift) &
101
- s->csize_mask) + 1;
102
if (addend != 0) {
103
- uint64_t coffset = (entry & s->cluster_offset_mask)
104
- & QCOW2_COMPRESSED_SECTOR_MASK;
105
+ uint64_t coffset;
106
+ int csize;
107
+
108
+ qcow2_parse_compressed_l2_entry(bs, entry,
109
+ &coffset, &csize);
110
ret = update_refcount(
111
- bs, coffset,
112
- nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE,
113
+ bs, coffset, csize,
114
abs(addend), addend < 0,
115
QCOW2_DISCARD_SNAPSHOT);
116
if (ret < 0) {
117
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
118
BDRVQcow2State *s = bs->opaque;
119
uint64_t l2_entry;
120
uint64_t next_contiguous_offset = 0;
121
- int i, nb_csectors, ret;
122
+ int i, ret;
123
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
124
g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
125
126
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
127
128
/* Do the actual checks */
129
for (i = 0; i < s->l2_size; i++) {
130
+ uint64_t coffset;
131
+ int csize;
132
l2_entry = get_l2_entry(s, l2_table, i);
133
134
switch (qcow2_get_cluster_type(bs, l2_entry)) {
135
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
136
}
137
138
/* Mark cluster as used */
139
- nb_csectors = ((l2_entry >> s->csize_shift) &
140
- s->csize_mask) + 1;
141
- l2_entry &= s->cluster_offset_mask;
142
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
143
ret = qcow2_inc_refcounts_imrt(
144
- bs, res, refcount_table, refcount_table_size,
145
- l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
146
- nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
147
+ bs, res, refcount_table, refcount_table_size, coffset, csize);
148
if (ret < 0) {
149
return ret;
150
}
151
diff --git a/block/qcow2.c b/block/qcow2.c
152
index XXXXXXX..XXXXXXX 100644
1298
index XXXXXXX..XXXXXXX 100644
153
--- a/block/qcow2.c
1299
--- a/block/trace-events
154
+++ b/block/qcow2.c
1300
+++ b/block/trace-events
155
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
1301
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
156
size_t qiov_offset)
1302
# ssh.c
157
{
1303
ssh_restart_coroutine(void *co) "co=%p"
158
BDRVQcow2State *s = bs->opaque;
1304
ssh_flush(void) "fsync"
159
- int ret = 0, csize, nb_csectors;
1305
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
160
+ int ret = 0, csize;
1306
+ssh_check_host_key_knownhosts(void) "host key OK"
161
uint64_t coffset;
1307
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
162
uint8_t *buf, *out_buf;
1308
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
163
int offset_in_cluster = offset_into_cluster(s, offset);
1309
ssh_co_yield_back(int sock) "s->sock=%d - back"
164
1310
ssh_getlength(int64_t length) "length=%" PRIi64
165
- assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
1311
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
1312
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1313
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
1314
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
1315
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
1316
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
1317
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1318
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
1319
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
1320
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
1321
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
1322
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
1323
+ssh_auth_methods(int methods) "auth methods=0x%x"
1324
+ssh_server_status(int status) "server status=%d"
1325
1326
# curl.c
1327
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
1328
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
1329
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
1330
1331
# ssh.c
1332
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
1333
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
1334
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
1335
index XXXXXXX..XXXXXXX 100644
1336
--- a/docs/qemu-block-drivers.texi
1337
+++ b/docs/qemu-block-drivers.texi
1338
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
1339
1340
warning: ssh server @code{ssh.example.com:22} does not support fsync
1341
1342
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1343
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1344
supported.
1345
1346
@node disk_images_nvme
1347
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1348
index XXXXXXX..XXXXXXX 100644
1349
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1350
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1351
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1352
mxe-$TARGET-w64-mingw32.shared-curl \
1353
mxe-$TARGET-w64-mingw32.shared-glib \
1354
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1355
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1356
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1357
mxe-$TARGET-w64-mingw32.shared-lzo \
1358
mxe-$TARGET-w64-mingw32.shared-nettle \
1359
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1360
index XXXXXXX..XXXXXXX 100644
1361
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1362
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1363
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1364
mxe-$TARGET-w64-mingw32.shared-curl \
1365
mxe-$TARGET-w64-mingw32.shared-glib \
1366
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1367
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1368
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1369
mxe-$TARGET-w64-mingw32.shared-lzo \
1370
mxe-$TARGET-w64-mingw32.shared-nettle \
1371
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1372
index XXXXXXX..XXXXXXX 100644
1373
--- a/tests/docker/dockerfiles/fedora.docker
1374
+++ b/tests/docker/dockerfiles/fedora.docker
1375
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1376
libpng-devel \
1377
librbd-devel \
1378
libseccomp-devel \
1379
- libssh2-devel \
1380
+ libssh-devel \
1381
libubsan \
1382
libusbx-devel \
1383
libxml2-devel \
1384
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1385
mingw32-gtk3 \
1386
mingw32-libjpeg-turbo \
1387
mingw32-libpng \
1388
- mingw32-libssh2 \
1389
mingw32-libtasn1 \
1390
mingw32-nettle \
1391
mingw32-pixman \
1392
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1393
mingw64-gtk3 \
1394
mingw64-libjpeg-turbo \
1395
mingw64-libpng \
1396
- mingw64-libssh2 \
1397
mingw64-libtasn1 \
1398
mingw64-nettle \
1399
mingw64-pixman \
1400
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1401
index XXXXXXX..XXXXXXX 100644
1402
--- a/tests/docker/dockerfiles/ubuntu.docker
1403
+++ b/tests/docker/dockerfiles/ubuntu.docker
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1405
libsnappy-dev \
1406
libspice-protocol-dev \
1407
libspice-server-dev \
1408
- libssh2-1-dev \
1409
+ libssh-dev \
1410
libusb-1.0-0-dev \
1411
libusbredirhost-dev \
1412
libvdeplug-dev \
1413
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1414
index XXXXXXX..XXXXXXX 100644
1415
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1416
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1417
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1418
libsnappy-dev \
1419
libspice-protocol-dev \
1420
libspice-server-dev \
1421
- libssh2-1-dev \
1422
+ libssh-dev \
1423
libusb-1.0-0-dev \
1424
libusbredirhost-dev \
1425
libvdeplug-dev \
1426
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1427
index XXXXXXX..XXXXXXX 100755
1428
--- a/tests/qemu-iotests/207
1429
+++ b/tests/qemu-iotests/207
1430
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1431
1432
iotests.img_info_log(remote_path)
1433
1434
- md5_key = subprocess.check_output(
1435
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1436
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1437
- shell=True).rstrip().decode('ascii')
1438
+ keys = subprocess.check_output(
1439
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1440
+ 'cut -d" " -f3',
1441
+ shell=True).rstrip().decode('ascii').split('\n')
1442
+
1443
+ # Mappings of base64 representations to digests
1444
+ md5_keys = {}
1445
+ sha1_keys = {}
1446
+
1447
+ for key in keys:
1448
+ md5_keys[key] = subprocess.check_output(
1449
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1450
+ shell=True).rstrip().decode('ascii')
1451
+
1452
+ sha1_keys[key] = subprocess.check_output(
1453
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1454
+ shell=True).rstrip().decode('ascii')
1455
1456
vm.launch()
1457
+
1458
+ # Find correct key first
1459
+ matching_key = None
1460
+ for key in keys:
1461
+ result = vm.qmp('blockdev-add',
1462
+ driver='ssh', node_name='node0', path=disk_path,
1463
+ server={
1464
+ 'host': '127.0.0.1',
1465
+ 'port': '22',
1466
+ }, host_key_check={
1467
+ 'mode': 'hash',
1468
+ 'type': 'md5',
1469
+ 'hash': md5_keys[key],
1470
+ })
1471
+
1472
+ if 'error' not in result:
1473
+ vm.qmp('blockdev-del', node_name='node0')
1474
+ matching_key = key
1475
+ break
1476
+
1477
+ if matching_key is None:
1478
+ vm.shutdown()
1479
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1480
+
1481
blockdev_create(vm, { 'driver': 'ssh',
1482
'location': {
1483
'path': disk_path,
1484
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1485
'host-key-check': {
1486
'mode': 'hash',
1487
'type': 'md5',
1488
- 'hash': md5_key,
1489
+ 'hash': md5_keys[matching_key],
1490
}
1491
},
1492
'size': 8388608 })
1493
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1494
1495
iotests.img_info_log(remote_path)
1496
1497
- sha1_key = subprocess.check_output(
1498
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1499
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1500
- shell=True).rstrip().decode('ascii')
166
-
1501
-
167
- coffset = l2_entry & s->cluster_offset_mask;
1502
vm.launch()
168
- nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
1503
blockdev_create(vm, { 'driver': 'ssh',
169
- csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
1504
'location': {
170
- (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
1505
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
171
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1506
'host-key-check': {
172
1507
'mode': 'hash',
173
buf = g_try_malloc(csize);
1508
'type': 'sha1',
174
if (!buf) {
1509
- 'hash': sha1_key,
1510
+ 'hash': sha1_keys[matching_key],
1511
}
1512
},
1513
'size': 4194304 })
1514
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
1515
index XXXXXXX..XXXXXXX 100644
1516
--- a/tests/qemu-iotests/207.out
1517
+++ b/tests/qemu-iotests/207.out
1518
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
1519
1520
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
1521
{"return": {}}
1522
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
1523
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
1524
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
1525
{"return": {}}
1526
175
--
1527
--
176
2.31.1
1528
2.21.0
177
1529
178
1530
diff view generated by jsdifflib
1
bdrv_co_block_status() does it for us, we do not need to do it here.
1
Tests should place their files into the test directory. This includes
2
Unix sockets. 205 currently fails to do so, which prevents it from
3
being run concurrently.
2
4
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
4
cache larger data regions than requested by its caller.
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
10
tests/qemu-iotests/205 | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
5
12
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
7
Reviewed-by: Eric Blake <eblake@redhat.com>
14
index XXXXXXX..XXXXXXX 100755
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
15
--- a/tests/qemu-iotests/205
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
+++ b/tests/qemu-iotests/205
10
Message-Id: <20210812084148.14458-6-hreitz@redhat.com>
17
@@ -XXX,XX +XXX,XX @@ import iotests
11
---
18
import time
12
block/gluster.c | 7 ++++---
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
13
1 file changed, 4 insertions(+), 3 deletions(-)
20
14
21
-nbd_sock = 'nbd_sock'
15
diff --git a/block/gluster.c b/block/gluster.c
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
16
index XXXXXXX..XXXXXXX 100644
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
17
--- a/block/gluster.c
24
disk = os.path.join(iotests.test_dir, 'disk')
18
+++ b/block/gluster.c
19
@@ -XXX,XX +XXX,XX @@ exit:
20
* the specified offset) that are known to be in the same
21
* allocated/unallocated state.
22
*
23
- * 'bytes' is the max value 'pnum' should be set to.
24
+ * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may
25
+ * well exceed it.
26
*
27
* (Based on raw_co_block_status() from file-posix.c.)
28
*/
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
30
} else if (data == offset) {
31
/* On a data extent, compute bytes to the end of the extent,
32
* possibly including a partial sector at EOF. */
33
- *pnum = MIN(bytes, hole - offset);
34
+ *pnum = hole - offset;
35
36
/*
37
* We are not allowed to return partial sectors, though, so
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
39
} else {
40
/* On a hole, compute bytes to the beginning of the next extent. */
41
assert(hole == offset);
42
- *pnum = MIN(bytes, data - offset);
43
+ *pnum = data - offset;
44
ret = BDRV_BLOCK_ZERO;
45
}
46
25
47
--
26
--
48
2.31.1
27
2.21.0
49
28
50
29
diff view generated by jsdifflib
Deleted patch
1
bdrv_co_block_status() does it for us, we do not need to do it here.
2
1
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
4
cache larger data regions than requested by its caller.
5
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20210812084148.14458-7-hreitz@redhat.com>
11
---
12
block/iscsi.c | 3 ---
13
1 file changed, 3 deletions(-)
14
15
diff --git a/block/iscsi.c b/block/iscsi.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/iscsi.c
18
+++ b/block/iscsi.c
19
@@ -XXX,XX +XXX,XX @@ retry:
20
iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
21
}
22
23
- if (*pnum > bytes) {
24
- *pnum = bytes;
25
- }
26
out_unlock:
27
qemu_mutex_unlock(&iscsilun->mutex);
28
g_free(iTask.err_str);
29
--
30
2.31.1
31
32
diff view generated by jsdifflib
Deleted patch
1
As of recently, pylint complains when `open()` calls are missing an
2
`encoding=` specified. Everything we have should be UTF-8 (and in fact,
3
everything should be UTF-8, period (exceptions apply)), so use that.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Message-Id: <20210824153540.177128-2-hreitz@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
---
10
tests/qemu-iotests/297 | 2 +-
11
tests/qemu-iotests/iotests.py | 8 +++++---
12
2 files changed, 6 insertions(+), 4 deletions(-)
13
14
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
15
index XXXXXXX..XXXXXXX 100755
16
--- a/tests/qemu-iotests/297
17
+++ b/tests/qemu-iotests/297
18
@@ -XXX,XX +XXX,XX @@ def is_python_file(filename):
19
if filename.endswith('.py'):
20
return True
21
22
- with open(filename) as f:
23
+ with open(filename, encoding='utf-8') as f:
24
try:
25
first_line = f.readline()
26
return re.match('^#!.*python', first_line) is not None
27
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tests/qemu-iotests/iotests.py
30
+++ b/tests/qemu-iotests/iotests.py
31
@@ -XXX,XX +XXX,XX @@ def _post_shutdown(self) -> None:
32
return
33
valgrind_filename = f"{test_dir}/{self._popen.pid}.valgrind"
34
if self.exitcode() == 99:
35
- with open(valgrind_filename) as f:
36
+ with open(valgrind_filename, encoding='utf-8') as f:
37
print(f.read())
38
else:
39
os.remove(valgrind_filename)
40
@@ -XXX,XX +XXX,XX @@ def notrun(reason):
41
# Each test in qemu-iotests has a number ("seq")
42
seq = os.path.basename(sys.argv[0])
43
44
- with open('%s/%s.notrun' % (output_dir, seq), 'w') as outfile:
45
+ with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \
46
+ as outfile:
47
outfile.write(reason + '\n')
48
logger.warning("%s not run: %s", seq, reason)
49
sys.exit(0)
50
@@ -XXX,XX +XXX,XX @@ def case_notrun(reason):
51
# Each test in qemu-iotests has a number ("seq")
52
seq = os.path.basename(sys.argv[0])
53
54
- with open('%s/%s.casenotrun' % (output_dir, seq), 'a') as outfile:
55
+ with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \
56
+ as outfile:
57
outfile.write(' [case not run] ' + reason + '\n')
58
59
def _verify_image_format(supported_fmts: Sequence[str] = (),
60
--
61
2.31.1
62
63
diff view generated by jsdifflib
Deleted patch
1
pylint proposes using `[]` instead of `list()` and `{}` instead of
2
`dict()`, because it is faster. That seems simple enough, so heed its
3
advice.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Message-Id: <20210824153540.177128-3-hreitz@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
---
9
tests/qemu-iotests/iotests.py | 4 ++--
10
1 file changed, 2 insertions(+), 2 deletions(-)
11
12
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/qemu-iotests/iotests.py
15
+++ b/tests/qemu-iotests/iotests.py
16
@@ -XXX,XX +XXX,XX @@ def hmp_qemu_io(self, drive: str, cmd: str,
17
18
def flatten_qmp_object(self, obj, output=None, basestr=''):
19
if output is None:
20
- output = dict()
21
+ output = {}
22
if isinstance(obj, list):
23
for i, item in enumerate(obj):
24
self.flatten_qmp_object(item, output, basestr + str(i) + '.')
25
@@ -XXX,XX +XXX,XX @@ def flatten_qmp_object(self, obj, output=None, basestr=''):
26
27
def qmp_to_opts(self, obj):
28
obj = self.flatten_qmp_object(obj)
29
- output_list = list()
30
+ output_list = []
31
for key in obj:
32
output_list += [key + '=' + obj[key]]
33
return ','.join(output_list)
34
--
35
2.31.1
36
37
diff view generated by jsdifflib
Deleted patch
1
169 and 199 have been renamed and moved to tests/ (commit a44be0334be:
2
"iotests: rename and move 169 and 199 tests"), so we can drop them from
3
the skip list.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-Id: <20210902094017.32902-2-hreitz@redhat.com>
10
---
11
tests/qemu-iotests/297 | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
15
index XXXXXXX..XXXXXXX 100755
16
--- a/tests/qemu-iotests/297
17
+++ b/tests/qemu-iotests/297
18
@@ -XXX,XX +XXX,XX @@ import iotests
19
SKIP_FILES = (
20
'030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
21
'096', '118', '124', '132', '136', '139', '147', '148', '149',
22
- '151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
23
+ '151', '152', '155', '163', '165', '194', '196', '202',
24
'203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
25
'218', '219', '224', '228', '234', '235', '236', '237', '238',
26
'240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
27
--
28
2.31.1
29
30
diff view generated by jsdifflib
Deleted patch
1
pylint complains that discards1_sha256 and all_discards_sha256 are first
2
set in non-__init__ methods.
3
1
4
These variables are not really class-variables anyway, so let them
5
instead be returned by start_postcopy(), thus silencing pylint.
6
7
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Message-Id: <20210902094017.32902-3-hreitz@redhat.com>
11
---
12
.../tests/migrate-bitmaps-postcopy-test | 13 +++++++------
13
1 file changed, 7 insertions(+), 6 deletions(-)
14
15
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
16
index XXXXXXX..XXXXXXX 100755
17
--- a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
18
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
19
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
20
21
result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
22
node='drive0', name='bitmap0')
23
- self.discards1_sha256 = result['return']['sha256']
24
+ discards1_sha256 = result['return']['sha256']
25
26
# Check, that updating the bitmap by discards works
27
- assert self.discards1_sha256 != empty_sha256
28
+ assert discards1_sha256 != empty_sha256
29
30
# We want to calculate resulting sha256. Do it in bitmap0, so, disable
31
# other bitmaps
32
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
33
34
result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
35
node='drive0', name='bitmap0')
36
- self.all_discards_sha256 = result['return']['sha256']
37
+ all_discards_sha256 = result['return']['sha256']
38
39
# Now, enable some bitmaps, to be updated during migration
40
for i in range(2, nb_bitmaps, 2):
41
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
42
43
event_resume = self.vm_b.event_wait('RESUME')
44
self.vm_b_events.append(event_resume)
45
- return event_resume
46
+ return (event_resume, discards1_sha256, all_discards_sha256)
47
48
def test_postcopy_success(self):
49
- event_resume = self.start_postcopy()
50
+ event_resume, discards1_sha256, all_discards_sha256 = \
51
+ self.start_postcopy()
52
53
# enabled bitmaps should be updated
54
apply_discards(self.vm_b, discards2)
55
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
56
for i in range(0, nb_bitmaps, 5):
57
result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
58
node='drive0', name='bitmap{}'.format(i))
59
- sha = self.discards1_sha256 if i % 2 else self.all_discards_sha256
60
+ sha = discards1_sha256 if i % 2 else all_discards_sha256
61
self.assert_qmp(result, 'return/sha256', sha)
62
63
def test_early_shutdown_destination(self):
64
--
65
2.31.1
66
67
diff view generated by jsdifflib
Deleted patch
1
There are a couple of things pylint takes issue with:
2
- The "time" import is unused
3
- The import order (iotests should come last)
4
- get_bitmap_hash() doesn't use @self and so should be a function
5
- Semicolons at the end of some lines
6
- Parentheses after "if"
7
- Some lines are too long (80 characters instead of 79)
8
- inject_test_case()'s @name parameter shadows a top-level @name
9
variable
10
- "lambda self: mc(self)" were equivalent to just "mc", but in
11
inject_test_case(), it is not equivalent, so add a comment and disable
12
the warning locally
13
- Always put two empty lines after a function
14
- f'exec: cat > /dev/null' does not need to be an f-string
15
1
16
Fix them.
17
18
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
19
Message-Id: <20210902094017.32902-4-hreitz@redhat.com>
20
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
21
---
22
tests/qemu-iotests/tests/migrate-bitmaps-test | 43 +++++++++++--------
23
1 file changed, 25 insertions(+), 18 deletions(-)
24
25
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-test b/tests/qemu-iotests/tests/migrate-bitmaps-test
26
index XXXXXXX..XXXXXXX 100755
27
--- a/tests/qemu-iotests/tests/migrate-bitmaps-test
28
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-test
29
@@ -XXX,XX +XXX,XX @@
30
#
31
32
import os
33
-import iotests
34
-import time
35
import itertools
36
import operator
37
import re
38
+import iotests
39
from iotests import qemu_img, qemu_img_create, Timeout
40
41
42
@@ -XXX,XX +XXX,XX @@ mig_cmd = 'exec: cat > ' + mig_file
43
incoming_cmd = 'exec: cat ' + mig_file
44
45
46
+def get_bitmap_hash(vm):
47
+ result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
48
+ node='drive0', name='bitmap0')
49
+ return result['return']['sha256']
50
+
51
+
52
class TestDirtyBitmapMigration(iotests.QMPTestCase):
53
def tearDown(self):
54
self.vm_a.shutdown()
55
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
56
params['persistent'] = True
57
58
result = vm.qmp('block-dirty-bitmap-add', **params)
59
- self.assert_qmp(result, 'return', {});
60
-
61
- def get_bitmap_hash(self, vm):
62
- result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
63
- node='drive0', name='bitmap0')
64
- return result['return']['sha256']
65
+ self.assert_qmp(result, 'return', {})
66
67
def check_bitmap(self, vm, sha256):
68
result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
69
node='drive0', name='bitmap0')
70
if sha256:
71
- self.assert_qmp(result, 'return/sha256', sha256);
72
+ self.assert_qmp(result, 'return/sha256', sha256)
73
else:
74
self.assert_qmp(result, 'error/desc',
75
- "Dirty bitmap 'bitmap0' not found");
76
+ "Dirty bitmap 'bitmap0' not found")
77
78
def do_test_migration_resume_source(self, persistent, migrate_bitmaps):
79
granularity = 512
80
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
81
self.add_bitmap(self.vm_a, granularity, persistent)
82
for r in regions:
83
self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
84
- sha256 = self.get_bitmap_hash(self.vm_a)
85
+ sha256 = get_bitmap_hash(self.vm_a)
86
87
result = self.vm_a.qmp('migrate', uri=mig_cmd)
88
while True:
89
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
90
break
91
while True:
92
result = self.vm_a.qmp('query-status')
93
- if (result['return']['status'] == 'postmigrate'):
94
+ if result['return']['status'] == 'postmigrate':
95
break
96
97
# test that bitmap is still here
98
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
99
self.add_bitmap(self.vm_a, granularity, persistent)
100
for r in regions:
101
self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
102
- sha256 = self.get_bitmap_hash(self.vm_a)
103
+ sha256 = get_bitmap_hash(self.vm_a)
104
105
if pre_shutdown:
106
self.vm_a.shutdown()
107
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
108
self.check_bitmap(self.vm_b, sha256 if persistent else False)
109
110
111
-def inject_test_case(klass, name, method, *args, **kwargs):
112
+def inject_test_case(klass, suffix, method, *args, **kwargs):
113
mc = operator.methodcaller(method, *args, **kwargs)
114
- setattr(klass, 'test_' + method + name, lambda self: mc(self))
115
+ # We want to add a function attribute to `klass`, so that it is
116
+ # correctly converted to a method on instantiation. The
117
+ # methodcaller object `mc` is a callable, not a function, so we
118
+ # need the lambda to turn it into a function.
119
+ # pylint: disable=unnecessary-lambda
120
+ setattr(klass, 'test_' + method + suffix, lambda self: mc(self))
121
+
122
123
for cmb in list(itertools.product((True, False), repeat=5)):
124
name = ('_' if cmb[0] else '_not_') + 'persistent_'
125
name += ('_' if cmb[1] else '_not_') + 'migbitmap_'
126
name += '_online' if cmb[2] else '_offline'
127
name += '_shared' if cmb[3] else '_nonshared'
128
- if (cmb[4]):
129
+ if cmb[4]:
130
name += '__pre_shutdown'
131
132
inject_test_case(TestDirtyBitmapMigration, name, 'do_test_migration',
133
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
134
self.assert_qmp(result, 'return', {})
135
136
# Check that the bitmaps are there
137
- for node in self.vm.qmp('query-named-block-nodes', flat=True)['return']:
138
+ nodes = self.vm.qmp('query-named-block-nodes', flat=True)['return']
139
+ for node in nodes:
140
if 'node0' in node['node-name']:
141
self.assert_qmp(node, 'dirty-bitmaps[0]/name', 'bmap0')
142
143
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
144
"""
145
Continue the source after migration.
146
"""
147
- result = self.vm.qmp('migrate', uri=f'exec: cat > /dev/null')
148
+ result = self.vm.qmp('migrate', uri='exec: cat > /dev/null')
149
self.assert_qmp(result, 'return', {})
150
151
with Timeout(10, 'Migration timeout'):
152
--
153
2.31.1
154
155
diff view generated by jsdifflib
Deleted patch
1
The AbnormalShutdown exception class is not in qemu.machine, but in
2
qemu.machine.machine. (qemu.machine.AbnormalShutdown was enough for
3
Python to find it in order to run this test, but pylint complains about
4
it.)
5
1
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Message-Id: <20210902094017.32902-5-hreitz@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
---
10
tests/qemu-iotests/tests/mirror-top-perms | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
12
13
diff --git a/tests/qemu-iotests/tests/mirror-top-perms b/tests/qemu-iotests/tests/mirror-top-perms
14
index XXXXXXX..XXXXXXX 100755
15
--- a/tests/qemu-iotests/tests/mirror-top-perms
16
+++ b/tests/qemu-iotests/tests/mirror-top-perms
17
@@ -XXX,XX +XXX,XX @@ class TestMirrorTopPerms(iotests.QMPTestCase):
18
def tearDown(self):
19
try:
20
self.vm.shutdown()
21
- except qemu.machine.AbnormalShutdown:
22
+ except qemu.machine.machine.AbnormalShutdown:
23
pass
24
25
if self.vm_b is not None:
26
--
27
2.31.1
28
29
diff view generated by jsdifflib
Deleted patch
1
297 so far does not check the named tests, which reside in the tests/
2
directory (i.e. full path tests/qemu-iotests/tests). Fix it.
3
1
4
Thanks to the previous two commits, all named tests pass its scrutiny,
5
so we do not have to add anything to SKIP_FILES.
6
7
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
8
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Message-Id: <20210902094017.32902-6-hreitz@redhat.com>
12
---
13
tests/qemu-iotests/297 | 5 +++--
14
1 file changed, 3 insertions(+), 2 deletions(-)
15
16
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/297
19
+++ b/tests/qemu-iotests/297
20
@@ -XXX,XX +XXX,XX @@ def is_python_file(filename):
21
22
23
def run_linters():
24
- files = [filename for filename in (set(os.listdir('.')) - set(SKIP_FILES))
25
- if is_python_file(filename)]
26
+ named_tests = [f'tests/{entry}' for entry in os.listdir('tests')]
27
+ check_tests = set(os.listdir('.') + named_tests) - set(SKIP_FILES)
28
+ files = [filename for filename in check_tests if is_python_file(filename)]
29
30
iotests.logger.debug('Files to be checked:')
31
iotests.logger.debug(', '.join(sorted(files)))
32
--
33
2.31.1
34
35
diff view generated by jsdifflib
Deleted patch
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
1
3
In mirror_iteration() we call mirror_wait_on_conflicts() with
4
`self` parameter set to NULL.
5
6
Starting from commit d44dae1a7c we dereference `self` pointer in
7
mirror_wait_on_conflicts() without checks if it is not NULL.
8
9
Backtrace:
10
Program terminated with signal SIGSEGV, Segmentation fault.
11
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
12
at ../block/mirror.c:172
13
172     self->waiting_for_op = op;
14
[Current thread is 1 (Thread 0x7f0908931ec0 (LWP 380249))]
15
(gdb) bt
16
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
17
at ../block/mirror.c:172
18
#1 0x00005610c5d9d631 in mirror_run (job=0x5610c76a2c00, errp=<optimized out>) at ../block/mirror.c:491
19
#2 0x00005610c5d58726 in job_co_entry (opaque=0x5610c76a2c00) at ../job.c:917
20
#3 0x00005610c5f046c6 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>)
21
at ../util/coroutine-ucontext.c:173
22
#4 0x00007f0909975820 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
23
from /usr/lib64/libc.so.6
24
25
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001404
26
Fixes: d44dae1a7c ("block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts")
27
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
28
Message-Id: <20210910124533.288318-1-sgarzare@redhat.com>
29
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
30
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
31
---
32
block/mirror.c | 25 ++++++++++++++++---------
33
1 file changed, 16 insertions(+), 9 deletions(-)
34
35
diff --git a/block/mirror.c b/block/mirror.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/block/mirror.c
38
+++ b/block/mirror.c
39
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
40
if (ranges_overlap(self_start_chunk, self_nb_chunks,
41
op_start_chunk, op_nb_chunks))
42
{
43
- /*
44
- * If the operation is already (indirectly) waiting for us, or
45
- * will wait for us as soon as it wakes up, then just go on
46
- * (instead of producing a deadlock in the former case).
47
- */
48
- if (op->waiting_for_op) {
49
- continue;
50
+ if (self) {
51
+ /*
52
+ * If the operation is already (indirectly) waiting for us,
53
+ * or will wait for us as soon as it wakes up, then just go
54
+ * on (instead of producing a deadlock in the former case).
55
+ */
56
+ if (op->waiting_for_op) {
57
+ continue;
58
+ }
59
+
60
+ self->waiting_for_op = op;
61
}
62
63
- self->waiting_for_op = op;
64
qemu_co_queue_wait(&op->waiting_requests, NULL);
65
- self->waiting_for_op = NULL;
66
+
67
+ if (self) {
68
+ self->waiting_for_op = NULL;
69
+ }
70
+
71
break;
72
}
73
}
74
--
75
2.31.1
76
77
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Add a simple test which tries to run migration during backup.
4
bdrv_inactivate_all() should fail. But due to bug (see next commit with
5
fix) it doesn't, nodes are inactivated and continued backup crashes
6
on assertion "assert(!(bs->open_flags & BDRV_O_INACTIVE));" in
7
bdrv_co_write_req_prepare().
8
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Message-Id: <20210911120027.8063-2-vsementsov@virtuozzo.com>
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
12
---
13
.../qemu-iotests/tests/migrate-during-backup | 97 +++++++++++++++++++
14
.../tests/migrate-during-backup.out | 5 +
15
2 files changed, 102 insertions(+)
16
create mode 100755 tests/qemu-iotests/tests/migrate-during-backup
17
create mode 100644 tests/qemu-iotests/tests/migrate-during-backup.out
18
19
diff --git a/tests/qemu-iotests/tests/migrate-during-backup b/tests/qemu-iotests/tests/migrate-during-backup
20
new file mode 100755
21
index XXXXXXX..XXXXXXX
22
--- /dev/null
23
+++ b/tests/qemu-iotests/tests/migrate-during-backup
24
@@ -XXX,XX +XXX,XX @@
25
+#!/usr/bin/env python3
26
+# group: migration disabled
27
+#
28
+# Copyright (c) 2021 Virtuozzo International GmbH
29
+#
30
+# This program is free software; you can redistribute it and/or modify
31
+# it under the terms of the GNU General Public License as published by
32
+# the Free Software Foundation; either version 2 of the License, or
33
+# (at your option) any later version.
34
+#
35
+# This program is distributed in the hope that it will be useful,
36
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
37
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38
+# GNU General Public License for more details.
39
+#
40
+# You should have received a copy of the GNU General Public License
41
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
42
+#
43
+
44
+import os
45
+import iotests
46
+from iotests import qemu_img_create, qemu_io
47
+
48
+
49
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
50
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
51
+size = '1M'
52
+mig_file = os.path.join(iotests.test_dir, 'mig_file')
53
+mig_cmd = 'exec: cat > ' + mig_file
54
+
55
+
56
+class TestMigrateDuringBackup(iotests.QMPTestCase):
57
+ def tearDown(self):
58
+ self.vm.shutdown()
59
+ os.remove(disk_a)
60
+ os.remove(disk_b)
61
+ os.remove(mig_file)
62
+
63
+ def setUp(self):
64
+ qemu_img_create('-f', iotests.imgfmt, disk_a, size)
65
+ qemu_img_create('-f', iotests.imgfmt, disk_b, size)
66
+ qemu_io('-c', f'write 0 {size}', disk_a)
67
+
68
+ self.vm = iotests.VM().add_drive(disk_a)
69
+ self.vm.launch()
70
+ result = self.vm.qmp('blockdev-add', {
71
+ 'node-name': 'target',
72
+ 'driver': iotests.imgfmt,
73
+ 'file': {
74
+ 'driver': 'file',
75
+ 'filename': disk_b
76
+ }
77
+ })
78
+ self.assert_qmp(result, 'return', {})
79
+
80
+ def test_migrate(self):
81
+ result = self.vm.qmp('blockdev-backup', device='drive0',
82
+ target='target', sync='full',
83
+ speed=1, x_perf={
84
+ 'max-workers': 1,
85
+ 'max-chunk': 64 * 1024
86
+ })
87
+ self.assert_qmp(result, 'return', {})
88
+
89
+ result = self.vm.qmp('job-pause', id='drive0')
90
+ self.assert_qmp(result, 'return', {})
91
+
92
+ result = self.vm.qmp('migrate-set-capabilities',
93
+ capabilities=[{'capability': 'events',
94
+ 'state': True}])
95
+ self.assert_qmp(result, 'return', {})
96
+ result = self.vm.qmp('migrate', uri=mig_cmd)
97
+ self.assert_qmp(result, 'return', {})
98
+
99
+ e = self.vm.events_wait((('MIGRATION',
100
+ {'data': {'status': 'completed'}}),
101
+ ('MIGRATION',
102
+ {'data': {'status': 'failed'}})))
103
+
104
+ # Don't assert that e is 'failed' now: this way we'll miss
105
+ # possible crash when backup continues :)
106
+
107
+ result = self.vm.qmp('block-job-set-speed', device='drive0',
108
+ speed=0)
109
+ self.assert_qmp(result, 'return', {})
110
+ result = self.vm.qmp('job-resume', id='drive0')
111
+ self.assert_qmp(result, 'return', {})
112
+
113
+ # For future: if something changes so that both migration
114
+ # and backup pass, let's not miss that moment, as it may
115
+ # be a bug as well as improvement.
116
+ self.assert_qmp(e, 'data/status', 'failed')
117
+
118
+
119
+if __name__ == '__main__':
120
+ iotests.main(supported_fmts=['qcow2'],
121
+ supported_protocols=['file'])
122
diff --git a/tests/qemu-iotests/tests/migrate-during-backup.out b/tests/qemu-iotests/tests/migrate-during-backup.out
123
new file mode 100644
124
index XXXXXXX..XXXXXXX
125
--- /dev/null
126
+++ b/tests/qemu-iotests/tests/migrate-during-backup.out
127
@@ -XXX,XX +XXX,XX @@
128
+.
129
+----------------------------------------------------------------------
130
+Ran 1 tests
131
+
132
+OK
133
--
134
2.31.1
135
136
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
We must not inactivate child when parent has write permissions on
4
it.
5
6
Calling .bdrv_inactivate() doesn't help: actually only qcow2 has this
7
handler and it is used to flush caches, not for permission
8
manipulations.
9
10
So, let's simply check cumulative parent permissions before
11
inactivating the node.
12
13
This commit fixes a crash when we do migration during backup: prior to
14
the commit nothing prevents all nodes inactivation at migration finish
15
and following backup write to the target crashes on assertion
16
"assert(!(bs->open_flags & BDRV_O_INACTIVE));" in
17
bdrv_co_write_req_prepare().
18
19
After the commit, we rely on the fact that copy-before-write filter
20
keeps write permission on target node to be able to write to it. So
21
inactivation fails and migration fails as expected.
22
23
Corresponding test now passes, so, enable it.
24
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
27
Message-Id: <20210911120027.8063-3-vsementsov@virtuozzo.com>
28
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
29
---
30
block.c | 8 ++++++++
31
tests/qemu-iotests/tests/migrate-during-backup | 2 +-
32
2 files changed, 9 insertions(+), 1 deletion(-)
33
34
diff --git a/block.c b/block.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/block.c
37
+++ b/block.c
38
@@ -XXX,XX +XXX,XX @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
39
{
40
BdrvChild *child, *parent;
41
int ret;
42
+ uint64_t cumulative_perms, cumulative_shared_perms;
43
44
if (!bs->drv) {
45
return -ENOMEDIUM;
46
@@ -XXX,XX +XXX,XX @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
47
}
48
}
49
50
+ bdrv_get_cumulative_perm(bs, &cumulative_perms,
51
+ &cumulative_shared_perms);
52
+ if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
53
+ /* Our inactive parents still need write access. Inactivation failed. */
54
+ return -EPERM;
55
+ }
56
+
57
bs->open_flags |= BDRV_O_INACTIVE;
58
59
/*
60
diff --git a/tests/qemu-iotests/tests/migrate-during-backup b/tests/qemu-iotests/tests/migrate-during-backup
61
index XXXXXXX..XXXXXXX 100755
62
--- a/tests/qemu-iotests/tests/migrate-during-backup
63
+++ b/tests/qemu-iotests/tests/migrate-during-backup
64
@@ -XXX,XX +XXX,XX @@
65
#!/usr/bin/env python3
66
-# group: migration disabled
67
+# group: migration
68
#
69
# Copyright (c) 2021 Virtuozzo International GmbH
70
#
71
--
72
2.31.1
73
74
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Add simple grammar-parsing template benchmark. New tool consume test
4
template written in bash with some special grammar injections and
5
produces multiple tests, run them and finally print a performance
6
comparison table of different tests produced from one template.
7
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Message-Id: <20210824101517.59802-2-vsementsov@virtuozzo.com>
10
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
12
---
13
scripts/simplebench/img_bench_templater.py | 95 ++++++++++++++++++++++
14
scripts/simplebench/table_templater.py | 62 ++++++++++++++
15
2 files changed, 157 insertions(+)
16
create mode 100755 scripts/simplebench/img_bench_templater.py
17
create mode 100644 scripts/simplebench/table_templater.py
18
19
diff --git a/scripts/simplebench/img_bench_templater.py b/scripts/simplebench/img_bench_templater.py
20
new file mode 100755
21
index XXXXXXX..XXXXXXX
22
--- /dev/null
23
+++ b/scripts/simplebench/img_bench_templater.py
24
@@ -XXX,XX +XXX,XX @@
25
+#!/usr/bin/env python3
26
+#
27
+# Process img-bench test templates
28
+#
29
+# Copyright (c) 2021 Virtuozzo International GmbH.
30
+#
31
+# This program is free software; you can redistribute it and/or modify
32
+# it under the terms of the GNU General Public License as published by
33
+# the Free Software Foundation; either version 2 of the License, or
34
+# (at your option) any later version.
35
+#
36
+# This program is distributed in the hope that it will be useful,
37
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
38
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39
+# GNU General Public License for more details.
40
+#
41
+# You should have received a copy of the GNU General Public License
42
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
43
+#
44
+
45
+
46
+import sys
47
+import subprocess
48
+import re
49
+import json
50
+
51
+import simplebench
52
+from results_to_text import results_to_text
53
+from table_templater import Templater
54
+
55
+
56
+def bench_func(env, case):
57
+ test = templater.gen(env['data'], case['data'])
58
+
59
+ p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
60
+ stderr=subprocess.STDOUT, universal_newlines=True)
61
+
62
+ if p.returncode == 0:
63
+ try:
64
+ m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
65
+ return {'seconds': float(m.group(1))}
66
+ except Exception:
67
+ return {'error': f'failed to parse qemu-img output: {p.stdout}'}
68
+ else:
69
+ return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
70
+
71
+
72
+if __name__ == '__main__':
73
+ if len(sys.argv) > 1:
74
+ print("""
75
+Usage: img_bench_templater.py < path/to/test-template.sh
76
+
77
+This script generates performance tests from a test template (example below),
78
+runs them, and displays the results in a table. The template is read from
79
+stdin. It must be written in bash and end with a `qemu-img bench` invocation
80
+(whose result is parsed to get the test instance’s result).
81
+
82
+Use the following syntax in the template to create the various different test
83
+instances:
84
+
85
+ column templating: {var1|var2|...} - test will use different values in
86
+ different columns. You may use several {} constructions in the test, in this
87
+ case product of all choice-sets will be used.
88
+
89
+ row templating: [var1|var2|...] - similar thing to define rows (test-cases)
90
+
91
+Test template example:
92
+
93
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
94
+qemu-img-new in your build directory in two test-cases with 4K writes and 64K
95
+writes. The template may look like this:
96
+
97
+qemu_img=/path/to/qemu/build/qemu-img-{old|new}
98
+$qemu_img create -f qcow2 /ssd/x.qcow2 1G
99
+$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2
100
+
101
+When passing this to stdin of img_bench_templater.py, the resulting comparison
102
+table will contain two columns (for two binaries) and two rows (for two
103
+test-cases).
104
+
105
+In addition to displaying the results, script also stores results in JSON
106
+format into results.json file in current directory.
107
+""")
108
+ sys.exit()
109
+
110
+ templater = Templater(sys.stdin.read())
111
+
112
+ envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns]
113
+ cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows]
114
+
115
+ result = simplebench.bench(bench_func, envs, cases, count=5,
116
+ initial_run=False)
117
+ print(results_to_text(result))
118
+ with open('results.json', 'w') as f:
119
+ json.dump(result, f, indent=4)
120
diff --git a/scripts/simplebench/table_templater.py b/scripts/simplebench/table_templater.py
121
new file mode 100644
122
index XXXXXXX..XXXXXXX
123
--- /dev/null
124
+++ b/scripts/simplebench/table_templater.py
125
@@ -XXX,XX +XXX,XX @@
126
+# Parser for test templates
127
+#
128
+# Copyright (c) 2021 Virtuozzo International GmbH.
129
+#
130
+# This program is free software; you can redistribute it and/or modify
131
+# it under the terms of the GNU General Public License as published by
132
+# the Free Software Foundation; either version 2 of the License, or
133
+# (at your option) any later version.
134
+#
135
+# This program is distributed in the hope that it will be useful,
136
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
137
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
138
+# GNU General Public License for more details.
139
+#
140
+# You should have received a copy of the GNU General Public License
141
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
142
+#
143
+
144
+import itertools
145
+from lark import Lark
146
+
147
+grammar = """
148
+start: ( text | column_switch | row_switch )+
149
+
150
+column_switch: "{" text ["|" text]+ "}"
151
+row_switch: "[" text ["|" text]+ "]"
152
+text: /[^|{}\[\]]+/
153
+"""
154
+
155
+parser = Lark(grammar)
156
+
157
+class Templater:
158
+ def __init__(self, template):
159
+ self.tree = parser.parse(template)
160
+
161
+ c_switches = []
162
+ r_switches = []
163
+ for x in self.tree.children:
164
+ if x.data == 'column_switch':
165
+ c_switches.append([el.children[0].value for el in x.children])
166
+ elif x.data == 'row_switch':
167
+ r_switches.append([el.children[0].value for el in x.children])
168
+
169
+ self.columns = list(itertools.product(*c_switches))
170
+ self.rows = list(itertools.product(*r_switches))
171
+
172
+ def gen(self, column, row):
173
+ i = 0
174
+ j = 0
175
+ result = []
176
+
177
+ for x in self.tree.children:
178
+ if x.data == 'text':
179
+ result.append(x.children[0].value)
180
+ elif x.data == 'column_switch':
181
+ result.append(column[i])
182
+ i += 1
183
+ elif x.data == 'row_switch':
184
+ result.append(row[j])
185
+ j += 1
186
+
187
+ return ''.join(result)
188
--
189
2.31.1
190
191
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
No logic change, just prepare for the following commit. While being
4
here do also small grammar fix in a comment.
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
9
Message-Id: <20210824101517.59802-3-vsementsov@virtuozzo.com>
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
---
12
block/qcow2-cluster.c | 49 ++++++++++++++++++++++++-------------------
13
1 file changed, 28 insertions(+), 21 deletions(-)
14
15
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2-cluster.c
18
+++ b/block/qcow2-cluster.c
19
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
20
21
if (end <= old_start || start >= old_end) {
22
/* No intersection */
23
- } else {
24
- if (start < old_start) {
25
- /* Stop at the start of a running allocation */
26
- bytes = old_start - start;
27
- } else {
28
- bytes = 0;
29
- }
30
+ continue;
31
+ }
32
33
- /* Stop if already an l2meta exists. After yielding, it wouldn't
34
- * be valid any more, so we'd have to clean up the old L2Metas
35
- * and deal with requests depending on them before starting to
36
- * gather new ones. Not worth the trouble. */
37
- if (bytes == 0 && *m) {
38
- *cur_bytes = 0;
39
- return 0;
40
- }
41
+ /* Conflict */
42
43
- if (bytes == 0) {
44
- /* Wait for the dependency to complete. We need to recheck
45
- * the free/allocated clusters when we continue. */
46
- qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
47
- return -EAGAIN;
48
- }
49
+ if (start < old_start) {
50
+ /* Stop at the start of a running allocation */
51
+ bytes = old_start - start;
52
+ } else {
53
+ bytes = 0;
54
+ }
55
+
56
+ /*
57
+ * Stop if an l2meta already exists. After yielding, it wouldn't
58
+ * be valid any more, so we'd have to clean up the old L2Metas
59
+ * and deal with requests depending on them before starting to
60
+ * gather new ones. Not worth the trouble.
61
+ */
62
+ if (bytes == 0 && *m) {
63
+ *cur_bytes = 0;
64
+ return 0;
65
+ }
66
+
67
+ if (bytes == 0) {
68
+ /*
69
+ * Wait for the dependency to complete. We need to recheck
70
+ * the free/allocated clusters when we continue.
71
+ */
72
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
73
+ return -EAGAIN;
74
}
75
}
76
77
--
78
2.31.1
79
80
diff view generated by jsdifflib
Deleted patch
1
We cannot write to images opened with O_DIRECT unless we allow them to
2
be resized so they are aligned to the sector size: Since 9c60a5d1978,
3
bdrv_node_refresh_perm() ensures that for nodes whose length is not
4
aligned to the request alignment and where someone has taken a WRITE
5
permission, the RESIZE permission is taken, too).
6
1
7
Let qemu-img convert pass the BDRV_O_RESIZE flag (which causes
8
blk_new_open() to take the RESIZE permission) when using cache=none for
9
the target, so that when writing to it, it can be aligned to the target
10
sector size.
11
12
Without this patch, an error is returned:
13
14
$ qemu-img convert -f raw -O raw -t none foo.img /mnt/tmp/foo.img
15
qemu-img: Could not open '/mnt/tmp/foo.img': Cannot get 'write'
16
permission without 'resize': Image size is not a multiple of request
17
alignment
18
19
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1994266
20
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
21
Message-Id: <20210819101200.64235-1-hreitz@redhat.com>
22
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
23
---
24
qemu-img.c | 8 ++++++++
25
1 file changed, 8 insertions(+)
26
27
diff --git a/qemu-img.c b/qemu-img.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/qemu-img.c
30
+++ b/qemu-img.c
31
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
32
goto out;
33
}
34
35
+ if (flags & BDRV_O_NOCACHE) {
36
+ /*
37
+ * If we open the target with O_DIRECT, it may be necessary to
38
+ * extend its size to align to the physical sector size.
39
+ */
40
+ flags |= BDRV_O_RESIZE;
41
+ }
42
+
43
if (skip_create) {
44
s.target = img_open(tgt_image_opts, out_filename, out_fmt,
45
flags, writethrough, s.quiet, false);
46
--
47
2.31.1
48
49
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
- don't use same name for size in bytes and in entries
4
- use g_autofree for l2_table
5
- add whitespace
6
- fix block comment style
7
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
11
Message-Id: <20210914122454.141075-2-vsementsov@virtuozzo.com>
12
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
13
---
14
block/qcow2-refcount.c | 47 +++++++++++++++++++++---------------------
15
1 file changed, 24 insertions(+), 23 deletions(-)
16
17
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/qcow2-refcount.c
20
+++ b/block/qcow2-refcount.c
21
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
22
int flags, BdrvCheckMode fix, bool active)
23
{
24
BDRVQcow2State *s = bs->opaque;
25
- uint64_t *l2_table, l2_entry;
26
+ uint64_t l2_entry;
27
uint64_t next_contiguous_offset = 0;
28
- int i, l2_size, nb_csectors, ret;
29
+ int i, nb_csectors, ret;
30
+ size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
31
+ g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
32
33
/* Read L2 table from disk */
34
- l2_size = s->l2_size * l2_entry_size(s);
35
- l2_table = g_malloc(l2_size);
36
-
37
- ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
38
+ ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
39
if (ret < 0) {
40
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
41
res->check_errors++;
42
- goto fail;
43
+ return ret;
44
}
45
46
/* Do the actual checks */
47
- for(i = 0; i < s->l2_size; i++) {
48
+ for (i = 0; i < s->l2_size; i++) {
49
l2_entry = get_l2_entry(s, l2_table, i);
50
51
switch (qcow2_get_cluster_type(bs, l2_entry)) {
52
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
53
l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
54
nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
55
if (ret < 0) {
56
- goto fail;
57
+ return ret;
58
}
59
60
if (flags & CHECK_FRAG_INFO) {
61
res->bfi.allocated_clusters++;
62
res->bfi.compressed_clusters++;
63
64
- /* Compressed clusters are fragmented by nature. Since they
65
+ /*
66
+ * Compressed clusters are fragmented by nature. Since they
67
* take up sub-sector space but we only have sector granularity
68
* I/O we need to re-read the same sectors even for adjacent
69
* compressed clusters.
70
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
71
if (ret < 0) {
72
fprintf(stderr, "ERROR: Overlap check failed\n");
73
res->check_errors++;
74
- /* Something is seriously wrong, so abort checking
75
- * this L2 table */
76
- goto fail;
77
+ /*
78
+ * Something is seriously wrong, so abort checking
79
+ * this L2 table.
80
+ */
81
+ return ret;
82
}
83
84
ret = bdrv_pwrite_sync(bs->file, l2e_offset,
85
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
86
fprintf(stderr, "ERROR: Failed to overwrite L2 "
87
"table entry: %s\n", strerror(-ret));
88
res->check_errors++;
89
- /* Do not abort, continue checking the rest of this
90
- * L2 table's entries */
91
+ /*
92
+ * Do not abort, continue checking the rest of this
93
+ * L2 table's entries.
94
+ */
95
} else {
96
res->corruptions--;
97
res->corruptions_fixed++;
98
- /* Skip marking the cluster as used
99
- * (it is unused now) */
100
+ /*
101
+ * Skip marking the cluster as used
102
+ * (it is unused now).
103
+ */
104
continue;
105
}
106
}
107
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
108
refcount_table_size,
109
offset, s->cluster_size);
110
if (ret < 0) {
111
- goto fail;
112
+ return ret;
113
}
114
}
115
break;
116
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
117
}
118
}
119
120
- g_free(l2_table);
121
return 0;
122
-
123
-fail:
124
- g_free(l2_table);
125
- return ret;
126
}
127
128
/*
129
--
130
2.31.1
131
132
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Let's pass the whole L2 entry and not bother with
4
L2E_COMPRESSED_OFFSET_SIZE_MASK.
5
6
It also helps further refactoring that adds generic
7
qcow2_parse_compressed_l2_entry() helper.
8
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
12
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
13
Message-Id: <20210914122454.141075-3-vsementsov@virtuozzo.com>
14
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
15
---
16
block/qcow2.h | 1 -
17
block/qcow2-cluster.c | 5 ++---
18
block/qcow2.c | 12 +++++++-----
19
3 files changed, 9 insertions(+), 9 deletions(-)
20
21
diff --git a/block/qcow2.h b/block/qcow2.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2.h
24
+++ b/block/qcow2.h
25
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
26
27
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
28
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
29
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
30
31
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
32
33
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/block/qcow2-cluster.c
36
+++ b/block/qcow2-cluster.c
37
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
38
* offset needs to be aligned to a cluster boundary.
39
*
40
* If the cluster is unallocated then *host_offset will be 0.
41
- * If the cluster is compressed then *host_offset will contain the
42
- * complete compressed cluster descriptor.
43
+ * If the cluster is compressed then *host_offset will contain the l2 entry.
44
*
45
* On entry, *bytes is the maximum number of contiguous bytes starting at
46
* offset that we are interested in.
47
@@ -XXX,XX +XXX,XX @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
48
ret = -EIO;
49
goto fail;
50
}
51
- *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK;
52
+ *host_offset = l2_entry;
53
break;
54
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
55
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
56
diff --git a/block/qcow2.c b/block/qcow2.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/block/qcow2.c
59
+++ b/block/qcow2.c
60
@@ -XXX,XX +XXX,XX @@ typedef struct {
61
62
static int coroutine_fn
63
qcow2_co_preadv_compressed(BlockDriverState *bs,
64
- uint64_t cluster_descriptor,
65
+ uint64_t l2_entry,
66
uint64_t offset,
67
uint64_t bytes,
68
QEMUIOVector *qiov,
69
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2AioTask {
70
71
BlockDriverState *bs;
72
QCow2SubclusterType subcluster_type; /* only for read */
73
- uint64_t host_offset; /* or full descriptor in compressed clusters */
74
+ uint64_t host_offset; /* or l2_entry for compressed read */
75
uint64_t offset;
76
uint64_t bytes;
77
QEMUIOVector *qiov;
78
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
79
80
static int coroutine_fn
81
qcow2_co_preadv_compressed(BlockDriverState *bs,
82
- uint64_t cluster_descriptor,
83
+ uint64_t l2_entry,
84
uint64_t offset,
85
uint64_t bytes,
86
QEMUIOVector *qiov,
87
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
88
uint8_t *buf, *out_buf;
89
int offset_in_cluster = offset_into_cluster(s, offset);
90
91
- coffset = cluster_descriptor & s->cluster_offset_mask;
92
- nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1;
93
+ assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
94
+
95
+ coffset = l2_entry & s->cluster_offset_mask;
96
+ nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
97
csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
98
(coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
99
100
--
101
2.31.1
102
103
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
We'll reuse the function to fix wrong L2 entry bitmap. Support it now.
4
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
8
Message-Id: <20210914122454.141075-6-vsementsov@virtuozzo.com>
9
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
---
11
block/qcow2-refcount.c | 18 +++++++++++++++---
12
1 file changed, 15 insertions(+), 3 deletions(-)
13
14
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2-refcount.c
17
+++ b/block/qcow2-refcount.c
18
@@ -XXX,XX +XXX,XX @@ enum {
19
};
20
21
/*
22
- * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN.
23
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN (or making all its present
24
+ * subclusters QCOW2_SUBCLUSTER_ZERO_PLAIN).
25
*
26
* This function decrements res->corruptions on success, so the caller is
27
* responsible to increment res->corruptions prior to the call.
28
@@ -XXX,XX +XXX,XX @@ static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
29
int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
30
uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
31
int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
32
- uint64_t l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
33
34
- set_l2_entry(s, l2_table, l2_index, l2_entry);
35
+ if (has_subclusters(s)) {
36
+ uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
37
+
38
+ /* Allocated subclusters become zero */
39
+ l2_bitmap |= l2_bitmap << 32;
40
+ l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
41
+
42
+ set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
43
+ set_l2_entry(s, l2_table, l2_index, 0);
44
+ } else {
45
+ set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
46
+ }
47
+
48
ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
49
false);
50
if (metadata_overlap) {
51
--
52
2.31.1
53
54
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Check subcluster bitmap of the l2 entry for different types of
4
clusters:
5
6
- for compressed it must be zero
7
- for allocated check consistency of two parts of the bitmap
8
- for unallocated all subclusters should be unallocated
9
(or zero-plain)
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
13
Message-Id: <20210914122454.141075-7-vsementsov@virtuozzo.com>
14
Reviewed-by: Eric Blake <eblake@redhat.com>
15
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
16
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
17
---
18
block/qcow2-refcount.c | 28 ++++++++++++++++++++++++++--
19
1 file changed, 26 insertions(+), 2 deletions(-)
20
21
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2-refcount.c
24
+++ b/block/qcow2-refcount.c
25
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
26
int flags, BdrvCheckMode fix, bool active)
27
{
28
BDRVQcow2State *s = bs->opaque;
29
- uint64_t l2_entry;
30
+ uint64_t l2_entry, l2_bitmap;
31
uint64_t next_contiguous_offset = 0;
32
int i, ret;
33
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
34
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
35
uint64_t coffset;
36
int csize;
37
l2_entry = get_l2_entry(s, l2_table, i);
38
+ l2_bitmap = get_l2_bitmap(s, l2_table, i);
39
40
switch (qcow2_get_cluster_type(bs, l2_entry)) {
41
case QCOW2_CLUSTER_COMPRESSED:
42
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
43
break;
44
}
45
46
+ if (l2_bitmap) {
47
+ fprintf(stderr, "ERROR compressed cluster %d with non-zero "
48
+ "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
49
+ i, l2_entry);
50
+ res->corruptions++;
51
+ break;
52
+ }
53
+
54
/* Mark cluster as used */
55
qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
56
ret = qcow2_inc_refcounts_imrt(
57
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
58
{
59
uint64_t offset = l2_entry & L2E_OFFSET_MASK;
60
61
+ if ((l2_bitmap >> 32) & l2_bitmap) {
62
+ res->corruptions++;
63
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
64
+ "cluster has corrupted subcluster allocation bitmap\n",
65
+ offset);
66
+ }
67
+
68
/* Correct offsets are cluster aligned */
69
if (offset_into_cluster(s, offset)) {
70
bool contains_data;
71
res->corruptions++;
72
73
if (has_subclusters(s)) {
74
- uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i);
75
contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
76
} else {
77
contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
78
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
79
}
80
81
case QCOW2_CLUSTER_ZERO_PLAIN:
82
+ /* Impossible when image has subclusters */
83
+ assert(!l2_bitmap);
84
+ break;
85
+
86
case QCOW2_CLUSTER_UNALLOCATED:
87
+ if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
88
+ res->corruptions++;
89
+ fprintf(stderr, "ERROR: Unallocated "
90
+ "cluster has non-zero subcluster allocation map\n");
91
+ }
92
break;
93
94
default:
95
--
96
2.31.1
97
98
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
4
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
6
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
7
Message-Id: <20210914122454.141075-8-vsementsov@virtuozzo.com>
8
[hreitz: Separated `type` declaration from statements]
9
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
---
11
block/qcow2.h | 1 +
12
block/qcow2-refcount.c | 14 +++++++++++++-
13
2 files changed, 14 insertions(+), 1 deletion(-)
14
15
diff --git a/block/qcow2.h b/block/qcow2.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2.h
18
+++ b/block/qcow2.h
19
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
20
21
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
22
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
23
+#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
24
25
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
26
27
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/block/qcow2-refcount.c
30
+++ b/block/qcow2-refcount.c
31
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
32
for (i = 0; i < s->l2_size; i++) {
33
uint64_t coffset;
34
int csize;
35
+ QCow2ClusterType type;
36
+
37
l2_entry = get_l2_entry(s, l2_table, i);
38
l2_bitmap = get_l2_bitmap(s, l2_table, i);
39
+ type = qcow2_get_cluster_type(bs, l2_entry);
40
+
41
+ if (type != QCOW2_CLUSTER_COMPRESSED) {
42
+ /* Check reserved bits of Standard Cluster Descriptor */
43
+ if (l2_entry & L2E_STD_RESERVED_MASK) {
44
+ fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
45
+ "%" PRIx64 "\n", l2_entry);
46
+ res->corruptions++;
47
+ }
48
+ }
49
50
- switch (qcow2_get_cluster_type(bs, l2_entry)) {
51
+ switch (type) {
52
case QCOW2_CLUSTER_COMPRESSED:
53
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
54
if (l2_entry & QCOW_OFLAG_COPIED) {
55
--
56
2.31.1
57
58
diff view generated by jsdifflib