1
The following changes since commit dac03af5d5482ec7ee9c23db467bb7230b33c0d9:
1
The following changes since commit 474f3938d79ab36b9231c9ad3b5a9314c2aeacde:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20190825' into staging (2019-08-27 10:00:51 +0100)
3
Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' into staging (2019-06-21 15:40:50 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-24
8
8
9
for you to fetch changes up to 5396234b96a2ac743f48644529771498e036e698:
9
for you to fetch changes up to ab5d4a30f7f3803ca5106b370969c1b7b54136f8:
10
10
11
block/qcow2: implement .bdrv_co_pwritev(_compressed)_part (2019-08-27 14:58:42 +0100)
11
iotests: Fix 205 for concurrent runs (2019-06-24 16:01:40 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Block patches:
15
- The SSH block driver now uses libssh instead of libssh2
16
- The VMDK block driver gets read-only support for the seSparse
17
subformat
18
- Various fixes
19
20
---
21
22
v2:
23
- Squashed Pino's fix for pre-0.8 libssh into the libssh patch
15
24
16
----------------------------------------------------------------
25
----------------------------------------------------------------
26
Anton Nefedov (1):
27
iotest 134: test cluster-misaligned encrypted write
17
28
18
Vladimir Sementsov-Ogievskiy (12):
29
Klaus Birkelund Jensen (1):
19
util/iov: introduce qemu_iovec_init_extended
30
nvme: do not advertise support for unsupported arbitration mechanism
20
util/iov: improve qemu_iovec_is_zero
21
block/io: refactor padding
22
block: define .*_part io handlers in BlockDriver
23
block/io: bdrv_co_do_copy_on_readv: use and support qiov_offset
24
block/io: bdrv_co_do_copy_on_readv: lazy allocation
25
block/io: bdrv_aligned_preadv: use and support qiov_offset
26
block/io: bdrv_aligned_pwritev: use and support qiov_offset
27
block/io: introduce bdrv_co_p{read, write}v_part
28
block/qcow2: refactor qcow2_co_preadv to use buffer-based io
29
block/qcow2: implement .bdrv_co_preadv_part
30
block/qcow2: implement .bdrv_co_pwritev(_compressed)_part
31
31
32
block/qcow2.h | 1 +
32
Max Reitz (1):
33
include/block/block_int.h | 21 ++
33
iotests: Fix 205 for concurrent runs
34
include/qemu/iov.h | 10 +-
34
35
block/backup.c | 2 +-
35
Pino Toscano (1):
36
block/io.c | 541 +++++++++++++++++++++++---------------
36
ssh: switch from libssh2 to libssh
37
block/qcow2-cluster.c | 14 +-
37
38
block/qcow2.c | 131 +++++----
38
Sam Eiderman (3):
39
qemu-img.c | 4 +-
39
vmdk: Fix comment regarding max l1_size coverage
40
util/iov.c | 153 +++++++++--
40
vmdk: Reduce the max bound for L1 table size
41
9 files changed, 568 insertions(+), 309 deletions(-)
41
vmdk: Add read-only support for seSparse snapshots
42
43
Vladimir Sementsov-Ogievskiy (1):
44
blockdev: enable non-root nodes for transaction drive-backup source
45
46
configure | 65 +-
47
block/Makefile.objs | 6 +-
48
block/ssh.c | 652 ++++++++++--------
49
block/vmdk.c | 372 +++++++++-
50
blockdev.c | 2 +-
51
hw/block/nvme.c | 1 -
52
.travis.yml | 4 +-
53
block/trace-events | 14 +-
54
docs/qemu-block-drivers.texi | 2 +-
55
.../dockerfiles/debian-win32-cross.docker | 1 -
56
.../dockerfiles/debian-win64-cross.docker | 1 -
57
tests/docker/dockerfiles/fedora.docker | 4 +-
58
tests/docker/dockerfiles/ubuntu.docker | 2 +-
59
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
60
tests/qemu-iotests/059.out | 2 +-
61
tests/qemu-iotests/134 | 9 +
62
tests/qemu-iotests/134.out | 10 +
63
tests/qemu-iotests/205 | 2 +-
64
tests/qemu-iotests/207 | 54 +-
65
tests/qemu-iotests/207.out | 2 +-
66
20 files changed, 823 insertions(+), 384 deletions(-)
42
67
43
--
68
--
44
2.21.0
69
2.21.0
45
70
46
71
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
2
2
3
Implement and use new interface to get rid of hd_qiov.
3
The device mistakenly reports that the Weighted Round Robin with Urgent
4
Priority Class arbitration mechanism is supported.
4
5
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
It is not.
6
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
7
7
Message-id: 20190604161514.262241-13-vsementsov@virtuozzo.com
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
8
Message-Id: <20190604161514.262241-13-vsementsov@virtuozzo.com>
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
12
---
11
block/qcow2.h | 1 +
13
hw/block/nvme.c | 1 -
12
include/qemu/iov.h | 1 +
14
1 file changed, 1 deletion(-)
13
block/qcow2-cluster.c | 9 ++++---
14
block/qcow2.c | 60 +++++++++++++++++++++----------------------
15
util/iov.c | 10 ++++++++
16
5 files changed, 48 insertions(+), 33 deletions(-)
17
15
18
diff --git a/block/qcow2.h b/block/qcow2.h
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
20
--- a/block/qcow2.h
18
--- a/hw/block/nvme.c
21
+++ b/block/qcow2.h
19
+++ b/hw/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct QCowL2Meta
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
23
* from @cow_start and @cow_end into one single write operation.
21
n->bar.cap = 0;
24
*/
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
25
QEMUIOVector *data_qiov;
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
26
+ size_t data_qiov_offset;
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
27
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
28
/** Pointer to next L2Meta of the same write request */
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
29
struct QCowL2Meta *next;
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
30
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/include/qemu/iov.h
33
+++ b/include/qemu/iov.h
34
@@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended(
35
void *tail_buf, size_t tail_len);
36
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
37
size_t offset, size_t len);
38
+int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
39
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
40
void qemu_iovec_concat(QEMUIOVector *dst,
41
QEMUIOVector *src, size_t soffset, size_t sbytes);
42
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/block/qcow2-cluster.c
45
+++ b/block/qcow2-cluster.c
46
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
47
assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
48
assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
49
assert(start->offset + start->nb_bytes <= end->offset);
50
- assert(!m->data_qiov || m->data_qiov->size == data_bytes);
51
52
if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
53
return 0;
54
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
55
/* The part of the buffer where the end region is located */
56
end_buffer = start_buffer + buffer_size - end->nb_bytes;
57
58
- qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
59
+ qemu_iovec_init(&qiov, 2 + (m->data_qiov ?
60
+ qemu_iovec_subvec_niov(m->data_qiov,
61
+ m->data_qiov_offset,
62
+ data_bytes)
63
+ : 0));
64
65
qemu_co_mutex_unlock(&s->lock);
66
/* First we read the existing data from both COW regions. We
67
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
68
if (start->nb_bytes) {
69
qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
70
}
71
- qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
72
+ qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes);
73
if (end->nb_bytes) {
74
qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
75
}
76
diff --git a/block/qcow2.c b/block/qcow2.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/block/qcow2.c
79
+++ b/block/qcow2.c
80
@@ -XXX,XX +XXX,XX @@ fail:
81
/* Check if it's possible to merge a write request with the writing of
82
* the data from the COW regions */
83
static bool merge_cow(uint64_t offset, unsigned bytes,
84
- QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
85
+ QEMUIOVector *qiov, size_t qiov_offset,
86
+ QCowL2Meta *l2meta)
87
{
88
QCowL2Meta *m;
89
90
@@ -XXX,XX +XXX,XX @@ static bool merge_cow(uint64_t offset, unsigned bytes,
91
92
/* Make sure that adding both COW regions to the QEMUIOVector
93
* does not exceed IOV_MAX */
94
- if (hd_qiov->niov > IOV_MAX - 2) {
95
+ if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
96
continue;
97
}
98
99
- m->data_qiov = hd_qiov;
100
+ m->data_qiov = qiov;
101
+ m->data_qiov_offset = qiov_offset;
102
return true;
103
}
104
105
@@ -XXX,XX +XXX,XX @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
106
return 0;
107
}
108
109
-static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
110
- uint64_t bytes, QEMUIOVector *qiov,
111
- int flags)
112
+static coroutine_fn int qcow2_co_pwritev_part(
113
+ BlockDriverState *bs, uint64_t offset, uint64_t bytes,
114
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
115
{
116
BDRVQcow2State *s = bs->opaque;
117
int offset_in_cluster;
118
int ret;
119
unsigned int cur_bytes; /* number of sectors in current iteration */
120
uint64_t cluster_offset;
121
- QEMUIOVector hd_qiov;
122
+ QEMUIOVector encrypted_qiov;
123
uint64_t bytes_done = 0;
124
uint8_t *cluster_data = NULL;
125
QCowL2Meta *l2meta = NULL;
126
127
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
128
129
- qemu_iovec_init(&hd_qiov, qiov->niov);
130
-
131
qemu_co_mutex_lock(&s->lock);
132
133
while (bytes != 0) {
134
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
135
136
qemu_co_mutex_unlock(&s->lock);
137
138
- qemu_iovec_reset(&hd_qiov);
139
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
140
-
141
if (bs->encrypted) {
142
assert(s->crypto);
143
if (!cluster_data) {
144
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
145
}
146
}
147
148
- assert(hd_qiov.size <=
149
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
150
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
151
+ assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
152
+ qemu_iovec_to_buf(qiov, qiov_offset + bytes_done,
153
+ cluster_data, cur_bytes);
154
155
if (qcow2_co_encrypt(bs, cluster_offset, offset,
156
cluster_data, cur_bytes) < 0) {
157
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
158
goto out_unlocked;
159
}
160
161
- qemu_iovec_reset(&hd_qiov);
162
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
163
+ qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes);
164
}
165
166
/* Try to efficiently initialize the physical space with zeroes */
167
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
168
* writing of the guest data together with that of the COW regions.
169
* If it's not possible (or not necessary) then write the
170
* guest data now. */
171
- if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
172
+ if (!merge_cow(offset, cur_bytes,
173
+ bs->encrypted ? &encrypted_qiov : qiov,
174
+ bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta))
175
+ {
176
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
177
trace_qcow2_writev_data(qemu_coroutine_self(),
178
cluster_offset + offset_in_cluster);
179
- ret = bdrv_co_pwritev(s->data_file,
180
- cluster_offset + offset_in_cluster,
181
- cur_bytes, &hd_qiov, 0);
182
+ ret = bdrv_co_pwritev_part(
183
+ s->data_file, cluster_offset + offset_in_cluster, cur_bytes,
184
+ bs->encrypted ? &encrypted_qiov : qiov,
185
+ bs->encrypted ? 0 : qiov_offset + bytes_done, 0);
186
if (ret < 0) {
187
goto out_unlocked;
188
}
189
@@ -XXX,XX +XXX,XX @@ out_locked:
190
191
qemu_co_mutex_unlock(&s->lock);
192
193
- qemu_iovec_destroy(&hd_qiov);
194
qemu_vfree(cluster_data);
195
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
196
197
@@ -XXX,XX +XXX,XX @@ fail:
198
/* XXX: put compressed sectors first, then all the cluster aligned
199
tables to avoid losing bytes in alignment */
200
static coroutine_fn int
201
-qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
202
- uint64_t bytes, QEMUIOVector *qiov)
203
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
204
+ uint64_t offset, uint64_t bytes,
205
+ QEMUIOVector *qiov, size_t qiov_offset)
206
{
207
BDRVQcow2State *s = bs->opaque;
208
int ret;
209
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
210
/* Zero-pad last write if image size is not cluster aligned */
211
memset(buf + bytes, 0, s->cluster_size - bytes);
212
}
213
- qemu_iovec_to_buf(qiov, 0, buf, bytes);
214
+ qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
215
216
out_buf = g_malloc(s->cluster_size);
217
218
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
219
buf, s->cluster_size);
220
if (out_len == -ENOMEM) {
221
/* could not compress: write normal cluster */
222
- ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
223
+ ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
224
if (ret < 0) {
225
goto fail;
226
}
227
@@ -XXX,XX +XXX,XX @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
228
BDRVQcow2State *s = bs->opaque;
229
230
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
231
- return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
232
- qiov->size, qiov, 0);
233
+ return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
234
+ qiov->size, qiov, 0, 0);
235
}
236
237
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
238
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
239
.bdrv_co_block_status = qcow2_co_block_status,
240
241
.bdrv_co_preadv_part = qcow2_co_preadv_part,
242
- .bdrv_co_pwritev = qcow2_co_pwritev,
243
+ .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
244
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
245
246
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
247
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
248
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
249
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
250
.bdrv_co_truncate = qcow2_co_truncate,
251
- .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
252
+ .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
253
.bdrv_make_empty = qcow2_make_empty,
254
255
.bdrv_snapshot_create = qcow2_snapshot_create,
256
diff --git a/util/iov.c b/util/iov.c
257
index XXXXXXX..XXXXXXX 100644
258
--- a/util/iov.c
259
+++ b/util/iov.c
260
@@ -XXX,XX +XXX,XX @@ static struct iovec *qiov_slice(QEMUIOVector *qiov,
261
return iov;
262
}
263
264
+int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
265
+{
266
+ size_t head, tail;
267
+ int niov;
268
+
269
+ qiov_slice(qiov, offset, len, &head, &tail, &niov);
270
+
271
+ return niov;
272
+}
273
+
274
/*
275
* Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
276
* and @tail_buf buffer into new qiov.
277
--
28
--
278
2.21.0
29
2.21.0
279
30
280
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Implement and use new interface to get rid of hd_qiov.
3
We forget to enable it for transaction .prepare, while it is already
4
enabled in do_drive_backup since commit a2d665c1bc362
5
"blockdev: loosen restrictions on drive-backup source node"
4
6
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
7
Message-id: 20190604161514.262241-12-vsementsov@virtuozzo.com
9
Reviewed-by: John Snow <jsnow@redhat.com>
8
Message-Id: <20190604161514.262241-12-vsementsov@virtuozzo.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
---
11
block/qcow2-cluster.c | 5 +++--
12
blockdev.c | 2 +-
12
block/qcow2.c | 49 +++++++++++++++++++------------------------
13
1 file changed, 1 insertion(+), 1 deletion(-)
13
2 files changed, 25 insertions(+), 29 deletions(-)
14
14
15
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
15
diff --git a/blockdev.c b/blockdev.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2-cluster.c
17
--- a/blockdev.c
18
+++ b/block/qcow2-cluster.c
18
+++ b/blockdev.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
20
* interface. This avoids double I/O throttling and request tracking,
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
21
* which can lead to deadlock when block layer copy-on-read is enabled.
21
backup = common->action->u.drive_backup.data;
22
*/
22
23
- ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
23
- bs = qmp_get_root_bs(backup->device, errp);
24
- qiov->size, qiov, 0);
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
25
+ ret = bs->drv->bdrv_co_preadv_part(bs,
25
if (!bs) {
26
+ src_cluster_offset + offset_in_cluster,
26
return;
27
+ qiov->size, qiov, 0, 0);
28
if (ret < 0) {
29
return ret;
30
}
27
}
31
diff --git a/block/qcow2.c b/block/qcow2.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/qcow2.c
34
+++ b/block/qcow2.c
35
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
36
uint64_t file_cluster_offset,
37
uint64_t offset,
38
uint64_t bytes,
39
- QEMUIOVector *qiov);
40
+ QEMUIOVector *qiov,
41
+ size_t qiov_offset);
42
43
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
44
{
45
@@ -XXX,XX +XXX,XX @@ out:
46
return ret;
47
}
48
49
-static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
50
- uint64_t bytes, QEMUIOVector *qiov,
51
- int flags)
52
+static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
53
+ uint64_t offset, uint64_t bytes,
54
+ QEMUIOVector *qiov,
55
+ size_t qiov_offset, int flags)
56
{
57
BDRVQcow2State *s = bs->opaque;
58
int offset_in_cluster;
59
int ret;
60
unsigned int cur_bytes; /* number of bytes in current iteration */
61
uint64_t cluster_offset = 0;
62
- uint64_t bytes_done = 0;
63
- QEMUIOVector hd_qiov;
64
uint8_t *cluster_data = NULL;
65
66
- qemu_iovec_init(&hd_qiov, qiov->niov);
67
-
68
while (bytes != 0) {
69
70
/* prepare next request */
71
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
72
73
offset_in_cluster = offset_into_cluster(s, offset);
74
75
- qemu_iovec_reset(&hd_qiov);
76
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
77
-
78
switch (ret) {
79
case QCOW2_CLUSTER_UNALLOCATED:
80
81
if (bs->backing) {
82
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
83
- ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
84
- &hd_qiov, 0);
85
+ ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes,
86
+ qiov, qiov_offset, 0);
87
if (ret < 0) {
88
goto fail;
89
}
90
} else {
91
/* Note: in this case, no need to wait */
92
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
93
+ qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
94
}
95
break;
96
97
case QCOW2_CLUSTER_ZERO_PLAIN:
98
case QCOW2_CLUSTER_ZERO_ALLOC:
99
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
100
+ qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
101
break;
102
103
case QCOW2_CLUSTER_COMPRESSED:
104
ret = qcow2_co_preadv_compressed(bs, cluster_offset,
105
offset, cur_bytes,
106
- &hd_qiov);
107
+ qiov, qiov_offset);
108
if (ret < 0) {
109
goto fail;
110
}
111
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
112
ret = -EIO;
113
goto fail;
114
}
115
- qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
116
+ qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes);
117
} else {
118
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
119
- ret = bdrv_co_preadv(s->data_file,
120
- cluster_offset + offset_in_cluster,
121
- cur_bytes, &hd_qiov, 0);
122
+ ret = bdrv_co_preadv_part(s->data_file,
123
+ cluster_offset + offset_in_cluster,
124
+ cur_bytes, qiov, qiov_offset, 0);
125
if (ret < 0) {
126
goto fail;
127
}
128
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
129
130
bytes -= cur_bytes;
131
offset += cur_bytes;
132
- bytes_done += cur_bytes;
133
+ qiov_offset += cur_bytes;
134
}
135
ret = 0;
136
137
fail:
138
- qemu_iovec_destroy(&hd_qiov);
139
qemu_vfree(cluster_data);
140
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
143
uint64_t file_cluster_offset,
144
uint64_t offset,
145
uint64_t bytes,
146
- QEMUIOVector *qiov)
147
+ QEMUIOVector *qiov,
148
+ size_t qiov_offset)
149
{
150
BDRVQcow2State *s = bs->opaque;
151
int ret = 0, csize, nb_csectors;
152
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
153
goto fail;
154
}
155
156
- qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
157
+ qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
158
159
fail:
160
qemu_vfree(out_buf);
161
@@ -XXX,XX +XXX,XX @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
162
BDRVQcow2State *s = bs->opaque;
163
164
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
165
- return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
166
- qiov->size, qiov, 0);
167
+ return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
168
+ qiov->size, qiov, 0, 0);
169
}
170
171
/*
172
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
173
.bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
174
.bdrv_co_block_status = qcow2_co_block_status,
175
176
- .bdrv_co_preadv = qcow2_co_preadv,
177
+ .bdrv_co_preadv_part = qcow2_co_preadv_part,
178
.bdrv_co_pwritev = qcow2_co_pwritev,
179
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
180
181
--
28
--
182
2.21.0
29
2.21.0
183
30
184
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
2
2
3
Use buffer based io in encrypted case.
3
COW (even empty/zero) areas require encryption too
4
4
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
6
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Message-id: 20190604161514.262241-11-vsementsov@virtuozzo.com
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20190604161514.262241-11-vsementsov@virtuozzo.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
---
11
block/qcow2.c | 28 ++++++++++++++++------------
12
tests/qemu-iotests/134 | 9 +++++++++
12
1 file changed, 16 insertions(+), 12 deletions(-)
13
tests/qemu-iotests/134.out | 10 ++++++++++
14
2 files changed, 19 insertions(+)
13
15
14
diff --git a/block/qcow2.c b/block/qcow2.c
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/134
19
+++ b/tests/qemu-iotests/134
20
@@ -XXX,XX +XXX,XX @@ echo
21
echo "== reading whole image =="
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
23
24
+echo
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
15
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2.c
38
--- a/tests/qemu-iotests/134.out
17
+++ b/block/qcow2.c
39
+++ b/tests/qemu-iotests/134.out
18
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
19
}
41
read 134217728/134217728 bytes at offset 0
20
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
21
assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
43
22
- qemu_iovec_reset(&hd_qiov);
44
+== rewriting cluster part ==
23
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
45
+wrote 512/512 bytes at offset 512
24
- }
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
25
26
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
27
- ret = bdrv_co_preadv(s->data_file,
28
- cluster_offset + offset_in_cluster,
29
- cur_bytes, &hd_qiov, 0);
30
- if (ret < 0) {
31
- goto fail;
32
- }
33
- if (bs->encrypted) {
34
- assert(s->crypto);
35
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
36
+ ret = bdrv_co_pread(s->data_file,
37
+ cluster_offset + offset_in_cluster,
38
+ cur_bytes, cluster_data, 0);
39
+ if (ret < 0) {
40
+ goto fail;
41
+ }
42
+
47
+
43
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
48
+== verify pattern ==
44
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
49
+read 512/512 bytes at offset 0
45
if (qcow2_co_decrypt(bs, cluster_offset, offset,
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
46
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
51
+read 512/512 bytes at offset 512
47
goto fail;
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
48
}
53
+
49
qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
54
== rewriting whole image ==
50
+ } else {
55
wrote 134217728/134217728 bytes at offset 0
51
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
52
+ ret = bdrv_co_preadv(s->data_file,
53
+ cluster_offset + offset_in_cluster,
54
+ cur_bytes, &hd_qiov, 0);
55
+ if (ret < 0) {
56
+ goto fail;
57
+ }
58
}
59
break;
60
61
--
57
--
62
2.21.0
58
2.21.0
63
59
64
60
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Introduce extended variants of bdrv_co_preadv and bdrv_co_pwritev
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
4
with qiov_offset parameter.
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
5
default coverage in the moved comment.
5
6
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
The previous vmdk4 calculation:
7
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
8
8
Message-id: 20190604161514.262241-10-vsementsov@virtuozzo.com
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
9
Message-Id: <20190604161514.262241-10-vsementsov@virtuozzo.com>
10
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
The added vmdk3 calculation:
12
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
30
---
12
include/block/block_int.h | 6 ++++++
31
block/vmdk.c | 11 ++++++++---
13
block/io.c | 29 +++++++++++++++++++++++------
32
1 file changed, 8 insertions(+), 3 deletions(-)
14
2 files changed, 29 insertions(+), 6 deletions(-)
15
33
16
diff --git a/include/block/block_int.h b/include/block/block_int.h
34
diff --git a/block/vmdk.c b/block/vmdk.c
17
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/block_int.h
36
--- a/block/vmdk.c
19
+++ b/include/block/block_int.h
37
+++ b/block/vmdk.c
20
@@ -XXX,XX +XXX,XX @@ extern BlockDriver bdrv_qcow2;
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
21
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
39
return -EFBIG;
22
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
23
BdrvRequestFlags flags);
24
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
25
+ int64_t offset, unsigned int bytes,
26
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
27
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
28
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
29
BdrvRequestFlags flags);
30
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
31
+ int64_t offset, unsigned int bytes,
32
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
33
34
static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
35
int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
36
diff --git a/block/io.c b/block/io.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/io.c
39
+++ b/block/io.c
40
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
41
*
42
* Function always succeeds.
43
*/
44
-static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
45
+static bool bdrv_pad_request(BlockDriverState *bs,
46
+ QEMUIOVector **qiov, size_t *qiov_offset,
47
int64_t *offset, unsigned int *bytes,
48
BdrvRequestPadding *pad)
49
{
50
@@ -XXX,XX +XXX,XX @@ static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
51
}
40
}
52
41
if (l1_size > 512 * 1024 * 1024) {
53
qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
54
- *qiov, 0, *bytes,
43
+ /*
55
+ *qiov, *qiov_offset, *bytes,
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
56
pad->buf + pad->buf_len - pad->tail, pad->tail);
45
* big l1_size, we don't want unbounded value to allocate the table.
57
*bytes += pad->head + pad->tail;
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
58
*offset -= pad->head;
47
- * size */
59
*qiov = &pad->local_qiov;
48
+ * Limit it to 512M, which is:
60
+ *qiov_offset = 0;
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
61
50
+ * cluster size: 64KB, L2 table size: 512 entries
62
return true;
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
63
}
52
+ * cluster size: 512B, L2 table size: 4096 entries
64
@@ -XXX,XX +XXX,XX @@ static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
53
+ */
65
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
54
error_setg(errp, "L1 size too big");
66
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
55
return -EFBIG;
67
BdrvRequestFlags flags)
68
+{
69
+ return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
70
+}
71
+
72
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
73
+ int64_t offset, unsigned int bytes,
74
+ QEMUIOVector *qiov, size_t qiov_offset,
75
+ BdrvRequestFlags flags)
76
{
77
BlockDriverState *bs = child->bs;
78
BdrvTrackedRequest req;
79
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
80
flags |= BDRV_REQ_COPY_ON_READ;
81
}
56
}
82
83
- bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
84
+ bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
85
86
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
87
ret = bdrv_aligned_preadv(child, &req, offset, bytes,
88
bs->bl.request_alignment,
89
- qiov, 0, flags);
90
+ qiov, qiov_offset, flags);
91
tracked_request_end(&req);
92
bdrv_dec_in_flight(bs);
93
94
@@ -XXX,XX +XXX,XX @@ out:
95
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
96
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
97
BdrvRequestFlags flags)
98
+{
99
+ return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
100
+}
101
+
102
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
103
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
104
+ BdrvRequestFlags flags)
105
{
106
BlockDriverState *bs = child->bs;
107
BdrvTrackedRequest req;
108
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
109
goto out;
110
}
111
112
- if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
113
+ if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
114
mark_request_serialising(&req, align);
115
wait_serialising_requests(&req);
116
bdrv_padding_rmw_read(child, &req, &pad, false);
117
}
118
119
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
120
- qiov, 0, flags);
121
+ qiov, qiov_offset, flags);
122
123
bdrv_padding_destroy(&pad);
124
125
--
57
--
126
2.21.0
58
2.21.0
127
59
128
60
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Use and support new API in bdrv_aligned_pwritev.
3
512M of L1 entries is a very loose bound, only 32M are required to store
4
the maximal supported VMDK file size of 2TB.
4
5
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
6
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
7
table size.
7
Message-id: 20190604161514.262241-9-vsementsov@virtuozzo.com
8
8
Message-Id: <20190604161514.262241-9-vsementsov@virtuozzo.com>
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
17
---
11
block/io.c | 27 +++++++++++++--------------
18
block/vmdk.c | 13 +++++++------
12
1 file changed, 13 insertions(+), 14 deletions(-)
19
tests/qemu-iotests/059.out | 2 +-
20
2 files changed, 8 insertions(+), 7 deletions(-)
13
21
14
diff --git a/block/io.c b/block/io.c
22
diff --git a/block/vmdk.c b/block/vmdk.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/block/io.c
24
--- a/block/vmdk.c
17
+++ b/block/io.c
25
+++ b/block/vmdk.c
18
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
19
*/
27
error_setg(errp, "Invalid granularity, image may be corrupt");
20
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
28
return -EFBIG;
21
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
22
- int64_t align, QEMUIOVector *qiov, int flags)
23
+ int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
24
{
25
BlockDriverState *bs = child->bs;
26
BlockDriver *drv = bs->drv;
27
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
28
assert(is_power_of_2(align));
29
assert((offset & (align - 1)) == 0);
30
assert((bytes & (align - 1)) == 0);
31
- assert(!qiov || bytes == qiov->size);
32
+ assert(!qiov || qiov_offset + bytes <= qiov->size);
33
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
34
align);
35
36
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
37
38
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
39
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
40
- qemu_iovec_is_zero(qiov, 0, qiov->size)) {
41
+ qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
42
flags |= BDRV_REQ_ZERO_WRITE;
43
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
44
flags |= BDRV_REQ_MAY_UNMAP;
45
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
46
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
47
ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
48
} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
49
- ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov, 0);
50
+ ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
51
+ qiov, qiov_offset);
52
} else if (bytes <= max_transfer) {
53
bdrv_debug_event(bs, BLKDBG_PWRITEV);
54
- ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, 0, flags);
55
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
56
} else {
57
bdrv_debug_event(bs, BLKDBG_PWRITEV);
58
while (bytes_remaining) {
59
int num = MIN(bytes_remaining, max_transfer);
60
- QEMUIOVector local_qiov;
61
int local_flags = flags;
62
63
assert(num);
64
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
65
* need to flush on the last iteration */
66
local_flags &= ~BDRV_REQ_FUA;
67
}
68
- qemu_iovec_init(&local_qiov, qiov->niov);
69
- qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
70
71
ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
72
- num, &local_qiov, 0, local_flags);
73
- qemu_iovec_destroy(&local_qiov);
74
+ num, qiov, bytes - bytes_remaining,
75
+ local_flags);
76
if (ret < 0) {
77
break;
78
}
79
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
80
81
qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
82
ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
83
- align, &local_qiov,
84
+ align, &local_qiov, 0,
85
flags & ~BDRV_REQ_ZERO_WRITE);
86
if (ret < 0 || pad.merge_reads) {
87
/* Error or all work is done */
88
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
89
/* Write the aligned part in the middle. */
90
uint64_t aligned_bytes = bytes & ~(align - 1);
91
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
92
- NULL, flags);
93
+ NULL, 0, flags);
94
if (ret < 0) {
95
goto out;
96
}
97
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
98
99
qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
100
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
101
- &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
102
+ &local_qiov, 0,
103
+ flags & ~BDRV_REQ_ZERO_WRITE);
104
}
29
}
105
30
- if (l1_size > 512 * 1024 * 1024) {
106
out:
31
+ if (l1_size > 32 * 1024 * 1024) {
107
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
32
/*
108
}
33
* Although with big capacity and small l1_entry_sectors, we can get a
109
34
* big l1_size, we don't want unbounded value to allocate the table.
110
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
35
- * Limit it to 512M, which is:
111
- qiov, flags);
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
112
+ qiov, 0, flags);
37
- * cluster size: 64KB, L2 table size: 512 entries
113
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
114
bdrv_padding_destroy(&pad);
39
- * cluster size: 512B, L2 table size: 4096 entries
115
40
+ * Limit it to 32M, which is enough to store:
41
+ * 8TB - for both VMDK3 & VMDK4 with
42
+ * minimal cluster size: 512B
43
+ * minimal L2 table size: 512 entries
44
+ * 8 TB is still more than the maximal value supported for
45
+ * VMDK3 & VMDK4 which is 2TB.
46
*/
47
error_setg(errp, "L1 size too big");
48
return -EFBIG;
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tests/qemu-iotests/059.out
52
+++ b/tests/qemu-iotests/059.out
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
55
56
=== Testing afl image with a very large capacity ===
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
59
*** done
116
--
60
--
117
2.21.0
61
2.21.0
118
62
119
63
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Introduce new initialization API, to create requests with padding. Will
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
4
be used in the following patch. New API uses qemu_iovec_init_buf if
4
QEMU).
5
resulting io vector has only one element, to avoid extra allocations.
5
6
So, we need to update qemu_iovec_destroy to support destroying such
6
This format was lacking in the following:
7
QIOVs.
7
8
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
allowing access to only 2TB (slightly less) of data.
10
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
10
* The grain size (default) was 512 bytes - leading to data
11
Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com
11
fragmentation and many grain tables.
12
Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com>
12
* For space reclamation purposes, it was necessary to find all the
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
grains which are not pointed to by any grain table - so a reverse
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
constructed - which takes large amounts of CPU/RAM.
16
17
The format specification can be found in VMware's documentation:
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
21
introduced: SESparse (Space Efficient).
22
23
This format fixes the above issues:
24
25
* All entries are now 64-bit.
26
* The grain size (default) is 4KB.
27
* Grain directory and grain tables are now located at the beginning
28
of the file.
29
+ seSparse format reserves space for all grain tables.
30
+ Grain tables can be addressed using an index.
31
+ Grains are located in the end of the file and can also be
32
addressed with an index.
33
- seSparse vmdks of large disks (64TB) have huge preallocated
34
headers - mainly due to L2 tables, even for empty snapshots.
35
* The header contains a reverse mapping ("backmap") of "offset of
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
152
---
15
include/qemu/iov.h | 7 +++
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
16
util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++--
154
1 file changed, 342 insertions(+), 16 deletions(-)
17
2 files changed, 114 insertions(+), 5 deletions(-)
155
18
156
diff --git a/block/vmdk.c b/block/vmdk.c
19
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
20
index XXXXXXX..XXXXXXX 100644
157
index XXXXXXX..XXXXXXX 100644
21
--- a/include/qemu/iov.h
158
--- a/block/vmdk.c
22
+++ b/include/qemu/iov.h
159
+++ b/block/vmdk.c
23
@@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
24
161
uint16_t compressAlgorithm;
25
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
162
} QEMU_PACKED VMDK4Header;
26
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
163
27
+void qemu_iovec_init_extended(
164
+typedef struct VMDKSESparseConstHeader {
28
+ QEMUIOVector *qiov,
165
+ uint64_t magic;
29
+ void *head_buf, size_t head_len,
166
+ uint64_t version;
30
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
167
+ uint64_t capacity;
31
+ void *tail_buf, size_t tail_len);
168
+ uint64_t grain_size;
32
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
169
+ uint64_t grain_table_size;
33
+ size_t offset, size_t len);
170
+ uint64_t flags;
34
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
171
+ uint64_t reserved1;
35
void qemu_iovec_concat(QEMUIOVector *dst,
172
+ uint64_t reserved2;
36
QEMUIOVector *src, size_t soffset, size_t sbytes);
173
+ uint64_t reserved3;
37
diff --git a/util/iov.c b/util/iov.c
174
+ uint64_t reserved4;
38
index XXXXXXX..XXXXXXX 100644
175
+ uint64_t volatile_header_offset;
39
--- a/util/iov.c
176
+ uint64_t volatile_header_size;
40
+++ b/util/iov.c
177
+ uint64_t journal_header_offset;
41
@@ -XXX,XX +XXX,XX @@ void qemu_iovec_concat(QEMUIOVector *dst,
178
+ uint64_t journal_header_size;
42
qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
179
+ uint64_t journal_offset;
180
+ uint64_t journal_size;
181
+ uint64_t grain_dir_offset;
182
+ uint64_t grain_dir_size;
183
+ uint64_t grain_tables_offset;
184
+ uint64_t grain_tables_size;
185
+ uint64_t free_bitmap_offset;
186
+ uint64_t free_bitmap_size;
187
+ uint64_t backmap_offset;
188
+ uint64_t backmap_size;
189
+ uint64_t grains_offset;
190
+ uint64_t grains_size;
191
+ uint8_t pad[304];
192
+} QEMU_PACKED VMDKSESparseConstHeader;
193
+
194
+typedef struct VMDKSESparseVolatileHeader {
195
+ uint64_t magic;
196
+ uint64_t free_gt_number;
197
+ uint64_t next_txn_seq_number;
198
+ uint64_t replay_journal;
199
+ uint8_t pad[480];
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
201
+
202
#define L2_CACHE_SIZE 16
203
204
typedef struct VmdkExtent {
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
206
bool compressed;
207
bool has_marker;
208
bool has_zero_grain;
209
+ bool sesparse;
210
+ uint64_t sesparse_l2_tables_offset;
211
+ uint64_t sesparse_clusters_offset;
212
+ int32_t entry_size;
213
int version;
214
int64_t sectors;
215
int64_t end_sector;
216
int64_t flat_start_offset;
217
int64_t l1_table_offset;
218
int64_t l1_backup_table_offset;
219
- uint32_t *l1_table;
220
+ void *l1_table;
221
uint32_t *l1_backup_table;
222
unsigned int l1_size;
223
uint32_t l1_entry_sectors;
224
225
unsigned int l2_size;
226
- uint32_t *l2_cache;
227
+ void *l2_cache;
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
230
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
232
* minimal L2 table size: 512 entries
233
* 8 TB is still more than the maximal value supported for
234
* VMDK3 & VMDK4 which is 2TB.
235
+ * 64TB - for "ESXi seSparse Extent"
236
+ * minimal cluster size: 512B (default is 4KB)
237
+ * L2 table size: 4096 entries (const).
238
+ * 64TB is more than the maximal value supported for
239
+ * seSparse VMDKs (which is slightly less than 64TB)
240
*/
241
error_setg(errp, "L1 size too big");
242
return -EFBIG;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
244
extent->l2_size = l2_size;
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
288
return ret;
43
}
289
}
44
290
45
+/*
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
46
+ * qiov_find_iov
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
47
+ *
293
+
48
+ * Return pointer to iovec structure, where byte at @offset in original vector
294
+/* Strict checks - format not officially documented */
49
+ * @iov exactly is.
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
50
+ * Set @remaining_offset to be offset inside that iovec to the same byte.
296
+ Error **errp)
51
+ */
52
+static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
53
+ size_t *remaining_offset)
54
+{
297
+{
55
+ while (offset > 0 && offset >= iov->iov_len) {
298
+ header->magic = le64_to_cpu(header->magic);
56
+ offset -= iov->iov_len;
299
+ header->version = le64_to_cpu(header->version);
57
+ iov++;
300
+ header->grain_size = le64_to_cpu(header->grain_size);
58
+ }
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
59
+ *remaining_offset = offset;
302
+ header->flags = le64_to_cpu(header->flags);
60
+
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
61
+ return iov;
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
307
+
308
+ header->volatile_header_offset =
309
+ le64_to_cpu(header->volatile_header_offset);
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
311
+
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
314
+
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
316
+ header->journal_size = le64_to_cpu(header->journal_size);
317
+
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
320
+
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
323
+
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
326
+
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
329
+
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
331
+ header->grains_size = le64_to_cpu(header->grains_size);
332
+
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
335
+ header->magic);
336
+ return -EINVAL;
337
+ }
338
+
339
+ if (header->version != 0x0000000200000001) {
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
341
+ header->version);
342
+ return -ENOTSUP;
343
+ }
344
+
345
+ if (header->grain_size != 8) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
347
+ header->grain_size);
348
+ return -ENOTSUP;
349
+ }
350
+
351
+ if (header->grain_table_size != 64) {
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
353
+ header->grain_table_size);
354
+ return -ENOTSUP;
355
+ }
356
+
357
+ if (header->flags != 0) {
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
359
+ header->flags);
360
+ return -ENOTSUP;
361
+ }
362
+
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
365
+ error_setg(errp, "Unsupported reserved bits:"
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
368
+ header->reserved1, header->reserved2,
369
+ header->reserved3, header->reserved4);
370
+ return -ENOTSUP;
371
+ }
372
+
373
+ /* check that padding is 0 */
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
375
+ error_setg(errp, "Unsupported non-zero const header padding");
376
+ return -ENOTSUP;
377
+ }
378
+
379
+ return 0;
62
+}
380
+}
63
+
381
+
64
+/*
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
65
+ * qiov_slice
383
+ Error **errp)
66
+ *
67
+ * Find subarray of iovec's, containing requested range. @head would
68
+ * be offset in first iov (returned by the function), @tail would be
69
+ * count of extra bytes in last iovec (returned iov + @niov - 1).
70
+ */
71
+static struct iovec *qiov_slice(QEMUIOVector *qiov,
72
+ size_t offset, size_t len,
73
+ size_t *head, size_t *tail, int *niov)
74
+{
384
+{
75
+ struct iovec *iov, *end_iov;
385
+ header->magic = le64_to_cpu(header->magic);
76
+
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
77
+ assert(offset + len <= qiov->size);
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
78
+
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
79
+ iov = iov_skip_offset(qiov->iov, offset, head);
389
+
80
+ end_iov = iov_skip_offset(iov, *head + len, tail);
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
81
+
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
82
+ if (*tail > 0) {
392
+ header->magic);
83
+ assert(*tail < end_iov->iov_len);
393
+ return -EINVAL;
84
+ *tail = end_iov->iov_len - *tail;
394
+ }
85
+ end_iov++;
395
+
86
+ }
396
+ if (header->replay_journal) {
87
+
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
88
+ *niov = end_iov - iov;
398
+ return -ENOTSUP;
89
+
399
+ }
90
+ return iov;
400
+
401
+ /* check that padding is 0 */
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
407
+ return 0;
91
+}
408
+}
92
+
409
+
93
+/*
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
94
+ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
411
+ BdrvChild *file,
95
+ * and @tail_buf buffer into new qiov.
412
+ int flags, Error **errp)
96
+ */
97
+void qemu_iovec_init_extended(
98
+ QEMUIOVector *qiov,
99
+ void *head_buf, size_t head_len,
100
+ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
101
+ void *tail_buf, size_t tail_len)
102
+{
413
+{
103
+ size_t mid_head, mid_tail;
414
+ int ret;
104
+ int total_niov, mid_niov = 0;
415
+ VMDKSESparseConstHeader const_header;
105
+ struct iovec *p, *mid_iov;
416
+ VMDKSESparseVolatileHeader volatile_header;
106
+
417
+ VmdkExtent *extent;
107
+ if (mid_len) {
418
+
108
+ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
419
+ ret = bdrv_apply_auto_read_only(bs,
109
+ &mid_head, &mid_tail, &mid_niov);
420
+ "No write support for seSparse images available", errp);
110
+ }
421
+ if (ret < 0) {
111
+
422
+ return ret;
112
+ total_niov = !!head_len + mid_niov + !!tail_len;
423
+ }
113
+ if (total_niov == 1) {
424
+
114
+ qemu_iovec_init_buf(qiov, NULL, 0);
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
115
+ p = &qiov->local_iov;
426
+
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
428
+ if (ret < 0) {
429
+ bdrv_refresh_filename(file->bs);
430
+ error_setg_errno(errp, -ret,
431
+ "Could not read const header from file '%s'",
432
+ file->bs->filename);
433
+ return ret;
434
+ }
435
+
436
+ /* check const header */
437
+ ret = check_se_sparse_const_header(&const_header, errp);
438
+ if (ret < 0) {
439
+ return ret;
440
+ }
441
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
443
+
444
+ ret = bdrv_pread(file,
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
446
+ &volatile_header, sizeof(volatile_header));
447
+ if (ret < 0) {
448
+ bdrv_refresh_filename(file->bs);
449
+ error_setg_errno(errp, -ret,
450
+ "Could not read volatile header from file '%s'",
451
+ file->bs->filename);
452
+ return ret;
453
+ }
454
+
455
+ /* check volatile header */
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
457
+ if (ret < 0) {
458
+ return ret;
459
+ }
460
+
461
+ ret = vmdk_add_extent(bs, file, false,
462
+ const_header.capacity,
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
464
+ 0,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
487
+ return ret;
488
+}
489
+
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
491
QDict *options, Error **errp);
492
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
494
* RW [size in sectors] SPARSE "file-name.vmdk"
495
* RW [size in sectors] VMFS "file-name.vmdk"
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
498
*/
499
flat_offset = -1;
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
502
503
if (sectors <= 0 ||
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
507
+ strcmp(type, "SESPARSE")) ||
508
(strcmp(access, "RW"))) {
509
continue;
510
}
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
512
return ret;
513
}
514
extent = &s->extents[s->num_extents - 1];
515
+ } else if (!strcmp(type, "SESPARSE")) {
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
517
+ if (ret) {
518
+ bdrv_unref_child(bs, extent_file);
519
+ return ret;
520
+ }
521
+ extent = &s->extents[s->num_extents - 1];
522
} else {
523
error_setg(errp, "Unsupported extent type '%s'", type);
524
bdrv_unref_child(bs, extent_file);
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
526
if (strcmp(ct, "monolithicFlat") &&
527
strcmp(ct, "vmfs") &&
528
strcmp(ct, "vmfsSparse") &&
529
+ strcmp(ct, "seSparse") &&
530
strcmp(ct, "twoGbMaxExtentSparse") &&
531
strcmp(ct, "twoGbMaxExtentFlat")) {
532
error_setg(errp, "Unsupported image type '%s'", ct);
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
534
{
535
unsigned int l1_index, l2_offset, l2_index;
536
int min_index, i, j;
537
- uint32_t min_count, *l2_table;
538
+ uint32_t min_count;
539
+ void *l2_table;
540
bool zeroed = false;
541
int64_t ret;
542
int64_t cluster_sector;
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
544
545
if (m_data) {
546
m_data->valid = 0;
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
548
if (l1_index >= extent->l1_size) {
549
return VMDK_ERROR;
550
}
551
- l2_offset = extent->l1_table[l1_index];
552
+ if (extent->sesparse) {
553
+ uint64_t l2_offset_u64;
554
+
555
+ assert(extent->entry_size == sizeof(uint64_t));
556
+
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
558
+ if (l2_offset_u64 == 0) {
559
+ l2_offset = 0;
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
561
+ /*
562
+ * Top most nibble is 0x1 if grain table is allocated.
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
565
+ * grain directories which is smaller than uint32,
566
+ * where 16MB is the only supported default grain table coverage.
567
+ */
568
+ return VMDK_ERROR;
569
+ } else {
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
574
+ return VMDK_ERROR;
575
+ }
576
+ l2_offset = (unsigned int)(l2_offset_u64);
577
+ }
116
+ } else {
578
+ } else {
117
+ qiov->niov = qiov->nalloc = total_niov;
579
+ assert(extent->entry_size == sizeof(uint32_t));
118
+ qiov->size = head_len + mid_len + tail_len;
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
119
+ p = qiov->iov = g_new(struct iovec, qiov->niov);
581
+ }
120
+ }
582
if (!l2_offset) {
121
+
583
return VMDK_UNALLOC;
122
+ if (head_len) {
584
}
123
+ p->iov_base = head_buf;
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
124
+ p->iov_len = head_len;
586
extent->l2_cache_counts[j] >>= 1;
125
+ p++;
587
}
126
+ }
588
}
127
+
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
128
+ if (mid_len) {
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
129
+ memcpy(p, mid_iov, mid_niov * sizeof(*p));
591
goto found;
130
+ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
592
}
131
+ p[0].iov_len -= mid_head;
593
}
132
+ p[mid_niov - 1].iov_len -= mid_tail;
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
133
+ p += mid_niov;
595
min_index = i;
134
+ }
596
}
135
+
597
}
136
+ if (tail_len) {
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
137
+ p->iov_base = tail_buf;
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
138
+ p->iov_len = tail_len;
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
139
+ }
601
if (bdrv_pread(extent->file,
140
+}
602
(int64_t)l2_offset * 512,
141
+
603
l2_table,
142
/*
604
- extent->l2_size * sizeof(uint32_t)
143
* Check if the contents of the iovecs are all zero
605
- ) != extent->l2_size * sizeof(uint32_t)) {
144
*/
606
+ l2_size_bytes
145
@@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov)
607
+ ) != l2_size_bytes) {
146
return true;
608
return VMDK_ERROR;
147
}
609
}
148
610
149
+void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
150
+ size_t offset, size_t len)
612
extent->l2_cache_counts[min_index] = 1;
151
+{
613
found:
152
+ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
153
+}
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
154
+
616
155
void qemu_iovec_destroy(QEMUIOVector *qiov)
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
156
{
618
- zeroed = true;
157
- assert(qiov->nalloc != -1);
619
+ if (extent->sesparse) {
158
+ if (qiov->nalloc != -1) {
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
159
+ g_free(qiov->iov);
621
+ switch (cluster_sector & 0xf000000000000000) {
160
+ }
622
+ case 0x0000000000000000:
161
623
+ /* unallocated grain */
162
- qemu_iovec_reset(qiov);
624
+ if (cluster_sector != 0) {
163
- g_free(qiov->iov);
625
+ return VMDK_ERROR;
164
- qiov->nalloc = 0;
626
+ }
165
- qiov->iov = NULL;
627
+ break;
166
+ memset(qiov, 0, sizeof(*qiov));
628
+ case 0x1000000000000000:
167
}
629
+ /* scsi-unmapped grain - fallthrough */
168
630
+ case 0x2000000000000000:
169
void qemu_iovec_reset(QEMUIOVector *qiov)
631
+ /* zero grain */
632
+ zeroed = true;
633
+ break;
634
+ case 0x3000000000000000:
635
+ /* allocated grain */
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
638
+ cluster_sector = extent->sesparse_clusters_offset +
639
+ cluster_sector * extent->cluster_sectors;
640
+ break;
641
+ default:
642
+ return VMDK_ERROR;
643
+ }
644
+ } else {
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
646
+
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
648
+ zeroed = true;
649
+ }
650
}
651
652
if (!cluster_sector || zeroed) {
653
if (!allocate) {
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
655
}
656
+ assert(!extent->sesparse);
657
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
659
return VMDK_ERROR;
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
661
m_data->l1_index = l1_index;
662
m_data->l2_index = l2_index;
663
m_data->l2_offset = l2_offset;
664
- m_data->l2_cache_entry = &l2_table[l2_index];
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
666
}
667
}
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
670
if (!extent) {
671
return -EIO;
672
}
673
+ if (extent->sesparse) {
674
+ return -ENOTSUP;
675
+ }
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
678
- offset_in_cluster);
170
--
679
--
171
2.21.0
680
2.21.0
172
681
173
682
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
We'll need to check a part of qiov soon, so implement it now.
4
5
Optimization with align down to 4 * sizeof(long) is dropped due to:
6
1. It is strange: it aligns length of the buffer, but where is a
7
guarantee that buffer pointer is aligned itself?
8
2. buffer_is_zero() is a better place for optimizations and it has
9
them.
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com
14
Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
include/qemu/iov.h | 2 +-
18
block/io.c | 2 +-
19
util/iov.c | 31 +++++++++++++++++++------------
20
3 files changed, 21 insertions(+), 14 deletions(-)
21
22
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/iov.h
25
+++ b/include/qemu/iov.h
26
@@ -XXX,XX +XXX,XX @@ void qemu_iovec_concat(QEMUIOVector *dst,
27
size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
28
struct iovec *src_iov, unsigned int src_cnt,
29
size_t soffset, size_t sbytes);
30
-bool qemu_iovec_is_zero(QEMUIOVector *qiov);
31
+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes);
32
void qemu_iovec_destroy(QEMUIOVector *qiov);
33
void qemu_iovec_reset(QEMUIOVector *qiov);
34
size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
35
diff --git a/block/io.c b/block/io.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/block/io.c
38
+++ b/block/io.c
39
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
40
41
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
42
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
43
- qemu_iovec_is_zero(qiov)) {
44
+ qemu_iovec_is_zero(qiov, 0, qiov->size)) {
45
flags |= BDRV_REQ_ZERO_WRITE;
46
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
47
flags |= BDRV_REQ_MAY_UNMAP;
48
diff --git a/util/iov.c b/util/iov.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/util/iov.c
51
+++ b/util/iov.c
52
@@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended(
53
}
54
55
/*
56
- * Check if the contents of the iovecs are all zero
57
+ * Check if the contents of subrange of qiov data is all zeroes.
58
*/
59
-bool qemu_iovec_is_zero(QEMUIOVector *qiov)
60
+bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
61
{
62
- int i;
63
- for (i = 0; i < qiov->niov; i++) {
64
- size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long));
65
- uint8_t *ptr = qiov->iov[i].iov_base;
66
- if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
67
+ struct iovec *iov;
68
+ size_t current_offset;
69
+
70
+ assert(offset + bytes <= qiov->size);
71
+
72
+ iov = iov_skip_offset(qiov->iov, offset, &current_offset);
73
+
74
+ while (bytes) {
75
+ uint8_t *base = (uint8_t *)iov->iov_base + current_offset;
76
+ size_t len = MIN(iov->iov_len - current_offset, bytes);
77
+
78
+ if (!buffer_is_zero(base, len)) {
79
return false;
80
}
81
- for (; offs < qiov->iov[i].iov_len; offs++) {
82
- if (ptr[offs]) {
83
- return false;
84
- }
85
- }
86
+
87
+ current_offset = 0;
88
+ bytes -= len;
89
+ iov++;
90
}
91
+
92
return true;
93
}
94
95
--
96
2.21.0
97
98
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Pino Toscano <ptoscano@redhat.com>
2
2
3
We have similar padding code in bdrv_co_pwritev,
3
Rewrite the implementation of the ssh block driver to use libssh instead
4
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
4
of libssh2. The libssh library has various advantages over libssh2:
5
it.
5
- easier API for authentication (for example for using ssh-agent)
6
- easier API for known_hosts handling
7
- supports newer types of keys in known_hosts
6
8
7
[Squashed in Vladimir's qemu-iotests 077 fix
9
Use APIs/features available in libssh 0.8 conditionally, to support
8
--Stefan]
10
older versions (which are not recommended though).
9
11
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Adjust the iotest 207 according to the different error message, and to
11
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
13
find the default key type for localhost (to properly compare the
12
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
14
fingerprint with).
13
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
15
Contributed-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
20
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Message-id: 5873173.t2JhDm7DL7@lindworm.usersys.redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
28
---
16
block/io.c | 365 +++++++++++++++++++++++++++++------------------------
29
configure | 65 +-
17
1 file changed, 200 insertions(+), 165 deletions(-)
30
block/Makefile.objs | 6 +-
31
block/ssh.c | 652 ++++++++++--------
32
.travis.yml | 4 +-
33
block/trace-events | 14 +-
34
docs/qemu-block-drivers.texi | 2 +-
35
.../dockerfiles/debian-win32-cross.docker | 1 -
36
.../dockerfiles/debian-win64-cross.docker | 1 -
37
tests/docker/dockerfiles/fedora.docker | 4 +-
38
tests/docker/dockerfiles/ubuntu.docker | 2 +-
39
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
40
tests/qemu-iotests/207 | 54 +-
41
tests/qemu-iotests/207.out | 2 +-
42
13 files changed, 449 insertions(+), 360 deletions(-)
18
43
19
diff --git a/block/io.c b/block/io.c
44
diff --git a/configure b/configure
45
index XXXXXXX..XXXXXXX 100755
46
--- a/configure
47
+++ b/configure
48
@@ -XXX,XX +XXX,XX @@ auth_pam=""
49
vte=""
50
virglrenderer=""
51
tpm=""
52
-libssh2=""
53
+libssh=""
54
live_block_migration="yes"
55
numa=""
56
tcmalloc="no"
57
@@ -XXX,XX +XXX,XX @@ for opt do
58
;;
59
--enable-tpm) tpm="yes"
60
;;
61
- --disable-libssh2) libssh2="no"
62
+ --disable-libssh) libssh="no"
63
;;
64
- --enable-libssh2) libssh2="yes"
65
+ --enable-libssh) libssh="yes"
66
;;
67
--disable-live-block-migration) live_block_migration="no"
68
;;
69
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
70
coroutine-pool coroutine freelist (better performance)
71
glusterfs GlusterFS backend
72
tpm TPM support
73
- libssh2 ssh block device support
74
+ libssh ssh block device support
75
numa libnuma support
76
libxml2 for Parallels image format
77
tcmalloc tcmalloc support
78
@@ -XXX,XX +XXX,XX @@ EOF
79
fi
80
81
##########################################
82
-# libssh2 probe
83
-min_libssh2_version=1.2.8
84
-if test "$libssh2" != "no" ; then
85
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
86
- libssh2_cflags=$($pkg_config libssh2 --cflags)
87
- libssh2_libs=$($pkg_config libssh2 --libs)
88
- libssh2=yes
89
+# libssh probe
90
+if test "$libssh" != "no" ; then
91
+ if $pkg_config --exists libssh; then
92
+ libssh_cflags=$($pkg_config libssh --cflags)
93
+ libssh_libs=$($pkg_config libssh --libs)
94
+ libssh=yes
95
else
96
- if test "$libssh2" = "yes" ; then
97
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
98
+ if test "$libssh" = "yes" ; then
99
+ error_exit "libssh required for --enable-libssh"
100
fi
101
- libssh2=no
102
+ libssh=no
103
fi
104
fi
105
106
##########################################
107
-# libssh2_sftp_fsync probe
108
+# Check for libssh 0.8
109
+# This is done like this instead of using the LIBSSH_VERSION_* and
110
+# SSH_VERSION_* macros because some distributions in the past shipped
111
+# snapshots of the future 0.8 from Git, and those snapshots did not
112
+# have updated version numbers (still referring to 0.7.0).
113
114
-if test "$libssh2" = "yes"; then
115
+if test "$libssh" = "yes"; then
116
cat > $TMPC <<EOF
117
-#include <stdio.h>
118
-#include <libssh2.h>
119
-#include <libssh2_sftp.h>
120
-int main(void) {
121
- LIBSSH2_SESSION *session;
122
- LIBSSH2_SFTP *sftp;
123
- LIBSSH2_SFTP_HANDLE *sftp_handle;
124
- session = libssh2_session_init ();
125
- sftp = libssh2_sftp_init (session);
126
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
127
- libssh2_sftp_fsync (sftp_handle);
128
- return 0;
129
-}
130
+#include <libssh/libssh.h>
131
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
132
EOF
133
- # libssh2_cflags/libssh2_libs defined in previous test.
134
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
135
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
136
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
137
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
138
fi
139
fi
140
141
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
142
echo "gcov $gcov_tool"
143
echo "gcov enabled $gcov"
144
echo "TPM support $tpm"
145
-echo "libssh2 support $libssh2"
146
+echo "libssh support $libssh"
147
echo "QOM debugging $qom_cast_debug"
148
echo "Live block migration $live_block_migration"
149
echo "lzo support $lzo"
150
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
151
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
152
fi
153
154
-if test "$libssh2" = "yes" ; then
155
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
156
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
157
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
158
+if test "$libssh" = "yes" ; then
159
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
160
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
161
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
162
fi
163
164
if test "$live_block_migration" = "yes" ; then
165
diff --git a/block/Makefile.objs b/block/Makefile.objs
20
index XXXXXXX..XXXXXXX 100644
166
index XXXXXXX..XXXXXXX 100644
21
--- a/block/io.c
167
--- a/block/Makefile.objs
22
+++ b/block/io.c
168
+++ b/block/Makefile.objs
23
@@ -XXX,XX +XXX,XX @@ out:
169
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
170
block-obj-$(CONFIG_RBD) += rbd.o
171
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
172
block-obj-$(CONFIG_VXHS) += vxhs.o
173
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
174
+block-obj-$(CONFIG_LIBSSH) += ssh.o
175
block-obj-y += accounting.o dirty-bitmap.o
176
block-obj-y += write-threshold.o
177
block-obj-y += backup.o
178
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
179
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
180
gluster.o-libs := $(GLUSTERFS_LIBS)
181
vxhs.o-libs := $(VXHS_LIBS)
182
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
183
-ssh.o-libs := $(LIBSSH2_LIBS)
184
+ssh.o-cflags := $(LIBSSH_CFLAGS)
185
+ssh.o-libs := $(LIBSSH_LIBS)
186
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
187
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
188
dmg-bz2.o-libs := $(BZIP2_LIBS)
189
diff --git a/block/ssh.c b/block/ssh.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/block/ssh.c
192
+++ b/block/ssh.c
193
@@ -XXX,XX +XXX,XX @@
194
195
#include "qemu/osdep.h"
196
197
-#include <libssh2.h>
198
-#include <libssh2_sftp.h>
199
+#include <libssh/libssh.h>
200
+#include <libssh/sftp.h>
201
202
#include "block/block_int.h"
203
#include "block/qdict.h"
204
@@ -XXX,XX +XXX,XX @@
205
#include "trace.h"
206
207
/*
208
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
209
- * that this requires that libssh2 was specially compiled with the
210
- * `./configure --enable-debug' option, so most likely you will have
211
- * to compile it yourself. The meaning of <bitmask> is described
212
- * here: http://www.libssh2.org/libssh2_trace.html
213
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
214
+ * The meaning of <level> is described here:
215
+ * http://api.libssh.org/master/group__libssh__log.html
216
*/
217
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
218
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
219
220
typedef struct BDRVSSHState {
221
/* Coroutine. */
222
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
223
224
/* SSH connection. */
225
int sock; /* socket */
226
- LIBSSH2_SESSION *session; /* ssh session */
227
- LIBSSH2_SFTP *sftp; /* sftp session */
228
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
229
+ ssh_session session; /* ssh session */
230
+ sftp_session sftp; /* sftp session */
231
+ sftp_file sftp_handle; /* sftp remote file handle */
232
233
- /* See ssh_seek() function below. */
234
- int64_t offset;
235
- bool offset_op_read;
236
-
237
- /* File attributes at open. We try to keep the .filesize field
238
+ /*
239
+ * File attributes at open. We try to keep the .size field
240
* updated if it changes (eg by writing at the end of the file).
241
*/
242
- LIBSSH2_SFTP_ATTRIBUTES attrs;
243
+ sftp_attributes attrs;
244
245
InetSocketAddress *inet;
246
247
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
248
{
249
memset(s, 0, sizeof *s);
250
s->sock = -1;
251
- s->offset = -1;
252
qemu_co_mutex_init(&s->lock);
24
}
253
}
25
254
26
/*
255
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
27
- * Handle a read request in coroutine context
256
{
28
+ * Request padding
257
g_free(s->user);
29
+ *
258
30
+ * |<---- align ----->| |<----- align ---->|
259
+ if (s->attrs) {
31
+ * |<- head ->|<------------- bytes ------------->|<-- tail -->|
260
+ sftp_attributes_free(s->attrs);
32
+ * | | | | | |
33
+ * -*----------$-------*-------- ... --------*-----$------------*---
34
+ * | | | | | |
35
+ * | offset | | end |
36
+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
37
+ * [buf ... ) [tail_buf )
38
+ *
39
+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head
40
+ * is placed at the beginning of @buf and @tail at the @end.
41
+ *
42
+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
43
+ * around tail, if tail exists.
44
+ *
45
+ * @merge_reads is true for small requests,
46
+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both
47
+ * head and tail exist but @buf_len == align and @tail_buf == @buf.
48
*/
49
+typedef struct BdrvRequestPadding {
50
+ uint8_t *buf;
51
+ size_t buf_len;
52
+ uint8_t *tail_buf;
53
+ size_t head;
54
+ size_t tail;
55
+ bool merge_reads;
56
+ QEMUIOVector local_qiov;
57
+} BdrvRequestPadding;
58
+
59
+static bool bdrv_init_padding(BlockDriverState *bs,
60
+ int64_t offset, int64_t bytes,
61
+ BdrvRequestPadding *pad)
62
+{
63
+ uint64_t align = bs->bl.request_alignment;
64
+ size_t sum;
65
+
66
+ memset(pad, 0, sizeof(*pad));
67
+
68
+ pad->head = offset & (align - 1);
69
+ pad->tail = ((offset + bytes) & (align - 1));
70
+ if (pad->tail) {
71
+ pad->tail = align - pad->tail;
72
+ }
261
+ }
73
+
262
if (s->sftp_handle) {
74
+ if ((!pad->head && !pad->tail) || !bytes) {
263
- libssh2_sftp_close(s->sftp_handle);
75
+ return false;
264
+ sftp_close(s->sftp_handle);
265
}
266
if (s->sftp) {
267
- libssh2_sftp_shutdown(s->sftp);
268
+ sftp_free(s->sftp);
269
}
270
if (s->session) {
271
- libssh2_session_disconnect(s->session,
272
- "from qemu ssh client: "
273
- "user closed the connection");
274
- libssh2_session_free(s->session);
275
- }
276
- if (s->sock >= 0) {
277
- close(s->sock);
278
+ ssh_disconnect(s->session);
279
+ ssh_free(s->session); /* This frees s->sock */
280
}
281
}
282
283
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
284
va_end(args);
285
286
if (s->session) {
287
- char *ssh_err;
288
+ const char *ssh_err;
289
int ssh_err_code;
290
291
- /* This is not an errno. See <libssh2.h>. */
292
- ssh_err_code = libssh2_session_last_error(s->session,
293
- &ssh_err, NULL, 0);
294
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
295
+ /* This is not an errno. See <libssh/libssh.h>. */
296
+ ssh_err = ssh_get_error(s->session);
297
+ ssh_err_code = ssh_get_error_code(s->session);
298
+ error_setg(errp, "%s: %s (libssh error code: %d)",
299
msg, ssh_err, ssh_err_code);
300
} else {
301
error_setg(errp, "%s", msg);
302
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
303
va_end(args);
304
305
if (s->sftp) {
306
- char *ssh_err;
307
+ const char *ssh_err;
308
int ssh_err_code;
309
- unsigned long sftp_err_code;
310
+ int sftp_err_code;
311
312
- /* This is not an errno. See <libssh2.h>. */
313
- ssh_err_code = libssh2_session_last_error(s->session,
314
- &ssh_err, NULL, 0);
315
- /* See <libssh2_sftp.h>. */
316
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
317
+ /* This is not an errno. See <libssh/libssh.h>. */
318
+ ssh_err = ssh_get_error(s->session);
319
+ ssh_err_code = ssh_get_error_code(s->session);
320
+ /* See <libssh/sftp.h>. */
321
+ sftp_err_code = sftp_get_error(s->sftp);
322
323
error_setg(errp,
324
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
325
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
326
msg, ssh_err, ssh_err_code, sftp_err_code);
327
} else {
328
error_setg(errp, "%s", msg);
329
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
330
331
static void sftp_error_trace(BDRVSSHState *s, const char *op)
332
{
333
- char *ssh_err;
334
+ const char *ssh_err;
335
int ssh_err_code;
336
- unsigned long sftp_err_code;
337
+ int sftp_err_code;
338
339
- /* This is not an errno. See <libssh2.h>. */
340
- ssh_err_code = libssh2_session_last_error(s->session,
341
- &ssh_err, NULL, 0);
342
- /* See <libssh2_sftp.h>. */
343
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
344
+ /* This is not an errno. See <libssh/libssh.h>. */
345
+ ssh_err = ssh_get_error(s->session);
346
+ ssh_err_code = ssh_get_error_code(s->session);
347
+ /* See <libssh/sftp.h>. */
348
+ sftp_err_code = sftp_get_error(s->sftp);
349
350
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
351
}
352
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
353
parse_uri(filename, options, errp);
354
}
355
356
-static int check_host_key_knownhosts(BDRVSSHState *s,
357
- const char *host, int port, Error **errp)
358
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
359
{
360
- const char *home;
361
- char *knh_file = NULL;
362
- LIBSSH2_KNOWNHOSTS *knh = NULL;
363
- struct libssh2_knownhost *found;
364
- int ret, r;
365
- const char *hostkey;
366
- size_t len;
367
- int type;
368
-
369
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
370
- if (!hostkey) {
371
+ int ret;
372
+#ifdef HAVE_LIBSSH_0_8
373
+ enum ssh_known_hosts_e state;
374
+ int r;
375
+ ssh_key pubkey;
376
+ enum ssh_keytypes_e pubkey_type;
377
+ unsigned char *server_hash = NULL;
378
+ size_t server_hash_len;
379
+ char *fingerprint = NULL;
380
+
381
+ state = ssh_session_is_known_server(s->session);
382
+ trace_ssh_server_status(state);
383
+
384
+ switch (state) {
385
+ case SSH_KNOWN_HOSTS_OK:
386
+ /* OK */
387
+ trace_ssh_check_host_key_knownhosts();
388
+ break;
389
+ case SSH_KNOWN_HOSTS_CHANGED:
390
ret = -EINVAL;
391
- session_error_setg(errp, s, "failed to read remote host key");
392
+ r = ssh_get_server_publickey(s->session, &pubkey);
393
+ if (r == 0) {
394
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
395
+ &server_hash, &server_hash_len);
396
+ pubkey_type = ssh_key_type(pubkey);
397
+ ssh_key_free(pubkey);
398
+ }
399
+ if (r == 0) {
400
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
401
+ server_hash,
402
+ server_hash_len);
403
+ ssh_clean_pubkey_hash(&server_hash);
404
+ }
405
+ if (fingerprint) {
406
+ error_setg(errp,
407
+ "host key (%s key with fingerprint %s) does not match "
408
+ "the one in known_hosts; this may be a possible attack",
409
+ ssh_key_type_to_char(pubkey_type), fingerprint);
410
+ ssh_string_free_char(fingerprint);
411
+ } else {
412
+ error_setg(errp,
413
+ "host key does not match the one in known_hosts; this "
414
+ "may be a possible attack");
415
+ }
416
goto out;
417
- }
418
-
419
- knh = libssh2_knownhost_init(s->session);
420
- if (!knh) {
421
+ case SSH_KNOWN_HOSTS_OTHER:
422
ret = -EINVAL;
423
- session_error_setg(errp, s,
424
- "failed to initialize known hosts support");
425
+ error_setg(errp,
426
+ "host key for this server not found, another type exists");
427
+ goto out;
428
+ case SSH_KNOWN_HOSTS_UNKNOWN:
429
+ ret = -EINVAL;
430
+ error_setg(errp, "no host key was found in known_hosts");
431
+ goto out;
432
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
433
+ ret = -ENOENT;
434
+ error_setg(errp, "known_hosts file not found");
435
+ goto out;
436
+ case SSH_KNOWN_HOSTS_ERROR:
437
+ ret = -EINVAL;
438
+ error_setg(errp, "error while checking the host");
439
+ goto out;
440
+ default:
441
+ ret = -EINVAL;
442
+ error_setg(errp, "error while checking for known server (%d)", state);
443
goto out;
444
}
445
+#else /* !HAVE_LIBSSH_0_8 */
446
+ int state;
447
448
- home = getenv("HOME");
449
- if (home) {
450
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
451
- } else {
452
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
453
- }
454
-
455
- /* Read all known hosts from OpenSSH-style known_hosts file. */
456
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
457
+ state = ssh_is_server_known(s->session);
458
+ trace_ssh_server_status(state);
459
460
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
461
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
462
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
463
- &found);
464
- switch (r) {
465
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
466
+ switch (state) {
467
+ case SSH_SERVER_KNOWN_OK:
468
/* OK */
469
- trace_ssh_check_host_key_knownhosts(found->key);
470
+ trace_ssh_check_host_key_knownhosts();
471
break;
472
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
473
+ case SSH_SERVER_KNOWN_CHANGED:
474
ret = -EINVAL;
475
- session_error_setg(errp, s,
476
- "host key does not match the one in known_hosts"
477
- " (found key %s)", found->key);
478
+ error_setg(errp,
479
+ "host key does not match the one in known_hosts; this "
480
+ "may be a possible attack");
481
goto out;
482
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
483
+ case SSH_SERVER_FOUND_OTHER:
484
ret = -EINVAL;
485
- session_error_setg(errp, s, "no host key was found in known_hosts");
486
+ error_setg(errp,
487
+ "host key for this server not found, another type exists");
488
+ goto out;
489
+ case SSH_SERVER_FILE_NOT_FOUND:
490
+ ret = -ENOENT;
491
+ error_setg(errp, "known_hosts file not found");
492
goto out;
493
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
494
+ case SSH_SERVER_NOT_KNOWN:
495
ret = -EINVAL;
496
- session_error_setg(errp, s,
497
- "failure matching the host key with known_hosts");
498
+ error_setg(errp, "no host key was found in known_hosts");
499
+ goto out;
500
+ case SSH_SERVER_ERROR:
501
+ ret = -EINVAL;
502
+ error_setg(errp, "server error");
503
goto out;
504
default:
505
ret = -EINVAL;
506
- session_error_setg(errp, s, "unknown error matching the host key"
507
- " with known_hosts (%d)", r);
508
+ error_setg(errp, "error while checking for known server (%d)", state);
509
goto out;
510
}
511
+#endif /* !HAVE_LIBSSH_0_8 */
512
513
/* known_hosts checking successful. */
514
ret = 0;
515
516
out:
517
- if (knh != NULL) {
518
- libssh2_knownhost_free(knh);
519
- }
520
- g_free(knh_file);
521
return ret;
522
}
523
524
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
525
526
static int
527
check_host_key_hash(BDRVSSHState *s, const char *hash,
528
- int hash_type, size_t fingerprint_len, Error **errp)
529
+ enum ssh_publickey_hash_type type, Error **errp)
530
{
531
- const char *fingerprint;
532
-
533
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
534
- if (!fingerprint) {
535
+ int r;
536
+ ssh_key pubkey;
537
+ unsigned char *server_hash;
538
+ size_t server_hash_len;
539
+
540
+#ifdef HAVE_LIBSSH_0_8
541
+ r = ssh_get_server_publickey(s->session, &pubkey);
542
+#else
543
+ r = ssh_get_publickey(s->session, &pubkey);
544
+#endif
545
+ if (r != SSH_OK) {
546
session_error_setg(errp, s, "failed to read remote host key");
547
return -EINVAL;
548
}
549
550
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
551
- hash) != 0) {
552
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
553
+ ssh_key_free(pubkey);
554
+ if (r != 0) {
555
+ session_error_setg(errp, s,
556
+ "failed reading the hash of the server SSH key");
557
+ return -EINVAL;
76
+ }
558
+ }
77
+
559
+
78
+ sum = pad->head + bytes + pad->tail;
560
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
79
+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
561
+ ssh_clean_pubkey_hash(&server_hash);
80
+ pad->buf = qemu_blockalign(bs, pad->buf_len);
562
+ if (r != 0) {
81
+ pad->merge_reads = sum == pad->buf_len;
563
error_setg(errp, "remote host key does not match host_key_check '%s'",
82
+ if (pad->tail) {
564
hash);
83
+ pad->tail_buf = pad->buf + pad->buf_len - align;
565
return -EPERM;
566
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
567
return 0;
568
}
569
570
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
571
- SshHostKeyCheck *hkc, Error **errp)
572
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
573
{
574
SshHostKeyCheckMode mode;
575
576
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
577
case SSH_HOST_KEY_CHECK_MODE_HASH:
578
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
579
return check_host_key_hash(s, hkc->u.hash.hash,
580
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
581
+ SSH_PUBLICKEY_HASH_MD5, errp);
582
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
583
return check_host_key_hash(s, hkc->u.hash.hash,
584
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
585
+ SSH_PUBLICKEY_HASH_SHA1, errp);
586
}
587
g_assert_not_reached();
588
break;
589
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
590
- return check_host_key_knownhosts(s, host, port, errp);
591
+ return check_host_key_knownhosts(s, errp);
592
default:
593
g_assert_not_reached();
594
}
595
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
596
return -EINVAL;
597
}
598
599
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
600
+static int authenticate(BDRVSSHState *s, Error **errp)
601
{
602
int r, ret;
603
- const char *userauthlist;
604
- LIBSSH2_AGENT *agent = NULL;
605
- struct libssh2_agent_publickey *identity;
606
- struct libssh2_agent_publickey *prev_identity = NULL;
607
+ int method;
608
609
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
610
- if (strstr(userauthlist, "publickey") == NULL) {
611
+ /* Try to authenticate with the "none" method. */
612
+ r = ssh_userauth_none(s->session, NULL);
613
+ if (r == SSH_AUTH_ERROR) {
614
ret = -EPERM;
615
- error_setg(errp,
616
- "remote server does not support \"publickey\" authentication");
617
+ session_error_setg(errp, s, "failed to authenticate using none "
618
+ "authentication");
619
goto out;
620
- }
621
-
622
- /* Connect to ssh-agent and try each identity in turn. */
623
- agent = libssh2_agent_init(s->session);
624
- if (!agent) {
625
- ret = -EINVAL;
626
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
627
- goto out;
628
- }
629
- if (libssh2_agent_connect(agent)) {
630
- ret = -ECONNREFUSED;
631
- session_error_setg(errp, s, "failed to connect to ssh-agent");
632
- goto out;
633
- }
634
- if (libssh2_agent_list_identities(agent)) {
635
- ret = -EINVAL;
636
- session_error_setg(errp, s,
637
- "failed requesting identities from ssh-agent");
638
+ } else if (r == SSH_AUTH_SUCCESS) {
639
+ /* Authenticated! */
640
+ ret = 0;
641
goto out;
642
}
643
644
- for(;;) {
645
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
646
- if (r == 1) { /* end of list */
647
- break;
648
- }
649
- if (r < 0) {
650
+ method = ssh_userauth_list(s->session, NULL);
651
+ trace_ssh_auth_methods(method);
652
+
653
+ /*
654
+ * Try to authenticate with publickey, using the ssh-agent
655
+ * if available.
656
+ */
657
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
658
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
659
+ if (r == SSH_AUTH_ERROR) {
660
ret = -EINVAL;
661
- session_error_setg(errp, s,
662
- "failed to obtain identity from ssh-agent");
663
+ session_error_setg(errp, s, "failed to authenticate using "
664
+ "publickey authentication");
665
goto out;
666
- }
667
- r = libssh2_agent_userauth(agent, user, identity);
668
- if (r == 0) {
669
+ } else if (r == SSH_AUTH_SUCCESS) {
670
/* Authenticated! */
671
ret = 0;
672
goto out;
673
}
674
- /* Failed to authenticate with this identity, try the next one. */
675
- prev_identity = identity;
676
}
677
678
ret = -EPERM;
679
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
680
"and the identities held by your ssh-agent");
681
682
out:
683
- if (agent != NULL) {
684
- /* Note: libssh2 implementation implicitly calls
685
- * libssh2_agent_disconnect if necessary.
686
- */
687
- libssh2_agent_free(agent);
688
- }
689
-
690
return ret;
691
}
692
693
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
694
int ssh_flags, int creat_mode, Error **errp)
695
{
696
int r, ret;
697
- long port = 0;
698
+ unsigned int port = 0;
699
+ int new_sock = -1;
700
701
if (opts->has_user) {
702
s->user = g_strdup(opts->user);
703
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
704
s->inet = opts->server;
705
opts->server = NULL;
706
707
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
708
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
709
error_setg(errp, "Use only numeric port value");
710
ret = -EINVAL;
711
goto err;
712
}
713
714
/* Open the socket and connect. */
715
- s->sock = inet_connect_saddr(s->inet, errp);
716
- if (s->sock < 0) {
717
+ new_sock = inet_connect_saddr(s->inet, errp);
718
+ if (new_sock < 0) {
719
ret = -EIO;
720
goto err;
721
}
722
723
+ /*
724
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
725
+ * but do not fail if it cannot be disabled.
726
+ */
727
+ r = socket_set_nodelay(new_sock);
728
+ if (r < 0) {
729
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
730
+ s->inet->host, strerror(errno));
84
+ }
731
+ }
85
+
732
+
86
+ return true;
733
/* Create SSH session. */
87
+}
734
- s->session = libssh2_session_init();
88
+
735
+ s->session = ssh_new();
89
+static int bdrv_padding_rmw_read(BdrvChild *child,
736
if (!s->session) {
90
+ BdrvTrackedRequest *req,
737
ret = -EINVAL;
91
+ BdrvRequestPadding *pad,
738
- session_error_setg(errp, s, "failed to initialize libssh2 session");
92
+ bool zero_middle)
739
+ session_error_setg(errp, s, "failed to initialize libssh session");
93
+{
740
goto err;
94
+ QEMUIOVector local_qiov;
741
}
95
+ BlockDriverState *bs = child->bs;
742
96
+ uint64_t align = bs->bl.request_alignment;
743
-#if TRACE_LIBSSH2 != 0
97
+ int ret;
744
- libssh2_trace(s->session, TRACE_LIBSSH2);
98
+
745
-#endif
99
+ assert(req->serialising && pad->buf);
746
+ /*
100
+
747
+ * Make sure we are in blocking mode during the connection and
101
+ if (pad->head || pad->merge_reads) {
748
+ * authentication phases.
102
+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
749
+ */
103
+
750
+ ssh_set_blocking(s->session, 1);
104
+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
751
105
+
752
- r = libssh2_session_handshake(s->session, s->sock);
106
+ if (pad->head) {
753
- if (r != 0) {
107
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
754
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
108
+ }
755
+ if (r < 0) {
109
+ if (pad->merge_reads && pad->tail) {
756
+ ret = -EINVAL;
110
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
757
+ session_error_setg(errp, s,
111
+ }
758
+ "failed to set the user in the libssh session");
112
+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
759
+ goto err;
113
+ align, &local_qiov, 0);
760
+ }
114
+ if (ret < 0) {
761
+
115
+ return ret;
762
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
116
+ }
763
+ if (r < 0) {
117
+ if (pad->head) {
764
+ ret = -EINVAL;
118
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
765
+ session_error_setg(errp, s,
119
+ }
766
+ "failed to set the host in the libssh session");
120
+ if (pad->merge_reads && pad->tail) {
767
+ goto err;
121
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
768
+ }
122
+ }
769
+
123
+
770
+ if (port > 0) {
124
+ if (pad->merge_reads) {
771
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
125
+ goto zero_mem;
772
+ if (r < 0) {
773
+ ret = -EINVAL;
774
+ session_error_setg(errp, s,
775
+ "failed to set the port in the libssh session");
776
+ goto err;
126
+ }
777
+ }
127
+ }
778
+ }
128
+
779
+
129
+ if (pad->tail) {
780
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
130
+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
781
+ if (r < 0) {
131
+
782
+ ret = -EINVAL;
132
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
783
+ session_error_setg(errp, s,
133
+ ret = bdrv_aligned_preadv(
784
+ "failed to disable the compression in the libssh "
134
+ child, req,
785
+ "session");
135
+ req->overlap_offset + req->overlap_bytes - align,
786
+ goto err;
136
+ align, align, &local_qiov, 0);
137
+ if (ret < 0) {
138
+ return ret;
139
+ }
140
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
141
+ }
787
+ }
142
+
788
+
143
+zero_mem:
789
+ /* Read ~/.ssh/config. */
144
+ if (zero_middle) {
790
+ r = ssh_options_parse_config(s->session, NULL);
145
+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
791
+ if (r < 0) {
792
+ ret = -EINVAL;
793
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
794
+ goto err;
146
+ }
795
+ }
147
+
796
+
148
+ return 0;
797
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
149
+}
798
+ if (r < 0) {
150
+
799
+ ret = -EINVAL;
151
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
800
+ session_error_setg(errp, s,
152
+{
801
+ "failed to set the socket in the libssh session");
153
+ if (pad->buf) {
802
+ goto err;
154
+ qemu_vfree(pad->buf);
155
+ qemu_iovec_destroy(&pad->local_qiov);
156
+ }
803
+ }
157
+}
804
+ /* libssh took ownership of the socket. */
158
+
805
+ s->sock = new_sock;
159
+/*
806
+ new_sock = -1;
160
+ * bdrv_pad_request
807
+
161
+ *
808
+ /* Connect. */
162
+ * Exchange request parameters with padded request if needed. Don't include RMW
809
+ r = ssh_connect(s->session);
163
+ * read of padding, bdrv_padding_rmw_read() should be called separately if
810
+ if (r != SSH_OK) {
164
+ * needed.
811
ret = -EINVAL;
165
+ *
812
session_error_setg(errp, s, "failed to establish SSH session");
166
+ * All parameters except @bs are in-out: they represent original request at
813
goto err;
167
+ * function call and padded (if padding needed) at function finish.
814
}
168
+ *
815
169
+ * Function always succeeds.
816
/* Check the remote host's key against known_hosts. */
170
+ */
817
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
171
+static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
818
+ ret = check_host_key(s, opts->host_key_check, errp);
172
+ int64_t *offset, unsigned int *bytes,
819
if (ret < 0) {
173
+ BdrvRequestPadding *pad)
820
goto err;
174
+{
821
}
175
+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
822
176
+ return false;
823
/* Authenticate. */
824
- ret = authenticate(s, s->user, errp);
825
+ ret = authenticate(s, errp);
826
if (ret < 0) {
827
goto err;
828
}
829
830
/* Start SFTP. */
831
- s->sftp = libssh2_sftp_init(s->session);
832
+ s->sftp = sftp_new(s->session);
833
if (!s->sftp) {
834
- session_error_setg(errp, s, "failed to initialize sftp handle");
835
+ session_error_setg(errp, s, "failed to create sftp handle");
836
+ ret = -EINVAL;
837
+ goto err;
177
+ }
838
+ }
178
+
839
+
179
+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
840
+ r = sftp_init(s->sftp);
180
+ *qiov, 0, *bytes,
841
+ if (r < 0) {
181
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
842
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
182
+ *bytes += pad->head + pad->tail;
843
ret = -EINVAL;
183
+ *offset -= pad->head;
844
goto err;
184
+ *qiov = &pad->local_qiov;
845
}
185
+
846
186
+ return true;
847
/* Open the remote file. */
187
+}
848
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
188
+
849
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
189
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
850
- creat_mode);
190
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
851
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
191
BdrvRequestFlags flags)
852
if (!s->sftp_handle) {
192
{
853
- session_error_setg(errp, s, "failed to open remote file '%s'",
193
BlockDriverState *bs = child->bs;
854
- opts->path);
194
- BlockDriver *drv = bs->drv;
855
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
195
BdrvTrackedRequest req;
856
+ opts->path);
196
-
857
ret = -EINVAL;
197
- uint64_t align = bs->bl.request_alignment;
858
goto err;
198
- uint8_t *head_buf = NULL;
859
}
199
- uint8_t *tail_buf = NULL;
860
200
- QEMUIOVector local_qiov;
861
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
201
- bool use_local_qiov = false;
862
- if (r < 0) {
202
+ BdrvRequestPadding pad;
863
+ /* Make sure the SFTP file is handled in blocking mode. */
203
int ret;
864
+ sftp_file_set_blocking(s->sftp_handle);
204
865
+
205
- trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
866
+ s->attrs = sftp_fstat(s->sftp_handle);
206
-
867
+ if (!s->attrs) {
207
- if (!drv) {
868
sftp_error_setg(errp, s, "failed to read file attributes");
208
- return -ENOMEDIUM;
869
return -EINVAL;
209
- }
870
}
210
+ trace_bdrv_co_preadv(bs, offset, bytes, flags);
871
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
211
872
return 0;
212
ret = bdrv_check_byte_request(bs, offset, bytes);
873
213
if (ret < 0) {
874
err:
214
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
875
+ if (s->attrs) {
215
flags |= BDRV_REQ_COPY_ON_READ;
876
+ sftp_attributes_free(s->attrs);
216
}
877
+ }
217
878
+ s->attrs = NULL;
218
- /* Align read if necessary by padding qiov */
879
if (s->sftp_handle) {
219
- if (offset & (align - 1)) {
880
- libssh2_sftp_close(s->sftp_handle);
220
- head_buf = qemu_blockalign(bs, align);
881
+ sftp_close(s->sftp_handle);
221
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
882
}
222
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
883
s->sftp_handle = NULL;
223
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
884
if (s->sftp) {
224
- use_local_qiov = true;
885
- libssh2_sftp_shutdown(s->sftp);
225
-
886
+ sftp_free(s->sftp);
226
- bytes += offset & (align - 1);
887
}
227
- offset = offset & ~(align - 1);
888
s->sftp = NULL;
228
- }
889
if (s->session) {
229
-
890
- libssh2_session_disconnect(s->session,
230
- if ((offset + bytes) & (align - 1)) {
891
- "from qemu ssh client: "
231
- if (!use_local_qiov) {
892
- "error opening connection");
232
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
893
- libssh2_session_free(s->session);
233
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
894
+ ssh_disconnect(s->session);
234
- use_local_qiov = true;
895
+ ssh_free(s->session);
235
- }
896
}
236
- tail_buf = qemu_blockalign(bs, align);
897
s->session = NULL;
237
- qemu_iovec_add(&local_qiov, tail_buf,
898
+ s->sock = -1;
238
- align - ((offset + bytes) & (align - 1)));
899
+ if (new_sock >= 0) {
239
-
900
+ close(new_sock);
240
- bytes = ROUND_UP(bytes, align);
901
+ }
241
- }
242
+ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
243
244
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
245
- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
246
- use_local_qiov ? &local_qiov : qiov,
247
- flags);
248
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes,
249
+ bs->bl.request_alignment,
250
+ qiov, flags);
251
tracked_request_end(&req);
252
bdrv_dec_in_flight(bs);
253
254
- if (use_local_qiov) {
255
- qemu_iovec_destroy(&local_qiov);
256
- qemu_vfree(head_buf);
257
- qemu_vfree(tail_buf);
258
- }
259
+ bdrv_padding_destroy(&pad);
260
902
261
return ret;
903
return ret;
262
}
904
}
263
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
905
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
264
BdrvTrackedRequest *req)
906
907
ssh_state_init(s);
908
909
- ssh_flags = LIBSSH2_FXF_READ;
910
+ ssh_flags = 0;
911
if (bdrv_flags & BDRV_O_RDWR) {
912
- ssh_flags |= LIBSSH2_FXF_WRITE;
913
+ ssh_flags |= O_RDWR;
914
+ } else {
915
+ ssh_flags |= O_RDONLY;
916
}
917
918
opts = ssh_parse_options(options, errp);
919
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
920
}
921
922
/* Go non-blocking. */
923
- libssh2_session_set_blocking(s->session, 0);
924
+ ssh_set_blocking(s->session, 0);
925
926
qapi_free_BlockdevOptionsSsh(opts);
927
928
return 0;
929
930
err:
931
- if (s->sock >= 0) {
932
- close(s->sock);
933
- }
934
- s->sock = -1;
935
-
936
qapi_free_BlockdevOptionsSsh(opts);
937
938
return ret;
939
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
265
{
940
{
266
BlockDriverState *bs = child->bs;
941
ssize_t ret;
267
- uint8_t *buf = NULL;
942
char c[1] = { '\0' };
268
QEMUIOVector local_qiov;
943
- int was_blocking = libssh2_session_get_blocking(s->session);
269
uint64_t align = bs->bl.request_alignment;
944
+ int was_blocking = ssh_is_blocking(s->session);
270
- unsigned int head_padding_bytes, tail_padding_bytes;
945
271
int ret = 0;
946
/* offset must be strictly greater than the current size so we do
272
+ bool padding;
947
* not overwrite anything */
273
+ BdrvRequestPadding pad;
948
- assert(offset > 0 && offset > s->attrs.filesize);
274
949
+ assert(offset > 0 && offset > s->attrs->size);
275
- head_padding_bytes = offset & (align - 1);
950
276
- tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
951
- libssh2_session_set_blocking(s->session, 1);
952
+ ssh_set_blocking(s->session, 1);
953
954
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
955
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
956
+ sftp_seek64(s->sftp_handle, offset - 1);
957
+ ret = sftp_write(s->sftp_handle, c, 1);
958
959
- libssh2_session_set_blocking(s->session, was_blocking);
960
+ ssh_set_blocking(s->session, was_blocking);
961
962
if (ret < 0) {
963
sftp_error_setg(errp, s, "Failed to grow file");
964
return -EIO;
965
}
966
967
- s->attrs.filesize = offset;
968
+ s->attrs->size = offset;
969
return 0;
970
}
971
972
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
973
ssh_state_init(&s);
974
975
ret = connect_to_ssh(&s, opts->location,
976
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
977
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
978
+ O_RDWR | O_CREAT | O_TRUNC,
979
0644, errp);
980
if (ret < 0) {
981
goto fail;
982
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
983
/* Assume false, unless we can positively prove it's true. */
984
int has_zero_init = 0;
985
986
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
987
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
988
- has_zero_init = 1;
989
- }
990
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
991
+ has_zero_init = 1;
992
}
993
994
return has_zero_init;
995
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
996
.co = qemu_coroutine_self()
997
};
998
999
- r = libssh2_session_block_directions(s->session);
1000
+ r = ssh_get_poll_flags(s->session);
1001
1002
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1003
+ if (r & SSH_READ_PENDING) {
1004
rd_handler = restart_coroutine;
1005
}
1006
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1007
+ if (r & SSH_WRITE_PENDING) {
1008
wr_handler = restart_coroutine;
1009
}
1010
1011
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1012
trace_ssh_co_yield_back(s->sock);
1013
}
1014
1015
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
1016
- * in the remote file. Notice that it just updates a field in the
1017
- * sftp_handle structure, so there is no network traffic and it cannot
1018
- * fail.
1019
- *
1020
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1021
- * performance since it causes the handle to throw away all in-flight
1022
- * reads and buffered readahead data. Therefore this function tries
1023
- * to be intelligent about when to call the underlying libssh2 function.
1024
- */
1025
-#define SSH_SEEK_WRITE 0
1026
-#define SSH_SEEK_READ 1
1027
-#define SSH_SEEK_FORCE 2
277
-
1028
-
1029
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
1030
-{
1031
- bool op_read = (flags & SSH_SEEK_READ) != 0;
1032
- bool force = (flags & SSH_SEEK_FORCE) != 0;
278
-
1033
-
279
- assert(flags & BDRV_REQ_ZERO_WRITE);
1034
- if (force || op_read != s->offset_op_read || offset != s->offset) {
280
- if (head_padding_bytes || tail_padding_bytes) {
1035
- trace_ssh_seek(offset);
281
- buf = qemu_blockalign(bs, align);
1036
- libssh2_sftp_seek64(s->sftp_handle, offset);
282
- qemu_iovec_init_buf(&local_qiov, buf, align);
1037
- s->offset = offset;
1038
- s->offset_op_read = op_read;
283
- }
1039
- }
284
- if (head_padding_bytes) {
1040
-}
285
- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
286
-
1041
-
287
- /* RMW the unaligned part before head. */
1042
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
288
+ padding = bdrv_init_padding(bs, offset, bytes, &pad);
1043
int64_t offset, size_t size,
289
+ if (padding) {
1044
QEMUIOVector *qiov)
290
mark_request_serialising(req, align);
1045
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
291
wait_serialising_requests(req);
1046
292
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
1047
trace_ssh_read(offset, size);
293
- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
1048
294
- align, &local_qiov, 0);
1049
- ssh_seek(s, offset, SSH_SEEK_READ);
295
- if (ret < 0) {
1050
+ trace_ssh_seek(offset);
296
- goto fail;
1051
+ sftp_seek64(s->sftp_handle, offset);
1052
1053
/* This keeps track of the current iovec element ('i'), where we
1054
* will write to next ('buf'), and the end of the current iovec
1055
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1056
buf = i->iov_base;
1057
end_of_vec = i->iov_base + i->iov_len;
1058
1059
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1060
- * although it will also do readahead behind our backs. Therefore
1061
- * we may have to do repeated reads here until we have read 'size'
1062
- * bytes.
1063
- */
1064
for (got = 0; got < size; ) {
1065
+ size_t request_read_size;
1066
again:
1067
- trace_ssh_read_buf(buf, end_of_vec - buf);
1068
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1069
- trace_ssh_read_return(r);
1070
+ /*
1071
+ * The size of SFTP packets is limited to 32K bytes, so limit
1072
+ * the amount of data requested to 16K, as libssh currently
1073
+ * does not handle multiple requests on its own.
1074
+ */
1075
+ request_read_size = MIN(end_of_vec - buf, 16384);
1076
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1077
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1078
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1079
1080
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1081
+ if (r == SSH_AGAIN) {
1082
co_yield(s, bs);
1083
goto again;
1084
}
1085
- if (r < 0) {
1086
- sftp_error_trace(s, "read");
1087
- s->offset = -1;
1088
- return -EIO;
297
- }
1089
- }
298
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
1090
- if (r == 0) {
299
1091
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
300
- memset(buf + head_padding_bytes, 0, zero_bytes);
1092
/* EOF: Short read so pad the buffer with zeroes and return it. */
301
- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
1093
qemu_iovec_memset(qiov, got, 0, size - got);
302
- align, &local_qiov,
1094
return 0;
303
- flags & ~BDRV_REQ_ZERO_WRITE);
304
- if (ret < 0) {
305
- goto fail;
306
+ bdrv_padding_rmw_read(child, req, &pad, true);
307
+
308
+ if (pad.head || pad.merge_reads) {
309
+ int64_t aligned_offset = offset & ~(align - 1);
310
+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
311
+
312
+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
313
+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
314
+ align, &local_qiov,
315
+ flags & ~BDRV_REQ_ZERO_WRITE);
316
+ if (ret < 0 || pad.merge_reads) {
317
+ /* Error or all work is done */
318
+ goto out;
319
+ }
320
+ offset += write_bytes - pad.head;
321
+ bytes -= write_bytes - pad.head;
322
}
1095
}
323
- offset += zero_bytes;
1096
+ if (r <= 0) {
324
- bytes -= zero_bytes;
1097
+ sftp_error_trace(s, "read");
325
}
1098
+ return -EIO;
326
1099
+ }
327
assert(!bytes || (offset & (align - 1)) == 0);
1100
328
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1101
got += r;
329
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
1102
buf += r;
330
NULL, flags);
1103
- s->offset += r;
331
if (ret < 0) {
1104
if (buf >= end_of_vec && got < size) {
332
- goto fail;
1105
i++;
333
+ goto out;
1106
buf = i->iov_base;
1107
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1108
1109
trace_ssh_write(offset, size);
1110
1111
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1112
+ trace_ssh_seek(offset);
1113
+ sftp_seek64(s->sftp_handle, offset);
1114
1115
/* This keeps track of the current iovec element ('i'), where we
1116
* will read from next ('buf'), and the end of the current iovec
1117
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1118
end_of_vec = i->iov_base + i->iov_len;
1119
1120
for (written = 0; written < size; ) {
1121
+ size_t request_write_size;
1122
again:
1123
- trace_ssh_write_buf(buf, end_of_vec - buf);
1124
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1125
- trace_ssh_write_return(r);
1126
+ /*
1127
+ * Avoid too large data packets, as libssh currently does not
1128
+ * handle multiple requests on its own.
1129
+ */
1130
+ request_write_size = MIN(end_of_vec - buf, 131072);
1131
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1132
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1133
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1134
1135
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1136
+ if (r == SSH_AGAIN) {
1137
co_yield(s, bs);
1138
goto again;
334
}
1139
}
335
bytes -= aligned_bytes;
1140
if (r < 0) {
336
offset += aligned_bytes;
1141
sftp_error_trace(s, "write");
337
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
1142
- s->offset = -1;
338
1143
return -EIO;
339
assert(!bytes || (offset & (align - 1)) == 0);
1144
}
340
if (bytes) {
1145
- /* The libssh2 API is very unclear about this. A comment in
341
- assert(align == tail_padding_bytes + bytes);
1146
- * the code says "nothing was acked, and no EAGAIN was
342
- /* RMW the unaligned part after tail. */
1147
- * received!" which apparently means that no data got sent
343
- mark_request_serialising(req, align);
1148
- * out, and the underlying channel didn't return any EAGAIN
344
- wait_serialising_requests(req);
1149
- * indication. I think this is a bug in either libssh2 or
345
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
1150
- * OpenSSH (server-side). In any case, forcing a seek (to
346
- ret = bdrv_aligned_preadv(child, req, offset, align,
1151
- * discard libssh2 internal buffers), and then trying again
347
- align, &local_qiov, 0);
1152
- * works for me.
348
- if (ret < 0) {
1153
- */
349
- goto fail;
1154
- if (r == 0) {
1155
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1156
- co_yield(s, bs);
1157
- goto again;
350
- }
1158
- }
351
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
1159
352
+ assert(align == pad.tail + bytes);
1160
written += r;
353
1161
buf += r;
354
- memset(buf, 0, bytes);
1162
- s->offset += r;
355
+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
1163
if (buf >= end_of_vec && written < size) {
356
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
1164
i++;
357
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
1165
buf = i->iov_base;
358
}
1166
end_of_vec = i->iov_base + i->iov_len;
359
-fail:
1167
}
360
- qemu_vfree(buf);
1168
361
+
1169
- if (offset + written > s->attrs.filesize)
362
+out:
1170
- s->attrs.filesize = offset + written;
363
+ bdrv_padding_destroy(&pad);
1171
+ if (offset + written > s->attrs->size) {
364
+
1172
+ s->attrs->size = offset + written;
365
return ret;
1173
+ }
366
-
1174
}
1175
1176
return 0;
1177
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
1178
}
367
}
1179
}
368
1180
369
/*
1181
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
370
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1182
+#ifdef HAVE_LIBSSH_0_8
371
BlockDriverState *bs = child->bs;
1183
372
BdrvTrackedRequest req;
1184
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
373
uint64_t align = bs->bl.request_alignment;
1185
{
374
- uint8_t *head_buf = NULL;
1186
int r;
375
- uint8_t *tail_buf = NULL;
1187
376
- QEMUIOVector local_qiov;
1188
trace_ssh_flush();
377
- bool use_local_qiov = false;
1189
+
378
+ BdrvRequestPadding pad;
1190
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
379
int ret;
1191
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
380
1192
+ return 0;
381
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
1193
+ }
382
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
1194
again:
383
goto out;
1195
- r = libssh2_sftp_fsync(s->sftp_handle);
384
}
1196
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
385
1197
+ r = sftp_fsync(s->sftp_handle);
386
- if (offset & (align - 1)) {
1198
+ if (r == SSH_AGAIN) {
387
- QEMUIOVector head_qiov;
1199
co_yield(s, bs);
388
-
1200
goto again;
389
+ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
1201
}
390
mark_request_serialising(&req, align);
1202
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
391
wait_serialising_requests(&req);
1203
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
392
-
1204
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
393
- head_buf = qemu_blockalign(bs, align);
1205
- return 0;
394
- qemu_iovec_init_buf(&head_qiov, head_buf, align);
395
-
396
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
397
- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
398
- align, &head_qiov, 0);
399
- if (ret < 0) {
400
- goto fail;
401
- }
402
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
403
-
404
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
405
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
406
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
407
- use_local_qiov = true;
408
-
409
- bytes += offset & (align - 1);
410
- offset = offset & ~(align - 1);
411
-
412
- /* We have read the tail already if the request is smaller
413
- * than one aligned block.
414
- */
415
- if (bytes < align) {
416
- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
417
- bytes = align;
418
- }
419
- }
1206
- }
420
-
1207
if (r < 0) {
421
- if ((offset + bytes) & (align - 1)) {
1208
sftp_error_trace(s, "fsync");
422
- QEMUIOVector tail_qiov;
1209
return -EIO;
423
- size_t tail_bytes;
1210
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
424
- bool waited;
425
-
426
- mark_request_serialising(&req, align);
427
- waited = wait_serialising_requests(&req);
428
- assert(!waited || !use_local_qiov);
429
-
430
- tail_buf = qemu_blockalign(bs, align);
431
- qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
432
-
433
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
434
- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
435
- align, align, &tail_qiov, 0);
436
- if (ret < 0) {
437
- goto fail;
438
- }
439
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
440
-
441
- if (!use_local_qiov) {
442
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
443
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
444
- use_local_qiov = true;
445
- }
446
-
447
- tail_bytes = (offset + bytes) & (align - 1);
448
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
449
-
450
- bytes = ROUND_UP(bytes, align);
451
+ bdrv_padding_rmw_read(child, &req, &pad, false);
452
}
453
454
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
455
- use_local_qiov ? &local_qiov : qiov,
456
- flags);
457
+ qiov, flags);
458
459
-fail:
460
+ bdrv_padding_destroy(&pad);
461
462
- if (use_local_qiov) {
463
- qemu_iovec_destroy(&local_qiov);
464
- }
465
- qemu_vfree(head_buf);
466
- qemu_vfree(tail_buf);
467
out:
468
tracked_request_end(&req);
469
bdrv_dec_in_flight(bs);
470
+
471
return ret;
1211
return ret;
472
}
1212
}
1213
1214
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
1215
+#else /* !HAVE_LIBSSH_0_8 */
1216
1217
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1218
{
1219
BDRVSSHState *s = bs->opaque;
1220
1221
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
1222
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
1223
return 0;
1224
}
1225
1226
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
1227
+#endif /* !HAVE_LIBSSH_0_8 */
1228
1229
static int64_t ssh_getlength(BlockDriverState *bs)
1230
{
1231
BDRVSSHState *s = bs->opaque;
1232
int64_t length;
1233
1234
- /* Note we cannot make a libssh2 call here. */
1235
- length = (int64_t) s->attrs.filesize;
1236
+ /* Note we cannot make a libssh call here. */
1237
+ length = (int64_t) s->attrs->size;
1238
trace_ssh_getlength(length);
1239
1240
return length;
1241
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
1242
return -ENOTSUP;
1243
}
1244
1245
- if (offset < s->attrs.filesize) {
1246
+ if (offset < s->attrs->size) {
1247
error_setg(errp, "ssh driver does not support shrinking files");
1248
return -ENOTSUP;
1249
}
1250
1251
- if (offset == s->attrs.filesize) {
1252
+ if (offset == s->attrs->size) {
1253
return 0;
1254
}
1255
1256
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1257
{
1258
int r;
1259
1260
- r = libssh2_init(0);
1261
+ r = ssh_init();
1262
if (r != 0) {
1263
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1264
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1265
exit(EXIT_FAILURE);
1266
}
1267
1268
+#if TRACE_LIBSSH != 0
1269
+ ssh_set_log_level(TRACE_LIBSSH);
1270
+#endif
1271
+
1272
bdrv_register(&bdrv_ssh);
1273
}
1274
1275
diff --git a/.travis.yml b/.travis.yml
1276
index XXXXXXX..XXXXXXX 100644
1277
--- a/.travis.yml
1278
+++ b/.travis.yml
1279
@@ -XXX,XX +XXX,XX @@ addons:
1280
- libseccomp-dev
1281
- libspice-protocol-dev
1282
- libspice-server-dev
1283
- - libssh2-1-dev
1284
+ - libssh-dev
1285
- liburcu-dev
1286
- libusb-1.0-0-dev
1287
- libvte-2.91-dev
1288
@@ -XXX,XX +XXX,XX @@ matrix:
1289
- libseccomp-dev
1290
- libspice-protocol-dev
1291
- libspice-server-dev
1292
- - libssh2-1-dev
1293
+ - libssh-dev
1294
- liburcu-dev
1295
- libusb-1.0-0-dev
1296
- libvte-2.91-dev
1297
diff --git a/block/trace-events b/block/trace-events
1298
index XXXXXXX..XXXXXXX 100644
1299
--- a/block/trace-events
1300
+++ b/block/trace-events
1301
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
1302
# ssh.c
1303
ssh_restart_coroutine(void *co) "co=%p"
1304
ssh_flush(void) "fsync"
1305
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
1306
+ssh_check_host_key_knownhosts(void) "host key OK"
1307
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
1308
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
1309
ssh_co_yield_back(int sock) "s->sock=%d - back"
1310
ssh_getlength(int64_t length) "length=%" PRIi64
1311
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
1312
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1313
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
1314
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
1315
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
1316
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
1317
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1318
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
1319
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
1320
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
1321
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
1322
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
1323
+ssh_auth_methods(int methods) "auth methods=0x%x"
1324
+ssh_server_status(int status) "server status=%d"
1325
1326
# curl.c
1327
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
1328
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
1329
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
1330
1331
# ssh.c
1332
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
1333
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
1334
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
1335
index XXXXXXX..XXXXXXX 100644
1336
--- a/docs/qemu-block-drivers.texi
1337
+++ b/docs/qemu-block-drivers.texi
1338
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
1339
1340
warning: ssh server @code{ssh.example.com:22} does not support fsync
1341
1342
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1343
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1344
supported.
1345
1346
@node disk_images_nvme
1347
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1348
index XXXXXXX..XXXXXXX 100644
1349
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1350
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1351
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1352
mxe-$TARGET-w64-mingw32.shared-curl \
1353
mxe-$TARGET-w64-mingw32.shared-glib \
1354
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1355
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1356
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1357
mxe-$TARGET-w64-mingw32.shared-lzo \
1358
mxe-$TARGET-w64-mingw32.shared-nettle \
1359
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1360
index XXXXXXX..XXXXXXX 100644
1361
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1362
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1363
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1364
mxe-$TARGET-w64-mingw32.shared-curl \
1365
mxe-$TARGET-w64-mingw32.shared-glib \
1366
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1367
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1368
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1369
mxe-$TARGET-w64-mingw32.shared-lzo \
1370
mxe-$TARGET-w64-mingw32.shared-nettle \
1371
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1372
index XXXXXXX..XXXXXXX 100644
1373
--- a/tests/docker/dockerfiles/fedora.docker
1374
+++ b/tests/docker/dockerfiles/fedora.docker
1375
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1376
libpng-devel \
1377
librbd-devel \
1378
libseccomp-devel \
1379
- libssh2-devel \
1380
+ libssh-devel \
1381
libubsan \
1382
libusbx-devel \
1383
libxml2-devel \
1384
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1385
mingw32-gtk3 \
1386
mingw32-libjpeg-turbo \
1387
mingw32-libpng \
1388
- mingw32-libssh2 \
1389
mingw32-libtasn1 \
1390
mingw32-nettle \
1391
mingw32-pixman \
1392
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1393
mingw64-gtk3 \
1394
mingw64-libjpeg-turbo \
1395
mingw64-libpng \
1396
- mingw64-libssh2 \
1397
mingw64-libtasn1 \
1398
mingw64-nettle \
1399
mingw64-pixman \
1400
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1401
index XXXXXXX..XXXXXXX 100644
1402
--- a/tests/docker/dockerfiles/ubuntu.docker
1403
+++ b/tests/docker/dockerfiles/ubuntu.docker
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1405
libsnappy-dev \
1406
libspice-protocol-dev \
1407
libspice-server-dev \
1408
- libssh2-1-dev \
1409
+ libssh-dev \
1410
libusb-1.0-0-dev \
1411
libusbredirhost-dev \
1412
libvdeplug-dev \
1413
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1414
index XXXXXXX..XXXXXXX 100644
1415
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1416
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1417
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1418
libsnappy-dev \
1419
libspice-protocol-dev \
1420
libspice-server-dev \
1421
- libssh2-1-dev \
1422
+ libssh-dev \
1423
libusb-1.0-0-dev \
1424
libusbredirhost-dev \
1425
libvdeplug-dev \
1426
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1427
index XXXXXXX..XXXXXXX 100755
1428
--- a/tests/qemu-iotests/207
1429
+++ b/tests/qemu-iotests/207
1430
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1431
1432
iotests.img_info_log(remote_path)
1433
1434
- md5_key = subprocess.check_output(
1435
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1436
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1437
- shell=True).rstrip().decode('ascii')
1438
+ keys = subprocess.check_output(
1439
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1440
+ 'cut -d" " -f3',
1441
+ shell=True).rstrip().decode('ascii').split('\n')
1442
+
1443
+ # Mappings of base64 representations to digests
1444
+ md5_keys = {}
1445
+ sha1_keys = {}
1446
+
1447
+ for key in keys:
1448
+ md5_keys[key] = subprocess.check_output(
1449
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1450
+ shell=True).rstrip().decode('ascii')
1451
+
1452
+ sha1_keys[key] = subprocess.check_output(
1453
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1454
+ shell=True).rstrip().decode('ascii')
1455
1456
vm.launch()
1457
+
1458
+ # Find correct key first
1459
+ matching_key = None
1460
+ for key in keys:
1461
+ result = vm.qmp('blockdev-add',
1462
+ driver='ssh', node_name='node0', path=disk_path,
1463
+ server={
1464
+ 'host': '127.0.0.1',
1465
+ 'port': '22',
1466
+ }, host_key_check={
1467
+ 'mode': 'hash',
1468
+ 'type': 'md5',
1469
+ 'hash': md5_keys[key],
1470
+ })
1471
+
1472
+ if 'error' not in result:
1473
+ vm.qmp('blockdev-del', node_name='node0')
1474
+ matching_key = key
1475
+ break
1476
+
1477
+ if matching_key is None:
1478
+ vm.shutdown()
1479
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1480
+
1481
blockdev_create(vm, { 'driver': 'ssh',
1482
'location': {
1483
'path': disk_path,
1484
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1485
'host-key-check': {
1486
'mode': 'hash',
1487
'type': 'md5',
1488
- 'hash': md5_key,
1489
+ 'hash': md5_keys[matching_key],
1490
}
1491
},
1492
'size': 8388608 })
1493
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1494
1495
iotests.img_info_log(remote_path)
1496
1497
- sha1_key = subprocess.check_output(
1498
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1499
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1500
- shell=True).rstrip().decode('ascii')
1501
-
1502
vm.launch()
1503
blockdev_create(vm, { 'driver': 'ssh',
1504
'location': {
1505
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1506
'host-key-check': {
1507
'mode': 'hash',
1508
'type': 'sha1',
1509
- 'hash': sha1_key,
1510
+ 'hash': sha1_keys[matching_key],
1511
}
1512
},
1513
'size': 4194304 })
1514
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
1515
index XXXXXXX..XXXXXXX 100644
1516
--- a/tests/qemu-iotests/207.out
1517
+++ b/tests/qemu-iotests/207.out
1518
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
1519
1520
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
1521
{"return": {}}
1522
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
1523
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
1524
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
1525
{"return": {}}
473
1526
474
--
1527
--
475
2.21.0
1528
2.21.0
476
1529
477
1530
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Add handlers supporting qiov_offset parameter:
4
bdrv_co_preadv_part
5
bdrv_co_pwritev_part
6
bdrv_co_pwritev_compressed_part
7
This is used to reduce need of defining local_qiovs and hd_qiovs in all
8
corners of block layer code. The following patches will increase usage
9
of this new API part by part.
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Message-id: 20190604161514.262241-5-vsementsov@virtuozzo.com
14
Message-Id: <20190604161514.262241-5-vsementsov@virtuozzo.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
include/block/block_int.h | 15 ++++++
18
block/backup.c | 2 +-
19
block/io.c | 96 +++++++++++++++++++++++++++++++--------
20
qemu-img.c | 4 +-
21
4 files changed, 95 insertions(+), 22 deletions(-)
22
23
diff --git a/include/block/block_int.h b/include/block/block_int.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/block/block_int.h
26
+++ b/include/block/block_int.h
27
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
28
*/
29
int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
30
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
31
+ int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
32
+ uint64_t offset, uint64_t bytes,
33
+ QEMUIOVector *qiov, size_t qiov_offset, int flags);
34
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
35
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
36
/**
37
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
38
*/
39
int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
40
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
41
+ int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
42
+ uint64_t offset, uint64_t bytes,
43
+ QEMUIOVector *qiov, size_t qiov_offset, int flags);
44
45
/*
46
* Efficiently zero a region of the disk image. Typically an image format
47
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
48
49
int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
50
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
51
+ int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
52
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
53
+ size_t qiov_offset);
54
55
int (*bdrv_snapshot_create)(BlockDriverState *bs,
56
QEMUSnapshotInfo *sn_info);
57
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
58
const char *const *strong_runtime_opts;
59
};
60
61
+static inline bool block_driver_can_compress(BlockDriver *drv)
62
+{
63
+ return drv->bdrv_co_pwritev_compressed ||
64
+ drv->bdrv_co_pwritev_compressed_part;
65
+}
66
+
67
typedef struct BlockLimits {
68
/* Alignment requirement, in bytes, for offset/length of I/O
69
* requests. Must be a power of 2 less than INT_MAX; defaults to
70
diff --git a/block/backup.c b/block/backup.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/block/backup.c
73
+++ b/block/backup.c
74
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
75
return NULL;
76
}
77
78
- if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
79
+ if (compress && !block_driver_can_compress(target->drv)) {
80
error_setg(errp, "Compression is not supported for this drive %s",
81
bdrv_get_device_name(target));
82
return NULL;
83
diff --git a/block/io.c b/block/io.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/block/io.c
86
+++ b/block/io.c
87
@@ -XXX,XX +XXX,XX @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
88
89
/* Default alignment based on whether driver has byte interface */
90
bs->bl.request_alignment = (drv->bdrv_co_preadv ||
91
- drv->bdrv_aio_preadv) ? 1 : 512;
92
+ drv->bdrv_aio_preadv ||
93
+ drv->bdrv_co_preadv_part) ? 1 : 512;
94
95
/* Take some limits from the children as a default */
96
if (bs->file) {
97
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
98
99
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
100
uint64_t offset, uint64_t bytes,
101
- QEMUIOVector *qiov, int flags)
102
+ QEMUIOVector *qiov,
103
+ size_t qiov_offset, int flags)
104
{
105
BlockDriver *drv = bs->drv;
106
int64_t sector_num;
107
unsigned int nb_sectors;
108
+ QEMUIOVector local_qiov;
109
+ int ret;
110
111
assert(!(flags & ~BDRV_REQ_MASK));
112
assert(!(flags & BDRV_REQ_NO_FALLBACK));
113
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
114
return -ENOMEDIUM;
115
}
116
117
+ if (drv->bdrv_co_preadv_part) {
118
+ return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
119
+ flags);
120
+ }
121
+
122
+ if (qiov_offset > 0 || bytes != qiov->size) {
123
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
124
+ qiov = &local_qiov;
125
+ }
126
+
127
if (drv->bdrv_co_preadv) {
128
- return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
129
+ ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
130
+ goto out;
131
}
132
133
if (drv->bdrv_aio_preadv) {
134
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
135
acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
136
bdrv_co_io_em_complete, &co);
137
if (acb == NULL) {
138
- return -EIO;
139
+ ret = -EIO;
140
+ goto out;
141
} else {
142
qemu_coroutine_yield();
143
- return co.ret;
144
+ ret = co.ret;
145
+ goto out;
146
}
147
}
148
149
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
150
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
151
assert(drv->bdrv_co_readv);
152
153
- return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
154
+ ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
155
+
156
+out:
157
+ if (qiov == &local_qiov) {
158
+ qemu_iovec_destroy(&local_qiov);
159
+ }
160
+
161
+ return ret;
162
}
163
164
static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
165
uint64_t offset, uint64_t bytes,
166
- QEMUIOVector *qiov, int flags)
167
+ QEMUIOVector *qiov,
168
+ size_t qiov_offset, int flags)
169
{
170
BlockDriver *drv = bs->drv;
171
int64_t sector_num;
172
unsigned int nb_sectors;
173
+ QEMUIOVector local_qiov;
174
int ret;
175
176
assert(!(flags & ~BDRV_REQ_MASK));
177
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
178
return -ENOMEDIUM;
179
}
180
181
+ if (drv->bdrv_co_pwritev_part) {
182
+ ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
183
+ flags & bs->supported_write_flags);
184
+ flags &= ~bs->supported_write_flags;
185
+ goto emulate_flags;
186
+ }
187
+
188
+ if (qiov_offset > 0 || bytes != qiov->size) {
189
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
190
+ qiov = &local_qiov;
191
+ }
192
+
193
if (drv->bdrv_co_pwritev) {
194
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
195
flags & bs->supported_write_flags);
196
@@ -XXX,XX +XXX,XX @@ emulate_flags:
197
ret = bdrv_co_flush(bs);
198
}
199
200
+ if (qiov == &local_qiov) {
201
+ qemu_iovec_destroy(&local_qiov);
202
+ }
203
+
204
return ret;
205
}
206
207
static int coroutine_fn
208
bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
209
- uint64_t bytes, QEMUIOVector *qiov)
210
+ uint64_t bytes, QEMUIOVector *qiov,
211
+ size_t qiov_offset)
212
{
213
BlockDriver *drv = bs->drv;
214
+ QEMUIOVector local_qiov;
215
+ int ret;
216
217
if (!drv) {
218
return -ENOMEDIUM;
219
}
220
221
- if (!drv->bdrv_co_pwritev_compressed) {
222
+ if (!block_driver_can_compress(drv)) {
223
return -ENOTSUP;
224
}
225
226
- return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
227
+ if (drv->bdrv_co_pwritev_compressed_part) {
228
+ return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
229
+ qiov, qiov_offset);
230
+ }
231
+
232
+ if (qiov_offset == 0) {
233
+ return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
234
+ }
235
+
236
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
237
+ ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
238
+ qemu_iovec_destroy(&local_qiov);
239
+
240
+ return ret;
241
}
242
243
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
244
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
245
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
246
247
ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
248
- &local_qiov, 0);
249
+ &local_qiov, 0, 0);
250
if (ret < 0) {
251
goto err;
252
}
253
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
254
* necessary to flush even in cache=writethrough mode.
255
*/
256
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
257
- &local_qiov,
258
+ &local_qiov, 0,
259
BDRV_REQ_WRITE_UNCHANGED);
260
}
261
262
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
263
qemu_iovec_init(&local_qiov, qiov->niov);
264
qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
265
ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
266
- &local_qiov, 0);
267
+ &local_qiov, 0, 0);
268
qemu_iovec_destroy(&local_qiov);
269
if (ret < 0) {
270
goto err;
271
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
272
273
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
274
if (bytes <= max_bytes && bytes <= max_transfer) {
275
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
276
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0, 0);
277
goto out;
278
}
279
280
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
281
qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
282
283
ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
284
- num, &local_qiov, 0);
285
+ num, &local_qiov, 0, 0);
286
max_bytes -= num;
287
qemu_iovec_destroy(&local_qiov);
288
} else {
289
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
290
}
291
qemu_iovec_init_buf(&qiov, buf, num);
292
293
- ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
294
+ ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
295
296
/* Keep bounce buffer around if it is big enough for all
297
* all future requests.
298
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
299
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
300
ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
301
} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
302
- ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
303
+ ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov, 0);
304
} else if (bytes <= max_transfer) {
305
bdrv_debug_event(bs, BLKDBG_PWRITEV);
306
- ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
307
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, 0, flags);
308
} else {
309
bdrv_debug_event(bs, BLKDBG_PWRITEV);
310
while (bytes_remaining) {
311
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
312
qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
313
314
ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
315
- num, &local_qiov, local_flags);
316
+ num, &local_qiov, 0, local_flags);
317
qemu_iovec_destroy(&local_qiov);
318
if (ret < 0) {
319
break;
320
diff --git a/qemu-img.c b/qemu-img.c
321
index XXXXXXX..XXXXXXX 100644
322
--- a/qemu-img.c
323
+++ b/qemu-img.c
324
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
325
const char *preallocation =
326
qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
327
328
- if (drv && !drv->bdrv_co_pwritev_compressed) {
329
+ if (drv && !block_driver_can_compress(drv)) {
330
error_report("Compression not supported for this file format");
331
ret = -1;
332
goto out;
333
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
334
}
335
out_bs = blk_bs(s.target);
336
337
- if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) {
338
+ if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
339
error_report("Compression not supported for this file format");
340
ret = -1;
341
goto out;
342
--
343
2.21.0
344
345
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Use and support new API in bdrv_co_do_copy_on_readv. Note that in case
4
of allocated-in-top we need to shrink read size to MIN(..) by hand, as
5
pre-patch this was actually done implicitly by qemu_iovec_concat (and
6
we used local_qiov.size).
7
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20190604161514.262241-6-vsementsov@virtuozzo.com
11
Message-Id: <20190604161514.262241-6-vsementsov@virtuozzo.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
block/io.c | 18 +++++++++---------
15
1 file changed, 9 insertions(+), 9 deletions(-)
16
17
diff --git a/block/io.c b/block/io.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/io.c
20
+++ b/block/io.c
21
@@ -XXX,XX +XXX,XX @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
22
23
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
24
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
25
- int flags)
26
+ size_t qiov_offset, int flags)
27
{
28
BlockDriverState *bs = child->bs;
29
30
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
31
void *bounce_buffer;
32
33
BlockDriver *drv = bs->drv;
34
- QEMUIOVector local_qiov;
35
int64_t cluster_offset;
36
int64_t cluster_bytes;
37
size_t skip_bytes;
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
39
assert(skip_bytes < pnum);
40
41
if (ret <= 0) {
42
+ QEMUIOVector local_qiov;
43
+
44
/* Must copy-on-read; use the bounce buffer */
45
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
46
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
47
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
48
}
49
50
if (!(flags & BDRV_REQ_PREFETCH)) {
51
- qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
52
+ qemu_iovec_from_buf(qiov, qiov_offset + progress,
53
+ bounce_buffer + skip_bytes,
54
pnum - skip_bytes);
55
}
56
} else if (!(flags & BDRV_REQ_PREFETCH)) {
57
/* Read directly into the destination */
58
- qemu_iovec_init(&local_qiov, qiov->niov);
59
- qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
60
- ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
61
- &local_qiov, 0, 0);
62
- qemu_iovec_destroy(&local_qiov);
63
+ ret = bdrv_driver_preadv(bs, offset + progress,
64
+ MIN(pnum - skip_bytes, bytes - progress),
65
+ qiov, qiov_offset + progress, 0);
66
if (ret < 0) {
67
goto err;
68
}
69
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
70
}
71
72
if (!ret || pnum != bytes) {
73
- ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags);
74
+ ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, 0, flags);
75
goto out;
76
} else if (flags & BDRV_REQ_PREFETCH) {
77
goto out;
78
--
79
2.21.0
80
81
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Allocate bounce_buffer only if it is really needed. Also, sub-optimize
4
allocation size (why not?).
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20190604161514.262241-7-vsementsov@virtuozzo.com
9
Message-Id: <20190604161514.262241-7-vsementsov@virtuozzo.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
block/io.c | 21 ++++++++++++---------
13
1 file changed, 12 insertions(+), 9 deletions(-)
14
15
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/io.c
18
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
20
* modifying the image file. This is critical for zero-copy guest I/O
21
* where anything might happen inside guest memory.
22
*/
23
- void *bounce_buffer;
24
+ void *bounce_buffer = NULL;
25
26
BlockDriver *drv = bs->drv;
27
int64_t cluster_offset;
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
29
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
30
cluster_offset, cluster_bytes);
31
32
- bounce_buffer = qemu_try_blockalign(bs,
33
- MIN(MIN(max_transfer, cluster_bytes),
34
- MAX_BOUNCE_BUFFER));
35
- if (bounce_buffer == NULL) {
36
- ret = -ENOMEM;
37
- goto err;
38
- }
39
-
40
while (cluster_bytes) {
41
int64_t pnum;
42
43
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
44
45
/* Must copy-on-read; use the bounce buffer */
46
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
47
+ if (!bounce_buffer) {
48
+ int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
49
+ int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
50
+ int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
51
+
52
+ bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
53
+ if (!bounce_buffer) {
54
+ ret = -ENOMEM;
55
+ goto err;
56
+ }
57
+ }
58
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
59
60
ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
61
--
62
2.21.0
63
64
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
Tests should place their files into the test directory. This includes
2
Unix sockets. 205 currently fails to do so, which prevents it from
3
being run concurrently.
2
4
3
Use and support new API in bdrv_co_do_copy_on_readv.
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
10
tests/qemu-iotests/205 | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
4
12
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
6
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
14
index XXXXXXX..XXXXXXX 100755
7
Message-id: 20190604161514.262241-8-vsementsov@virtuozzo.com
15
--- a/tests/qemu-iotests/205
8
Message-Id: <20190604161514.262241-8-vsementsov@virtuozzo.com>
16
+++ b/tests/qemu-iotests/205
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
@@ -XXX,XX +XXX,XX @@ import iotests
10
---
18
import time
11
block/io.c | 21 ++++++++-------------
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
12
1 file changed, 8 insertions(+), 13 deletions(-)
20
13
21
-nbd_sock = 'nbd_sock'
14
diff --git a/block/io.c b/block/io.c
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
15
index XXXXXXX..XXXXXXX 100644
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
16
--- a/block/io.c
24
disk = os.path.join(iotests.test_dir, 'disk')
17
+++ b/block/io.c
18
@@ -XXX,XX +XXX,XX @@ err:
19
*/
20
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
21
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
22
- int64_t align, QEMUIOVector *qiov, int flags)
23
+ int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
24
{
25
BlockDriverState *bs = child->bs;
26
int64_t total_bytes, max_bytes;
27
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
28
assert(is_power_of_2(align));
29
assert((offset & (align - 1)) == 0);
30
assert((bytes & (align - 1)) == 0);
31
- assert(!qiov || bytes == qiov->size);
32
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
33
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
34
align);
35
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
36
}
37
38
if (!ret || pnum != bytes) {
39
- ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, 0, flags);
40
+ ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
41
+ qiov, qiov_offset, flags);
42
goto out;
43
} else if (flags & BDRV_REQ_PREFETCH) {
44
goto out;
45
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
46
47
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
48
if (bytes <= max_bytes && bytes <= max_transfer) {
49
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0, 0);
50
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
51
goto out;
52
}
53
54
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
55
int num;
56
57
if (max_bytes) {
58
- QEMUIOVector local_qiov;
59
-
60
num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
61
assert(num);
62
- qemu_iovec_init(&local_qiov, qiov->niov);
63
- qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
64
65
ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
66
- num, &local_qiov, 0, 0);
67
+ num, qiov, bytes - bytes_remaining, 0);
68
max_bytes -= num;
69
- qemu_iovec_destroy(&local_qiov);
70
} else {
71
num = bytes_remaining;
72
ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
73
@@ -XXX,XX +XXX,XX @@ static int bdrv_padding_rmw_read(BdrvChild *child,
74
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
75
}
76
ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
77
- align, &local_qiov, 0);
78
+ align, &local_qiov, 0, 0);
79
if (ret < 0) {
80
return ret;
81
}
82
@@ -XXX,XX +XXX,XX @@ static int bdrv_padding_rmw_read(BdrvChild *child,
83
ret = bdrv_aligned_preadv(
84
child, req,
85
req->overlap_offset + req->overlap_bytes - align,
86
- align, align, &local_qiov, 0);
87
+ align, align, &local_qiov, 0, 0);
88
if (ret < 0) {
89
return ret;
90
}
91
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
92
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
93
ret = bdrv_aligned_preadv(child, &req, offset, bytes,
94
bs->bl.request_alignment,
95
- qiov, flags);
96
+ qiov, 0, flags);
97
tracked_request_end(&req);
98
bdrv_dec_in_flight(bs);
99
25
100
--
26
--
101
2.21.0
27
2.21.0
102
28
103
29
diff view generated by jsdifflib