1
The following changes since commit bfec359afba088aaacc7d316f43302f28c6e642a:
1
The following changes since commit 848a6caa88b9f082c89c9b41afa975761262981d:
2
2
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qdev-2017-04-21' into staging (2017-04-21 11:42:03 +0100)
3
Merge tag 'migration-20230602-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-06-02 17:33:29 -0700)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/hreitz/qemu.git tags/pull-block-2023-06-05
8
8
9
for you to fetch changes up to 1507631e438930bc07f776f303af127a9cdb4d41:
9
for you to fetch changes up to 42a2890a76f4783cd1c212f27856edcf2b5e8a75:
10
10
11
qemu-iotests: _cleanup_qemu must be called on exit (2017-04-21 08:32:44 -0400)
11
qcow2: add discard-no-unref option (2023-06-05 13:15:42 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches
14
15
15
Block patches for 2.10
16
- Fix padding of unaligned vectored requests to match the host alignment
17
for vectors with 1023 or 1024 buffers
18
- Refactor and fix bugs in parallels's image check functionality
19
- Add an option to the qcow2 driver to retain (qcow2-level) allocations
20
on discard requests from the guest (while still forwarding the discard
21
to the lower level and marking the range as zero)
16
22
17
----------------------------------------------------------------
23
----------------------------------------------------------------
24
Alexander Ivanov (12):
25
parallels: Out of image offset in BAT leads to image inflation
26
parallels: Fix high_off calculation in parallels_co_check()
27
parallels: Fix image_end_offset and data_end after out-of-image check
28
parallels: create parallels_set_bat_entry_helper() to assign BAT value
29
parallels: Use generic infrastructure for BAT writing in
30
parallels_co_check()
31
parallels: Move check of unclean image to a separate function
32
parallels: Move check of cluster outside image to a separate function
33
parallels: Fix statistics calculation
34
parallels: Move check of leaks to a separate function
35
parallels: Move statistic collection to a separate function
36
parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD
37
parallels: Incorrect condition in out-of-image check
18
38
19
Ashish Mittal (2):
39
Hanna Czenczek (4):
20
block/vxhs.c: Add support for a new block device type called "vxhs"
40
util/iov: Make qiov_slice() public
21
block/vxhs.c: Add qemu-iotests for new block device type "vxhs"
41
block: Collapse padded I/O vecs exceeding IOV_MAX
42
util/iov: Remove qemu_iovec_init_extended()
43
iotests/iov-padding: New test
22
44
23
Jeff Cody (10):
45
Jean-Louis Dupond (1):
24
qemu-iotests: exclude vxhs from image creation via protocol
46
qcow2: add discard-no-unref option
25
block: add bdrv_set_read_only() helper function
26
block: do not set BDS read_only if copy_on_read enabled
27
block: honor BDRV_O_ALLOW_RDWR when clearing bs->read_only
28
block: code movement
29
block: introduce bdrv_can_set_read_only()
30
block: use bdrv_can_set_read_only() during reopen
31
block/rbd - update variable names to more apt names
32
block/rbd: Add support for reopen()
33
qemu-iotests: _cleanup_qemu must be called on exit
34
47
35
block.c | 56 +++-
48
qapi/block-core.json | 12 ++
36
block/Makefile.objs | 2 +
49
block/qcow2.h | 3 +
37
block/bochs.c | 5 +-
50
include/qemu/iov.h | 8 +-
38
block/cloop.c | 5 +-
51
block/io.c | 166 ++++++++++++++++++--
39
block/dmg.c | 6 +-
52
block/parallels.c | 190 ++++++++++++++++-------
40
block/rbd.c | 65 +++--
53
block/qcow2-cluster.c | 32 +++-
41
block/trace-events | 17 ++
54
block/qcow2.c | 18 +++
42
block/vvfat.c | 19 +-
55
util/iov.c | 89 ++---------
43
block/vxhs.c | 575 +++++++++++++++++++++++++++++++++++++++
56
qemu-options.hx | 12 ++
44
configure | 39 +++
57
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++
45
include/block/block.h | 2 +
58
tests/qemu-iotests/tests/iov-padding.out | 59 +++++++
46
qapi/block-core.json | 23 +-
59
11 files changed, 523 insertions(+), 151 deletions(-)
47
tests/qemu-iotests/017 | 1 +
60
create mode 100755 tests/qemu-iotests/tests/iov-padding
48
tests/qemu-iotests/020 | 1 +
61
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
49
tests/qemu-iotests/028 | 1 +
50
tests/qemu-iotests/029 | 1 +
51
tests/qemu-iotests/073 | 1 +
52
tests/qemu-iotests/094 | 11 +-
53
tests/qemu-iotests/102 | 5 +-
54
tests/qemu-iotests/109 | 1 +
55
tests/qemu-iotests/114 | 1 +
56
tests/qemu-iotests/117 | 1 +
57
tests/qemu-iotests/130 | 2 +
58
tests/qemu-iotests/134 | 1 +
59
tests/qemu-iotests/140 | 1 +
60
tests/qemu-iotests/141 | 1 +
61
tests/qemu-iotests/143 | 1 +
62
tests/qemu-iotests/156 | 2 +
63
tests/qemu-iotests/158 | 1 +
64
tests/qemu-iotests/common | 6 +
65
tests/qemu-iotests/common.config | 13 +
66
tests/qemu-iotests/common.filter | 1 +
67
tests/qemu-iotests/common.rc | 19 ++
68
33 files changed, 844 insertions(+), 42 deletions(-)
69
create mode 100644 block/vxhs.c
70
62
71
--
63
--
72
2.9.3
64
2.40.1
73
74
diff view generated by jsdifflib
New patch
1
We want to inline qemu_iovec_init_extended() in block/io.c for padding
2
requests, and having access to qiov_slice() is useful for this. As a
3
public function, it is renamed to qemu_iovec_slice().
1
4
5
(We will need to count the number of I/O vector elements of a slice
6
there, and then later process this slice. Without qiov_slice(), we
7
would need to call qemu_iovec_subvec_niov(), and all further
8
IOV-processing functions may need to skip prefixing elements to
9
accomodate for a qiov_offset. Because qemu_iovec_subvec_niov()
10
internally calls qiov_slice(), we can just have the block/io.c code call
11
qiov_slice() itself, thus get the number of elements, and also create an
12
iovec array with the superfluous prefixing elements stripped, so the
13
following processing functions no longer need to skip them.)
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-2-hreitz@redhat.com>
19
---
20
include/qemu/iov.h | 3 +++
21
util/iov.c | 14 +++++++-------
22
2 files changed, 10 insertions(+), 7 deletions(-)
23
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/qemu/iov.h
27
+++ b/include/qemu/iov.h
28
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
29
void *tail_buf, size_t tail_len);
30
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
31
size_t offset, size_t len);
32
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
33
+ size_t offset, size_t len,
34
+ size_t *head, size_t *tail, int *niov);
35
int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
36
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
37
void qemu_iovec_concat(QEMUIOVector *dst,
38
diff --git a/util/iov.c b/util/iov.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/iov.c
41
+++ b/util/iov.c
42
@@ -XXX,XX +XXX,XX @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
43
}
44
45
/*
46
- * qiov_slice
47
+ * qemu_iovec_slice
48
*
49
* Find subarray of iovec's, containing requested range. @head would
50
* be offset in first iov (returned by the function), @tail would be
51
* count of extra bytes in last iovec (returned iov + @niov - 1).
52
*/
53
-static struct iovec *qiov_slice(QEMUIOVector *qiov,
54
- size_t offset, size_t len,
55
- size_t *head, size_t *tail, int *niov)
56
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
57
+ size_t offset, size_t len,
58
+ size_t *head, size_t *tail, int *niov)
59
{
60
struct iovec *iov, *end_iov;
61
62
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
63
size_t head, tail;
64
int niov;
65
66
- qiov_slice(qiov, offset, len, &head, &tail, &niov);
67
+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
68
69
return niov;
70
}
71
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_init_extended(
72
}
73
74
if (mid_len) {
75
- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
76
- &mid_head, &mid_tail, &mid_niov);
77
+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
78
+ &mid_head, &mid_tail, &mid_niov);
79
}
80
81
total_niov = !!head_len + mid_niov + !!tail_len;
82
--
83
2.40.1
diff view generated by jsdifflib
1
From: Ashish Mittal <ashmit602@gmail.com>
1
When processing vectored guest requests that are not aligned to the
2
2
storage request alignment, we pad them by adding head and/or tail
3
These changes use a vxhs test server that is a part of the following
3
buffers for a read-modify-write cycle.
4
repository:
4
5
https://github.com/VeritasHyperScale/libqnio.git
5
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
6
6
with this padding, the vector can exceed that limit. As of
7
Signed-off-by: Ashish Mittal <Ashish.Mittal@veritas.com>
7
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
9
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
limit, instead returning an error to the guest.
10
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
11
Message-id: 1491277689-24949-3-git-send-email-Ashish.Mittal@veritas.com
11
To the guest, this appears as a random I/O error. We should not return
12
an I/O error to the guest when it issued a perfectly valid request.
13
14
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
15
longer than IOV_MAX, which generally seems to work (because the guest
16
assumes a smaller alignment than we really have, file-posix's
17
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
18
so emulate the request, so that the IOV_MAX does not matter). However,
19
that does not seem exactly great.
20
21
I see two ways to fix this problem:
22
1. We split such long requests into two requests.
23
2. We join some elements of the vector into new buffers to make it
24
shorter.
25
26
I am wary of (1), because it seems like it may have unintended side
27
effects.
28
29
(2) on the other hand seems relatively simple to implement, with
30
hopefully few side effects, so this patch does that.
31
32
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
33
is effectively replaced by the new function bdrv_create_padded_qiov(),
34
which not only wraps the request IOV with padding head/tail, but also
35
ensures that the resulting vector will not have more than IOV_MAX
36
elements. Putting that functionality into qemu_iovec_init_extended() is
37
infeasible because it requires allocating a bounce buffer; doing so
38
would require many more parameters (buffer alignment, how to initialize
39
the buffer, and out parameters like the buffer, its length, and the
40
original elements), which is not reasonable.
41
42
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
43
functionality into bdrv_create_padded_qiov() by using public
44
qemu_iovec_* functions, so that is what this patch does.
45
46
Because bdrv_pad_request() was the only "serious" user of
47
qemu_iovec_init_extended(), the next patch will remove the latter
48
function, so the functionality is not implemented twice.
49
50
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
51
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
52
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
53
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
12
---
54
---
13
tests/qemu-iotests/common | 6 ++++++
55
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
14
tests/qemu-iotests/common.config | 13 +++++++++++++
56
1 file changed, 151 insertions(+), 15 deletions(-)
15
tests/qemu-iotests/common.filter | 1 +
57
16
tests/qemu-iotests/common.rc | 19 +++++++++++++++++++
58
diff --git a/block/io.c b/block/io.c
17
4 files changed, 39 insertions(+)
18
19
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
20
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
21
--- a/tests/qemu-iotests/common
60
--- a/block/io.c
22
+++ b/tests/qemu-iotests/common
61
+++ b/block/io.c
23
@@ -XXX,XX +XXX,XX @@ check options
62
@@ -XXX,XX +XXX,XX @@ out:
24
-ssh test ssh
63
* @merge_reads is true for small requests,
25
-nfs test nfs
64
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
26
-luks test luks
65
* head and tail exist but @buf_len == align and @tail_buf == @buf.
27
+ -vxhs test vxhs
66
+ *
28
-xdiff graphical mode diff
67
+ * @write is true for write requests, false for read requests.
29
-nocache use O_DIRECT on backing file
68
+ *
30
-misalign misalign memory allocations
69
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
31
@@ -XXX,XX +XXX,XX @@ testlist options
70
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
32
xpand=false
71
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
33
;;
72
+ * I/O vector elements so for read requests, the data can be copied back after
34
73
+ * the read is done.
35
+ -vxhs)
74
*/
36
+ IMGPROTO=vxhs
75
typedef struct BdrvRequestPadding {
37
+ xpand=false
76
uint8_t *buf;
38
+ ;;
77
@@ -XXX,XX +XXX,XX @@ typedef struct BdrvRequestPadding {
39
+
78
size_t head;
40
-ssh)
79
size_t tail;
41
IMGPROTO=ssh
80
bool merge_reads;
42
xpand=false
81
+ bool write;
43
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
82
QEMUIOVector local_qiov;
44
index XXXXXXX..XXXXXXX 100644
83
+
45
--- a/tests/qemu-iotests/common.config
84
+ uint8_t *collapse_bounce_buf;
46
+++ b/tests/qemu-iotests/common.config
85
+ size_t collapse_len;
47
@@ -XXX,XX +XXX,XX @@ if [ -z "$QEMU_NBD_PROG" ]; then
86
+ QEMUIOVector pre_collapse_qiov;
48
export QEMU_NBD_PROG="`set_prog_path qemu-nbd`"
87
} BdrvRequestPadding;
49
fi
88
50
89
static bool bdrv_init_padding(BlockDriverState *bs,
51
+if [ -z "$QEMU_VXHS_PROG" ]; then
90
int64_t offset, int64_t bytes,
52
+ export QEMU_VXHS_PROG="`set_prog_path qnio_server`"
91
+ bool write,
53
+fi
92
BdrvRequestPadding *pad)
54
+
55
_qemu_wrapper()
56
{
93
{
57
(
94
int64_t align = bs->bl.request_alignment;
58
@@ -XXX,XX +XXX,XX @@ _qemu_nbd_wrapper()
95
@@ -XXX,XX +XXX,XX @@ static bool bdrv_init_padding(BlockDriverState *bs,
59
)
96
pad->tail_buf = pad->buf + pad->buf_len - align;
97
}
98
99
+ pad->write = write;
100
+
101
return true;
60
}
102
}
61
103
62
+_qemu_vxhs_wrapper()
104
@@ -XXX,XX +XXX,XX @@ zero_mem:
105
return 0;
106
}
107
108
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
109
+/**
110
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
111
+ */
112
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
113
{
114
+ if (pad->collapse_bounce_buf) {
115
+ if (!pad->write) {
116
+ /*
117
+ * If padding required elements in the vector to be collapsed into a
118
+ * bounce buffer, copy the bounce buffer content back
119
+ */
120
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
121
+ pad->collapse_bounce_buf, pad->collapse_len);
122
+ }
123
+ qemu_vfree(pad->collapse_bounce_buf);
124
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
125
+ }
126
if (pad->buf) {
127
qemu_vfree(pad->buf);
128
qemu_iovec_destroy(&pad->local_qiov);
129
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
130
memset(pad, 0, sizeof(*pad));
131
}
132
133
+/*
134
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
135
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
136
+ *
137
+ * To ensure this, when necessary, the first two or three elements of @iov are
138
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
139
+ * bounce buffer in pad->local_qiov.
140
+ *
141
+ * After performing a read request, the data from the bounce buffer must be
142
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
143
+ */
144
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
145
+ BdrvRequestPadding *pad,
146
+ struct iovec *iov, int niov,
147
+ size_t iov_offset, size_t bytes)
63
+{
148
+{
64
+ (
149
+ int padded_niov, surplus_count, collapse_count;
65
+ echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid"
150
+
66
+ exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@"
151
+ /* Assert this invariant */
67
+ )
152
+ assert(niov <= IOV_MAX);
153
+
154
+ /*
155
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
156
+ * to the guest is not ideal, but there is little else we can do. At least
157
+ * this will practically never happen on 64-bit systems.
158
+ */
159
+ if (SIZE_MAX - pad->head < bytes ||
160
+ SIZE_MAX - pad->head - bytes < pad->tail)
161
+ {
162
+ return -EINVAL;
163
+ }
164
+
165
+ /* Length of the resulting IOV if we just concatenated everything */
166
+ padded_niov = !!pad->head + niov + !!pad->tail;
167
+
168
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
169
+
170
+ if (pad->head) {
171
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
172
+ }
173
+
174
+ /*
175
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
176
+ * Instead, merge the first two or three elements of @iov to reduce the
177
+ * number of vector elements as necessary.
178
+ */
179
+ if (padded_niov > IOV_MAX) {
180
+ /*
181
+ * Only head and tail can have lead to the number of entries exceeding
182
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
183
+ * to reduce the number of elements by `surplus_count`, so we merge that
184
+ * many elements plus one into one element.
185
+ */
186
+ surplus_count = padded_niov - IOV_MAX;
187
+ assert(surplus_count <= !!pad->head + !!pad->tail);
188
+ collapse_count = surplus_count + 1;
189
+
190
+ /*
191
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
192
+ * advance `iov` (and associated variables) by those elements.
193
+ */
194
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
195
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
196
+ collapse_count, iov_offset, SIZE_MAX);
197
+ iov += collapse_count;
198
+ iov_offset = 0;
199
+ niov -= collapse_count;
200
+ bytes -= pad->pre_collapse_qiov.size;
201
+
202
+ /*
203
+ * Construct the bounce buffer to match the length of the to-collapse
204
+ * vector elements, and for write requests, initialize it with the data
205
+ * from those elements. Then add it to `pad->local_qiov`.
206
+ */
207
+ pad->collapse_len = pad->pre_collapse_qiov.size;
208
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
209
+ if (pad->write) {
210
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
211
+ pad->collapse_bounce_buf, pad->collapse_len);
212
+ }
213
+ qemu_iovec_add(&pad->local_qiov,
214
+ pad->collapse_bounce_buf, pad->collapse_len);
215
+ }
216
+
217
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
218
+
219
+ if (pad->tail) {
220
+ qemu_iovec_add(&pad->local_qiov,
221
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
222
+ }
223
+
224
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
225
+ return 0;
68
+}
226
+}
69
+
227
+
70
export QEMU=_qemu_wrapper
228
/*
71
export QEMU_IMG=_qemu_img_wrapper
229
* bdrv_pad_request
72
export QEMU_IO=_qemu_io_wrapper
230
*
73
export QEMU_NBD=_qemu_nbd_wrapper
231
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
74
+export QEMU_VXHS=_qemu_vxhs_wrapper
232
* read of padding, bdrv_padding_rmw_read() should be called separately if
75
233
* needed.
76
QEMU_IMG_EXTRA_ARGS=
234
*
77
if [ "$IMGOPTSSYNTAX" = "true" ]; then
235
+ * @write is true for write requests, false for read requests.
78
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
236
+ *
79
index XXXXXXX..XXXXXXX 100644
237
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
80
--- a/tests/qemu-iotests/common.filter
238
* - on function start they represent original request
81
+++ b/tests/qemu-iotests/common.filter
239
* - on failure or when padding is not needed they are unchanged
82
@@ -XXX,XX +XXX,XX @@ _filter_img_info()
240
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
83
-e "s#$TEST_DIR#TEST_DIR#g" \
241
static int bdrv_pad_request(BlockDriverState *bs,
84
-e "s#$IMGFMT#IMGFMT#g" \
242
QEMUIOVector **qiov, size_t *qiov_offset,
85
-e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \
243
int64_t *offset, int64_t *bytes,
86
+ -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \
244
+ bool write,
87
-e "/encrypted: yes/d" \
245
BdrvRequestPadding *pad, bool *padded,
88
-e "/cluster_size: [0-9]\\+/d" \
246
BdrvRequestFlags *flags)
89
-e "/table_size: [0-9]\\+/d" \
247
{
90
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
248
int ret;
91
index XXXXXXX..XXXXXXX 100644
249
+ struct iovec *sliced_iov;
92
--- a/tests/qemu-iotests/common.rc
250
+ int sliced_niov;
93
+++ b/tests/qemu-iotests/common.rc
251
+ size_t sliced_head, sliced_tail;
94
@@ -XXX,XX +XXX,XX @@ else
252
95
elif [ "$IMGPROTO" = "nfs" ]; then
253
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
96
TEST_DIR="nfs://127.0.0.1/$TEST_DIR"
254
97
TEST_IMG=$TEST_DIR/t.$IMGFMT
255
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
98
+ elif [ "$IMGPROTO" = "vxhs" ]; then
256
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
99
+ TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
257
if (padded) {
100
+ TEST_IMG="vxhs://127.0.0.1:9999/t.$IMGFMT"
258
*padded = false;
101
else
259
}
102
TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
260
return 0;
103
fi
261
}
104
@@ -XXX,XX +XXX,XX @@ _make_test_img()
262
105
eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE >/dev/null &"
263
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
106
sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
264
- *qiov, *qiov_offset, *bytes,
107
fi
265
- pad->buf + pad->buf_len - pad->tail,
108
+
266
- pad->tail);
109
+ # Start QNIO server on image directory for vxhs protocol
267
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
110
+ if [ $IMGPROTO = "vxhs" ]; then
268
+ &sliced_head, &sliced_tail,
111
+ eval "$QEMU_VXHS -d $TEST_DIR > /dev/null &"
269
+ &sliced_niov);
112
+ sleep 1 # Wait for server to come up.
270
+
113
+ fi
271
+ /* Guaranteed by bdrv_check_qiov_request() */
272
+ assert(*bytes <= SIZE_MAX);
273
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
274
+ sliced_head, *bytes);
275
if (ret < 0) {
276
- bdrv_padding_destroy(pad);
277
+ bdrv_padding_finalize(pad);
278
return ret;
279
}
280
*bytes += pad->head + pad->tail;
281
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
282
flags |= BDRV_REQ_COPY_ON_READ;
283
}
284
285
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
286
- NULL, &flags);
287
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
288
+ &pad, NULL, &flags);
289
if (ret < 0) {
290
goto fail;
291
}
292
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
293
bs->bl.request_alignment,
294
qiov, qiov_offset, flags);
295
tracked_request_end(&req);
296
- bdrv_padding_destroy(&pad);
297
+ bdrv_padding_finalize(&pad);
298
299
fail:
300
bdrv_dec_in_flight(bs);
301
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
302
/* This flag doesn't make sense for padding or zero writes */
303
flags &= ~BDRV_REQ_REGISTERED_BUF;
304
305
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
306
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
307
if (padding) {
308
assert(!(flags & BDRV_REQ_NO_WAIT));
309
bdrv_make_request_serialising(req, align);
310
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
311
}
312
313
out:
314
- bdrv_padding_destroy(&pad);
315
+ bdrv_padding_finalize(&pad);
316
317
return ret;
114
}
318
}
115
319
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
116
_rm_test_img()
320
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
117
@@ -XXX,XX +XXX,XX @@ _cleanup_test_img()
321
* alignment only if there is no ZERO flag.
118
fi
322
*/
119
rm -f "$TEST_IMG_FILE"
323
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
120
;;
324
- &padded, &flags);
121
+ vxhs)
325
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
122
+ if [ -f "${TEST_DIR}/qemu-vxhs.pid" ]; then
326
+ &pad, &padded, &flags);
123
+ local QEMU_VXHS_PID
327
if (ret < 0) {
124
+ read QEMU_VXHS_PID < "${TEST_DIR}/qemu-vxhs.pid"
328
return ret;
125
+ kill ${QEMU_VXHS_PID} >/dev/null 2>&1
329
}
126
+ rm -f "${TEST_DIR}/qemu-vxhs.pid"
330
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
127
+ fi
331
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
128
+ rm -f "$TEST_IMG_FILE"
332
qiov, qiov_offset, flags);
129
+ ;;
333
130
+
334
- bdrv_padding_destroy(&pad);
131
file)
335
+ bdrv_padding_finalize(&pad);
132
_rm_test_img "$TEST_DIR/t.$IMGFMT"
336
133
_rm_test_img "$TEST_DIR/t.$IMGFMT.orig"
337
out:
338
tracked_request_end(&req);
134
--
339
--
135
2.9.3
340
2.40.1
136
137
diff view generated by jsdifflib
1
Move bdrv_is_read_only() up with its friends.
1
bdrv_pad_request() was the main user of qemu_iovec_init_extended().
2
HEAD^ has removed that use, so we can remove qemu_iovec_init_extended()
3
now.
2
4
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
The only remaining user is qemu_iovec_init_slice(), which can easily
4
Reviewed-by: John Snow <jsnow@redhat.com>
6
inline the small part it really needs.
5
Signed-off-by: Jeff Cody <jcody@redhat.com>
7
6
Message-id: 73b2399459760c32506f9407efb9dddb3a2789de.1491597120.git.jcody@redhat.com
8
Note that qemu_iovec_init_extended() offered a memcpy() optimization to
9
initialize the new I/O vector. qemu_iovec_concat_iov(), which is used
10
to replace its functionality, does not, but calls qemu_iovec_add() for
11
every single element. If we decide this optimization was important, we
12
will need to re-implement it in qemu_iovec_concat_iov(), which might
13
also benefit its pre-existing users.
14
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
17
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
18
Message-Id: <20230411173418.19549-4-hreitz@redhat.com>
7
---
19
---
8
block.c | 10 +++++-----
20
include/qemu/iov.h | 5 ---
9
1 file changed, 5 insertions(+), 5 deletions(-)
21
util/iov.c | 79 +++++++---------------------------------------
22
2 files changed, 11 insertions(+), 73 deletions(-)
10
23
11
diff --git a/block.c b/block.c
24
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
12
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
13
--- a/block.c
26
--- a/include/qemu/iov.h
14
+++ b/block.c
27
+++ b/include/qemu/iov.h
15
@@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size,
28
@@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
16
}
29
30
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
31
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
32
-int qemu_iovec_init_extended(
33
- QEMUIOVector *qiov,
34
- void *head_buf, size_t head_len,
35
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
36
- void *tail_buf, size_t tail_len);
37
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
38
size_t offset, size_t len);
39
struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
40
diff --git a/util/iov.c b/util/iov.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/util/iov.c
43
+++ b/util/iov.c
44
@@ -XXX,XX +XXX,XX @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
45
return niov;
17
}
46
}
18
47
19
+bool bdrv_is_read_only(BlockDriverState *bs)
48
-/*
20
+{
49
- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
21
+ return bs->read_only;
50
- * and @tail_buf buffer into new qiov.
22
+}
51
- */
23
+
52
-int qemu_iovec_init_extended(
24
int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
53
- QEMUIOVector *qiov,
25
{
54
- void *head_buf, size_t head_len,
26
/* Do not set read_only if copy_on_read is enabled */
55
- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
27
@@ -XXX,XX +XXX,XX @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
56
- void *tail_buf, size_t tail_len)
28
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
29
}
30
31
-bool bdrv_is_read_only(BlockDriverState *bs)
32
-{
57
-{
33
- return bs->read_only;
58
- size_t mid_head, mid_tail;
59
- int total_niov, mid_niov = 0;
60
- struct iovec *p, *mid_iov = NULL;
61
-
62
- assert(mid_qiov->niov <= IOV_MAX);
63
-
64
- if (SIZE_MAX - head_len < mid_len ||
65
- SIZE_MAX - head_len - mid_len < tail_len)
66
- {
67
- return -EINVAL;
68
- }
69
-
70
- if (mid_len) {
71
- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
72
- &mid_head, &mid_tail, &mid_niov);
73
- }
74
-
75
- total_niov = !!head_len + mid_niov + !!tail_len;
76
- if (total_niov > IOV_MAX) {
77
- return -EINVAL;
78
- }
79
-
80
- if (total_niov == 1) {
81
- qemu_iovec_init_buf(qiov, NULL, 0);
82
- p = &qiov->local_iov;
83
- } else {
84
- qiov->niov = qiov->nalloc = total_niov;
85
- qiov->size = head_len + mid_len + tail_len;
86
- p = qiov->iov = g_new(struct iovec, qiov->niov);
87
- }
88
-
89
- if (head_len) {
90
- p->iov_base = head_buf;
91
- p->iov_len = head_len;
92
- p++;
93
- }
94
-
95
- assert(!mid_niov == !mid_len);
96
- if (mid_niov) {
97
- memcpy(p, mid_iov, mid_niov * sizeof(*p));
98
- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
99
- p[0].iov_len -= mid_head;
100
- p[mid_niov - 1].iov_len -= mid_tail;
101
- p += mid_niov;
102
- }
103
-
104
- if (tail_len) {
105
- p->iov_base = tail_buf;
106
- p->iov_len = tail_len;
107
- }
108
-
109
- return 0;
34
-}
110
-}
35
-
111
-
36
bool bdrv_is_sg(BlockDriverState *bs)
112
/*
113
* Check if the contents of subrange of qiov data is all zeroes.
114
*/
115
@@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
116
void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
117
size_t offset, size_t len)
37
{
118
{
38
return bs->sg;
119
- int ret;
120
+ struct iovec *slice_iov;
121
+ int slice_niov;
122
+ size_t slice_head, slice_tail;
123
124
assert(source->size >= len);
125
assert(source->size - len >= offset);
126
127
- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
128
- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
129
- assert(ret == 0);
130
+ slice_iov = qemu_iovec_slice(source, offset, len,
131
+ &slice_head, &slice_tail, &slice_niov);
132
+ if (slice_niov == 1) {
133
+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
134
+ } else {
135
+ qemu_iovec_init(qiov, slice_niov);
136
+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
137
+ }
138
}
139
140
void qemu_iovec_destroy(QEMUIOVector *qiov)
39
--
141
--
40
2.9.3
142
2.40.1
41
42
diff view generated by jsdifflib
New patch
1
Test that even vectored IO requests with 1024 vector elements that are
2
not aligned to the device's request alignment will succeed.
1
3
4
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
6
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
7
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
8
---
9
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
10
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
11
2 files changed, 144 insertions(+)
12
create mode 100755 tests/qemu-iotests/tests/iov-padding
13
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
14
15
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
16
new file mode 100755
17
index XXXXXXX..XXXXXXX
18
--- /dev/null
19
+++ b/tests/qemu-iotests/tests/iov-padding
20
@@ -XXX,XX +XXX,XX @@
21
+#!/usr/bin/env bash
22
+# group: rw quick
23
+#
24
+# Check the interaction of request padding (to fit alignment restrictions) with
25
+# vectored I/O from the guest
26
+#
27
+# Copyright Red Hat
28
+#
29
+# This program is free software; you can redistribute it and/or modify
30
+# it under the terms of the GNU General Public License as published by
31
+# the Free Software Foundation; either version 2 of the License, or
32
+# (at your option) any later version.
33
+#
34
+# This program is distributed in the hope that it will be useful,
35
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37
+# GNU General Public License for more details.
38
+#
39
+# You should have received a copy of the GNU General Public License
40
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
41
+#
42
+
43
+seq=$(basename $0)
44
+echo "QA output created by $seq"
45
+
46
+status=1    # failure is the default!
47
+
48
+_cleanup()
49
+{
50
+ _cleanup_test_img
51
+}
52
+trap "_cleanup; exit \$status" 0 1 2 3 15
53
+
54
+# get standard environment, filters and checks
55
+cd ..
56
+. ./common.rc
57
+. ./common.filter
58
+
59
+_supported_fmt raw
60
+_supported_proto file
61
+
62
+_make_test_img 1M
63
+
64
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
65
+
66
+# Four combinations:
67
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
68
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
69
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
70
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
71
+for start_offset in 4096 512; do
72
+ for last_element_length in 512 4096; do
73
+ length=$((1023 * 512 + $last_element_length))
74
+
75
+ echo
76
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
77
+
78
+ # Fill with data for testing
79
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
80
+
81
+ # 1023 512-byte buffers, and then one with length $last_element_length
82
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
83
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
84
+ -c "writev $cmd_params" \
85
+ --image-opts \
86
+ "$IMGSPEC" \
87
+ | _filter_qemu_io
88
+
89
+ # Read all patterns -- read the part we just wrote with writev twice,
90
+ # once "normally", and once with a readv, so we see that that works, too
91
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
92
+ -c "read -P 1 0 $start_offset" \
93
+ -c "read -P 2 $start_offset $length" \
94
+ -c "readv $cmd_params" \
95
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
96
+ --image-opts \
97
+ "$IMGSPEC" \
98
+ | _filter_qemu_io
99
+ done
100
+done
101
+
102
+# success, all done
103
+echo "*** done"
104
+rm -f $seq.full
105
+status=0
106
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
107
new file mode 100644
108
index XXXXXXX..XXXXXXX
109
--- /dev/null
110
+++ b/tests/qemu-iotests/tests/iov-padding.out
111
@@ -XXX,XX +XXX,XX @@
112
+QA output created by iov-padding
113
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
114
+
115
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
116
+wrote 1048576/1048576 bytes at offset 0
117
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
118
+wrote 524288/524288 bytes at offset 4096
119
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
120
+read 4096/4096 bytes at offset 0
121
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+read 524288/524288 bytes at offset 4096
123
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
124
+read 524288/524288 bytes at offset 4096
125
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
126
+read 520192/520192 bytes at offset 528384
127
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
128
+
129
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
130
+wrote 1048576/1048576 bytes at offset 0
131
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
132
+wrote 527872/527872 bytes at offset 4096
133
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
134
+read 4096/4096 bytes at offset 0
135
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
136
+read 527872/527872 bytes at offset 4096
137
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
138
+read 527872/527872 bytes at offset 4096
139
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
140
+read 516608/516608 bytes at offset 531968
141
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
142
+
143
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
144
+wrote 1048576/1048576 bytes at offset 0
145
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
146
+wrote 524288/524288 bytes at offset 512
147
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
148
+read 512/512 bytes at offset 0
149
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
150
+read 524288/524288 bytes at offset 512
151
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
152
+read 524288/524288 bytes at offset 512
153
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
154
+read 523776/523776 bytes at offset 524800
155
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
156
+
157
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
158
+wrote 1048576/1048576 bytes at offset 0
159
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
160
+wrote 527872/527872 bytes at offset 512
161
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
162
+read 512/512 bytes at offset 0
163
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
164
+read 527872/527872 bytes at offset 512
165
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
166
+read 527872/527872 bytes at offset 512
167
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
168
+read 520192/520192 bytes at offset 528384
169
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
170
+*** done
171
--
172
2.40.1
diff view generated by jsdifflib
1
A few block drivers will set the BDS read_only flag from their
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
.bdrv_open() function. This means the bs->read_only flag could
3
be set after we enable copy_on_read, as the BDRV_O_COPY_ON_READ
4
flag check occurs prior to the call to bdrv->bdrv_open().
5
2
6
This adds an error return to bdrv_set_read_only(), and an error will be
3
data_end field in BDRVParallelsState is set to the biggest offset present
7
return if we try to set the BDS to read_only while copy_on_read is
4
in BAT. If this offset is outside of the image, any further write will
8
enabled.
5
create the cluster at this offset and/or the image will be truncated to
6
this offset on close. This is definitely not correct.
9
7
10
This patch also changes the behavior of vvfat. Before, vvfat could
8
Raise an error in parallels_open() if data_end points outside the image
11
override the drive 'readonly' flag with its own, internal 'rw' flag.
9
and it is not a check (let the check to repaire the image). Set data_end
10
to the end of the cluster with the last correct offset.
12
11
13
For instance, this -drive parameter would result in a writable image:
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
13
Message-Id: <20230424093147.197643-2-alexander.ivanov@virtuozzo.com>
14
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
15
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
16
---
17
block/parallels.c | 17 +++++++++++++++++
18
1 file changed, 17 insertions(+)
14
19
15
"-drive format=vvfat,dir=/tmp/vvfat,rw,if=virtio,readonly=on"
20
diff --git a/block/parallels.c b/block/parallels.c
16
17
This is not correct. Now, attempting to use the above -drive parameter
18
will result in an error (i.e., 'rw' is incompatible with 'readonly=on').
19
20
Signed-off-by: Jeff Cody <jcody@redhat.com>
21
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Reviewed-by: John Snow <jsnow@redhat.com>
23
Message-id: 0c5b4c1cc2c651471b131f21376dfd5ea24d2196.1491597120.git.jcody@redhat.com
24
---
25
block.c | 10 +++++++++-
26
block/bochs.c | 5 ++++-
27
block/cloop.c | 5 ++++-
28
block/dmg.c | 6 +++++-
29
block/rbd.c | 11 ++++++++++-
30
block/vvfat.c | 19 +++++++++++++++----
31
include/block/block.h | 2 +-
32
7 files changed, 48 insertions(+), 10 deletions(-)
33
34
diff --git a/block.c b/block.c
35
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
36
--- a/block.c
22
--- a/block/parallels.c
37
+++ b/block.c
23
+++ b/block/parallels.c
38
@@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size,
24
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
25
BDRVParallelsState *s = bs->opaque;
26
ParallelsHeader ph;
27
int ret, size, i;
28
+ int64_t file_nb_sectors;
29
QemuOpts *opts = NULL;
30
Error *local_err = NULL;
31
char *buf;
32
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
33
return ret;
39
}
34
}
40
}
35
41
36
+ file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
42
-void bdrv_set_read_only(BlockDriverState *bs, bool read_only)
37
+ if (file_nb_sectors < 0) {
43
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
44
{
45
+ /* Do not set read_only if copy_on_read is enabled */
46
+ if (bs->copy_on_read && read_only) {
47
+ error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
48
+ bdrv_get_device_or_node_name(bs));
49
+ return -EINVAL;
38
+ return -EINVAL;
50
+ }
39
+ }
51
+
40
+
52
bs->read_only = read_only;
41
ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
53
+ return 0;
54
}
55
56
void bdrv_get_full_backing_filename_from_filename(const char *backed,
57
diff --git a/block/bochs.c b/block/bochs.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/block/bochs.c
60
+++ b/block/bochs.c
61
@@ -XXX,XX +XXX,XX @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
62
return -EINVAL;
63
}
64
65
- bdrv_set_read_only(bs, true); /* no write support yet */
66
+ ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
67
+ if (ret < 0) {
68
+ return ret;
69
+ }
70
71
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
72
if (ret < 0) {
42
if (ret < 0) {
73
diff --git a/block/cloop.c b/block/cloop.c
43
goto fail;
74
index XXXXXXX..XXXXXXX 100644
44
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
75
--- a/block/cloop.c
45
76
+++ b/block/cloop.c
46
for (i = 0; i < s->bat_size; i++) {
77
@@ -XXX,XX +XXX,XX @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
47
int64_t off = bat2sect(s, i);
78
return -EINVAL;
48
+ if (off >= file_nb_sectors) {
79
}
49
+ if (flags & BDRV_O_CHECK) {
80
50
+ continue;
81
- bdrv_set_read_only(bs, true);
82
+ ret = bdrv_set_read_only(bs, true, errp);
83
+ if (ret < 0) {
84
+ return ret;
85
+ }
86
87
/* read header */
88
ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
89
diff --git a/block/dmg.c b/block/dmg.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/block/dmg.c
92
+++ b/block/dmg.c
93
@@ -XXX,XX +XXX,XX @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
94
return -EINVAL;
95
}
96
97
+ ret = bdrv_set_read_only(bs, true, errp);
98
+ if (ret < 0) {
99
+ return ret;
100
+ }
101
+
102
block_module_load_one("dmg-bz2");
103
- bdrv_set_read_only(bs, true);
104
105
s->n_chunks = 0;
106
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
107
diff --git a/block/rbd.c b/block/rbd.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/block/rbd.c
110
+++ b/block/rbd.c
111
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
112
goto failed_shutdown;
113
}
114
115
+ /* rbd_open is always r/w */
116
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
117
if (r < 0) {
118
error_setg_errno(errp, -r, "error reading header from %s", s->name);
119
goto failed_open;
120
}
121
122
- bdrv_set_read_only(bs, (s->snap != NULL));
123
+ /* If we are using an rbd snapshot, we must be r/o, otherwise
124
+ * leave as-is */
125
+ if (s->snap != NULL) {
126
+ r = bdrv_set_read_only(bs, true, &local_err);
127
+ if (r < 0) {
128
+ error_propagate(errp, local_err);
129
+ goto failed_open;
130
+ }
131
+ }
132
133
qemu_opts_del(opts);
134
return 0;
135
diff --git a/block/vvfat.c b/block/vvfat.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/block/vvfat.c
138
+++ b/block/vvfat.c
139
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
140
141
s->current_cluster=0xffffffff;
142
143
- /* read only is the default for safety */
144
- bdrv_set_read_only(bs, true);
145
s->qcow = NULL;
146
s->qcow_filename = NULL;
147
s->fat2 = NULL;
148
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
149
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
150
151
if (qemu_opt_get_bool(opts, "rw", false)) {
152
- ret = enable_write_target(bs, errp);
153
+ if (!bdrv_is_read_only(bs)) {
154
+ ret = enable_write_target(bs, errp);
155
+ if (ret < 0) {
156
+ goto fail;
157
+ }
51
+ }
158
+ } else {
52
+ error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
159
+ ret = -EPERM;
53
+ "is larger than file size (%" PRIi64 ")",
160
+ error_setg(errp,
54
+ off << BDRV_SECTOR_BITS, i,
161
+ "Unable to set VVFAT to 'rw' when drive is read-only");
55
+ file_nb_sectors << BDRV_SECTOR_BITS);
56
+ ret = -EINVAL;
162
+ goto fail;
57
+ goto fail;
163
+ }
58
+ }
164
+ } else {
59
if (off >= s->data_end) {
165
+ /* read only is the default for safety */
60
s->data_end = off + s->tracks;
166
+ ret = bdrv_set_read_only(bs, true, &local_err);
167
if (ret < 0) {
168
+ error_propagate(errp, local_err);
169
goto fail;
170
}
61
}
171
- bdrv_set_read_only(bs, false);
172
}
173
174
bs->total_sectors = cyls * heads * secs;
175
diff --git a/include/block/block.h b/include/block/block.h
176
index XXXXXXX..XXXXXXX 100644
177
--- a/include/block/block.h
178
+++ b/include/block/block.h
179
@@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
180
int64_t sector_num, int nb_sectors, int *pnum);
181
182
bool bdrv_is_read_only(BlockDriverState *bs);
183
-void bdrv_set_read_only(BlockDriverState *bs, bool read_only);
184
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
185
bool bdrv_is_sg(BlockDriverState *bs);
186
bool bdrv_is_inserted(BlockDriverState *bs);
187
int bdrv_media_changed(BlockDriverState *bs);
188
--
62
--
189
2.9.3
63
2.40.1
190
191
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Don't let high_off be more than the file size even if we don't fix the
4
image.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-3-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
13
block/parallels.c | 4 ++--
14
1 file changed, 2 insertions(+), 2 deletions(-)
15
16
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/parallels.c
19
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
21
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
22
res->corruptions++;
23
if (fix & BDRV_FIX_ERRORS) {
24
- prev_off = 0;
25
s->bat_bitmap[i] = 0;
26
res->corruptions_fixed++;
27
flush_bat = true;
28
- continue;
29
}
30
+ prev_off = 0;
31
+ continue;
32
}
33
34
res->bfi.allocated_clusters++;
35
--
36
2.40.1
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Set data_end to the end of the last cluster inside the image. In such a
4
way we can be sure that corrupted offsets in the BAT can't affect on the
5
image size. If there are no allocated clusters set image_end_offset by
6
data_end.
7
8
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Denis V. Lunev <den@openvz.org>
10
Message-Id: <20230424093147.197643-4-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 8 +++++++-
15
1 file changed, 7 insertions(+), 1 deletion(-)
16
17
diff --git a/block/parallels.c b/block/parallels.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
20
+++ b/block/parallels.c
21
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
22
}
23
}
24
25
- res->image_end_offset = high_off + s->cluster_size;
26
+ if (high_off == 0) {
27
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
28
+ } else {
29
+ res->image_end_offset = high_off + s->cluster_size;
30
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
31
+ }
32
+
33
if (size > res->image_end_offset) {
34
int64_t count;
35
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
36
--
37
2.40.1
diff view generated by jsdifflib
1
We have a helper wrapper for checking for the BDS read_only flag,
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
add a helper wrapper to set the read_only flag as well.
3
2
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
This helper will be reused in next patches during parallels_co_check
5
Signed-off-by: Jeff Cody <jcody@redhat.com>
4
rework to simplify its code.
6
Reviewed-by: John Snow <jsnow@redhat.com>
5
7
Message-id: 9b18972d05f5fa2ac16c014f0af98d680553048d.1491597120.git.jcody@redhat.com
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-Id: <20230424093147.197643-5-alexander.ivanov@virtuozzo.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
8
---
12
---
9
block.c | 5 +++++
13
block/parallels.c | 11 ++++++++---
10
block/bochs.c | 2 +-
14
1 file changed, 8 insertions(+), 3 deletions(-)
11
block/cloop.c | 2 +-
12
block/dmg.c | 2 +-
13
block/rbd.c | 2 +-
14
block/vvfat.c | 4 ++--
15
include/block/block.h | 1 +
16
7 files changed, 12 insertions(+), 6 deletions(-)
17
15
18
diff --git a/block.c b/block.c
16
diff --git a/block/parallels.c b/block/parallels.c
19
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
20
--- a/block.c
18
--- a/block/parallels.c
21
+++ b/block.c
19
+++ b/block/parallels.c
22
@@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size,
20
@@ -XXX,XX +XXX,XX @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
23
}
21
return start_off;
24
}
22
}
25
23
26
+void bdrv_set_read_only(BlockDriverState *bs, bool read_only)
24
+static void parallels_set_bat_entry(BDRVParallelsState *s,
25
+ uint32_t index, uint32_t offset)
27
+{
26
+{
28
+ bs->read_only = read_only;
27
+ s->bat_bitmap[index] = cpu_to_le32(offset);
28
+ bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
29
+}
29
+}
30
+
30
+
31
void bdrv_get_full_backing_filename_from_filename(const char *backed,
31
static int64_t coroutine_fn GRAPH_RDLOCK
32
const char *backing,
32
allocate_clusters(BlockDriverState *bs, int64_t sector_num,
33
char *dest, size_t sz,
33
int nb_sectors, int *pnum)
34
diff --git a/block/bochs.c b/block/bochs.c
34
@@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
35
index XXXXXXX..XXXXXXX 100644
36
--- a/block/bochs.c
37
+++ b/block/bochs.c
38
@@ -XXX,XX +XXX,XX @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
39
return -EINVAL;
40
}
35
}
41
36
42
- bs->read_only = true; /* no write support yet */
37
for (i = 0; i < to_allocate; i++) {
43
+ bdrv_set_read_only(bs, true); /* no write support yet */
38
- s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
44
39
+ parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
45
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
40
s->data_end += s->tracks;
46
if (ret < 0) {
41
- bitmap_set(s->bat_dirty_bmap,
47
diff --git a/block/cloop.c b/block/cloop.c
42
- bat_entry_off(idx + i) / s->bat_dirty_block, 1);
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/cloop.c
50
+++ b/block/cloop.c
51
@@ -XXX,XX +XXX,XX @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
52
return -EINVAL;
53
}
43
}
54
44
55
- bs->read_only = true;
45
return bat2sect(s, idx) + sector_num % s->tracks;
56
+ bdrv_set_read_only(bs, true);
57
58
/* read header */
59
ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
60
diff --git a/block/dmg.c b/block/dmg.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/block/dmg.c
63
+++ b/block/dmg.c
64
@@ -XXX,XX +XXX,XX @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
65
}
66
67
block_module_load_one("dmg-bz2");
68
- bs->read_only = true;
69
+ bdrv_set_read_only(bs, true);
70
71
s->n_chunks = 0;
72
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
73
diff --git a/block/rbd.c b/block/rbd.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/block/rbd.c
76
+++ b/block/rbd.c
77
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
78
goto failed_open;
79
}
80
81
- bs->read_only = (s->snap != NULL);
82
+ bdrv_set_read_only(bs, (s->snap != NULL));
83
84
qemu_opts_del(opts);
85
return 0;
86
diff --git a/block/vvfat.c b/block/vvfat.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/block/vvfat.c
89
+++ b/block/vvfat.c
90
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
91
s->current_cluster=0xffffffff;
92
93
/* read only is the default for safety */
94
- bs->read_only = true;
95
+ bdrv_set_read_only(bs, true);
96
s->qcow = NULL;
97
s->qcow_filename = NULL;
98
s->fat2 = NULL;
99
@@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
100
if (ret < 0) {
101
goto fail;
102
}
103
- bs->read_only = false;
104
+ bdrv_set_read_only(bs, false);
105
}
106
107
bs->total_sectors = cyls * heads * secs;
108
diff --git a/include/block/block.h b/include/block/block.h
109
index XXXXXXX..XXXXXXX 100644
110
--- a/include/block/block.h
111
+++ b/include/block/block.h
112
@@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
113
int64_t sector_num, int nb_sectors, int *pnum);
114
115
bool bdrv_is_read_only(BlockDriverState *bs);
116
+void bdrv_set_read_only(BlockDriverState *bs, bool read_only);
117
bool bdrv_is_sg(BlockDriverState *bs);
118
bool bdrv_is_inserted(BlockDriverState *bs);
119
int bdrv_media_changed(BlockDriverState *bs);
120
--
46
--
121
2.9.3
47
2.40.1
122
123
diff view generated by jsdifflib
1
Update 'clientname' to be 'user', which tracks better with both
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
the QAPI and rados variable naming.
3
2
4
Update 'name' to be 'image_name', as it indicates the rbd image.
3
BAT is written in the context of conventional operations over the image
5
Naming it 'image' would have been ideal, but we are using that for
4
inside bdrv_co_flush() when it calls parallels_co_flush_to_os() callback.
6
the rados_image_t value returned by rbd_open().
5
Thus we should not modify BAT array directly, but call
6
parallels_set_bat_entry() helper and bdrv_co_flush() further on. After
7
that there is no need to manually write BAT and track its modification.
7
8
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
This makes code more generic and allows to split parallels_set_bat_entry()
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
for independent pieces.
10
Reviewed-by: John Snow <jsnow@redhat.com>
11
11
Message-id: b7ec1fb2e1cf36f9b6911631447a5b0422590b7d.1491597120.git.jcody@redhat.com
12
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
13
Reviewed-by: Denis V. Lunev <den@openvz.org>
14
Message-Id: <20230424093147.197643-6-alexander.ivanov@virtuozzo.com>
15
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
16
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
12
---
17
---
13
block/rbd.c | 33 +++++++++++++++++----------------
18
block/parallels.c | 23 ++++++++++-------------
14
1 file changed, 17 insertions(+), 16 deletions(-)
19
1 file changed, 10 insertions(+), 13 deletions(-)
15
20
16
diff --git a/block/rbd.c b/block/rbd.c
21
diff --git a/block/parallels.c b/block/parallels.c
17
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
18
--- a/block/rbd.c
23
--- a/block/parallels.c
19
+++ b/block/rbd.c
24
+++ b/block/parallels.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRBDState {
25
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
21
rados_t cluster;
26
{
22
rados_ioctx_t io_ctx;
27
BDRVParallelsState *s = bs->opaque;
23
rbd_image_t image;
28
int64_t size, prev_off, high_off;
24
- char *name;
29
- int ret;
25
+ char *image_name;
30
+ int ret = 0;
26
char *snap;
31
uint32_t i;
27
} BDRVRBDState;
32
- bool flush_bat = false;
28
33
29
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
34
size = bdrv_getlength(bs->file->bs);
30
int64_t bytes = 0;
35
if (size < 0) {
31
int64_t objsize;
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
32
int obj_order = 0;
37
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
33
- const char *pool, *name, *conf, *clientname, *keypairs;
38
res->corruptions++;
34
+ const char *pool, *image_name, *conf, *user, *keypairs;
39
if (fix & BDRV_FIX_ERRORS) {
35
const char *secretid;
40
- s->bat_bitmap[i] = 0;
36
rados_t cluster;
41
+ parallels_set_bat_entry(s, i, 0);
37
rados_ioctx_t io_ctx;
42
res->corruptions_fixed++;
38
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
43
- flush_bat = true;
39
*/
44
}
40
pool = qdict_get_try_str(options, "pool");
45
prev_off = 0;
41
conf = qdict_get_try_str(options, "conf");
46
continue;
42
- clientname = qdict_get_try_str(options, "user");
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
43
- name = qdict_get_try_str(options, "image");
48
prev_off = off;
44
+ user = qdict_get_try_str(options, "user");
45
+ image_name = qdict_get_try_str(options, "image");
46
keypairs = qdict_get_try_str(options, "=keyvalue-pairs");
47
48
- ret = rados_create(&cluster, clientname);
49
+ ret = rados_create(&cluster, user);
50
if (ret < 0) {
51
error_setg_errno(errp, -ret, "error initializing");
52
goto exit;
53
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
54
goto shutdown;
55
}
49
}
56
50
57
- ret = rbd_create(io_ctx, name, bytes, &obj_order);
51
- ret = 0;
58
+ ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
52
- if (flush_bat) {
59
if (ret < 0) {
53
- ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
60
error_setg_errno(errp, -ret, "error rbd create");
54
- if (ret < 0) {
61
}
55
- res->check_errors++;
62
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
56
- goto out;
63
Error **errp)
57
- }
64
{
58
- }
65
BDRVRBDState *s = bs->opaque;
59
-
66
- const char *pool, *snap, *conf, *clientname, *name, *keypairs;
60
if (high_off == 0) {
67
+ const char *pool, *snap, *conf, *user, *image_name, *keypairs;
61
res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
68
const char *secretid;
62
} else {
69
QemuOpts *opts;
63
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
70
Error *local_err = NULL;
64
71
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
65
out:
72
pool = qemu_opt_get(opts, "pool");
66
qemu_co_mutex_unlock(&s->lock);
73
conf = qemu_opt_get(opts, "conf");
67
+
74
snap = qemu_opt_get(opts, "snapshot");
68
+ if (ret == 0) {
75
- clientname = qemu_opt_get(opts, "user");
69
+ ret = bdrv_co_flush(bs);
76
- name = qemu_opt_get(opts, "image");
70
+ if (ret < 0) {
77
+ user = qemu_opt_get(opts, "user");
71
+ res->check_errors++;
78
+ image_name = qemu_opt_get(opts, "image");
72
+ }
79
keypairs = qemu_opt_get(opts, "=keyvalue-pairs");
73
+ }
80
74
+
81
- if (!pool || !name) {
75
return ret;
82
+ if (!pool || !image_name) {
83
error_setg(errp, "Parameters 'pool' and 'image' are required");
84
r = -EINVAL;
85
goto failed_opts;
86
}
87
88
- r = rados_create(&s->cluster, clientname);
89
+ r = rados_create(&s->cluster, user);
90
if (r < 0) {
91
error_setg_errno(errp, -r, "error initializing");
92
goto failed_opts;
93
}
94
95
s->snap = g_strdup(snap);
96
- s->name = g_strdup(name);
97
+ s->image_name = g_strdup(image_name);
98
99
/* try default location when conf=NULL, but ignore failure */
100
r = rados_conf_read_file(s->cluster, conf);
101
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
102
}
103
104
/* rbd_open is always r/w */
105
- r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
106
+ r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
107
if (r < 0) {
108
- error_setg_errno(errp, -r, "error reading header from %s", s->name);
109
+ error_setg_errno(errp, -r, "error reading header from %s",
110
+ s->image_name);
111
goto failed_open;
112
}
113
114
@@ -XXX,XX +XXX,XX @@ failed_open:
115
failed_shutdown:
116
rados_shutdown(s->cluster);
117
g_free(s->snap);
118
- g_free(s->name);
119
+ g_free(s->image_name);
120
failed_opts:
121
qemu_opts_del(opts);
122
g_free(mon_host);
123
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_close(BlockDriverState *bs)
124
rbd_close(s->image);
125
rados_ioctx_destroy(s->io_ctx);
126
g_free(s->snap);
127
- g_free(s->name);
128
+ g_free(s->image_name);
129
rados_shutdown(s->cluster);
130
}
76
}
131
77
132
--
78
--
133
2.9.3
79
2.40.1
134
135
diff view generated by jsdifflib
1
This adds support for reopen in rbd, for changing between r/w and r/o.
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
Note, that this is only a flag change, but we will block a change from
3
We will add more and more checks so we need a better code structure
4
r/o to r/w if we are using an RBD internal snapshot.
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
5
6
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Signed-off-by: Jeff Cody <jcody@redhat.com>
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
9
Message-id: d4e87539167ec6527d44c97b164eabcccf96e4f3.1491597120.git.jcody@redhat.com
10
Message-Id: <20230424093147.197643-7-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
---
13
---
11
block/rbd.c | 21 +++++++++++++++++++++
14
block/parallels.c | 31 +++++++++++++++++++++----------
12
1 file changed, 21 insertions(+)
15
1 file changed, 21 insertions(+), 10 deletions(-)
13
16
14
diff --git a/block/rbd.c b/block/rbd.c
17
diff --git a/block/parallels.c b/block/parallels.c
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/block/rbd.c
19
--- a/block/parallels.c
17
+++ b/block/rbd.c
20
+++ b/block/parallels.c
18
@@ -XXX,XX +XXX,XX @@ failed_opts:
21
@@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
19
return r;
22
return ret;
20
}
23
}
21
24
25
+static void parallels_check_unclean(BlockDriverState *bs,
26
+ BdrvCheckResult *res,
27
+ BdrvCheckMode fix)
28
+{
29
+ BDRVParallelsState *s = bs->opaque;
22
+
30
+
23
+/* Since RBD is currently always opened R/W via the API,
31
+ if (!s->header_unclean) {
24
+ * we just need to check if we are using a snapshot or not, in
32
+ return;
25
+ * order to determine if we will allow it to be R/W */
26
+static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
27
+ BlockReopenQueue *queue, Error **errp)
28
+{
29
+ BDRVRBDState *s = state->bs->opaque;
30
+ int ret = 0;
31
+
32
+ if (s->snap && state->flags & BDRV_O_RDWR) {
33
+ error_setg(errp,
34
+ "Cannot change node '%s' to r/w when using RBD snapshot",
35
+ bdrv_get_device_or_node_name(state->bs));
36
+ ret = -EINVAL;
37
+ }
33
+ }
38
+
34
+
39
+ return ret;
35
+ fprintf(stderr, "%s image was not closed correctly\n",
36
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
37
+ res->corruptions++;
38
+ if (fix & BDRV_FIX_ERRORS) {
39
+ /* parallels_close will do the job right */
40
+ res->corruptions_fixed++;
41
+ s->header_unclean = false;
42
+ }
40
+}
43
+}
44
45
static int coroutine_fn GRAPH_RDLOCK
46
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
47
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
48
}
49
50
qemu_co_mutex_lock(&s->lock);
51
- if (s->header_unclean) {
52
- fprintf(stderr, "%s image was not closed correctly\n",
53
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
54
- res->corruptions++;
55
- if (fix & BDRV_FIX_ERRORS) {
56
- /* parallels_close will do the job right */
57
- res->corruptions_fixed++;
58
- s->header_unclean = false;
59
- }
60
- }
41
+
61
+
42
static void qemu_rbd_close(BlockDriverState *bs)
62
+ parallels_check_unclean(bs, res, fix);
43
{
63
44
BDRVRBDState *s = bs->opaque;
64
res->bfi.total_clusters = s->bat_size;
45
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = {
65
res->bfi.compressed_clusters = 0; /* compression is not supported */
46
.bdrv_parse_filename = qemu_rbd_parse_filename,
47
.bdrv_file_open = qemu_rbd_open,
48
.bdrv_close = qemu_rbd_close,
49
+ .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
50
.bdrv_create = qemu_rbd_create,
51
.bdrv_has_zero_init = bdrv_has_zero_init_1,
52
.bdrv_get_info = qemu_rbd_getinfo,
53
--
66
--
54
2.9.3
67
2.40.1
55
56
diff view generated by jsdifflib
1
From: Ashish Mittal <ashmit602@gmail.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
2
3
Source code for the qnio library that this code loads can be downloaded from:
3
We will add more and more checks so we need a better code structure in
4
https://github.com/VeritasHyperScale/libqnio.git
4
parallels_co_check. Let each check performs in a separate loop in a
5
separate helper.
5
6
6
Sample command line using JSON syntax:
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
./x86_64-softmmu/qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
-k en-us -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
9
Message-Id: <20230424093147.197643-8-alexander.ivanov@virtuozzo.com>
9
-msg timestamp=on
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410",
11
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
11
"server":{"host":"172.172.17.4","port":"9999"}}'
12
---
13
block/parallels.c | 75 +++++++++++++++++++++++++++++++----------------
14
1 file changed, 49 insertions(+), 26 deletions(-)
12
15
13
Sample command line using URI syntax:
16
diff --git a/block/parallels.c b/block/parallels.c
14
qemu-img convert -f raw -O raw -n
15
/var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
16
vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0
17
18
Sample command line using TLS credentials (run in secure mode):
19
./qemu-io --object
20
tls-creds-x509,id=tls0,dir=/etc/pki/qemu/vxhs,endpoint=client -c 'read
21
-v 66000 2.5k' 'json:{"server.host": "127.0.0.1", "server.port": "9999",
22
"vdisk-id": "/test.raw", "driver": "vxhs", "tls-creds":"tls0"}'
23
24
Signed-off-by: Ashish Mittal <Ashish.Mittal@veritas.com>
25
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
26
Reviewed-by: Jeff Cody <jcody@redhat.com>
27
Signed-off-by: Jeff Cody <jcody@redhat.com>
28
Message-id: 1491277689-24949-2-git-send-email-Ashish.Mittal@veritas.com
29
---
30
block/Makefile.objs | 2 +
31
block/trace-events | 17 ++
32
block/vxhs.c | 575 +++++++++++++++++++++++++++++++++++++++++++++++++++
33
configure | 39 ++++
34
qapi/block-core.json | 23 ++-
35
5 files changed, 654 insertions(+), 2 deletions(-)
36
create mode 100644 block/vxhs.c
37
38
diff --git a/block/Makefile.objs b/block/Makefile.objs
39
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
40
--- a/block/Makefile.objs
18
--- a/block/parallels.c
41
+++ b/block/Makefile.objs
19
+++ b/block/parallels.c
42
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
20
@@ -XXX,XX +XXX,XX @@ static void parallels_check_unclean(BlockDriverState *bs,
43
block-obj-$(CONFIG_CURL) += curl.o
21
}
44
block-obj-$(CONFIG_RBD) += rbd.o
22
}
45
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
23
46
+block-obj-$(CONFIG_VXHS) += vxhs.o
24
+static int coroutine_fn GRAPH_RDLOCK
47
block-obj-$(CONFIG_LIBSSH2) += ssh.o
25
+parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
48
block-obj-y += accounting.o dirty-bitmap.o
26
+ BdrvCheckMode fix)
49
block-obj-y += write-threshold.o
27
+{
50
@@ -XXX,XX +XXX,XX @@ rbd.o-cflags := $(RBD_CFLAGS)
28
+ BDRVParallelsState *s = bs->opaque;
51
rbd.o-libs := $(RBD_LIBS)
29
+ uint32_t i;
52
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
30
+ int64_t off, high_off, size;
53
gluster.o-libs := $(GLUSTERFS_LIBS)
54
+vxhs.o-libs := $(VXHS_LIBS)
55
ssh.o-cflags := $(LIBSSH2_CFLAGS)
56
ssh.o-libs := $(LIBSSH2_LIBS)
57
block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
58
diff --git a/block/trace-events b/block/trace-events
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/trace-events
61
+++ b/block/trace-events
62
@@ -XXX,XX +XXX,XX @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
63
qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
64
qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
65
qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
66
+
31
+
67
+# block/vxhs.c
32
+ size = bdrv_getlength(bs->file->bs);
68
+vxhs_iio_callback(int error) "ctx is NULL: error %d"
33
+ if (size < 0) {
69
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
34
+ res->check_errors++;
70
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
35
+ return size;
71
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
72
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d"
73
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
74
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu"
75
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
76
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
77
+vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s"
78
+vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
79
+vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s"
80
+vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d"
81
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
82
+vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s"
83
diff --git a/block/vxhs.c b/block/vxhs.c
84
new file mode 100644
85
index XXXXXXX..XXXXXXX
86
--- /dev/null
87
+++ b/block/vxhs.c
88
@@ -XXX,XX +XXX,XX @@
89
+/*
90
+ * QEMU Block driver for Veritas HyperScale (VxHS)
91
+ *
92
+ * Copyright (c) 2017 Veritas Technologies LLC.
93
+ *
94
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
95
+ * See the COPYING file in the top-level directory.
96
+ *
97
+ */
98
+
99
+#include "qemu/osdep.h"
100
+#include <qnio/qnio_api.h>
101
+#include <sys/param.h>
102
+#include "block/block_int.h"
103
+#include "qapi/qmp/qerror.h"
104
+#include "qapi/qmp/qdict.h"
105
+#include "qapi/qmp/qstring.h"
106
+#include "trace.h"
107
+#include "qemu/uri.h"
108
+#include "qapi/error.h"
109
+#include "qemu/uuid.h"
110
+#include "crypto/tlscredsx509.h"
111
+
112
+#define VXHS_OPT_FILENAME "filename"
113
+#define VXHS_OPT_VDISK_ID "vdisk-id"
114
+#define VXHS_OPT_SERVER "server"
115
+#define VXHS_OPT_HOST "host"
116
+#define VXHS_OPT_PORT "port"
117
+
118
+/* Only accessed under QEMU global mutex */
119
+static uint32_t vxhs_ref;
120
+
121
+typedef enum {
122
+ VDISK_AIO_READ,
123
+ VDISK_AIO_WRITE,
124
+} VDISKAIOCmd;
125
+
126
+/*
127
+ * HyperScale AIO callbacks structure
128
+ */
129
+typedef struct VXHSAIOCB {
130
+ BlockAIOCB common;
131
+ int err;
132
+} VXHSAIOCB;
133
+
134
+typedef struct VXHSvDiskHostsInfo {
135
+ void *dev_handle; /* Device handle */
136
+ char *host; /* Host name or IP */
137
+ int port; /* Host's port number */
138
+} VXHSvDiskHostsInfo;
139
+
140
+/*
141
+ * Structure per vDisk maintained for state
142
+ */
143
+typedef struct BDRVVXHSState {
144
+ VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
145
+ char *vdisk_guid;
146
+ char *tlscredsid; /* tlscredsid */
147
+} BDRVVXHSState;
148
+
149
+static void vxhs_complete_aio_bh(void *opaque)
150
+{
151
+ VXHSAIOCB *acb = opaque;
152
+ BlockCompletionFunc *cb = acb->common.cb;
153
+ void *cb_opaque = acb->common.opaque;
154
+ int ret = 0;
155
+
156
+ if (acb->err != 0) {
157
+ trace_vxhs_complete_aio(acb, acb->err);
158
+ ret = (-EIO);
159
+ }
36
+ }
160
+
37
+
161
+ qemu_aio_unref(acb);
38
+ high_off = 0;
162
+ cb(cb_opaque, ret);
39
+ for (i = 0; i < s->bat_size; i++) {
163
+}
40
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
164
+
41
+ if (off > size) {
165
+/*
42
+ fprintf(stderr, "%s cluster %u is outside image\n",
166
+ * Called from a libqnio thread
43
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
167
+ */
44
+ res->corruptions++;
168
+static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
45
+ if (fix & BDRV_FIX_ERRORS) {
169
+{
46
+ parallels_set_bat_entry(s, i, 0);
170
+ VXHSAIOCB *acb = NULL;
47
+ res->corruptions_fixed++;
171
+
48
+ }
172
+ switch (opcode) {
49
+ continue;
173
+ case IRP_READ_REQUEST:
174
+ case IRP_WRITE_REQUEST:
175
+
176
+ /*
177
+ * ctx is VXHSAIOCB*
178
+ * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
179
+ */
180
+ if (ctx) {
181
+ acb = ctx;
182
+ } else {
183
+ trace_vxhs_iio_callback(error);
184
+ goto out;
185
+ }
50
+ }
186
+
51
+ if (high_off < off) {
187
+ if (error) {
52
+ high_off = off;
188
+ if (!acb->err) {
189
+ acb->err = error;
190
+ }
191
+ trace_vxhs_iio_callback(error);
192
+ }
53
+ }
193
+
194
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
195
+ vxhs_complete_aio_bh, acb);
196
+ break;
197
+
198
+ default:
199
+ if (error == QNIOERROR_HUP) {
200
+ /*
201
+ * Channel failed, spontaneous notification,
202
+ * not in response to I/O
203
+ */
204
+ trace_vxhs_iio_callback_chnfail(error, errno);
205
+ } else {
206
+ trace_vxhs_iio_callback_unknwn(opcode, error);
207
+ }
208
+ break;
209
+ }
210
+out:
211
+ return;
212
+}
213
+
214
+static QemuOptsList runtime_opts = {
215
+ .name = "vxhs",
216
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
217
+ .desc = {
218
+ {
219
+ .name = VXHS_OPT_FILENAME,
220
+ .type = QEMU_OPT_STRING,
221
+ .help = "URI to the Veritas HyperScale image",
222
+ },
223
+ {
224
+ .name = VXHS_OPT_VDISK_ID,
225
+ .type = QEMU_OPT_STRING,
226
+ .help = "UUID of the VxHS vdisk",
227
+ },
228
+ {
229
+ .name = "tls-creds",
230
+ .type = QEMU_OPT_STRING,
231
+ .help = "ID of the TLS/SSL credentials to use",
232
+ },
233
+ { /* end of list */ }
234
+ },
235
+};
236
+
237
+static QemuOptsList runtime_tcp_opts = {
238
+ .name = "vxhs_tcp",
239
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
240
+ .desc = {
241
+ {
242
+ .name = VXHS_OPT_HOST,
243
+ .type = QEMU_OPT_STRING,
244
+ .help = "host address (ipv4 addresses)",
245
+ },
246
+ {
247
+ .name = VXHS_OPT_PORT,
248
+ .type = QEMU_OPT_NUMBER,
249
+ .help = "port number on which VxHSD is listening (default 9999)",
250
+ .def_value_str = "9999"
251
+ },
252
+ { /* end of list */ }
253
+ },
254
+};
255
+
256
+/*
257
+ * Parse incoming URI and populate *options with the host
258
+ * and device information
259
+ */
260
+static int vxhs_parse_uri(const char *filename, QDict *options)
261
+{
262
+ URI *uri = NULL;
263
+ char *port;
264
+ int ret = 0;
265
+
266
+ trace_vxhs_parse_uri_filename(filename);
267
+ uri = uri_parse(filename);
268
+ if (!uri || !uri->server || !uri->path) {
269
+ uri_free(uri);
270
+ return -EINVAL;
271
+ }
54
+ }
272
+
55
+
273
+ qdict_put(options, VXHS_OPT_SERVER".host", qstring_from_str(uri->server));
56
+ if (high_off == 0) {
274
+
57
+ res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
275
+ if (uri->port) {
58
+ } else {
276
+ port = g_strdup_printf("%d", uri->port);
59
+ res->image_end_offset = high_off + s->cluster_size;
277
+ qdict_put(options, VXHS_OPT_SERVER".port", qstring_from_str(port));
60
+ s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
278
+ g_free(port);
279
+ }
61
+ }
280
+
62
+
281
+ qdict_put(options, "vdisk-id", qstring_from_str(uri->path));
282
+
283
+ trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
284
+ uri_free(uri);
285
+
286
+ return ret;
287
+}
288
+
289
+static void vxhs_parse_filename(const char *filename, QDict *options,
290
+ Error **errp)
291
+{
292
+ if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
293
+ error_setg(errp, "vdisk-id/server and a file name may not be specified "
294
+ "at the same time");
295
+ return;
296
+ }
297
+
298
+ if (strstr(filename, "://")) {
299
+ int ret = vxhs_parse_uri(filename, options);
300
+ if (ret < 0) {
301
+ error_setg(errp, "Invalid URI. URI should be of the form "
302
+ " vxhs://<host_ip>:<port>/<vdisk-id>");
303
+ }
304
+ }
305
+}
306
+
307
+static int vxhs_init_and_ref(void)
308
+{
309
+ if (vxhs_ref++ == 0) {
310
+ if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
311
+ return -ENODEV;
312
+ }
313
+ }
314
+ return 0;
63
+ return 0;
315
+}
64
+}
316
+
65
+
317
+static void vxhs_unref(void)
66
static int coroutine_fn GRAPH_RDLOCK
318
+{
67
parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
319
+ if (--vxhs_ref == 0) {
68
BdrvCheckMode fix)
320
+ iio_fini();
69
{
321
+ }
70
BDRVParallelsState *s = bs->opaque;
322
+}
71
- int64_t size, prev_off, high_off;
323
+
72
- int ret = 0;
324
+static void vxhs_get_tls_creds(const char *id, char **cacert,
73
+ int64_t size, prev_off;
325
+ char **key, char **cert, Error **errp)
74
+ int ret;
326
+{
75
uint32_t i;
327
+ Object *obj;
76
328
+ QCryptoTLSCreds *creds;
77
size = bdrv_getlength(bs->file->bs);
329
+ QCryptoTLSCredsX509 *creds_x509;
78
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
330
+
79
331
+ obj = object_resolve_path_component(
80
parallels_check_unclean(bs, res, fix);
332
+ object_get_objects_root(), id);
81
333
+
82
+ ret = parallels_check_outside_image(bs, res, fix);
334
+ if (!obj) {
335
+ error_setg(errp, "No TLS credentials with id '%s'",
336
+ id);
337
+ return;
338
+ }
339
+
340
+ creds_x509 = (QCryptoTLSCredsX509 *)
341
+ object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
342
+
343
+ if (!creds_x509) {
344
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
345
+ id);
346
+ return;
347
+ }
348
+
349
+ creds = &creds_x509->parent_obj;
350
+
351
+ if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
352
+ error_setg(errp,
353
+ "Expecting TLS credentials with a client endpoint");
354
+ return;
355
+ }
356
+
357
+ /*
358
+ * Get the cacert, client_cert and client_key file names.
359
+ */
360
+ if (!creds->dir) {
361
+ error_setg(errp, "TLS object missing 'dir' property value");
362
+ return;
363
+ }
364
+
365
+ *cacert = g_strdup_printf("%s/%s", creds->dir,
366
+ QCRYPTO_TLS_CREDS_X509_CA_CERT);
367
+ *cert = g_strdup_printf("%s/%s", creds->dir,
368
+ QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
369
+ *key = g_strdup_printf("%s/%s", creds->dir,
370
+ QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
371
+}
372
+
373
+static int vxhs_open(BlockDriverState *bs, QDict *options,
374
+ int bdrv_flags, Error **errp)
375
+{
376
+ BDRVVXHSState *s = bs->opaque;
377
+ void *dev_handlep;
378
+ QDict *backing_options = NULL;
379
+ QemuOpts *opts = NULL;
380
+ QemuOpts *tcp_opts = NULL;
381
+ char *of_vsa_addr = NULL;
382
+ Error *local_err = NULL;
383
+ const char *vdisk_id_opt;
384
+ const char *server_host_opt;
385
+ int ret = 0;
386
+ char *cacert = NULL;
387
+ char *client_key = NULL;
388
+ char *client_cert = NULL;
389
+
390
+ ret = vxhs_init_and_ref();
391
+ if (ret < 0) {
83
+ if (ret < 0) {
392
+ ret = -EINVAL;
393
+ goto out;
84
+ goto out;
394
+ }
85
+ }
395
+
86
+
396
+ /* Create opts info from runtime_opts and runtime_tcp_opts list */
87
res->bfi.total_clusters = s->bat_size;
397
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
88
res->bfi.compressed_clusters = 0; /* compression is not supported */
398
+ tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
89
399
+
90
- high_off = 0;
400
+ qemu_opts_absorb_qdict(opts, options, &local_err);
91
prev_off = 0;
401
+ if (local_err) {
92
for (i = 0; i < s->bat_size; i++) {
402
+ ret = -EINVAL;
93
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
403
+ goto out;
94
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
404
+ }
95
continue;
405
+
96
}
406
+ /* vdisk-id is the disk UUID */
97
407
+ vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
98
- /* cluster outside the image */
408
+ if (!vdisk_id_opt) {
99
- if (off > size) {
409
+ error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
100
- fprintf(stderr, "%s cluster %u is outside image\n",
410
+ ret = -EINVAL;
101
- fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
411
+ goto out;
102
- res->corruptions++;
412
+ }
103
- if (fix & BDRV_FIX_ERRORS) {
413
+
104
- parallels_set_bat_entry(s, i, 0);
414
+ /* vdisk-id may contain a leading '/' */
105
- res->corruptions_fixed++;
415
+ if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
106
- }
416
+ error_setg(&local_err, "vdisk-id cannot be more than %d characters",
107
- prev_off = 0;
417
+ UUID_FMT_LEN);
108
- continue;
418
+ ret = -EINVAL;
109
- }
419
+ goto out;
110
-
420
+ }
111
res->bfi.allocated_clusters++;
421
+
112
- if (off > high_off) {
422
+ s->vdisk_guid = g_strdup(vdisk_id_opt);
113
- high_off = off;
423
+ trace_vxhs_open_vdiskid(vdisk_id_opt);
114
- }
424
+
115
425
+ /* get the 'server.' arguments */
116
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
426
+ qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
117
res->bfi.fragmented_clusters++;
427
+
118
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
428
+ qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
119
prev_off = off;
429
+ if (local_err != NULL) {
120
}
430
+ ret = -EINVAL;
121
431
+ goto out;
122
- if (high_off == 0) {
432
+ }
123
- res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
433
+
124
- } else {
434
+ server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
125
- res->image_end_offset = high_off + s->cluster_size;
435
+ if (!server_host_opt) {
126
- s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
436
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
127
- }
437
+ VXHS_OPT_SERVER"."VXHS_OPT_HOST);
128
-
438
+ ret = -EINVAL;
129
if (size > res->image_end_offset) {
439
+ goto out;
130
int64_t count;
440
+ }
131
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
441
+
442
+ if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
443
+ error_setg(&local_err, "server.host cannot be more than %d characters",
444
+ MAXHOSTNAMELEN);
445
+ ret = -EINVAL;
446
+ goto out;
447
+ }
448
+
449
+ /* check if we got tls-creds via the --object argument */
450
+ s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
451
+ if (s->tlscredsid) {
452
+ vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
453
+ &client_cert, &local_err);
454
+ if (local_err != NULL) {
455
+ ret = -EINVAL;
456
+ goto out;
457
+ }
458
+ trace_vxhs_get_creds(cacert, client_key, client_cert);
459
+ }
460
+
461
+ s->vdisk_hostinfo.host = g_strdup(server_host_opt);
462
+ s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
463
+ VXHS_OPT_PORT),
464
+ NULL, 0);
465
+
466
+ trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
467
+ s->vdisk_hostinfo.port);
468
+
469
+ of_vsa_addr = g_strdup_printf("of://%s:%d",
470
+ s->vdisk_hostinfo.host,
471
+ s->vdisk_hostinfo.port);
472
+
473
+ /*
474
+ * Open qnio channel to storage agent if not opened before
475
+ */
476
+ dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
477
+ cacert, client_key, client_cert);
478
+ if (dev_handlep == NULL) {
479
+ trace_vxhs_open_iio_open(of_vsa_addr);
480
+ ret = -ENODEV;
481
+ goto out;
482
+ }
483
+ s->vdisk_hostinfo.dev_handle = dev_handlep;
484
+
485
+out:
486
+ g_free(of_vsa_addr);
487
+ QDECREF(backing_options);
488
+ qemu_opts_del(tcp_opts);
489
+ qemu_opts_del(opts);
490
+ g_free(cacert);
491
+ g_free(client_key);
492
+ g_free(client_cert);
493
+
494
+ if (ret < 0) {
495
+ vxhs_unref();
496
+ error_propagate(errp, local_err);
497
+ g_free(s->vdisk_hostinfo.host);
498
+ g_free(s->vdisk_guid);
499
+ g_free(s->tlscredsid);
500
+ s->vdisk_guid = NULL;
501
+ }
502
+
503
+ return ret;
504
+}
505
+
506
+static const AIOCBInfo vxhs_aiocb_info = {
507
+ .aiocb_size = sizeof(VXHSAIOCB)
508
+};
509
+
510
+/*
511
+ * This allocates QEMU-VXHS callback for each IO
512
+ * and is passed to QNIO. When QNIO completes the work,
513
+ * it will be passed back through the callback.
514
+ */
515
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
516
+ QEMUIOVector *qiov, int nb_sectors,
517
+ BlockCompletionFunc *cb, void *opaque,
518
+ VDISKAIOCmd iodir)
519
+{
520
+ VXHSAIOCB *acb = NULL;
521
+ BDRVVXHSState *s = bs->opaque;
522
+ size_t size;
523
+ uint64_t offset;
524
+ int iio_flags = 0;
525
+ int ret = 0;
526
+ void *dev_handle = s->vdisk_hostinfo.dev_handle;
527
+
528
+ offset = sector_num * BDRV_SECTOR_SIZE;
529
+ size = nb_sectors * BDRV_SECTOR_SIZE;
530
+ acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
531
+
532
+ /*
533
+ * Initialize VXHSAIOCB.
534
+ */
535
+ acb->err = 0;
536
+
537
+ iio_flags = IIO_FLAG_ASYNC;
538
+
539
+ switch (iodir) {
540
+ case VDISK_AIO_WRITE:
541
+ ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
542
+ offset, (uint64_t)size, iio_flags);
543
+ break;
544
+ case VDISK_AIO_READ:
545
+ ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
546
+ offset, (uint64_t)size, iio_flags);
547
+ break;
548
+ default:
549
+ trace_vxhs_aio_rw_invalid(iodir);
550
+ goto errout;
551
+ }
552
+
553
+ if (ret != 0) {
554
+ trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
555
+ acb, ret, errno);
556
+ goto errout;
557
+ }
558
+ return &acb->common;
559
+
560
+errout:
561
+ qemu_aio_unref(acb);
562
+ return NULL;
563
+}
564
+
565
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
566
+ int64_t sector_num, QEMUIOVector *qiov,
567
+ int nb_sectors,
568
+ BlockCompletionFunc *cb, void *opaque)
569
+{
570
+ return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
571
+ opaque, VDISK_AIO_READ);
572
+}
573
+
574
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
575
+ int64_t sector_num, QEMUIOVector *qiov,
576
+ int nb_sectors,
577
+ BlockCompletionFunc *cb, void *opaque)
578
+{
579
+ return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
580
+ cb, opaque, VDISK_AIO_WRITE);
581
+}
582
+
583
+static void vxhs_close(BlockDriverState *bs)
584
+{
585
+ BDRVVXHSState *s = bs->opaque;
586
+
587
+ trace_vxhs_close(s->vdisk_guid);
588
+
589
+ g_free(s->vdisk_guid);
590
+ s->vdisk_guid = NULL;
591
+
592
+ /*
593
+ * Close vDisk device
594
+ */
595
+ if (s->vdisk_hostinfo.dev_handle) {
596
+ iio_close(s->vdisk_hostinfo.dev_handle);
597
+ s->vdisk_hostinfo.dev_handle = NULL;
598
+ }
599
+
600
+ vxhs_unref();
601
+
602
+ /*
603
+ * Free the dynamically allocated host string etc
604
+ */
605
+ g_free(s->vdisk_hostinfo.host);
606
+ g_free(s->tlscredsid);
607
+ s->tlscredsid = NULL;
608
+ s->vdisk_hostinfo.host = NULL;
609
+ s->vdisk_hostinfo.port = 0;
610
+}
611
+
612
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
613
+{
614
+ int64_t vdisk_size = -1;
615
+ int ret = 0;
616
+ void *dev_handle = s->vdisk_hostinfo.dev_handle;
617
+
618
+ ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
619
+ if (ret < 0) {
620
+ trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
621
+ return -EIO;
622
+ }
623
+
624
+ trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
625
+ return vdisk_size;
626
+}
627
+
628
+/*
629
+ * Returns the size of vDisk in bytes. This is required
630
+ * by QEMU block upper block layer so that it is visible
631
+ * to guest.
632
+ */
633
+static int64_t vxhs_getlength(BlockDriverState *bs)
634
+{
635
+ BDRVVXHSState *s = bs->opaque;
636
+ int64_t vdisk_size;
637
+
638
+ vdisk_size = vxhs_get_vdisk_stat(s);
639
+ if (vdisk_size < 0) {
640
+ return -EIO;
641
+ }
642
+
643
+ return vdisk_size;
644
+}
645
+
646
+static BlockDriver bdrv_vxhs = {
647
+ .format_name = "vxhs",
648
+ .protocol_name = "vxhs",
649
+ .instance_size = sizeof(BDRVVXHSState),
650
+ .bdrv_file_open = vxhs_open,
651
+ .bdrv_parse_filename = vxhs_parse_filename,
652
+ .bdrv_close = vxhs_close,
653
+ .bdrv_getlength = vxhs_getlength,
654
+ .bdrv_aio_readv = vxhs_aio_readv,
655
+ .bdrv_aio_writev = vxhs_aio_writev,
656
+};
657
+
658
+static void bdrv_vxhs_init(void)
659
+{
660
+ bdrv_register(&bdrv_vxhs);
661
+}
662
+
663
+block_init(bdrv_vxhs_init);
664
diff --git a/configure b/configure
665
index XXXXXXX..XXXXXXX 100755
666
--- a/configure
667
+++ b/configure
668
@@ -XXX,XX +XXX,XX @@ numa=""
669
tcmalloc="no"
670
jemalloc="no"
671
replication="yes"
672
+vxhs=""
673
674
supported_cpu="no"
675
supported_os="no"
676
@@ -XXX,XX +XXX,XX @@ for opt do
677
;;
678
--enable-replication) replication="yes"
679
;;
680
+ --disable-vxhs) vxhs="no"
681
+ ;;
682
+ --enable-vxhs) vxhs="yes"
683
+ ;;
684
*)
685
echo "ERROR: unknown option $opt"
686
echo "Try '$0 --help' for more information"
687
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
688
xfsctl xfsctl support
689
qom-cast-debug cast debugging support
690
tools build qemu-io, qemu-nbd and qemu-image tools
691
+ vxhs Veritas HyperScale vDisk backend support
692
693
NOTE: The object files are built at the place where configure is launched
694
EOF
695
@@ -XXX,XX +XXX,XX @@ if compile_prog "" "" ; then
696
fi
697
698
##########################################
699
+# Veritas HyperScale block driver VxHS
700
+# Check if libvxhs is installed
701
+
702
+if test "$vxhs" != "no" ; then
703
+ cat > $TMPC <<EOF
704
+#include <stdint.h>
705
+#include <qnio/qnio_api.h>
706
+
707
+void *vxhs_callback;
708
+
709
+int main(void) {
710
+ iio_init(QNIO_VERSION, vxhs_callback);
711
+ return 0;
712
+}
713
+EOF
714
+ vxhs_libs="-lvxhs -lssl"
715
+ if compile_prog "" "$vxhs_libs" ; then
716
+ vxhs=yes
717
+ else
718
+ if test "$vxhs" = "yes" ; then
719
+ feature_not_found "vxhs block device" "Install libvxhs See github"
720
+ fi
721
+ vxhs=no
722
+ fi
723
+fi
724
+
725
+##########################################
726
# End of CC checks
727
# After here, no more $cc or $ld runs
728
729
@@ -XXX,XX +XXX,XX @@ echo "tcmalloc support $tcmalloc"
730
echo "jemalloc support $jemalloc"
731
echo "avx2 optimization $avx2_opt"
732
echo "replication support $replication"
733
+echo "VxHS block device $vxhs"
734
735
if test "$sdl_too_old" = "yes"; then
736
echo "-> Your SDL version is too old - please upgrade to have SDL support"
737
@@ -XXX,XX +XXX,XX @@ if test "$pthread_setname_np" = "yes" ; then
738
echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
739
fi
740
741
+if test "$vxhs" = "yes" ; then
742
+ echo "CONFIG_VXHS=y" >> $config_host_mak
743
+ echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
744
+fi
745
+
746
if test "$tcg_interpreter" = "yes"; then
747
QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
748
elif test "$ARCH" = "sparc64" ; then
749
diff --git a/qapi/block-core.json b/qapi/block-core.json
750
index XXXXXXX..XXXXXXX 100644
751
--- a/qapi/block-core.json
752
+++ b/qapi/block-core.json
753
@@ -XXX,XX +XXX,XX @@
754
#
755
# Drivers that are supported in block device operations.
756
#
757
+# @vxhs: Since 2.10
758
+#
759
# Since: 2.9
760
##
761
{ 'enum': 'BlockdevDriver',
762
@@ -XXX,XX +XXX,XX @@
763
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
764
'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
765
'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
766
- 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
767
+ 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
768
769
##
770
# @BlockdevOptionsFile:
771
@@ -XXX,XX +XXX,XX @@
772
'data': { '*offset': 'int', '*size': 'int' } }
773
774
##
775
+# @BlockdevOptionsVxHS:
776
+#
777
+# Driver specific block device options for VxHS
778
+#
779
+# @vdisk-id: UUID of VxHS volume
780
+# @server: vxhs server IP, port
781
+# @tls-creds: TLS credentials ID
782
+#
783
+# Since: 2.10
784
+##
785
+{ 'struct': 'BlockdevOptionsVxHS',
786
+ 'data': { 'vdisk-id': 'str',
787
+ 'server': 'InetSocketAddressBase',
788
+ '*tls-creds': 'str' } }
789
+
790
+##
791
# @BlockdevOptions:
792
#
793
# Options for creating a block device. Many options are available for all
794
@@ -XXX,XX +XXX,XX @@
795
'vhdx': 'BlockdevOptionsGenericFormat',
796
'vmdk': 'BlockdevOptionsGenericCOWFormat',
797
'vpc': 'BlockdevOptionsGenericFormat',
798
- 'vvfat': 'BlockdevOptionsVVFAT'
799
+ 'vvfat': 'BlockdevOptionsVVFAT',
800
+ 'vxhs': 'BlockdevOptionsVxHS'
801
} }
802
803
##
804
--
132
--
805
2.9.3
133
2.40.1
806
807
diff view generated by jsdifflib
New patch
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
1
2
3
Exclude out-of-image clusters from allocated and fragmented clusters
4
calculation.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Message-Id: <20230424093147.197643-9-alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
9
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
---
11
block/parallels.c | 6 +++++-
12
1 file changed, 5 insertions(+), 1 deletion(-)
13
14
diff --git a/block/parallels.c b/block/parallels.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/parallels.c
17
+++ b/block/parallels.c
18
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
19
prev_off = 0;
20
for (i = 0; i < s->bat_size; i++) {
21
int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
22
- if (off == 0) {
23
+ /*
24
+ * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
25
+ * fixed. Skip not allocated and out-of-image BAT entries.
26
+ */
27
+ if (off == 0 || off + s->cluster_size > res->image_end_offset) {
28
prev_off = 0;
29
continue;
30
}
31
--
32
2.40.1
diff view generated by jsdifflib
1
Introduce check function for setting read_only flags. Will return < 0 on
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
error, with appropriate Error value set. Does not alter any flags.
3
2
4
Signed-off-by: Jeff Cody <jcody@redhat.com>
3
We will add more and more checks so we need a better code structure
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
in parallels_co_check. Let each check performs in a separate loop
6
Reviewed-by: John Snow <jsnow@redhat.com>
5
in a separate helper.
7
Message-id: e2bba34ac3bc76a0c42adc390413f358ae0566e8.1491597120.git.jcody@redhat.com
6
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
Message-Id: <20230424093147.197643-10-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
8
---
11
---
9
block.c | 14 +++++++++++++-
12
block/parallels.c | 74 ++++++++++++++++++++++++++++-------------------
10
include/block/block.h | 1 +
13
1 file changed, 45 insertions(+), 29 deletions(-)
11
2 files changed, 14 insertions(+), 1 deletion(-)
12
14
13
diff --git a/block.c b/block.c
15
diff --git a/block/parallels.c b/block/parallels.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
17
--- a/block/parallels.c
16
+++ b/block.c
18
+++ b/block/parallels.c
17
@@ -XXX,XX +XXX,XX @@ bool bdrv_is_read_only(BlockDriverState *bs)
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
18
return bs->read_only;
19
}
20
}
20
21
21
-int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
22
static int coroutine_fn GRAPH_RDLOCK
22
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
23
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
24
- BdrvCheckMode fix)
25
+parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
26
+ BdrvCheckMode fix)
23
{
27
{
24
/* Do not set read_only if copy_on_read is enabled */
28
BDRVParallelsState *s = bs->opaque;
25
if (bs->copy_on_read && read_only) {
29
- int64_t size, prev_off;
26
@@ -XXX,XX +XXX,XX @@ int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
30
+ int64_t size;
27
return -EPERM;
31
int ret;
32
- uint32_t i;
33
34
size = bdrv_getlength(bs->file->bs);
35
if (size < 0) {
36
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
37
return size;
28
}
38
}
29
39
40
+ if (size > res->image_end_offset) {
41
+ int64_t count;
42
+ count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
43
+ fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
44
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
45
+ size - res->image_end_offset);
46
+ res->leaks += count;
47
+ if (fix & BDRV_FIX_LEAKS) {
48
+ Error *local_err = NULL;
49
+
50
+ /*
51
+ * In order to really repair the image, we must shrink it.
52
+ * That means we have to pass exact=true.
53
+ */
54
+ ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
55
+ PREALLOC_MODE_OFF, 0, &local_err);
56
+ if (ret < 0) {
57
+ error_report_err(local_err);
58
+ res->check_errors++;
59
+ return ret;
60
+ }
61
+ res->leaks_fixed += count;
62
+ }
63
+ }
64
+
30
+ return 0;
65
+ return 0;
31
+}
66
+}
32
+
67
+
33
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
68
+static int coroutine_fn GRAPH_RDLOCK
69
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
70
+ BdrvCheckMode fix)
34
+{
71
+{
35
+ int ret = 0;
72
+ BDRVParallelsState *s = bs->opaque;
73
+ int64_t prev_off;
74
+ int ret;
75
+ uint32_t i;
36
+
76
+
37
+ ret = bdrv_can_set_read_only(bs, read_only, errp);
77
qemu_co_mutex_lock(&s->lock);
78
79
parallels_check_unclean(bs, res, fix);
80
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
81
goto out;
82
}
83
84
+ ret = parallels_check_leak(bs, res, fix);
38
+ if (ret < 0) {
85
+ if (ret < 0) {
39
+ return ret;
86
+ goto out;
40
+ }
87
+ }
41
+
88
+
42
bs->read_only = read_only;
89
res->bfi.total_clusters = s->bat_size;
43
return 0;
90
res->bfi.compressed_clusters = 0; /* compression is not supported */
44
}
91
45
diff --git a/include/block/block.h b/include/block/block.h
92
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
46
index XXXXXXX..XXXXXXX 100644
93
prev_off = off;
47
--- a/include/block/block.h
94
}
48
+++ b/include/block/block.h
95
49
@@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
96
- if (size > res->image_end_offset) {
50
int64_t sector_num, int nb_sectors, int *pnum);
97
- int64_t count;
51
98
- count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
52
bool bdrv_is_read_only(BlockDriverState *bs);
99
- fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
53
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
100
- fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
54
int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
101
- size - res->image_end_offset);
55
bool bdrv_is_sg(BlockDriverState *bs);
102
- res->leaks += count;
56
bool bdrv_is_inserted(BlockDriverState *bs);
103
- if (fix & BDRV_FIX_LEAKS) {
104
- Error *local_err = NULL;
105
-
106
- /*
107
- * In order to really repair the image, we must shrink it.
108
- * That means we have to pass exact=true.
109
- */
110
- ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
111
- PREALLOC_MODE_OFF, 0, &local_err);
112
- if (ret < 0) {
113
- error_report_err(local_err);
114
- res->check_errors++;
115
- goto out;
116
- }
117
- res->leaks_fixed += count;
118
- }
119
- }
120
-
121
out:
122
qemu_co_mutex_unlock(&s->lock);
123
57
--
124
--
58
2.9.3
125
2.40.1
59
60
diff view generated by jsdifflib
1
For the tests that use the common.qemu functions for running a QEMU
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
process, _cleanup_qemu must be called in the exit function.
3
2
4
If it is not, if the qemu process aborts, then not all of the droppings
3
We will add more and more checks so we need a better code structure
5
are cleaned up (e.g. pidfile, fifos).
4
in parallels_co_check. Let each check performs in a separate loop
5
in a separate helper.
6
6
7
This updates those tests that did not have a cleanup in qemu-iotests.
7
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
8
Reviewed-by: Denis V. Lunev <den@openvz.org>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
10
Message-Id: <20230424093147.197643-11-alexander.ivanov@virtuozzo.com>
11
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
12
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
13
---
14
block/parallels.c | 52 +++++++++++++++++++++++++++--------------------
15
1 file changed, 30 insertions(+), 22 deletions(-)
8
16
9
(I swapped spaces for tabs in test 102 as well)
17
diff --git a/block/parallels.c b/block/parallels.c
10
18
index XXXXXXX..XXXXXXX 100644
11
Reported-by: Eric Blake <eblake@redhat.com>
19
--- a/block/parallels.c
12
Reviewed-by: Eric Blake <eblake@redhat.com>
20
+++ b/block/parallels.c
13
Signed-off-by: Jeff Cody <jcody@redhat.com>
21
@@ -XXX,XX +XXX,XX @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
14
Message-id: d59c2f6ad6c1da8b9b3c7f357c94a7122ccfc55a.1492544096.git.jcody@redhat.com
22
return 0;
15
---
23
}
16
tests/qemu-iotests/028 | 1 +
24
17
tests/qemu-iotests/094 | 11 ++++++++---
25
-static int coroutine_fn GRAPH_RDLOCK
18
tests/qemu-iotests/102 | 5 +++--
26
-parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
19
tests/qemu-iotests/109 | 1 +
27
- BdrvCheckMode fix)
20
tests/qemu-iotests/117 | 1 +
28
+static void parallels_collect_statistics(BlockDriverState *bs,
21
tests/qemu-iotests/130 | 1 +
29
+ BdrvCheckResult *res,
22
tests/qemu-iotests/140 | 1 +
30
+ BdrvCheckMode fix)
23
tests/qemu-iotests/141 | 1 +
24
tests/qemu-iotests/143 | 1 +
25
tests/qemu-iotests/156 | 1 +
26
10 files changed, 19 insertions(+), 5 deletions(-)
27
28
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
29
index XXXXXXX..XXXXXXX 100755
30
--- a/tests/qemu-iotests/028
31
+++ b/tests/qemu-iotests/028
32
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
33
34
_cleanup()
35
{
31
{
36
+ _cleanup_qemu
32
BDRVParallelsState *s = bs->opaque;
37
rm -f "${TEST_IMG}.copy"
33
- int64_t prev_off;
38
_cleanup_test_img
34
- int ret;
39
}
35
+ int64_t off, prev_off;
40
diff --git a/tests/qemu-iotests/094 b/tests/qemu-iotests/094
36
uint32_t i;
41
index XXXXXXX..XXXXXXX 100755
37
42
--- a/tests/qemu-iotests/094
38
- qemu_co_mutex_lock(&s->lock);
43
+++ b/tests/qemu-iotests/094
39
-
44
@@ -XXX,XX +XXX,XX @@ echo "QA output created by $seq"
40
- parallels_check_unclean(bs, res, fix);
45
here="$PWD"
41
-
46
status=1    # failure is the default!
42
- ret = parallels_check_outside_image(bs, res, fix);
47
43
- if (ret < 0) {
48
-trap "exit \$status" 0 1 2 3 15
44
- goto out;
49
+_cleanup()
45
- }
50
+{
46
-
51
+ _cleanup_qemu
47
- ret = parallels_check_leak(bs, res, fix);
52
+ _cleanup_test_img
48
- if (ret < 0) {
53
+ rm -f "$TEST_DIR/source.$IMGFMT"
49
- goto out;
50
- }
51
-
52
res->bfi.total_clusters = s->bat_size;
53
res->bfi.compressed_clusters = 0; /* compression is not supported */
54
55
prev_off = 0;
56
for (i = 0; i < s->bat_size; i++) {
57
- int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
58
+ off = bat2sect(s, i) << BDRV_SECTOR_BITS;
59
/*
60
* If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
61
* fixed. Skip not allocated and out-of-image BAT entries.
62
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
63
continue;
64
}
65
66
- res->bfi.allocated_clusters++;
67
-
68
if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
69
res->bfi.fragmented_clusters++;
70
}
71
prev_off = off;
72
+ res->bfi.allocated_clusters++;
73
}
54
+}
74
+}
55
+
75
+
56
+trap "_cleanup; exit \$status" 0 1 2 3 15
76
+static int coroutine_fn GRAPH_RDLOCK
57
77
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
58
# get standard environment, filters and checks
78
+ BdrvCheckMode fix)
59
. ./common.rc
79
+{
60
@@ -XXX,XX +XXX,XX @@ _send_qemu_cmd $QEMU_HANDLE \
80
+ BDRVParallelsState *s = bs->opaque;
61
81
+ int ret;
62
wait=1 _cleanup_qemu
82
+
63
83
+ qemu_co_mutex_lock(&s->lock);
64
-_cleanup_test_img
84
+
65
-rm -f "$TEST_DIR/source.$IMGFMT"
85
+ parallels_check_unclean(bs, res, fix);
66
86
+
67
# success, all done
87
+ ret = parallels_check_outside_image(bs, res, fix);
68
echo '*** done'
88
+ if (ret < 0) {
69
diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102
89
+ goto out;
70
index XXXXXXX..XXXXXXX 100755
90
+ }
71
--- a/tests/qemu-iotests/102
91
+
72
+++ b/tests/qemu-iotests/102
92
+ ret = parallels_check_leak(bs, res, fix);
73
@@ -XXX,XX +XXX,XX @@ seq=$(basename $0)
93
+ if (ret < 0) {
74
echo "QA output created by $seq"
94
+ goto out;
75
95
+ }
76
here=$PWD
96
+
77
-status=1    # failure is the default!
97
+ parallels_collect_statistics(bs, res, fix);
78
+status=1 # failure is the default!
98
79
99
out:
80
_cleanup()
100
qemu_co_mutex_unlock(&s->lock);
81
{
82
-    _cleanup_test_img
83
+ _cleanup_qemu
84
+ _cleanup_test_img
85
}
86
trap "_cleanup; exit \$status" 0 1 2 3 15
87
88
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
89
index XXXXXXX..XXXXXXX 100755
90
--- a/tests/qemu-iotests/109
91
+++ b/tests/qemu-iotests/109
92
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
93
94
_cleanup()
95
{
96
+ _cleanup_qemu
97
rm -f $TEST_IMG.src
98
    _cleanup_test_img
99
}
100
diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117
101
index XXXXXXX..XXXXXXX 100755
102
--- a/tests/qemu-iotests/117
103
+++ b/tests/qemu-iotests/117
104
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
105
106
_cleanup()
107
{
108
+ _cleanup_qemu
109
    _cleanup_test_img
110
}
111
trap "_cleanup; exit \$status" 0 1 2 3 15
112
diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130
113
index XXXXXXX..XXXXXXX 100755
114
--- a/tests/qemu-iotests/130
115
+++ b/tests/qemu-iotests/130
116
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
117
118
_cleanup()
119
{
120
+ _cleanup_qemu
121
_cleanup_test_img
122
}
123
trap "_cleanup; exit \$status" 0 1 2 3 15
124
diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140
125
index XXXXXXX..XXXXXXX 100755
126
--- a/tests/qemu-iotests/140
127
+++ b/tests/qemu-iotests/140
128
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
129
130
_cleanup()
131
{
132
+ _cleanup_qemu
133
_cleanup_test_img
134
rm -f "$TEST_DIR/nbd"
135
}
136
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
137
index XXXXXXX..XXXXXXX 100755
138
--- a/tests/qemu-iotests/141
139
+++ b/tests/qemu-iotests/141
140
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
141
142
_cleanup()
143
{
144
+ _cleanup_qemu
145
_cleanup_test_img
146
rm -f "$TEST_DIR/{b,m,o}.$IMGFMT"
147
}
148
diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143
149
index XXXXXXX..XXXXXXX 100755
150
--- a/tests/qemu-iotests/143
151
+++ b/tests/qemu-iotests/143
152
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
153
154
_cleanup()
155
{
156
+ _cleanup_qemu
157
rm -f "$TEST_DIR/nbd"
158
}
159
trap "_cleanup; exit \$status" 0 1 2 3 15
160
diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156
161
index XXXXXXX..XXXXXXX 100755
162
--- a/tests/qemu-iotests/156
163
+++ b/tests/qemu-iotests/156
164
@@ -XXX,XX +XXX,XX @@ status=1    # failure is the default!
165
166
_cleanup()
167
{
168
+ _cleanup_qemu
169
rm -f "$TEST_IMG{,.target}{,.backing,.overlay}"
170
}
171
trap "_cleanup; exit \$status" 0 1 2 3 15
172
--
101
--
173
2.9.3
102
2.40.1
174
175
diff view generated by jsdifflib
1
Signed-off-by: Jeff Cody <jcody@redhat.com>
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
2
3
Reviewed-by: John Snow <jsnow@redhat.com>
3
Replace the way we use mutex in parallels_co_check() for simplier
4
Message-id: 00aed7ffdd7be4b9ed9ce1007d50028a72b34ebe.1491597120.git.jcody@redhat.com
4
and less error prone code.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-12-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
5
---
11
---
6
block.c | 14 ++++++++------
12
block/parallels.c | 33 ++++++++++++++-------------------
7
1 file changed, 8 insertions(+), 6 deletions(-)
13
1 file changed, 14 insertions(+), 19 deletions(-)
8
14
9
diff --git a/block.c b/block.c
15
diff --git a/block/parallels.c b/block/parallels.c
10
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
11
--- a/block.c
17
--- a/block/parallels.c
12
+++ b/block.c
18
+++ b/block/parallels.c
13
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
19
@@ -XXX,XX +XXX,XX @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
14
BlockDriver *drv;
20
BDRVParallelsState *s = bs->opaque;
15
QemuOpts *opts;
21
int ret;
16
const char *value;
22
17
+ bool read_only;
23
- qemu_co_mutex_lock(&s->lock);
18
24
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
19
assert(reopen_state != NULL);
25
+ parallels_check_unclean(bs, res, fix);
20
assert(reopen_state->bs->drv != NULL);
26
21
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
27
- parallels_check_unclean(bs, res, fix);
22
qdict_put(reopen_state->options, "driver", qstring_from_str(value));
28
+ ret = parallels_check_outside_image(bs, res, fix);
29
+ if (ret < 0) {
30
+ return ret;
31
+ }
32
33
- ret = parallels_check_outside_image(bs, res, fix);
34
- if (ret < 0) {
35
- goto out;
36
- }
37
+ ret = parallels_check_leak(bs, res, fix);
38
+ if (ret < 0) {
39
+ return ret;
40
+ }
41
42
- ret = parallels_check_leak(bs, res, fix);
43
- if (ret < 0) {
44
- goto out;
45
+ parallels_collect_statistics(bs, res, fix);
23
}
46
}
24
47
25
- /* if we are to stay read-only, do not allow permission change
48
- parallels_collect_statistics(bs, res, fix);
26
- * to r/w */
49
-
27
- if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
50
-out:
28
- reopen_state->flags & BDRV_O_RDWR) {
51
- qemu_co_mutex_unlock(&s->lock);
29
- error_setg(errp, "Node '%s' is read only",
52
-
30
- bdrv_get_device_or_node_name(reopen_state->bs));
53
- if (ret == 0) {
31
+ /* If we are to stay read-only, do not allow permission change
54
- ret = bdrv_co_flush(bs);
32
+ * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
55
- if (ret < 0) {
33
+ * not set, or if the BDS still has copy_on_read enabled */
56
- res->check_errors++;
34
+ read_only = !(reopen_state->flags & BDRV_O_RDWR);
57
- }
35
+ ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err);
58
+ ret = bdrv_co_flush(bs);
36
+ if (local_err) {
59
+ if (ret < 0) {
37
+ error_propagate(errp, local_err);
60
+ res->check_errors++;
38
goto error;
39
}
61
}
40
62
63
return ret;
41
--
64
--
42
2.9.3
65
2.40.1
43
44
diff view generated by jsdifflib
1
The protocol VXHS does not support image creation. Some tests expect
1
From: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
2
to be able to create images through the protocol. Exclude VXHS from
3
these tests.
4
2
5
Signed-off-by: Jeff Cody <jcody@redhat.com>
3
All the offsets in the BAT must be lower than the file size.
4
Fix the check condition for correct check.
5
6
Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
7
Reviewed-by: Denis V. Lunev <den@openvz.org>
8
Message-Id: <20230424093147.197643-13-alexander.ivanov@virtuozzo.com>
9
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
10
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
6
---
11
---
7
tests/qemu-iotests/017 | 1 +
12
block/parallels.c | 2 +-
8
tests/qemu-iotests/020 | 1 +
13
1 file changed, 1 insertion(+), 1 deletion(-)
9
tests/qemu-iotests/029 | 1 +
10
tests/qemu-iotests/073 | 1 +
11
tests/qemu-iotests/114 | 1 +
12
tests/qemu-iotests/130 | 1 +
13
tests/qemu-iotests/134 | 1 +
14
tests/qemu-iotests/156 | 1 +
15
tests/qemu-iotests/158 | 1 +
16
9 files changed, 9 insertions(+)
17
14
18
diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
15
diff --git a/block/parallels.c b/block/parallels.c
19
index XXXXXXX..XXXXXXX 100755
16
index XXXXXXX..XXXXXXX 100644
20
--- a/tests/qemu-iotests/017
17
--- a/block/parallels.c
21
+++ b/tests/qemu-iotests/017
18
+++ b/block/parallels.c
22
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
19
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
23
# Any format supporting backing files
20
high_off = 0;
24
_supported_fmt qcow qcow2 vmdk qed
21
for (i = 0; i < s->bat_size; i++) {
25
_supported_proto generic
22
off = bat2sect(s, i) << BDRV_SECTOR_BITS;
26
+_unsupported_proto vxhs
23
- if (off > size) {
27
_supported_os Linux
24
+ if (off + s->cluster_size > size) {
28
_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
25
fprintf(stderr, "%s cluster %u is outside image\n",
29
26
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
30
diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
27
res->corruptions++;
31
index XXXXXXX..XXXXXXX 100755
32
--- a/tests/qemu-iotests/020
33
+++ b/tests/qemu-iotests/020
34
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
35
# Any format supporting backing files
36
_supported_fmt qcow qcow2 vmdk qed
37
_supported_proto generic
38
+_unsupported_proto vxhs
39
_supported_os Linux
40
_unsupported_imgopts "subformat=monolithicFlat" \
41
"subformat=twoGbMaxExtentFlat" \
42
diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029
43
index XXXXXXX..XXXXXXX 100755
44
--- a/tests/qemu-iotests/029
45
+++ b/tests/qemu-iotests/029
46
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
47
# Any format supporting intenal snapshots
48
_supported_fmt qcow2
49
_supported_proto generic
50
+_unsupported_proto vxhs
51
_supported_os Linux
52
# Internal snapshots are (currently) impossible with refcount_bits=1
53
_unsupported_imgopts 'refcount_bits=1[^0-9]'
54
diff --git a/tests/qemu-iotests/073 b/tests/qemu-iotests/073
55
index XXXXXXX..XXXXXXX 100755
56
--- a/tests/qemu-iotests/073
57
+++ b/tests/qemu-iotests/073
58
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
59
60
_supported_fmt qcow2
61
_supported_proto generic
62
+_unsupported_proto vxhs
63
_supported_os Linux
64
65
CLUSTER_SIZE=64k
66
diff --git a/tests/qemu-iotests/114 b/tests/qemu-iotests/114
67
index XXXXXXX..XXXXXXX 100755
68
--- a/tests/qemu-iotests/114
69
+++ b/tests/qemu-iotests/114
70
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
71
72
_supported_fmt qcow2
73
_supported_proto generic
74
+_unsupported_proto vxhs
75
_supported_os Linux
76
77
78
diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130
79
index XXXXXXX..XXXXXXX 100755
80
--- a/tests/qemu-iotests/130
81
+++ b/tests/qemu-iotests/130
82
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
83
84
_supported_fmt qcow2
85
_supported_proto generic
86
+_unsupported_proto vxhs
87
_supported_os Linux
88
89
qemu_comm_method="monitor"
90
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
91
index XXXXXXX..XXXXXXX 100755
92
--- a/tests/qemu-iotests/134
93
+++ b/tests/qemu-iotests/134
94
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
95
96
_supported_fmt qcow2
97
_supported_proto generic
98
+_unsupported_proto vxhs
99
_supported_os Linux
100
101
102
diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156
103
index XXXXXXX..XXXXXXX 100755
104
--- a/tests/qemu-iotests/156
105
+++ b/tests/qemu-iotests/156
106
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
107
108
_supported_fmt qcow2 qed
109
_supported_proto generic
110
+_unsupported_proto vxhs
111
_supported_os Linux
112
113
# Create source disk
114
diff --git a/tests/qemu-iotests/158 b/tests/qemu-iotests/158
115
index XXXXXXX..XXXXXXX 100755
116
--- a/tests/qemu-iotests/158
117
+++ b/tests/qemu-iotests/158
118
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
119
120
_supported_fmt qcow2
121
_supported_proto generic
122
+_unsupported_proto vxhs
123
_supported_os Linux
124
125
126
--
28
--
127
2.9.3
29
2.40.1
128
129
diff view generated by jsdifflib
1
The BDRV_O_ALLOW_RDWR flag allows / prohibits the changing of
1
From: Jean-Louis Dupond <jean-louis@dupond.be>
2
the BDS 'read_only' state, but there are a few places where it
2
3
is ignored. In the bdrv_set_read_only() helper, make sure to
3
When we for example have a sparse qcow2 image and discard: unmap is enabled,
4
honor the flag.
4
there can be a lot of fragmentation in the image after some time. Especially on VM's
5
5
that do a lot of writes/deletes.
6
Signed-off-by: Jeff Cody <jcody@redhat.com>
6
This causes the qcow2 image to grow even over 110% of its virtual size,
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
because the free gaps in the image get too small to allocate new
8
Reviewed-by: John Snow <jsnow@redhat.com>
8
continuous clusters. So it allocates new space at the end of the image.
9
Message-id: be2e5fb2d285cbece2b6d06bed54a6f56520d251.1491597120.git.jcody@redhat.com
9
10
Disabling discard is not an option, as discard is needed to keep the
11
incremental backup size as low as possible. Without discard, the
12
incremental backups would become large, as qemu thinks it's just dirty
13
blocks but it doesn't know the blocks are unneeded.
14
So we need to avoid fragmentation but also 'empty' the unneeded blocks in
15
the image to have a small incremental backup.
16
17
In addition, we also want to send the discards further down the stack, so
18
the underlying blocks are still discarded.
19
20
Therefor we introduce a new qcow2 option "discard-no-unref".
21
When setting this option to true, discards will no longer have the qcow2
22
driver relinquish cluster allocations. Other than that, the request is
23
handled as normal: All clusters in range are marked as zero, and, if
24
pass-discard-request is true, it is passed further down the stack.
25
The only difference is that the now-zero clusters are preallocated
26
instead of being unallocated.
27
This will avoid fragmentation on the qcow2 image.
28
29
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1621
30
Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
31
Message-Id: <20230605084523.34134-2-jean-louis@dupond.be>
32
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
33
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
10
---
34
---
11
block.c | 7 +++++++
35
qapi/block-core.json | 12 ++++++++++++
12
1 file changed, 7 insertions(+)
36
block/qcow2.h | 3 +++
13
37
block/qcow2-cluster.c | 32 ++++++++++++++++++++++++++++----
14
diff --git a/block.c b/block.c
38
block/qcow2.c | 18 ++++++++++++++++++
15
index XXXXXXX..XXXXXXX 100644
39
qemu-options.hx | 12 ++++++++++++
16
--- a/block.c
40
5 files changed, 73 insertions(+), 4 deletions(-)
17
+++ b/block.c
41
18
@@ -XXX,XX +XXX,XX @@ int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
42
diff --git a/qapi/block-core.json b/qapi/block-core.json
19
return -EINVAL;
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/block-core.json
45
+++ b/qapi/block-core.json
46
@@ -XXX,XX +XXX,XX @@
47
# @pass-discard-other: whether discard requests for the data source
48
# should be issued on other occasions where a cluster gets freed
49
#
50
+# @discard-no-unref: when enabled, discards from the guest will not cause
51
+# cluster allocations to be relinquished. This prevents qcow2 fragmentation
52
+# that would be caused by such discards. Besides potential
53
+# performance degradation, such fragmentation can lead to increased
54
+# allocation of clusters past the end of the image file,
55
+# resulting in image files whose file length can grow much larger
56
+# than their guest disk size would suggest.
57
+# If image file length is of concern (e.g. when storing qcow2
58
+# images directly on block devices), you should consider enabling
59
+# this option. (since 8.1)
60
+#
61
# @overlap-check: which overlap checks to perform for writes to the
62
# image, defaults to 'cached' (since 2.2)
63
#
64
@@ -XXX,XX +XXX,XX @@
65
'*pass-discard-request': 'bool',
66
'*pass-discard-snapshot': 'bool',
67
'*pass-discard-other': 'bool',
68
+ '*discard-no-unref': 'bool',
69
'*overlap-check': 'Qcow2OverlapChecks',
70
'*cache-size': 'int',
71
'*l2-cache-size': 'int',
72
diff --git a/block/qcow2.h b/block/qcow2.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/block/qcow2.h
75
+++ b/block/qcow2.h
76
@@ -XXX,XX +XXX,XX @@
77
#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
78
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
79
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
80
+#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
81
#define QCOW2_OPT_OVERLAP "overlap-check"
82
#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
83
#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
84
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State {
85
86
bool discard_passthrough[QCOW2_DISCARD_MAX];
87
88
+ bool discard_no_unref;
89
+
90
int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
91
bool signaled_corruption;
92
93
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/block/qcow2-cluster.c
96
+++ b/block/qcow2-cluster.c
97
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
98
uint64_t new_l2_bitmap = old_l2_bitmap;
99
QCow2ClusterType cluster_type =
100
qcow2_get_cluster_type(bs, old_l2_entry);
101
+ bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
102
+ !full_discard &&
103
+ (s->discard_no_unref &&
104
+ type == QCOW2_DISCARD_REQUEST);
105
106
/*
107
* If full_discard is true, the cluster should not read back as zeroes,
108
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
109
new_l2_entry = new_l2_bitmap = 0;
110
} else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
111
if (has_subclusters(s)) {
112
- new_l2_entry = 0;
113
+ if (keep_reference) {
114
+ new_l2_entry = old_l2_entry;
115
+ } else {
116
+ new_l2_entry = 0;
117
+ }
118
new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
119
} else {
120
- new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
121
+ if (s->qcow_version >= 3) {
122
+ if (keep_reference) {
123
+ new_l2_entry |= QCOW_OFLAG_ZERO;
124
+ } else {
125
+ new_l2_entry = QCOW_OFLAG_ZERO;
126
+ }
127
+ } else {
128
+ new_l2_entry = 0;
129
+ }
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
134
if (has_subclusters(s)) {
135
set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
136
}
137
- /* Then decrease the refcount */
138
- qcow2_free_any_cluster(bs, old_l2_entry, type);
139
+ if (!keep_reference) {
140
+ /* Then decrease the refcount */
141
+ qcow2_free_any_cluster(bs, old_l2_entry, type);
142
+ } else if (s->discard_passthrough[type] &&
143
+ (cluster_type == QCOW2_CLUSTER_NORMAL ||
144
+ cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
145
+ /* If we keep the reference, pass on the discard still */
146
+ bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
147
+ s->cluster_size);
148
+ }
20
}
149
}
21
150
22
+ /* Do not clear read_only if it is prohibited */
151
qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
23
+ if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) {
152
diff --git a/block/qcow2.c b/block/qcow2.c
24
+ error_setg(errp, "Node '%s' is read only",
153
index XXXXXXX..XXXXXXX 100644
25
+ bdrv_get_device_or_node_name(bs));
154
--- a/block/qcow2.c
26
+ return -EPERM;
155
+++ b/block/qcow2.c
156
@@ -XXX,XX +XXX,XX @@ static const char *const mutable_opts[] = {
157
QCOW2_OPT_DISCARD_REQUEST,
158
QCOW2_OPT_DISCARD_SNAPSHOT,
159
QCOW2_OPT_DISCARD_OTHER,
160
+ QCOW2_OPT_DISCARD_NO_UNREF,
161
QCOW2_OPT_OVERLAP,
162
QCOW2_OPT_OVERLAP_TEMPLATE,
163
QCOW2_OPT_OVERLAP_MAIN_HEADER,
164
@@ -XXX,XX +XXX,XX @@ static QemuOptsList qcow2_runtime_opts = {
165
.type = QEMU_OPT_BOOL,
166
.help = "Generate discard requests when other clusters are freed",
167
},
168
+ {
169
+ .name = QCOW2_OPT_DISCARD_NO_UNREF,
170
+ .type = QEMU_OPT_BOOL,
171
+ .help = "Do not unreference discarded clusters",
172
+ },
173
{
174
.name = QCOW2_OPT_OVERLAP,
175
.type = QEMU_OPT_STRING,
176
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2ReopenState {
177
bool use_lazy_refcounts;
178
int overlap_check;
179
bool discard_passthrough[QCOW2_DISCARD_MAX];
180
+ bool discard_no_unref;
181
uint64_t cache_clean_interval;
182
QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
183
} Qcow2ReopenState;
184
@@ -XXX,XX +XXX,XX @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
185
r->discard_passthrough[QCOW2_DISCARD_OTHER] =
186
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
187
188
+ r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
189
+ false);
190
+ if (r->discard_no_unref && s->qcow_version < 3) {
191
+ error_setg(errp,
192
+ "discard-no-unref is only supported since qcow2 version 3");
193
+ ret = -EINVAL;
194
+ goto fail;
27
+ }
195
+ }
28
+
196
+
29
bs->read_only = read_only;
197
switch (s->crypt_method_header) {
30
return 0;
198
case QCOW_CRYPT_NONE:
31
}
199
if (encryptfmt) {
200
@@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_commit(BlockDriverState *bs,
201
s->discard_passthrough[i] = r->discard_passthrough[i];
202
}
203
204
+ s->discard_no_unref = r->discard_no_unref;
205
+
206
if (s->cache_clean_interval != r->cache_clean_interval) {
207
cache_clean_timer_del(bs);
208
s->cache_clean_interval = r->cache_clean_interval;
209
diff --git a/qemu-options.hx b/qemu-options.hx
210
index XXXXXXX..XXXXXXX 100644
211
--- a/qemu-options.hx
212
+++ b/qemu-options.hx
213
@@ -XXX,XX +XXX,XX @@ SRST
214
issued on other occasions where a cluster gets freed
215
(on/off; default: off)
216
217
+ ``discard-no-unref``
218
+ When enabled, discards from the guest will not cause cluster
219
+ allocations to be relinquished. This prevents qcow2 fragmentation
220
+ that would be caused by such discards. Besides potential
221
+ performance degradation, such fragmentation can lead to increased
222
+ allocation of clusters past the end of the image file,
223
+ resulting in image files whose file length can grow much larger
224
+ than their guest disk size would suggest.
225
+ If image file length is of concern (e.g. when storing qcow2
226
+ images directly on block devices), you should consider enabling
227
+ this option.
228
+
229
``overlap-check``
230
Which overlap checks to perform for writes to the image
231
(none/constant/cached/all; default: cached). For details or
32
--
232
--
33
2.9.3
233
2.40.1
34
35
diff view generated by jsdifflib