1
The following changes since commit c25e8bba1f546ea72744ccfab77f8a9e8a323be8:
1
The following changes since commit 98b2e3c9ab3abfe476a2b02f8f51813edb90e72d:
2
2
3
Merge remote-tracking branch 'remotes/otubo/tags/pull-seccomp-20180601' into staging (2018-06-01 13:11:30 +0100)
3
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging (2019-10-08 16:08:35 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 21891a5a3011608845b5d7f1f9cce60cdc2bcc62:
9
for you to fetch changes up to 69de48445a0d6169f1e2a6c5bfab994e1c810e33:
10
10
11
main-loop: drop spin_counter (2018-06-01 16:01:29 +0100)
11
test-bdrv-drain: fix iothread_join() hang (2019-10-14 09:48:01 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
* Copy offloading for qemu-img convert (iSCSI, raw, and qcow2)
17
18
If the underlying storage supports copy offloading, qemu-img convert will
19
use it instead of performing reads and writes. This avoids data transfers
20
and thus frees up storage bandwidth for other purposes. SCSI EXTENDED COPY
21
and Linux copy_file_range(2) are used to implement this optimization.
22
23
* Drop spurious "WARNING: I\/O thread spun for 1000 iterations" warning
24
25
----------------------------------------------------------------
16
----------------------------------------------------------------
26
17
27
Fam Zheng (10):
18
Stefan Hajnoczi (1):
28
block: Introduce API for copy offloading
19
test-bdrv-drain: fix iothread_join() hang
29
raw: Check byte range uniformly
30
raw: Implement copy offloading
31
qcow2: Implement copy offloading
32
file-posix: Implement bdrv_co_copy_range
33
iscsi: Query and save device designator when opening
34
iscsi: Create and use iscsi_co_wait_for_task
35
iscsi: Implement copy offloading
36
block-backend: Add blk_co_copy_range
37
qemu-img: Convert with copy offloading
38
20
39
Stefan Hajnoczi (1):
21
tests/iothread.c | 10 ++++++++--
40
main-loop: drop spin_counter
22
1 file changed, 8 insertions(+), 2 deletions(-)
41
42
configure | 17 ++
43
include/block/block.h | 32 ++++
44
include/block/block_int.h | 38 ++++
45
include/block/raw-aio.h | 10 +-
46
include/scsi/constants.h | 4 +
47
include/sysemu/block-backend.h | 4 +
48
block/block-backend.c | 18 ++
49
block/file-posix.c | 98 +++++++++-
50
block/io.c | 97 ++++++++++
51
block/iscsi.c | 314 +++++++++++++++++++++++++++----
52
block/qcow2.c | 229 +++++++++++++++++++---
53
block/raw-format.c | 96 +++++++---
54
qemu-img.c | 50 ++++-
55
util/main-loop.c | 25 ---
56
tests/qemu-iotests/common.filter | 1 -
57
15 files changed, 908 insertions(+), 125 deletions(-)
58
23
59
--
24
--
60
2.17.1
25
2.21.0
61
26
62
27
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Introduce the bdrv_co_copy_range() API for copy offloading. Block
4
drivers implementing this API support efficient copy operations that
5
avoid reading each block from the source device and writing it to the
6
destination devices. Examples of copy offload primitives are SCSI
7
EXTENDED COPY and Linux copy_file_range(2).
8
9
Signed-off-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-id: 20180601092648.24614-2-famz@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
include/block/block.h | 32 +++++++++++++
15
include/block/block_int.h | 38 +++++++++++++++
16
block/io.c | 97 +++++++++++++++++++++++++++++++++++++++
17
3 files changed, 167 insertions(+)
18
19
diff --git a/include/block/block.h b/include/block/block.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/block/block.h
22
+++ b/include/block/block.h
23
@@ -XXX,XX +XXX,XX @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
24
*/
25
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
26
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
27
+
28
+/**
29
+ *
30
+ * bdrv_co_copy_range:
31
+ *
32
+ * Do offloaded copy between two children. If the operation is not implemented
33
+ * by the driver, or if the backend storage doesn't support it, a negative
34
+ * error code will be returned.
35
+ *
36
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
37
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
38
+ * calling copy_file_range(2)) after the first error, thus it should fall back
39
+ * to a read+write path in the caller level.
40
+ *
41
+ * @src: Source child to copy data from
42
+ * @src_offset: offset in @src image to read data
43
+ * @dst: Destination child to copy data to
44
+ * @dst_offset: offset in @dst image to write data
45
+ * @bytes: number of bytes to copy
46
+ * @flags: request flags. Must be one of:
47
+ * 0 - actually read data from src;
48
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
49
+ * write on @dst as if bdrv_co_pwrite_zeroes is
50
+ * called. Used to simplify caller code, or
51
+ * during BlockDriver.bdrv_co_copy_range_from()
52
+ * recursion.
53
+ *
54
+ * Returns: 0 if succeeded; negative error code if failed.
55
+ **/
56
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
57
+ BdrvChild *dst, uint64_t dst_offset,
58
+ uint64_t bytes, BdrvRequestFlags flags);
59
#endif
60
diff --git a/include/block/block_int.h b/include/block/block_int.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/include/block/block_int.h
63
+++ b/include/block/block_int.h
64
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
65
int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
66
int64_t offset, int bytes);
67
68
+ /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
69
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
70
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
71
+ *
72
+ * See the comment of bdrv_co_copy_range for the parameter and return value
73
+ * semantics.
74
+ */
75
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
76
+ BdrvChild *src,
77
+ uint64_t offset,
78
+ BdrvChild *dst,
79
+ uint64_t dst_offset,
80
+ uint64_t bytes,
81
+ BdrvRequestFlags flags);
82
+
83
+ /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
84
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
85
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
86
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
87
+ *
88
+ * See the comment of bdrv_co_copy_range for the parameter and return value
89
+ * semantics.
90
+ */
91
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
92
+ BdrvChild *src,
93
+ uint64_t src_offset,
94
+ BdrvChild *dst,
95
+ uint64_t dst_offset,
96
+ uint64_t bytes,
97
+ BdrvRequestFlags flags);
98
+
99
/*
100
* Building block for bdrv_block_status[_above] and
101
* bdrv_is_allocated[_above]. The driver should answer only
102
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs);
103
104
void blockdev_close_all_bdrv_states(void);
105
106
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
107
+ BdrvChild *dst, uint64_t dst_offset,
108
+ uint64_t bytes, BdrvRequestFlags flags);
109
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
110
+ BdrvChild *dst, uint64_t dst_offset,
111
+ uint64_t bytes, BdrvRequestFlags flags);
112
+
113
#endif /* BLOCK_INT_H */
114
diff --git a/block/io.c b/block/io.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/block/io.c
117
+++ b/block/io.c
118
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
119
bdrv_unregister_buf(child->bs, host);
120
}
121
}
122
+
123
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
124
+ uint64_t src_offset,
125
+ BdrvChild *dst,
126
+ uint64_t dst_offset,
127
+ uint64_t bytes,
128
+ BdrvRequestFlags flags,
129
+ bool recurse_src)
130
+{
131
+ int ret;
132
+
133
+ if (!src || !dst || !src->bs || !dst->bs) {
134
+ return -ENOMEDIUM;
135
+ }
136
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
137
+ if (ret) {
138
+ return ret;
139
+ }
140
+
141
+ ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
142
+ if (ret) {
143
+ return ret;
144
+ }
145
+ if (flags & BDRV_REQ_ZERO_WRITE) {
146
+ return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
147
+ }
148
+
149
+ if (!src->bs->drv->bdrv_co_copy_range_from
150
+ || !dst->bs->drv->bdrv_co_copy_range_to
151
+ || src->bs->encrypted || dst->bs->encrypted) {
152
+ return -ENOTSUP;
153
+ }
154
+ if (recurse_src) {
155
+ return src->bs->drv->bdrv_co_copy_range_from(src->bs,
156
+ src, src_offset,
157
+ dst, dst_offset,
158
+ bytes, flags);
159
+ } else {
160
+ return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
161
+ src, src_offset,
162
+ dst, dst_offset,
163
+ bytes, flags);
164
+ }
165
+}
166
+
167
+/* Copy range from @src to @dst.
168
+ *
169
+ * See the comment of bdrv_co_copy_range for the parameter and return value
170
+ * semantics. */
171
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
172
+ BdrvChild *dst, uint64_t dst_offset,
173
+ uint64_t bytes, BdrvRequestFlags flags)
174
+{
175
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
176
+ bytes, flags, true);
177
+}
178
+
179
+/* Copy range from @src to @dst.
180
+ *
181
+ * See the comment of bdrv_co_copy_range for the parameter and return value
182
+ * semantics. */
183
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
184
+ BdrvChild *dst, uint64_t dst_offset,
185
+ uint64_t bytes, BdrvRequestFlags flags)
186
+{
187
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
188
+ bytes, flags, false);
189
+}
190
+
191
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
192
+ BdrvChild *dst, uint64_t dst_offset,
193
+ uint64_t bytes, BdrvRequestFlags flags)
194
+{
195
+ BdrvTrackedRequest src_req, dst_req;
196
+ BlockDriverState *src_bs = src->bs;
197
+ BlockDriverState *dst_bs = dst->bs;
198
+ int ret;
199
+
200
+ bdrv_inc_in_flight(src_bs);
201
+ bdrv_inc_in_flight(dst_bs);
202
+ tracked_request_begin(&src_req, src_bs, src_offset,
203
+ bytes, BDRV_TRACKED_READ);
204
+ tracked_request_begin(&dst_req, dst_bs, dst_offset,
205
+ bytes, BDRV_TRACKED_WRITE);
206
+
207
+ wait_serialising_requests(&src_req);
208
+ wait_serialising_requests(&dst_req);
209
+ ret = bdrv_co_copy_range_from(src, src_offset,
210
+ dst, dst_offset,
211
+ bytes, flags);
212
+
213
+ tracked_request_end(&src_req);
214
+ tracked_request_end(&dst_req);
215
+ bdrv_dec_in_flight(src_bs);
216
+ bdrv_dec_in_flight(dst_bs);
217
+ return ret;
218
+}
219
--
220
2.17.1
221
222
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
tests/test-bdrv-drain can hang in tests/iothread.c:iothread_run():
2
2
3
We don't verify the request range against s->size in the I/O callbacks
3
while (!atomic_read(&iothread->stopping)) {
4
except for raw_co_pwritev. This is inconsistent (especially for
4
aio_poll(iothread->ctx, true);
5
raw_co_pwrite_zeroes and raw_co_pdiscard), so fix them, in the meanwhile
5
}
6
make the helper reusable by the coming new callbacks.
7
6
8
Note that in most cases the block layer already verifies the request
7
The iothread_join() function works as follows:
9
byte range against our reported image length, before invoking the driver
10
callbacks. The exception is during image creating, after
11
blk_set_allow_write_beyond_eof(blk, true) is called. But in that case,
12
the requests are not directly from the user or guest. So there is no
13
visible behavior change in adding the check code.
14
8
15
The int64_t -> uint64_t inconsistency, as shown by the type casting, is
9
void iothread_join(IOThread *iothread)
16
pre-existing due to the interface.
10
{
11
iothread->stopping = true;
12
aio_notify(iothread->ctx);
13
qemu_thread_join(&iothread->thread);
17
14
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
If iothread_run() checks iothread->stopping before the iothread_join()
19
Reviewed-by: Eric Blake <eblake@redhat.com>
16
thread sets stopping to true, then aio_notify() may be optimized away
20
Signed-off-by: Fam Zheng <famz@redhat.com>
17
and iothread_run() hangs forever in aio_poll().
21
Message-id: 20180601092648.24614-3-famz@redhat.com
18
19
The correct way to change iothread->stopping is from a BH that executes
20
within iothread_run(). This ensures that iothread->stopping is checked
21
after we set it to true.
22
23
This was already fixed for ./iothread.c (note this is a different source
24
file!) by commit 2362a28ea11c145e1a13ae79342d76dc118a72a6 ("iothread:
25
fix iothread_stop() race condition"), but not for tests/iothread.c.
26
27
Fixes: 0c330a734b51c177ab8488932ac3b0c4d63a718a
28
("aio: introduce aio_co_schedule and aio_co_wake")
29
Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
30
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
31
Message-Id: <20191003100103.331-1-stefanha@redhat.com>
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
---
33
---
24
block/raw-format.c | 64 ++++++++++++++++++++++++++++------------------
34
tests/iothread.c | 10 ++++++++--
25
1 file changed, 39 insertions(+), 25 deletions(-)
35
1 file changed, 8 insertions(+), 2 deletions(-)
26
36
27
diff --git a/block/raw-format.c b/block/raw-format.c
37
diff --git a/tests/iothread.c b/tests/iothread.c
28
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
29
--- a/block/raw-format.c
39
--- a/tests/iothread.c
30
+++ b/block/raw-format.c
40
+++ b/tests/iothread.c
31
@@ -XXX,XX +XXX,XX @@ static void raw_reopen_abort(BDRVReopenState *state)
41
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
32
state->opaque = NULL;
42
return NULL;
33
}
43
}
34
44
35
+/* Check and adjust the offset, against 'offset' and 'size' options. */
45
-void iothread_join(IOThread *iothread)
36
+static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
46
+static void iothread_stop_bh(void *opaque)
37
+ uint64_t bytes, bool is_write)
47
{
38
+{
48
+ IOThread *iothread = opaque;
39
+ BDRVRawState *s = bs->opaque;
40
+
49
+
41
+ if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
50
iothread->stopping = true;
42
+ /* There's not enough space for the write, or the read request is
51
- aio_notify(iothread->ctx);
43
+ * out-of-range. Don't read/write anything to prevent leaking out of
44
+ * the size specified in options. */
45
+ return is_write ? -ENOSPC : -EINVAL;;
46
+ }
47
+
48
+ if (*offset > INT64_MAX - s->offset) {
49
+ return -EINVAL;
50
+ }
51
+ *offset += s->offset;
52
+
53
+ return 0;
54
+}
52
+}
55
+
53
+
56
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
54
+void iothread_join(IOThread *iothread)
57
uint64_t bytes, QEMUIOVector *qiov,
55
+{
58
int flags)
56
+ aio_bh_schedule_oneshot(iothread->ctx, iothread_stop_bh, iothread);
59
{
57
qemu_thread_join(&iothread->thread);
60
- BDRVRawState *s = bs->opaque;
58
qemu_cond_destroy(&iothread->init_done_cond);
61
+ int ret;
59
qemu_mutex_destroy(&iothread->init_done_lock);
62
63
- if (offset > UINT64_MAX - s->offset) {
64
- return -EINVAL;
65
+ ret = raw_adjust_offset(bs, &offset, bytes, false);
66
+ if (ret) {
67
+ return ret;
68
}
69
- offset += s->offset;
70
71
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
72
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
73
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
74
uint64_t bytes, QEMUIOVector *qiov,
75
int flags)
76
{
77
- BDRVRawState *s = bs->opaque;
78
void *buf = NULL;
79
BlockDriver *drv;
80
QEMUIOVector local_qiov;
81
int ret;
82
83
- if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
84
- /* There's not enough space for the data. Don't write anything and just
85
- * fail to prevent leaking out of the size specified in options. */
86
- return -ENOSPC;
87
- }
88
-
89
- if (offset > UINT64_MAX - s->offset) {
90
- ret = -EINVAL;
91
- goto fail;
92
- }
93
-
94
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
95
/* Handling partial writes would be a pain - so we just
96
* require that guests have 512-byte request alignment if
97
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
98
qiov = &local_qiov;
99
}
100
101
- offset += s->offset;
102
+ ret = raw_adjust_offset(bs, &offset, bytes, true);
103
+ if (ret) {
104
+ goto fail;
105
+ }
106
107
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
108
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
109
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
110
int64_t offset, int bytes,
111
BdrvRequestFlags flags)
112
{
113
- BDRVRawState *s = bs->opaque;
114
- if (offset > UINT64_MAX - s->offset) {
115
- return -EINVAL;
116
+ int ret;
117
+
118
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
119
+ if (ret) {
120
+ return ret;
121
}
122
- offset += s->offset;
123
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
124
}
125
126
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
127
int64_t offset, int bytes)
128
{
129
- BDRVRawState *s = bs->opaque;
130
- if (offset > UINT64_MAX - s->offset) {
131
- return -EINVAL;
132
+ int ret;
133
+
134
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
135
+ if (ret) {
136
+ return ret;
137
}
138
- offset += s->offset;
139
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
140
}
141
142
--
60
--
143
2.17.1
61
2.21.0
144
62
145
63
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Just pass down to ->file.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20180601092648.24614-4-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/raw-format.c | 32 ++++++++++++++++++++++++++++++++
11
1 file changed, 32 insertions(+)
12
13
diff --git a/block/raw-format.c b/block/raw-format.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/raw-format.c
16
+++ b/block/raw-format.c
17
@@ -XXX,XX +XXX,XX @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
18
return bdrv_probe_geometry(bs->file->bs, geo);
19
}
20
21
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
22
+ BdrvChild *src, uint64_t src_offset,
23
+ BdrvChild *dst, uint64_t dst_offset,
24
+ uint64_t bytes, BdrvRequestFlags flags)
25
+{
26
+ int ret;
27
+
28
+ ret = raw_adjust_offset(bs, &src_offset, bytes, false);
29
+ if (ret) {
30
+ return ret;
31
+ }
32
+ return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
33
+ bytes, flags);
34
+}
35
+
36
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
37
+ BdrvChild *src, uint64_t src_offset,
38
+ BdrvChild *dst, uint64_t dst_offset,
39
+ uint64_t bytes, BdrvRequestFlags flags)
40
+{
41
+ int ret;
42
+
43
+ ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
44
+ if (ret) {
45
+ return ret;
46
+ }
47
+ return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
48
+ flags);
49
+}
50
+
51
BlockDriver bdrv_raw = {
52
.format_name = "raw",
53
.instance_size = sizeof(BDRVRawState),
54
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = {
55
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
56
.bdrv_co_pdiscard = &raw_co_pdiscard,
57
.bdrv_co_block_status = &raw_co_block_status,
58
+ .bdrv_co_copy_range_from = &raw_co_copy_range_from,
59
+ .bdrv_co_copy_range_to = &raw_co_copy_range_to,
60
.bdrv_truncate = &raw_truncate,
61
.bdrv_getlength = &raw_getlength,
62
.has_variable_length = true,
63
--
64
2.17.1
65
66
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
The two callbacks are implemented quite similarly to the read/write
4
functions: bdrv_co_copy_range_from maps for read and calls into bs->file
5
or bs->backing depending on the allocation status; bdrv_co_copy_range_to
6
maps for write and calls into bs->file.
7
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20180601092648.24614-5-famz@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/qcow2.c | 229 +++++++++++++++++++++++++++++++++++++++++++-------
14
1 file changed, 199 insertions(+), 30 deletions(-)
15
16
diff --git a/block/qcow2.c b/block/qcow2.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.c
19
+++ b/block/qcow2.c
20
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
21
return status;
22
}
23
24
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
25
+ QCowL2Meta **pl2meta,
26
+ bool link_l2)
27
+{
28
+ int ret = 0;
29
+ QCowL2Meta *l2meta = *pl2meta;
30
+
31
+ while (l2meta != NULL) {
32
+ QCowL2Meta *next;
33
+
34
+ if (!ret && link_l2) {
35
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
36
+ if (ret) {
37
+ goto out;
38
+ }
39
+ }
40
+
41
+ /* Take the request off the list of running requests */
42
+ if (l2meta->nb_clusters != 0) {
43
+ QLIST_REMOVE(l2meta, next_in_flight);
44
+ }
45
+
46
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
47
+
48
+ next = l2meta->next;
49
+ g_free(l2meta);
50
+ l2meta = next;
51
+ }
52
+out:
53
+ *pl2meta = l2meta;
54
+ return ret;
55
+}
56
+
57
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
58
uint64_t bytes, QEMUIOVector *qiov,
59
int flags)
60
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
61
}
62
}
63
64
- while (l2meta != NULL) {
65
- QCowL2Meta *next;
66
-
67
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
68
- if (ret < 0) {
69
- goto fail;
70
- }
71
-
72
- /* Take the request off the list of running requests */
73
- if (l2meta->nb_clusters != 0) {
74
- QLIST_REMOVE(l2meta, next_in_flight);
75
- }
76
-
77
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
78
-
79
- next = l2meta->next;
80
- g_free(l2meta);
81
- l2meta = next;
82
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
83
+ if (ret) {
84
+ goto fail;
85
}
86
87
bytes -= cur_bytes;
88
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
89
ret = 0;
90
91
fail:
92
- while (l2meta != NULL) {
93
- QCowL2Meta *next;
94
-
95
- if (l2meta->nb_clusters != 0) {
96
- QLIST_REMOVE(l2meta, next_in_flight);
97
- }
98
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
99
-
100
- next = l2meta->next;
101
- g_free(l2meta);
102
- l2meta = next;
103
- }
104
+ qcow2_handle_l2meta(bs, &l2meta, false);
105
106
qemu_co_mutex_unlock(&s->lock);
107
108
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
109
return ret;
110
}
111
112
+static int coroutine_fn
113
+qcow2_co_copy_range_from(BlockDriverState *bs,
114
+ BdrvChild *src, uint64_t src_offset,
115
+ BdrvChild *dst, uint64_t dst_offset,
116
+ uint64_t bytes, BdrvRequestFlags flags)
117
+{
118
+ BDRVQcow2State *s = bs->opaque;
119
+ int ret;
120
+ unsigned int cur_bytes; /* number of bytes in current iteration */
121
+ BdrvChild *child = NULL;
122
+ BdrvRequestFlags cur_flags;
123
+
124
+ assert(!bs->encrypted);
125
+ qemu_co_mutex_lock(&s->lock);
126
+
127
+ while (bytes != 0) {
128
+ uint64_t copy_offset = 0;
129
+ /* prepare next request */
130
+ cur_bytes = MIN(bytes, INT_MAX);
131
+ cur_flags = flags;
132
+
133
+ ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
134
+ if (ret < 0) {
135
+ goto out;
136
+ }
137
+
138
+ switch (ret) {
139
+ case QCOW2_CLUSTER_UNALLOCATED:
140
+ if (bs->backing && bs->backing->bs) {
141
+ int64_t backing_length = bdrv_getlength(bs->backing->bs);
142
+ if (src_offset >= backing_length) {
143
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
144
+ } else {
145
+ child = bs->backing;
146
+ cur_bytes = MIN(cur_bytes, backing_length - src_offset);
147
+ copy_offset = src_offset;
148
+ }
149
+ } else {
150
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
151
+ }
152
+ break;
153
+
154
+ case QCOW2_CLUSTER_ZERO_PLAIN:
155
+ case QCOW2_CLUSTER_ZERO_ALLOC:
156
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
157
+ break;
158
+
159
+ case QCOW2_CLUSTER_COMPRESSED:
160
+ ret = -ENOTSUP;
161
+ goto out;
162
+ break;
163
+
164
+ case QCOW2_CLUSTER_NORMAL:
165
+ child = bs->file;
166
+ copy_offset += offset_into_cluster(s, src_offset);
167
+ if ((copy_offset & 511) != 0) {
168
+ ret = -EIO;
169
+ goto out;
170
+ }
171
+ break;
172
+
173
+ default:
174
+ abort();
175
+ }
176
+ qemu_co_mutex_unlock(&s->lock);
177
+ ret = bdrv_co_copy_range_from(child,
178
+ copy_offset,
179
+ dst, dst_offset,
180
+ cur_bytes, cur_flags);
181
+ qemu_co_mutex_lock(&s->lock);
182
+ if (ret < 0) {
183
+ goto out;
184
+ }
185
+
186
+ bytes -= cur_bytes;
187
+ src_offset += cur_bytes;
188
+ dst_offset += cur_bytes;
189
+ }
190
+ ret = 0;
191
+
192
+out:
193
+ qemu_co_mutex_unlock(&s->lock);
194
+ return ret;
195
+}
196
+
197
+static int coroutine_fn
198
+qcow2_co_copy_range_to(BlockDriverState *bs,
199
+ BdrvChild *src, uint64_t src_offset,
200
+ BdrvChild *dst, uint64_t dst_offset,
201
+ uint64_t bytes, BdrvRequestFlags flags)
202
+{
203
+ BDRVQcow2State *s = bs->opaque;
204
+ int offset_in_cluster;
205
+ int ret;
206
+ unsigned int cur_bytes; /* number of sectors in current iteration */
207
+ uint64_t cluster_offset;
208
+ uint8_t *cluster_data = NULL;
209
+ QCowL2Meta *l2meta = NULL;
210
+
211
+ assert(!bs->encrypted);
212
+ s->cluster_cache_offset = -1; /* disable compressed cache */
213
+
214
+ qemu_co_mutex_lock(&s->lock);
215
+
216
+ while (bytes != 0) {
217
+
218
+ l2meta = NULL;
219
+
220
+ offset_in_cluster = offset_into_cluster(s, dst_offset);
221
+ cur_bytes = MIN(bytes, INT_MAX);
222
+
223
+ /* TODO:
224
+ * If src->bs == dst->bs, we could simply copy by incrementing
225
+ * the refcnt, without copying user data.
226
+ * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
227
+ ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
228
+ &cluster_offset, &l2meta);
229
+ if (ret < 0) {
230
+ goto fail;
231
+ }
232
+
233
+ assert((cluster_offset & 511) == 0);
234
+
235
+ ret = qcow2_pre_write_overlap_check(bs, 0,
236
+ cluster_offset + offset_in_cluster, cur_bytes);
237
+ if (ret < 0) {
238
+ goto fail;
239
+ }
240
+
241
+ qemu_co_mutex_unlock(&s->lock);
242
+ ret = bdrv_co_copy_range_to(src, src_offset,
243
+ bs->file,
244
+ cluster_offset + offset_in_cluster,
245
+ cur_bytes, flags);
246
+ qemu_co_mutex_lock(&s->lock);
247
+ if (ret < 0) {
248
+ goto fail;
249
+ }
250
+
251
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
252
+ if (ret) {
253
+ goto fail;
254
+ }
255
+
256
+ bytes -= cur_bytes;
257
+ dst_offset += cur_bytes;
258
+ }
259
+ ret = 0;
260
+
261
+fail:
262
+ qcow2_handle_l2meta(bs, &l2meta, false);
263
+
264
+ qemu_co_mutex_unlock(&s->lock);
265
+
266
+ qemu_vfree(cluster_data);
267
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
268
+
269
+ return ret;
270
+}
271
+
272
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
273
PreallocMode prealloc, Error **errp)
274
{
275
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
276
277
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
278
.bdrv_co_pdiscard = qcow2_co_pdiscard,
279
+ .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
280
+ .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
281
.bdrv_truncate = qcow2_truncate,
282
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
283
.bdrv_make_empty = qcow2_make_empty,
284
--
285
2.17.1
286
287
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
With copy_file_range(2), we can implement the bdrv_co_copy_range
4
semantics.
5
6
Signed-off-by: Fam Zheng <famz@redhat.com>
7
Message-id: 20180601092648.24614-6-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
configure | 17 +++++++
11
include/block/raw-aio.h | 10 ++++-
12
block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++++--
13
3 files changed, 120 insertions(+), 5 deletions(-)
14
15
diff --git a/configure b/configure
16
index XXXXXXX..XXXXXXX 100755
17
--- a/configure
18
+++ b/configure
19
@@ -XXX,XX +XXX,XX @@ if test "$fortify_source" != "no"; then
20
fi
21
fi
22
23
+###############################################
24
+# Check if copy_file_range is provided by glibc
25
+have_copy_file_range=no
26
+cat > $TMPC << EOF
27
+#include <unistd.h>
28
+int main(void) {
29
+ copy_file_range(0, NULL, 0, NULL, 0, 0);
30
+ return 0;
31
+}
32
+EOF
33
+if compile_prog "" "" ; then
34
+ have_copy_file_range=yes
35
+fi
36
+
37
##########################################
38
# check if struct fsxattr is available via linux/fs.h
39
40
@@ -XXX,XX +XXX,XX @@ fi
41
if test "$have_fsxattr" = "yes" ; then
42
echo "HAVE_FSXATTR=y" >> $config_host_mak
43
fi
44
+if test "$have_copy_file_range" = "yes" ; then
45
+ echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak
46
+fi
47
if test "$vte" = "yes" ; then
48
echo "CONFIG_VTE=y" >> $config_host_mak
49
echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
50
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/include/block/raw-aio.h
53
+++ b/include/block/raw-aio.h
54
@@ -XXX,XX +XXX,XX @@
55
#define QEMU_AIO_FLUSH 0x0008
56
#define QEMU_AIO_DISCARD 0x0010
57
#define QEMU_AIO_WRITE_ZEROES 0x0020
58
+#define QEMU_AIO_COPY_RANGE 0x0040
59
#define QEMU_AIO_TYPE_MASK \
60
- (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
61
- QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
62
+ (QEMU_AIO_READ | \
63
+ QEMU_AIO_WRITE | \
64
+ QEMU_AIO_IOCTL | \
65
+ QEMU_AIO_FLUSH | \
66
+ QEMU_AIO_DISCARD | \
67
+ QEMU_AIO_WRITE_ZEROES | \
68
+ QEMU_AIO_COPY_RANGE)
69
70
/* AIO flags */
71
#define QEMU_AIO_MISALIGNED 0x1000
72
diff --git a/block/file-posix.c b/block/file-posix.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/block/file-posix.c
75
+++ b/block/file-posix.c
76
@@ -XXX,XX +XXX,XX @@
77
#ifdef __linux__
78
#include <sys/ioctl.h>
79
#include <sys/param.h>
80
+#include <sys/syscall.h>
81
#include <linux/cdrom.h>
82
#include <linux/fd.h>
83
#include <linux/fs.h>
84
@@ -XXX,XX +XXX,XX @@ typedef struct RawPosixAIOData {
85
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
86
off_t aio_offset;
87
int aio_type;
88
+ int aio_fd2;
89
+ off_t aio_offset2;
90
} RawPosixAIOData;
91
92
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
93
@@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
94
return -ENOTSUP;
95
}
96
97
+#ifndef HAVE_COPY_FILE_RANGE
98
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
99
+ off_t *out_off, size_t len, unsigned int flags)
100
+{
101
+#ifdef __NR_copy_file_range
102
+ return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
103
+ out_off, len, flags);
104
+#else
105
+ errno = ENOSYS;
106
+ return -1;
107
+#endif
108
+}
109
+#endif
110
+
111
+static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
112
+{
113
+ uint64_t bytes = aiocb->aio_nbytes;
114
+ off_t in_off = aiocb->aio_offset;
115
+ off_t out_off = aiocb->aio_offset2;
116
+
117
+ while (bytes) {
118
+ ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
119
+ aiocb->aio_fd2, &out_off,
120
+ bytes, 0);
121
+ if (ret == -EINTR) {
122
+ continue;
123
+ }
124
+ if (ret < 0) {
125
+ if (errno == ENOSYS) {
126
+ return -ENOTSUP;
127
+ } else {
128
+ return -errno;
129
+ }
130
+ }
131
+ if (!ret) {
132
+ /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */
133
+ return -ENOTSUP;
134
+ }
135
+ bytes -= ret;
136
+ }
137
+ return 0;
138
+}
139
+
140
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
141
{
142
int ret = -EOPNOTSUPP;
143
@@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg)
144
case QEMU_AIO_WRITE_ZEROES:
145
ret = handle_aiocb_write_zeroes(aiocb);
146
break;
147
+ case QEMU_AIO_COPY_RANGE:
148
+ ret = handle_aiocb_copy_range(aiocb);
149
+ break;
150
default:
151
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
152
ret = -EINVAL;
153
@@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg)
154
return ret;
155
}
156
157
-static int paio_submit_co(BlockDriverState *bs, int fd,
158
- int64_t offset, QEMUIOVector *qiov,
159
- int bytes, int type)
160
+static int paio_submit_co_full(BlockDriverState *bs, int fd,
161
+ int64_t offset, int fd2, int64_t offset2,
162
+ QEMUIOVector *qiov,
163
+ int bytes, int type)
164
{
165
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
166
ThreadPool *pool;
167
@@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd,
168
acb->bs = bs;
169
acb->aio_type = type;
170
acb->aio_fildes = fd;
171
+ acb->aio_fd2 = fd2;
172
+ acb->aio_offset2 = offset2;
173
174
acb->aio_nbytes = bytes;
175
acb->aio_offset = offset;
176
@@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd,
177
return thread_pool_submit_co(pool, aio_worker, acb);
178
}
179
180
+static inline int paio_submit_co(BlockDriverState *bs, int fd,
181
+ int64_t offset, QEMUIOVector *qiov,
182
+ int bytes, int type)
183
+{
184
+ return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
185
+}
186
+
187
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
188
int64_t offset, QEMUIOVector *qiov, int bytes,
189
BlockCompletionFunc *cb, void *opaque, int type)
190
@@ -XXX,XX +XXX,XX @@ static void raw_abort_perm_update(BlockDriverState *bs)
191
raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
192
}
193
194
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
195
+ BdrvChild *src, uint64_t src_offset,
196
+ BdrvChild *dst, uint64_t dst_offset,
197
+ uint64_t bytes, BdrvRequestFlags flags)
198
+{
199
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
200
+}
201
+
202
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
203
+ BdrvChild *src, uint64_t src_offset,
204
+ BdrvChild *dst, uint64_t dst_offset,
205
+ uint64_t bytes, BdrvRequestFlags flags)
206
+{
207
+ BDRVRawState *s = bs->opaque;
208
+ BDRVRawState *src_s;
209
+
210
+ assert(dst->bs == bs);
211
+ if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) {
212
+ return -ENOTSUP;
213
+ }
214
+
215
+ src_s = src->bs->opaque;
216
+ if (fd_open(bs) < 0 || fd_open(bs) < 0) {
217
+ return -EIO;
218
+ }
219
+ return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
220
+ NULL, bytes, QEMU_AIO_COPY_RANGE);
221
+}
222
+
223
BlockDriver bdrv_file = {
224
.format_name = "file",
225
.protocol_name = "file",
226
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
227
.bdrv_co_pwritev = raw_co_pwritev,
228
.bdrv_aio_flush = raw_aio_flush,
229
.bdrv_aio_pdiscard = raw_aio_pdiscard,
230
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
231
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
232
.bdrv_refresh_limits = raw_refresh_limits,
233
.bdrv_io_plug = raw_aio_plug,
234
.bdrv_io_unplug = raw_aio_unplug,
235
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = {
236
.bdrv_co_pwritev = raw_co_pwritev,
237
.bdrv_aio_flush    = raw_aio_flush,
238
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
239
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
240
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
241
.bdrv_refresh_limits = raw_refresh_limits,
242
.bdrv_io_plug = raw_aio_plug,
243
.bdrv_io_unplug = raw_aio_unplug,
244
--
245
2.17.1
246
247
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
The device designator data returned in INQUIRY command will be useful to
4
fill in source/target fields during copy offloading. Do this when
5
connecting to the target and save the data for later use.
6
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20180601092648.24614-7-famz@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
block/iscsi.c | 41 +++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 41 insertions(+)
14
15
diff --git a/block/iscsi.c b/block/iscsi.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/iscsi.c
18
+++ b/block/iscsi.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct IscsiLun {
20
QemuMutex mutex;
21
struct scsi_inquiry_logical_block_provisioning lbp;
22
struct scsi_inquiry_block_limits bl;
23
+ struct scsi_inquiry_device_designator *dd;
24
unsigned char *zeroblock;
25
/* The allocmap tracks which clusters (pages) on the iSCSI target are
26
* allocated and which are not. In case a target returns zeros for
27
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
28
},
29
};
30
31
+static void iscsi_save_designator(IscsiLun *lun,
32
+ struct scsi_inquiry_device_identification *inq_di)
33
+{
34
+ struct scsi_inquiry_device_designator *desig, *copy = NULL;
35
+
36
+ for (desig = inq_di->designators; desig; desig = desig->next) {
37
+ if (desig->association ||
38
+ desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
39
+ continue;
40
+ }
41
+ /* NAA works better than T10 vendor ID based designator. */
42
+ if (!copy || copy->designator_type < desig->designator_type) {
43
+ copy = desig;
44
+ }
45
+ }
46
+ if (copy) {
47
+ lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
48
+ *lun->dd = *copy;
49
+ lun->dd->next = NULL;
50
+ lun->dd->designator = g_malloc(copy->designator_length);
51
+ memcpy(lun->dd->designator, copy->designator, copy->designator_length);
52
+ }
53
+}
54
+
55
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
56
Error **errp)
57
{
58
@@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
59
struct scsi_task *inq_task;
60
struct scsi_inquiry_logical_block_provisioning *inq_lbp;
61
struct scsi_inquiry_block_limits *inq_bl;
62
+ struct scsi_inquiry_device_identification *inq_di;
63
switch (inq_vpd->pages[i]) {
64
case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
65
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
66
@@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
67
sizeof(struct scsi_inquiry_block_limits));
68
scsi_free_scsi_task(inq_task);
69
break;
70
+ case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
71
+ inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
72
+ SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
73
+ (void **) &inq_di, errp);
74
+ if (inq_task == NULL) {
75
+ ret = -EINVAL;
76
+ goto out;
77
+ }
78
+ iscsi_save_designator(iscsilun, inq_di);
79
+ scsi_free_scsi_task(inq_task);
80
+ break;
81
default:
82
break;
83
}
84
@@ -XXX,XX +XXX,XX @@ static void iscsi_close(BlockDriverState *bs)
85
iscsi_logout_sync(iscsi);
86
}
87
iscsi_destroy_context(iscsi);
88
+ if (iscsilun->dd) {
89
+ g_free(iscsilun->dd->designator);
90
+ g_free(iscsilun->dd);
91
+ }
92
g_free(iscsilun->zeroblock);
93
iscsi_allocmap_free(iscsilun);
94
qemu_mutex_destroy(&iscsilun->mutex);
95
--
96
2.17.1
97
98
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
This loop is repeated a growing number times. Make a helper.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Message-id: 20180601092648.24614-8-famz@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/iscsi.c | 54 ++++++++++++++++-----------------------------------
12
1 file changed, 17 insertions(+), 37 deletions(-)
13
14
diff --git a/block/iscsi.c b/block/iscsi.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/iscsi.c
17
+++ b/block/iscsi.c
18
@@ -XXX,XX +XXX,XX @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
19
offset / iscsilun->cluster_size) == size);
20
}
21
22
+static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
23
+ IscsiLun *iscsilun)
24
+{
25
+ while (!iTask->complete) {
26
+ iscsi_set_events(iscsilun);
27
+ qemu_mutex_unlock(&iscsilun->mutex);
28
+ qemu_coroutine_yield();
29
+ qemu_mutex_lock(&iscsilun->mutex);
30
+ }
31
+}
32
+
33
static int coroutine_fn
34
iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
35
QEMUIOVector *iov, int flags)
36
@@ -XXX,XX +XXX,XX @@ retry:
37
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
38
iov->niov);
39
#endif
40
- while (!iTask.complete) {
41
- iscsi_set_events(iscsilun);
42
- qemu_mutex_unlock(&iscsilun->mutex);
43
- qemu_coroutine_yield();
44
- qemu_mutex_lock(&iscsilun->mutex);
45
- }
46
+ iscsi_co_wait_for_task(&iTask, iscsilun);
47
48
if (iTask.task != NULL) {
49
scsi_free_scsi_task(iTask.task);
50
@@ -XXX,XX +XXX,XX @@ retry:
51
ret = -ENOMEM;
52
goto out_unlock;
53
}
54
-
55
- while (!iTask.complete) {
56
- iscsi_set_events(iscsilun);
57
- qemu_mutex_unlock(&iscsilun->mutex);
58
- qemu_coroutine_yield();
59
- qemu_mutex_lock(&iscsilun->mutex);
60
- }
61
+ iscsi_co_wait_for_task(&iTask, iscsilun);
62
63
if (iTask.do_retry) {
64
if (iTask.task != NULL) {
65
@@ -XXX,XX +XXX,XX @@ retry:
66
#if LIBISCSI_API_VERSION < (20160603)
67
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
68
#endif
69
- while (!iTask.complete) {
70
- iscsi_set_events(iscsilun);
71
- qemu_mutex_unlock(&iscsilun->mutex);
72
- qemu_coroutine_yield();
73
- qemu_mutex_lock(&iscsilun->mutex);
74
- }
75
76
+ iscsi_co_wait_for_task(&iTask, iscsilun);
77
if (iTask.task != NULL) {
78
scsi_free_scsi_task(iTask.task);
79
iTask.task = NULL;
80
@@ -XXX,XX +XXX,XX @@ retry:
81
return -ENOMEM;
82
}
83
84
- while (!iTask.complete) {
85
- iscsi_set_events(iscsilun);
86
- qemu_mutex_unlock(&iscsilun->mutex);
87
- qemu_coroutine_yield();
88
- qemu_mutex_lock(&iscsilun->mutex);
89
- }
90
+ iscsi_co_wait_for_task(&iTask, iscsilun);
91
92
if (iTask.task != NULL) {
93
scsi_free_scsi_task(iTask.task);
94
@@ -XXX,XX +XXX,XX @@ retry:
95
goto out_unlock;
96
}
97
98
- while (!iTask.complete) {
99
- iscsi_set_events(iscsilun);
100
- qemu_mutex_unlock(&iscsilun->mutex);
101
- qemu_coroutine_yield();
102
- qemu_mutex_lock(&iscsilun->mutex);
103
- }
104
+ iscsi_co_wait_for_task(&iTask, iscsilun);
105
106
if (iTask.task != NULL) {
107
scsi_free_scsi_task(iTask.task);
108
@@ -XXX,XX +XXX,XX @@ retry:
109
return -ENOMEM;
110
}
111
112
- while (!iTask.complete) {
113
- iscsi_set_events(iscsilun);
114
- qemu_mutex_unlock(&iscsilun->mutex);
115
- qemu_coroutine_yield();
116
- qemu_mutex_lock(&iscsilun->mutex);
117
- }
118
+ iscsi_co_wait_for_task(&iTask, iscsilun);
119
120
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
121
iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
122
--
123
2.17.1
124
125
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Issue EXTENDED COPY (LID1) command to implement the copy_range API.
4
5
The parameter data construction code is modified from libiscsi's
6
iscsi-dd.c.
7
8
Signed-off-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20180601092648.24614-9-famz@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
include/scsi/constants.h | 4 +
14
block/iscsi.c | 219 +++++++++++++++++++++++++++++++++++++++
15
2 files changed, 223 insertions(+)
16
17
diff --git a/include/scsi/constants.h b/include/scsi/constants.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/scsi/constants.h
20
+++ b/include/scsi/constants.h
21
@@ -XXX,XX +XXX,XX @@
22
#define MMC_PROFILE_HDDVD_RW_DL 0x005A
23
#define MMC_PROFILE_INVALID 0xFFFF
24
25
+#define XCOPY_DESC_OFFSET 16
26
+#define IDENT_DESCR_TGT_DESCR_SIZE 32
27
+#define XCOPY_BLK2BLK_SEG_DESC_SIZE 28
28
+
29
#endif
30
diff --git a/block/iscsi.c b/block/iscsi.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/iscsi.c
33
+++ b/block/iscsi.c
34
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
35
iscsi_allocmap_invalidate(iscsilun);
36
}
37
38
+static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
39
+ BdrvChild *src,
40
+ uint64_t src_offset,
41
+ BdrvChild *dst,
42
+ uint64_t dst_offset,
43
+ uint64_t bytes,
44
+ BdrvRequestFlags flags)
45
+{
46
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
47
+}
48
+
49
+static struct scsi_task *iscsi_xcopy_task(int param_len)
50
+{
51
+ struct scsi_task *task;
52
+
53
+ task = g_new0(struct scsi_task, 1);
54
+
55
+ task->cdb[0] = EXTENDED_COPY;
56
+ task->cdb[10] = (param_len >> 24) & 0xFF;
57
+ task->cdb[11] = (param_len >> 16) & 0xFF;
58
+ task->cdb[12] = (param_len >> 8) & 0xFF;
59
+ task->cdb[13] = param_len & 0xFF;
60
+ task->cdb_size = 16;
61
+ task->xfer_dir = SCSI_XFER_WRITE;
62
+ task->expxferlen = param_len;
63
+
64
+ return task;
65
+}
66
+
67
+static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
68
+{
69
+ struct scsi_inquiry_device_designator *dd = lun->dd;
70
+
71
+ memset(desc, 0, 32);
72
+ desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
73
+ desc[4] = dd->code_set;
74
+ desc[5] = (dd->designator_type & 0xF)
75
+ | ((dd->association & 3) << 4);
76
+ desc[7] = dd->designator_length;
77
+ memcpy(desc + 8, dd->designator, dd->designator_length);
78
+
79
+ desc[28] = 0;
80
+ desc[29] = (lun->block_size >> 16) & 0xFF;
81
+ desc[30] = (lun->block_size >> 8) & 0xFF;
82
+ desc[31] = lun->block_size & 0xFF;
83
+}
84
+
85
+static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
86
+ int dst_index)
87
+{
88
+ hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
89
+ hdr[1] = ((dc << 1) | cat) & 0xFF;
90
+ hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
91
+ /* don't account for the first 4 bytes in descriptor header*/
92
+ hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
93
+ hdr[4] = (src_index >> 8) & 0xFF;
94
+ hdr[5] = src_index & 0xFF;
95
+ hdr[6] = (dst_index >> 8) & 0xFF;
96
+ hdr[7] = dst_index & 0xFF;
97
+}
98
+
99
+static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
100
+ int src_index, int dst_index, int num_blks,
101
+ uint64_t src_lba, uint64_t dst_lba)
102
+{
103
+ iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
104
+
105
+ /* The caller should verify the request size */
106
+ assert(num_blks < 65536);
107
+ desc[10] = (num_blks >> 8) & 0xFF;
108
+ desc[11] = num_blks & 0xFF;
109
+ desc[12] = (src_lba >> 56) & 0xFF;
110
+ desc[13] = (src_lba >> 48) & 0xFF;
111
+ desc[14] = (src_lba >> 40) & 0xFF;
112
+ desc[15] = (src_lba >> 32) & 0xFF;
113
+ desc[16] = (src_lba >> 24) & 0xFF;
114
+ desc[17] = (src_lba >> 16) & 0xFF;
115
+ desc[18] = (src_lba >> 8) & 0xFF;
116
+ desc[19] = src_lba & 0xFF;
117
+ desc[20] = (dst_lba >> 56) & 0xFF;
118
+ desc[21] = (dst_lba >> 48) & 0xFF;
119
+ desc[22] = (dst_lba >> 40) & 0xFF;
120
+ desc[23] = (dst_lba >> 32) & 0xFF;
121
+ desc[24] = (dst_lba >> 24) & 0xFF;
122
+ desc[25] = (dst_lba >> 16) & 0xFF;
123
+ desc[26] = (dst_lba >> 8) & 0xFF;
124
+ desc[27] = dst_lba & 0xFF;
125
+}
126
+
127
+static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
128
+ int list_id_usage, int prio,
129
+ int tgt_desc_len,
130
+ int seg_desc_len, int inline_data_len)
131
+{
132
+ buf[0] = list_id;
133
+ buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
134
+ buf[2] = (tgt_desc_len >> 8) & 0xFF;
135
+ buf[3] = tgt_desc_len & 0xFF;
136
+ buf[8] = (seg_desc_len >> 24) & 0xFF;
137
+ buf[9] = (seg_desc_len >> 16) & 0xFF;
138
+ buf[10] = (seg_desc_len >> 8) & 0xFF;
139
+ buf[11] = seg_desc_len & 0xFF;
140
+ buf[12] = (inline_data_len >> 24) & 0xFF;
141
+ buf[13] = (inline_data_len >> 16) & 0xFF;
142
+ buf[14] = (inline_data_len >> 8) & 0xFF;
143
+ buf[15] = inline_data_len & 0xFF;
144
+}
145
+
146
+static void iscsi_xcopy_data(struct iscsi_data *data,
147
+ IscsiLun *src, int64_t src_lba,
148
+ IscsiLun *dst, int64_t dst_lba,
149
+ uint16_t num_blocks)
150
+{
151
+ uint8_t *buf;
152
+ const int src_offset = XCOPY_DESC_OFFSET;
153
+ const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
154
+ const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
155
+
156
+ data->size = XCOPY_DESC_OFFSET +
157
+ IDENT_DESCR_TGT_DESCR_SIZE * 2 +
158
+ XCOPY_BLK2BLK_SEG_DESC_SIZE;
159
+ data->data = g_malloc0(data->size);
160
+ buf = data->data;
161
+
162
+ /* Initialise the parameter list header */
163
+ iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
164
+ 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
165
+ XCOPY_BLK2BLK_SEG_DESC_SIZE,
166
+ 0);
167
+
168
+ /* Initialise CSCD list with one src + one dst descriptor */
169
+ iscsi_populate_target_desc(&buf[src_offset], src);
170
+ iscsi_populate_target_desc(&buf[dst_offset], dst);
171
+
172
+ /* Initialise one segment descriptor */
173
+ iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
174
+ src_lba, dst_lba);
175
+}
176
+
177
+static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
178
+ BdrvChild *src,
179
+ uint64_t src_offset,
180
+ BdrvChild *dst,
181
+ uint64_t dst_offset,
182
+ uint64_t bytes,
183
+ BdrvRequestFlags flags)
184
+{
185
+ IscsiLun *dst_lun = dst->bs->opaque;
186
+ IscsiLun *src_lun;
187
+ struct IscsiTask iscsi_task;
188
+ struct iscsi_data data;
189
+ int r = 0;
190
+ int block_size;
191
+
192
+ if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
193
+ return -ENOTSUP;
194
+ }
195
+ src_lun = src->bs->opaque;
196
+
197
+ if (!src_lun->dd || !dst_lun->dd) {
198
+ return -ENOTSUP;
199
+ }
200
+ if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
201
+ return -ENOTSUP;
202
+ }
203
+ if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
204
+ return -ENOTSUP;
205
+ }
206
+ if (dst_lun->block_size != src_lun->block_size ||
207
+ !dst_lun->block_size) {
208
+ return -ENOTSUP;
209
+ }
210
+
211
+ block_size = dst_lun->block_size;
212
+ if (bytes / block_size > 65535) {
213
+ return -ENOTSUP;
214
+ }
215
+
216
+ iscsi_xcopy_data(&data,
217
+ src_lun, src_offset / block_size,
218
+ dst_lun, dst_offset / block_size,
219
+ bytes / block_size);
220
+
221
+ iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
222
+
223
+ qemu_mutex_lock(&dst_lun->mutex);
224
+ iscsi_task.task = iscsi_xcopy_task(data.size);
225
+retry:
226
+ if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
227
+ iscsi_task.task, iscsi_co_generic_cb,
228
+ &data,
229
+ &iscsi_task) != 0) {
230
+ r = -EIO;
231
+ goto out_unlock;
232
+ }
233
+
234
+ iscsi_co_wait_for_task(&iscsi_task, dst_lun);
235
+
236
+ if (iscsi_task.do_retry) {
237
+ iscsi_task.complete = 0;
238
+ goto retry;
239
+ }
240
+
241
+ if (iscsi_task.status != SCSI_STATUS_GOOD) {
242
+ r = iscsi_task.err_code;
243
+ goto out_unlock;
244
+ }
245
+
246
+out_unlock:
247
+ g_free(iscsi_task.task);
248
+ qemu_mutex_unlock(&dst_lun->mutex);
249
+ g_free(iscsi_task.err_str);
250
+ return r;
251
+}
252
+
253
static QemuOptsList iscsi_create_opts = {
254
.name = "iscsi-create-opts",
255
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
256
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = {
257
258
.bdrv_co_block_status = iscsi_co_block_status,
259
.bdrv_co_pdiscard = iscsi_co_pdiscard,
260
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
261
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
262
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
263
.bdrv_co_readv = iscsi_co_readv,
264
.bdrv_co_writev = iscsi_co_writev,
265
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = {
266
267
.bdrv_co_block_status = iscsi_co_block_status,
268
.bdrv_co_pdiscard = iscsi_co_pdiscard,
269
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
270
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
271
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
272
.bdrv_co_readv = iscsi_co_readv,
273
.bdrv_co_writev = iscsi_co_writev,
274
--
275
2.17.1
276
277
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
It's a BlockBackend wrapper of the BDS interface.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20180601092648.24614-10-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
include/sysemu/block-backend.h | 4 ++++
11
block/block-backend.c | 18 ++++++++++++++++++
12
2 files changed, 22 insertions(+)
13
14
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/sysemu/block-backend.h
17
+++ b/include/sysemu/block-backend.h
18
@@ -XXX,XX +XXX,XX @@ void blk_set_force_allow_inactivate(BlockBackend *blk);
19
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
20
void blk_unregister_buf(BlockBackend *blk, void *host);
21
22
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
23
+ BlockBackend *blk_out, int64_t off_out,
24
+ int bytes, BdrvRequestFlags flags);
25
+
26
#endif
27
diff --git a/block/block-backend.c b/block/block-backend.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/block/block-backend.c
30
+++ b/block/block-backend.c
31
@@ -XXX,XX +XXX,XX @@ void blk_unregister_buf(BlockBackend *blk, void *host)
32
{
33
bdrv_unregister_buf(blk_bs(blk), host);
34
}
35
+
36
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
37
+ BlockBackend *blk_out, int64_t off_out,
38
+ int bytes, BdrvRequestFlags flags)
39
+{
40
+ int r;
41
+ r = blk_check_byte_request(blk_in, off_in, bytes);
42
+ if (r) {
43
+ return r;
44
+ }
45
+ r = blk_check_byte_request(blk_out, off_out, bytes);
46
+ if (r) {
47
+ return r;
48
+ }
49
+ return bdrv_co_copy_range(blk_in->root, off_in,
50
+ blk_out->root, off_out,
51
+ bytes, flags);
52
+}
53
--
54
2.17.1
55
56
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
The new blk_co_copy_range interface offers a more efficient way in the
4
case of network based storage. Make use of it to allow faster convert
5
operation.
6
7
Since copy offloading cannot do zero detection ('-S') and compression
8
(-c), only try it when these options are not used.
9
10
Signed-off-by: Fam Zheng <famz@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20180601092648.24614-11-famz@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
qemu-img.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
16
1 file changed, 48 insertions(+), 2 deletions(-)
17
18
diff --git a/qemu-img.c b/qemu-img.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/qemu-img.c
21
+++ b/qemu-img.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct ImgConvertState {
23
bool compressed;
24
bool target_has_backing;
25
bool wr_in_order;
26
+ bool copy_range;
27
int min_sparse;
28
size_t cluster_sectors;
29
size_t buf_sectors;
30
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
31
return 0;
32
}
33
34
+static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
35
+ int nb_sectors)
36
+{
37
+ int n, ret;
38
+
39
+ while (nb_sectors > 0) {
40
+ BlockBackend *blk;
41
+ int src_cur;
42
+ int64_t bs_sectors, src_cur_offset;
43
+ int64_t offset;
44
+
45
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
46
+ offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
47
+ blk = s->src[src_cur];
48
+ bs_sectors = s->src_sectors[src_cur];
49
+
50
+ n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
51
+
52
+ ret = blk_co_copy_range(blk, offset, s->target,
53
+ sector_num << BDRV_SECTOR_BITS,
54
+ n << BDRV_SECTOR_BITS, 0);
55
+ if (ret < 0) {
56
+ return ret;
57
+ }
58
+
59
+ sector_num += n;
60
+ nb_sectors -= n;
61
+ }
62
+ return 0;
63
+}
64
+
65
static void coroutine_fn convert_co_do_copy(void *opaque)
66
{
67
ImgConvertState *s = opaque;
68
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
69
int n;
70
int64_t sector_num;
71
enum ImgConvertBlockStatus status;
72
+ bool copy_range;
73
74
qemu_co_mutex_lock(&s->lock);
75
if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
76
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
77
s->allocated_sectors, 0);
78
}
79
80
- if (status == BLK_DATA) {
81
+retry:
82
+ copy_range = s->copy_range && s->status == BLK_DATA;
83
+ if (status == BLK_DATA && !copy_range) {
84
ret = convert_co_read(s, sector_num, n, buf);
85
if (ret < 0) {
86
error_report("error while reading sector %" PRId64
87
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
88
}
89
90
if (s->ret == -EINPROGRESS) {
91
- ret = convert_co_write(s, sector_num, n, buf, status);
92
+ if (copy_range) {
93
+ ret = convert_co_copy_range(s, sector_num, n);
94
+ if (ret) {
95
+ s->copy_range = false;
96
+ goto retry;
97
+ }
98
+ } else {
99
+ ret = convert_co_write(s, sector_num, n, buf, status);
100
+ }
101
if (ret < 0) {
102
error_report("error while writing sector %" PRId64
103
": %s", sector_num, strerror(-ret));
104
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
105
ImgConvertState s = (ImgConvertState) {
106
/* Need at least 4k of zeros for sparse detection */
107
.min_sparse = 8,
108
+ .copy_range = true,
109
.buf_sectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
110
.wr_in_order = true,
111
.num_coroutines = 8,
112
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
113
break;
114
case 'c':
115
s.compressed = true;
116
+ s.copy_range = false;
117
break;
118
case 'o':
119
if (!is_valid_option_list(optarg)) {
120
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
121
}
122
123
s.min_sparse = sval / BDRV_SECTOR_SIZE;
124
+ s.copy_range = false;
125
break;
126
}
127
case 'p':
128
--
129
2.17.1
130
131
diff view generated by jsdifflib
Deleted patch
1
Commit d759c951f3287fad04210a52f2dc93f94cf58c7f ("replay: push
2
replay_mutex_lock up the call tree") removed the !timeout lock
3
optimization in the main loop.
4
1
5
The idea of the optimization was to avoid ping-pongs between threads by
6
keeping the Big QEMU Lock held across non-blocking (!timeout) main loop
7
iterations.
8
9
A warning is printed when the main loop spins without releasing BQL for
10
long periods of time. These warnings were supposed to aid debugging but
11
in practice they just alarm users. They are considered noise because
12
the cause of spinning is not shown and is hard to find.
13
14
Now that the lock optimization has been removed, there is no danger of
15
hogging the BQL. Drop the spin counter and the infamous warning.
16
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Reviewed-by: Jeff Cody <jcody@redhat.com>
19
---
20
util/main-loop.c | 25 -------------------------
21
tests/qemu-iotests/common.filter | 1 -
22
2 files changed, 26 deletions(-)
23
24
diff --git a/util/main-loop.c b/util/main-loop.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/util/main-loop.c
27
+++ b/util/main-loop.c
28
@@ -XXX,XX +XXX,XX @@ static int os_host_main_loop_wait(int64_t timeout)
29
{
30
GMainContext *context = g_main_context_default();
31
int ret;
32
- static int spin_counter;
33
34
g_main_context_acquire(context);
35
36
glib_pollfds_fill(&timeout);
37
38
- /* If the I/O thread is very busy or we are incorrectly busy waiting in
39
- * the I/O thread, this can lead to starvation of the BQL such that the
40
- * VCPU threads never run. To make sure we can detect the later case,
41
- * print a message to the screen. If we run into this condition, create
42
- * a fake timeout in order to give the VCPU threads a chance to run.
43
- */
44
- if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
45
- static bool notified;
46
-
47
- if (!notified && !qtest_enabled() && !qtest_driver()) {
48
- warn_report("I/O thread spun for %d iterations",
49
- MAX_MAIN_LOOP_SPIN);
50
- notified = true;
51
- }
52
-
53
- timeout = SCALE_MS;
54
- }
55
-
56
-
57
- if (timeout) {
58
- spin_counter = 0;
59
- } else {
60
- spin_counter++;
61
- }
62
qemu_mutex_unlock_iothread();
63
replay_mutex_unlock();
64
65
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
66
index XXXXXXX..XXXXXXX 100644
67
--- a/tests/qemu-iotests/common.filter
68
+++ b/tests/qemu-iotests/common.filter
69
@@ -XXX,XX +XXX,XX @@ _filter_qemu()
70
{
71
sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
72
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
73
- -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \
74
-e $'s#\r##' # QEMU monitor uses \r\n line endings
75
}
76
77
--
78
2.17.1
79
80
diff view generated by jsdifflib