1
The following changes since commit c25e8bba1f546ea72744ccfab77f8a9e8a323be8:
1
The following changes since commit f6b06fcceef465de0cf2514c9f76fe0192896781:
2
2
3
Merge remote-tracking branch 'remotes/otubo/tags/pull-seccomp-20180601' into staging (2018-06-01 13:11:30 +0100)
3
Merge remote-tracking branch 'remotes/kraxel/tags/ui-20190121-pull-request' into staging (2019-01-23 17:57:47 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 21891a5a3011608845b5d7f1f9cce60cdc2bcc62:
9
for you to fetch changes up to 8595685986152334b1ec28c78cb0e5e855d56b54:
10
10
11
main-loop: drop spin_counter (2018-06-01 16:01:29 +0100)
11
qemu-coroutine-sleep: drop CoSleepCB (2019-01-24 10:05:16 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
* Copy offloading for qemu-img convert (iSCSI, raw, and qcow2)
16
Changelog: No user-visible changes.
17
18
If the underlying storage supports copy offloading, qemu-img convert will
19
use it instead of performing reads and writes. This avoids data transfers
20
and thus frees up storage bandwidth for other purposes. SCSI EXTENDED COPY
21
and Linux copy_file_range(2) are used to implement this optimization.
22
23
* Drop spurious "WARNING: I\/O thread spun for 1000 iterations" warning
24
17
25
----------------------------------------------------------------
18
----------------------------------------------------------------
26
19
27
Fam Zheng (10):
20
Stefan Hajnoczi (2):
28
block: Introduce API for copy offloading
21
throttle-groups: fix restart coroutine iothread race
29
raw: Check byte range uniformly
22
iotests: add 238 for throttling tgm unregister iothread segfault
30
raw: Implement copy offloading
31
qcow2: Implement copy offloading
32
file-posix: Implement bdrv_co_copy_range
33
iscsi: Query and save device designator when opening
34
iscsi: Create and use iscsi_co_wait_for_task
35
iscsi: Implement copy offloading
36
block-backend: Add blk_co_copy_range
37
qemu-img: Convert with copy offloading
38
23
39
Stefan Hajnoczi (1):
24
Vladimir Sementsov-Ogievskiy (1):
40
main-loop: drop spin_counter
25
qemu-coroutine-sleep: drop CoSleepCB
41
26
42
configure | 17 ++
27
include/block/throttle-groups.h | 5 ++++
43
include/block/block.h | 32 ++++
28
block/throttle-groups.c | 9 +++++++
44
include/block/block_int.h | 38 ++++
29
util/qemu-coroutine-sleep.c | 27 +++++++------------
45
include/block/raw-aio.h | 10 +-
30
tests/qemu-iotests/238 | 47 +++++++++++++++++++++++++++++++++
46
include/scsi/constants.h | 4 +
31
tests/qemu-iotests/238.out | 6 +++++
47
include/sysemu/block-backend.h | 4 +
32
tests/qemu-iotests/group | 1 +
48
block/block-backend.c | 18 ++
33
6 files changed, 78 insertions(+), 17 deletions(-)
49
block/file-posix.c | 98 +++++++++-
34
create mode 100755 tests/qemu-iotests/238
50
block/io.c | 97 ++++++++++
35
create mode 100644 tests/qemu-iotests/238.out
51
block/iscsi.c | 314 +++++++++++++++++++++++++++----
52
block/qcow2.c | 229 +++++++++++++++++++---
53
block/raw-format.c | 96 +++++++---
54
qemu-img.c | 50 ++++-
55
util/main-loop.c | 25 ---
56
tests/qemu-iotests/common.filter | 1 -
57
15 files changed, 908 insertions(+), 125 deletions(-)
58
36
59
--
37
--
60
2.17.1
38
2.20.1
61
39
62
40
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Introduce the bdrv_co_copy_range() API for copy offloading. Block
4
drivers implementing this API support efficient copy operations that
5
avoid reading each block from the source device and writing it to the
6
destination devices. Examples of copy offload primitives are SCSI
7
EXTENDED COPY and Linux copy_file_range(2).
8
9
Signed-off-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-id: 20180601092648.24614-2-famz@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
include/block/block.h | 32 +++++++++++++
15
include/block/block_int.h | 38 +++++++++++++++
16
block/io.c | 97 +++++++++++++++++++++++++++++++++++++++
17
3 files changed, 167 insertions(+)
18
19
diff --git a/include/block/block.h b/include/block/block.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/block/block.h
22
+++ b/include/block/block.h
23
@@ -XXX,XX +XXX,XX @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
24
*/
25
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
26
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
27
+
28
+/**
29
+ *
30
+ * bdrv_co_copy_range:
31
+ *
32
+ * Do offloaded copy between two children. If the operation is not implemented
33
+ * by the driver, or if the backend storage doesn't support it, a negative
34
+ * error code will be returned.
35
+ *
36
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
37
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
38
+ * calling copy_file_range(2)) after the first error, thus it should fall back
39
+ * to a read+write path in the caller level.
40
+ *
41
+ * @src: Source child to copy data from
42
+ * @src_offset: offset in @src image to read data
43
+ * @dst: Destination child to copy data to
44
+ * @dst_offset: offset in @dst image to write data
45
+ * @bytes: number of bytes to copy
46
+ * @flags: request flags. Must be one of:
47
+ * 0 - actually read data from src;
48
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
49
+ * write on @dst as if bdrv_co_pwrite_zeroes is
50
+ * called. Used to simplify caller code, or
51
+ * during BlockDriver.bdrv_co_copy_range_from()
52
+ * recursion.
53
+ *
54
+ * Returns: 0 if succeeded; negative error code if failed.
55
+ **/
56
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
57
+ BdrvChild *dst, uint64_t dst_offset,
58
+ uint64_t bytes, BdrvRequestFlags flags);
59
#endif
60
diff --git a/include/block/block_int.h b/include/block/block_int.h
61
index XXXXXXX..XXXXXXX 100644
62
--- a/include/block/block_int.h
63
+++ b/include/block/block_int.h
64
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
65
int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
66
int64_t offset, int bytes);
67
68
+ /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
69
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
70
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
71
+ *
72
+ * See the comment of bdrv_co_copy_range for the parameter and return value
73
+ * semantics.
74
+ */
75
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
76
+ BdrvChild *src,
77
+ uint64_t offset,
78
+ BdrvChild *dst,
79
+ uint64_t dst_offset,
80
+ uint64_t bytes,
81
+ BdrvRequestFlags flags);
82
+
83
+ /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
84
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
85
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
86
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
87
+ *
88
+ * See the comment of bdrv_co_copy_range for the parameter and return value
89
+ * semantics.
90
+ */
91
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
92
+ BdrvChild *src,
93
+ uint64_t src_offset,
94
+ BdrvChild *dst,
95
+ uint64_t dst_offset,
96
+ uint64_t bytes,
97
+ BdrvRequestFlags flags);
98
+
99
/*
100
* Building block for bdrv_block_status[_above] and
101
* bdrv_is_allocated[_above]. The driver should answer only
102
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs);
103
104
void blockdev_close_all_bdrv_states(void);
105
106
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
107
+ BdrvChild *dst, uint64_t dst_offset,
108
+ uint64_t bytes, BdrvRequestFlags flags);
109
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
110
+ BdrvChild *dst, uint64_t dst_offset,
111
+ uint64_t bytes, BdrvRequestFlags flags);
112
+
113
#endif /* BLOCK_INT_H */
114
diff --git a/block/io.c b/block/io.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/block/io.c
117
+++ b/block/io.c
118
@@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
119
bdrv_unregister_buf(child->bs, host);
120
}
121
}
122
+
123
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
124
+ uint64_t src_offset,
125
+ BdrvChild *dst,
126
+ uint64_t dst_offset,
127
+ uint64_t bytes,
128
+ BdrvRequestFlags flags,
129
+ bool recurse_src)
130
+{
131
+ int ret;
132
+
133
+ if (!src || !dst || !src->bs || !dst->bs) {
134
+ return -ENOMEDIUM;
135
+ }
136
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
137
+ if (ret) {
138
+ return ret;
139
+ }
140
+
141
+ ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
142
+ if (ret) {
143
+ return ret;
144
+ }
145
+ if (flags & BDRV_REQ_ZERO_WRITE) {
146
+ return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
147
+ }
148
+
149
+ if (!src->bs->drv->bdrv_co_copy_range_from
150
+ || !dst->bs->drv->bdrv_co_copy_range_to
151
+ || src->bs->encrypted || dst->bs->encrypted) {
152
+ return -ENOTSUP;
153
+ }
154
+ if (recurse_src) {
155
+ return src->bs->drv->bdrv_co_copy_range_from(src->bs,
156
+ src, src_offset,
157
+ dst, dst_offset,
158
+ bytes, flags);
159
+ } else {
160
+ return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
161
+ src, src_offset,
162
+ dst, dst_offset,
163
+ bytes, flags);
164
+ }
165
+}
166
+
167
+/* Copy range from @src to @dst.
168
+ *
169
+ * See the comment of bdrv_co_copy_range for the parameter and return value
170
+ * semantics. */
171
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
172
+ BdrvChild *dst, uint64_t dst_offset,
173
+ uint64_t bytes, BdrvRequestFlags flags)
174
+{
175
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
176
+ bytes, flags, true);
177
+}
178
+
179
+/* Copy range from @src to @dst.
180
+ *
181
+ * See the comment of bdrv_co_copy_range for the parameter and return value
182
+ * semantics. */
183
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
184
+ BdrvChild *dst, uint64_t dst_offset,
185
+ uint64_t bytes, BdrvRequestFlags flags)
186
+{
187
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
188
+ bytes, flags, false);
189
+}
190
+
191
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
192
+ BdrvChild *dst, uint64_t dst_offset,
193
+ uint64_t bytes, BdrvRequestFlags flags)
194
+{
195
+ BdrvTrackedRequest src_req, dst_req;
196
+ BlockDriverState *src_bs = src->bs;
197
+ BlockDriverState *dst_bs = dst->bs;
198
+ int ret;
199
+
200
+ bdrv_inc_in_flight(src_bs);
201
+ bdrv_inc_in_flight(dst_bs);
202
+ tracked_request_begin(&src_req, src_bs, src_offset,
203
+ bytes, BDRV_TRACKED_READ);
204
+ tracked_request_begin(&dst_req, dst_bs, dst_offset,
205
+ bytes, BDRV_TRACKED_WRITE);
206
+
207
+ wait_serialising_requests(&src_req);
208
+ wait_serialising_requests(&dst_req);
209
+ ret = bdrv_co_copy_range_from(src, src_offset,
210
+ dst, dst_offset,
211
+ bytes, flags);
212
+
213
+ tracked_request_end(&src_req);
214
+ tracked_request_end(&dst_req);
215
+ bdrv_dec_in_flight(src_bs);
216
+ bdrv_dec_in_flight(dst_bs);
217
+ return ret;
218
+}
219
--
220
2.17.1
221
222
diff view generated by jsdifflib
1
Commit d759c951f3287fad04210a52f2dc93f94cf58c7f ("replay: push
1
The following QMP command leads to a crash when iothreads are used:
2
replay_mutex_lock up the call tree") removed the !timeout lock
3
optimization in the main loop.
4
2
5
The idea of the optimization was to avoid ping-pongs between threads by
3
{ 'execute': 'device_del', 'arguments': {'id': 'data'} }
6
keeping the Big QEMU Lock held across non-blocking (!timeout) main loop
7
iterations.
8
4
9
A warning is printed when the main loop spins without releasing BQL for
5
The backtrace involves the queue restart coroutine where
10
long periods of time. These warnings were supposed to aid debugging but
6
tgm->throttle_state is a NULL pointer because
11
in practice they just alarm users. They are considered noise because
7
throttle_group_unregister_tgm() has already been called:
12
the cause of spinning is not shown and is hard to find.
13
8
14
Now that the lock optimization has been removed, there is no danger of
9
(gdb) bt full
15
hogging the BQL. Drop the spin counter and the infamous warning.
10
#0 0x00005585a7a3b378 in qemu_mutex_lock_impl (mutex=0xffffffffffffffd0, file=0x5585a7bb3d54 "block/throttle-groups.c", line=412) at util/qemu-thread-posix.c:64
11
err = <optimized out>
12
__PRETTY_FUNCTION__ = "qemu_mutex_lock_impl"
13
__func__ = "qemu_mutex_lock_impl"
14
#1 0x00005585a79be074 in throttle_group_restart_queue_entry (opaque=0x5585a9de4eb0) at block/throttle-groups.c:412
15
_f = <optimized out>
16
data = 0x5585a9de4eb0
17
tgm = 0x5585a9079440
18
ts = 0x0
19
tg = 0xffffffffffffff98
20
is_write = false
21
empty_queue = 255
22
23
This coroutine should not execute in the iothread after the throttle
24
group member has been unregistered!
25
26
The root cause is that the device_del code path schedules the restart
27
coroutine in the iothread while holding the AioContext lock. Therefore
28
the iothread cannot execute the coroutine until after device_del
29
releases the lock - by this time it's too late.
30
31
This patch adds a reference count to ThrottleGroupMember so we can
32
synchronously wait for restart coroutines to complete. Once they are
33
done it is safe to unregister the ThrottleGroupMember.
16
34
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
35
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Reviewed-by: Jeff Cody <jcody@redhat.com>
36
Reviewed-by: Alberto Garcia <berto@igalia.com>
37
Message-id: 20190114133257.30299-2-stefanha@redhat.com
38
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
39
---
20
util/main-loop.c | 25 -------------------------
40
include/block/throttle-groups.h | 5 +++++
21
tests/qemu-iotests/common.filter | 1 -
41
block/throttle-groups.c | 9 +++++++++
22
2 files changed, 26 deletions(-)
42
2 files changed, 14 insertions(+)
23
43
24
diff --git a/util/main-loop.c b/util/main-loop.c
44
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
25
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
26
--- a/util/main-loop.c
46
--- a/include/block/throttle-groups.h
27
+++ b/util/main-loop.c
47
+++ b/include/block/throttle-groups.h
28
@@ -XXX,XX +XXX,XX @@ static int os_host_main_loop_wait(int64_t timeout)
48
@@ -XXX,XX +XXX,XX @@ typedef struct ThrottleGroupMember {
29
{
49
*/
30
GMainContext *context = g_main_context_default();
50
unsigned int io_limits_disabled;
31
int ret;
51
32
- static int spin_counter;
52
+ /* Number of pending throttle_group_restart_queue_entry() coroutines.
33
53
+ * Accessed with atomic operations.
34
g_main_context_acquire(context);
54
+ */
35
55
+ unsigned int restart_pending;
36
glib_pollfds_fill(&timeout);
56
+
37
57
/* The following fields are protected by the ThrottleGroup lock.
38
- /* If the I/O thread is very busy or we are incorrectly busy waiting in
58
* See the ThrottleGroup documentation for details.
39
- * the I/O thread, this can lead to starvation of the BQL such that the
59
* throttle_state tells us if I/O limits are configured. */
40
- * VCPU threads never run. To make sure we can detect the later case,
60
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
41
- * print a message to the screen. If we run into this condition, create
42
- * a fake timeout in order to give the VCPU threads a chance to run.
43
- */
44
- if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
45
- static bool notified;
46
-
47
- if (!notified && !qtest_enabled() && !qtest_driver()) {
48
- warn_report("I/O thread spun for %d iterations",
49
- MAX_MAIN_LOOP_SPIN);
50
- notified = true;
51
- }
52
-
53
- timeout = SCALE_MS;
54
- }
55
-
56
-
57
- if (timeout) {
58
- spin_counter = 0;
59
- } else {
60
- spin_counter++;
61
- }
62
qemu_mutex_unlock_iothread();
63
replay_mutex_unlock();
64
65
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
66
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
67
--- a/tests/qemu-iotests/common.filter
62
--- a/block/throttle-groups.c
68
+++ b/tests/qemu-iotests/common.filter
63
+++ b/block/throttle-groups.c
69
@@ -XXX,XX +XXX,XX @@ _filter_qemu()
64
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
70
{
65
}
71
sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
66
72
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
67
g_free(data);
73
- -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \
68
+
74
-e $'s#\r##' # QEMU monitor uses \r\n line endings
69
+ atomic_dec(&tgm->restart_pending);
70
+ aio_wait_kick();
75
}
71
}
76
72
73
static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write)
74
@@ -XXX,XX +XXX,XX @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write
75
* be no timer pending on this tgm at this point */
76
assert(!timer_pending(tgm->throttle_timers.timers[is_write]));
77
78
+ atomic_inc(&tgm->restart_pending);
79
+
80
co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd);
81
aio_co_enter(tgm->aio_context, co);
82
}
83
@@ -XXX,XX +XXX,XX @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
84
85
tgm->throttle_state = ts;
86
tgm->aio_context = ctx;
87
+ atomic_set(&tgm->restart_pending, 0);
88
89
qemu_mutex_lock(&tg->lock);
90
/* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
91
@@ -XXX,XX +XXX,XX @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
92
return;
93
}
94
95
+ /* Wait for throttle_group_restart_queue_entry() coroutines to finish */
96
+ AIO_WAIT_WHILE(tgm->aio_context, atomic_read(&tgm->restart_pending) > 0);
97
+
98
qemu_mutex_lock(&tg->lock);
99
for (i = 0; i < 2; i++) {
100
assert(tgm->pending_reqs[i] == 0);
77
--
101
--
78
2.17.1
102
2.20.1
79
103
80
104
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
Hot-unplug a scsi-hd using an iothread. The previous patch fixes a
2
segfault in this scenario.
2
3
3
The new blk_co_copy_range interface offers a more efficient way in the
4
This patch adds a regression test.
4
case of network based storage. Make use of it to allow faster convert
5
operation.
6
5
7
Since copy offloading cannot do zero detection ('-S') and compression
6
Suggested-by: Alberto Garcia <berto@igalia.com>
8
(-c), only try it when these options are not used.
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
9
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20190114133257.30299-3-stefanha@redhat.com
12
Message-id: 20180601092648.24614-11-famz@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
12
---
15
qemu-img.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
13
tests/qemu-iotests/238 | 47 ++++++++++++++++++++++++++++++++++++++
16
1 file changed, 48 insertions(+), 2 deletions(-)
14
tests/qemu-iotests/238.out | 6 +++++
15
tests/qemu-iotests/group | 1 +
16
3 files changed, 54 insertions(+)
17
create mode 100755 tests/qemu-iotests/238
18
create mode 100644 tests/qemu-iotests/238.out
17
19
18
diff --git a/qemu-img.c b/qemu-img.c
20
diff --git a/tests/qemu-iotests/238 b/tests/qemu-iotests/238
21
new file mode 100755
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/tests/qemu-iotests/238
25
@@ -XXX,XX +XXX,XX @@
26
+#!/usr/bin/env python
27
+#
28
+# Regression test for throttle group member unregister segfault with iothread
29
+#
30
+# Copyright (c) 2019 Red Hat, Inc.
31
+#
32
+# This program is free software; you can redistribute it and/or modify
33
+# it under the terms of the GNU General Public License as published by
34
+# the Free Software Foundation; either version 2 of the License, or
35
+# (at your option) any later version.
36
+#
37
+# This program is distributed in the hope that it will be useful,
38
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
39
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
40
+# GNU General Public License for more details.
41
+#
42
+# You should have received a copy of the GNU General Public License
43
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
44
+#
45
+
46
+import sys
47
+import os
48
+import iotests
49
+from iotests import log
50
+
51
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
52
+
53
+from qemu import QEMUMachine
54
+
55
+if iotests.qemu_default_machine == 's390-ccw-virtio':
56
+ virtio_scsi_device = 'virtio-scsi-ccw'
57
+else:
58
+ virtio_scsi_device = 'virtio-scsi-pci'
59
+
60
+vm = QEMUMachine(iotests.qemu_prog)
61
+vm.add_args('-machine', 'accel=kvm')
62
+vm.launch()
63
+
64
+log(vm.qmp('blockdev-add', node_name='hd0', driver='null-co'))
65
+log(vm.qmp('object-add', qom_type='iothread', id='iothread0'))
66
+log(vm.qmp('device_add', id='scsi0', driver=virtio_scsi_device, iothread='iothread0'))
67
+log(vm.qmp('device_add', id='scsi-hd0', driver='scsi-hd', drive='hd0'))
68
+log(vm.qmp('block_set_io_throttle', id='scsi-hd0', bps=0, bps_rd=0, bps_wr=0,
69
+ iops=1000, iops_rd=0, iops_wr=0, conv_keys=False))
70
+log(vm.qmp('device_del', id='scsi-hd0'))
71
+
72
+vm.shutdown()
73
diff --git a/tests/qemu-iotests/238.out b/tests/qemu-iotests/238.out
74
new file mode 100644
75
index XXXXXXX..XXXXXXX
76
--- /dev/null
77
+++ b/tests/qemu-iotests/238.out
78
@@ -XXX,XX +XXX,XX @@
79
+{"return": {}}
80
+{"return": {}}
81
+{"return": {}}
82
+{"return": {}}
83
+{"return": {}}
84
+{"return": {}}
85
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
19
index XXXXXXX..XXXXXXX 100644
86
index XXXXXXX..XXXXXXX 100644
20
--- a/qemu-img.c
87
--- a/tests/qemu-iotests/group
21
+++ b/qemu-img.c
88
+++ b/tests/qemu-iotests/group
22
@@ -XXX,XX +XXX,XX @@ typedef struct ImgConvertState {
89
@@ -XXX,XX +XXX,XX @@
23
bool compressed;
90
234 auto quick migration
24
bool target_has_backing;
91
235 auto quick
25
bool wr_in_order;
92
236 auto quick
26
+ bool copy_range;
93
+238 auto quick
27
int min_sparse;
28
size_t cluster_sectors;
29
size_t buf_sectors;
30
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
31
return 0;
32
}
33
34
+static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
35
+ int nb_sectors)
36
+{
37
+ int n, ret;
38
+
39
+ while (nb_sectors > 0) {
40
+ BlockBackend *blk;
41
+ int src_cur;
42
+ int64_t bs_sectors, src_cur_offset;
43
+ int64_t offset;
44
+
45
+ convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
46
+ offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
47
+ blk = s->src[src_cur];
48
+ bs_sectors = s->src_sectors[src_cur];
49
+
50
+ n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
51
+
52
+ ret = blk_co_copy_range(blk, offset, s->target,
53
+ sector_num << BDRV_SECTOR_BITS,
54
+ n << BDRV_SECTOR_BITS, 0);
55
+ if (ret < 0) {
56
+ return ret;
57
+ }
58
+
59
+ sector_num += n;
60
+ nb_sectors -= n;
61
+ }
62
+ return 0;
63
+}
64
+
65
static void coroutine_fn convert_co_do_copy(void *opaque)
66
{
67
ImgConvertState *s = opaque;
68
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
69
int n;
70
int64_t sector_num;
71
enum ImgConvertBlockStatus status;
72
+ bool copy_range;
73
74
qemu_co_mutex_lock(&s->lock);
75
if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
76
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
77
s->allocated_sectors, 0);
78
}
79
80
- if (status == BLK_DATA) {
81
+retry:
82
+ copy_range = s->copy_range && s->status == BLK_DATA;
83
+ if (status == BLK_DATA && !copy_range) {
84
ret = convert_co_read(s, sector_num, n, buf);
85
if (ret < 0) {
86
error_report("error while reading sector %" PRId64
87
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque)
88
}
89
90
if (s->ret == -EINPROGRESS) {
91
- ret = convert_co_write(s, sector_num, n, buf, status);
92
+ if (copy_range) {
93
+ ret = convert_co_copy_range(s, sector_num, n);
94
+ if (ret) {
95
+ s->copy_range = false;
96
+ goto retry;
97
+ }
98
+ } else {
99
+ ret = convert_co_write(s, sector_num, n, buf, status);
100
+ }
101
if (ret < 0) {
102
error_report("error while writing sector %" PRId64
103
": %s", sector_num, strerror(-ret));
104
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
105
ImgConvertState s = (ImgConvertState) {
106
/* Need at least 4k of zeros for sparse detection */
107
.min_sparse = 8,
108
+ .copy_range = true,
109
.buf_sectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
110
.wr_in_order = true,
111
.num_coroutines = 8,
112
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
113
break;
114
case 'c':
115
s.compressed = true;
116
+ s.copy_range = false;
117
break;
118
case 'o':
119
if (!is_valid_option_list(optarg)) {
120
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
121
}
122
123
s.min_sparse = sval / BDRV_SECTOR_SIZE;
124
+ s.copy_range = false;
125
break;
126
}
127
case 'p':
128
--
94
--
129
2.17.1
95
2.20.1
130
96
131
97
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
We don't verify the request range against s->size in the I/O callbacks
3
Drop CoSleepCB structure. It's actually unused.
4
except for raw_co_pwritev. This is inconsistent (especially for
5
raw_co_pwrite_zeroes and raw_co_pdiscard), so fix them, in the meanwhile
6
make the helper reusable by the coming new callbacks.
7
4
8
Note that in most cases the block layer already verifies the request
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
byte range against our reported image length, before invoking the driver
6
Message-id: 20190122143113.20331-1-vsementsov@virtuozzo.com
10
callbacks. The exception is during image creating, after
11
blk_set_allow_write_beyond_eof(blk, true) is called. But in that case,
12
the requests are not directly from the user or guest. So there is no
13
visible behavior change in adding the check code.
14
15
The int64_t -> uint64_t inconsistency, as shown by the type casting, is
16
pre-existing due to the interface.
17
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Reviewed-by: Eric Blake <eblake@redhat.com>
20
Signed-off-by: Fam Zheng <famz@redhat.com>
21
Message-id: 20180601092648.24614-3-famz@redhat.com
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
---
8
---
24
block/raw-format.c | 64 ++++++++++++++++++++++++++++------------------
9
util/qemu-coroutine-sleep.c | 27 ++++++++++-----------------
25
1 file changed, 39 insertions(+), 25 deletions(-)
10
1 file changed, 10 insertions(+), 17 deletions(-)
26
11
27
diff --git a/block/raw-format.c b/block/raw-format.c
12
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
28
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
29
--- a/block/raw-format.c
14
--- a/util/qemu-coroutine-sleep.c
30
+++ b/block/raw-format.c
15
+++ b/util/qemu-coroutine-sleep.c
31
@@ -XXX,XX +XXX,XX @@ static void raw_reopen_abort(BDRVReopenState *state)
16
@@ -XXX,XX +XXX,XX @@
32
state->opaque = NULL;
17
#include "qemu/timer.h"
18
#include "block/aio.h"
19
20
-typedef struct CoSleepCB {
21
- QEMUTimer *ts;
22
- Coroutine *co;
23
-} CoSleepCB;
24
-
25
static void co_sleep_cb(void *opaque)
26
{
27
- CoSleepCB *sleep_cb = opaque;
28
+ Coroutine *co = opaque;
29
30
/* Write of schedule protected by barrier write in aio_co_schedule */
31
- atomic_set(&sleep_cb->co->scheduled, NULL);
32
- aio_co_wake(sleep_cb->co);
33
+ atomic_set(&co->scheduled, NULL);
34
+ aio_co_wake(co);
33
}
35
}
34
36
35
+/* Check and adjust the offset, against 'offset' and 'size' options. */
37
void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
36
+static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
37
+ uint64_t bytes, bool is_write)
38
+{
39
+ BDRVRawState *s = bs->opaque;
40
+
41
+ if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
42
+ /* There's not enough space for the write, or the read request is
43
+ * out-of-range. Don't read/write anything to prevent leaking out of
44
+ * the size specified in options. */
45
+ return is_write ? -ENOSPC : -EINVAL;;
46
+ }
47
+
48
+ if (*offset > INT64_MAX - s->offset) {
49
+ return -EINVAL;
50
+ }
51
+ *offset += s->offset;
52
+
53
+ return 0;
54
+}
55
+
56
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
57
uint64_t bytes, QEMUIOVector *qiov,
58
int flags)
59
{
38
{
60
- BDRVRawState *s = bs->opaque;
39
AioContext *ctx = qemu_get_current_aio_context();
61
+ int ret;
40
- CoSleepCB sleep_cb = {
62
41
- .co = qemu_coroutine_self(),
63
- if (offset > UINT64_MAX - s->offset) {
42
- };
64
- return -EINVAL;
43
+ QEMUTimer *ts;
65
+ ret = raw_adjust_offset(bs, &offset, bytes, false);
44
+ Coroutine *co = qemu_coroutine_self();
66
+ if (ret) {
45
67
+ return ret;
46
- const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
47
- __func__);
48
+ const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, __func__);
49
if (scheduled) {
50
fprintf(stderr,
51
"%s: Co-routine was already scheduled in '%s'\n",
52
__func__, scheduled);
53
abort();
68
}
54
}
69
- offset += s->offset;
55
- sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
70
56
- timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
71
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
57
+ ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, co);
72
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
58
+ timer_mod(ts, qemu_clock_get_ns(type) + ns);
73
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
59
qemu_coroutine_yield();
74
uint64_t bytes, QEMUIOVector *qiov,
60
- timer_del(sleep_cb.ts);
75
int flags)
61
- timer_free(sleep_cb.ts);
76
{
62
+ timer_del(ts);
77
- BDRVRawState *s = bs->opaque;
63
+ timer_free(ts);
78
void *buf = NULL;
79
BlockDriver *drv;
80
QEMUIOVector local_qiov;
81
int ret;
82
83
- if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
84
- /* There's not enough space for the data. Don't write anything and just
85
- * fail to prevent leaking out of the size specified in options. */
86
- return -ENOSPC;
87
- }
88
-
89
- if (offset > UINT64_MAX - s->offset) {
90
- ret = -EINVAL;
91
- goto fail;
92
- }
93
-
94
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
95
/* Handling partial writes would be a pain - so we just
96
* require that guests have 512-byte request alignment if
97
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
98
qiov = &local_qiov;
99
}
100
101
- offset += s->offset;
102
+ ret = raw_adjust_offset(bs, &offset, bytes, true);
103
+ if (ret) {
104
+ goto fail;
105
+ }
106
107
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
108
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
109
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
110
int64_t offset, int bytes,
111
BdrvRequestFlags flags)
112
{
113
- BDRVRawState *s = bs->opaque;
114
- if (offset > UINT64_MAX - s->offset) {
115
- return -EINVAL;
116
+ int ret;
117
+
118
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
119
+ if (ret) {
120
+ return ret;
121
}
122
- offset += s->offset;
123
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
124
}
64
}
125
126
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
127
int64_t offset, int bytes)
128
{
129
- BDRVRawState *s = bs->opaque;
130
- if (offset > UINT64_MAX - s->offset) {
131
- return -EINVAL;
132
+ int ret;
133
+
134
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
135
+ if (ret) {
136
+ return ret;
137
}
138
- offset += s->offset;
139
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
140
}
141
142
--
65
--
143
2.17.1
66
2.20.1
144
67
145
68
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Just pass down to ->file.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20180601092648.24614-4-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/raw-format.c | 32 ++++++++++++++++++++++++++++++++
11
1 file changed, 32 insertions(+)
12
13
diff --git a/block/raw-format.c b/block/raw-format.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/raw-format.c
16
+++ b/block/raw-format.c
17
@@ -XXX,XX +XXX,XX @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
18
return bdrv_probe_geometry(bs->file->bs, geo);
19
}
20
21
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
22
+ BdrvChild *src, uint64_t src_offset,
23
+ BdrvChild *dst, uint64_t dst_offset,
24
+ uint64_t bytes, BdrvRequestFlags flags)
25
+{
26
+ int ret;
27
+
28
+ ret = raw_adjust_offset(bs, &src_offset, bytes, false);
29
+ if (ret) {
30
+ return ret;
31
+ }
32
+ return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
33
+ bytes, flags);
34
+}
35
+
36
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
37
+ BdrvChild *src, uint64_t src_offset,
38
+ BdrvChild *dst, uint64_t dst_offset,
39
+ uint64_t bytes, BdrvRequestFlags flags)
40
+{
41
+ int ret;
42
+
43
+ ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
44
+ if (ret) {
45
+ return ret;
46
+ }
47
+ return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
48
+ flags);
49
+}
50
+
51
BlockDriver bdrv_raw = {
52
.format_name = "raw",
53
.instance_size = sizeof(BDRVRawState),
54
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = {
55
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
56
.bdrv_co_pdiscard = &raw_co_pdiscard,
57
.bdrv_co_block_status = &raw_co_block_status,
58
+ .bdrv_co_copy_range_from = &raw_co_copy_range_from,
59
+ .bdrv_co_copy_range_to = &raw_co_copy_range_to,
60
.bdrv_truncate = &raw_truncate,
61
.bdrv_getlength = &raw_getlength,
62
.has_variable_length = true,
63
--
64
2.17.1
65
66
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
The two callbacks are implemented quite similarly to the read/write
4
functions: bdrv_co_copy_range_from maps for read and calls into bs->file
5
or bs->backing depending on the allocation status; bdrv_co_copy_range_to
6
maps for write and calls into bs->file.
7
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20180601092648.24614-5-famz@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/qcow2.c | 229 +++++++++++++++++++++++++++++++++++++++++++-------
14
1 file changed, 199 insertions(+), 30 deletions(-)
15
16
diff --git a/block/qcow2.c b/block/qcow2.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.c
19
+++ b/block/qcow2.c
20
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
21
return status;
22
}
23
24
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
25
+ QCowL2Meta **pl2meta,
26
+ bool link_l2)
27
+{
28
+ int ret = 0;
29
+ QCowL2Meta *l2meta = *pl2meta;
30
+
31
+ while (l2meta != NULL) {
32
+ QCowL2Meta *next;
33
+
34
+ if (!ret && link_l2) {
35
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
36
+ if (ret) {
37
+ goto out;
38
+ }
39
+ }
40
+
41
+ /* Take the request off the list of running requests */
42
+ if (l2meta->nb_clusters != 0) {
43
+ QLIST_REMOVE(l2meta, next_in_flight);
44
+ }
45
+
46
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
47
+
48
+ next = l2meta->next;
49
+ g_free(l2meta);
50
+ l2meta = next;
51
+ }
52
+out:
53
+ *pl2meta = l2meta;
54
+ return ret;
55
+}
56
+
57
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
58
uint64_t bytes, QEMUIOVector *qiov,
59
int flags)
60
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
61
}
62
}
63
64
- while (l2meta != NULL) {
65
- QCowL2Meta *next;
66
-
67
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
68
- if (ret < 0) {
69
- goto fail;
70
- }
71
-
72
- /* Take the request off the list of running requests */
73
- if (l2meta->nb_clusters != 0) {
74
- QLIST_REMOVE(l2meta, next_in_flight);
75
- }
76
-
77
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
78
-
79
- next = l2meta->next;
80
- g_free(l2meta);
81
- l2meta = next;
82
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
83
+ if (ret) {
84
+ goto fail;
85
}
86
87
bytes -= cur_bytes;
88
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
89
ret = 0;
90
91
fail:
92
- while (l2meta != NULL) {
93
- QCowL2Meta *next;
94
-
95
- if (l2meta->nb_clusters != 0) {
96
- QLIST_REMOVE(l2meta, next_in_flight);
97
- }
98
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
99
-
100
- next = l2meta->next;
101
- g_free(l2meta);
102
- l2meta = next;
103
- }
104
+ qcow2_handle_l2meta(bs, &l2meta, false);
105
106
qemu_co_mutex_unlock(&s->lock);
107
108
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
109
return ret;
110
}
111
112
+static int coroutine_fn
113
+qcow2_co_copy_range_from(BlockDriverState *bs,
114
+ BdrvChild *src, uint64_t src_offset,
115
+ BdrvChild *dst, uint64_t dst_offset,
116
+ uint64_t bytes, BdrvRequestFlags flags)
117
+{
118
+ BDRVQcow2State *s = bs->opaque;
119
+ int ret;
120
+ unsigned int cur_bytes; /* number of bytes in current iteration */
121
+ BdrvChild *child = NULL;
122
+ BdrvRequestFlags cur_flags;
123
+
124
+ assert(!bs->encrypted);
125
+ qemu_co_mutex_lock(&s->lock);
126
+
127
+ while (bytes != 0) {
128
+ uint64_t copy_offset = 0;
129
+ /* prepare next request */
130
+ cur_bytes = MIN(bytes, INT_MAX);
131
+ cur_flags = flags;
132
+
133
+ ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
134
+ if (ret < 0) {
135
+ goto out;
136
+ }
137
+
138
+ switch (ret) {
139
+ case QCOW2_CLUSTER_UNALLOCATED:
140
+ if (bs->backing && bs->backing->bs) {
141
+ int64_t backing_length = bdrv_getlength(bs->backing->bs);
142
+ if (src_offset >= backing_length) {
143
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
144
+ } else {
145
+ child = bs->backing;
146
+ cur_bytes = MIN(cur_bytes, backing_length - src_offset);
147
+ copy_offset = src_offset;
148
+ }
149
+ } else {
150
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
151
+ }
152
+ break;
153
+
154
+ case QCOW2_CLUSTER_ZERO_PLAIN:
155
+ case QCOW2_CLUSTER_ZERO_ALLOC:
156
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
157
+ break;
158
+
159
+ case QCOW2_CLUSTER_COMPRESSED:
160
+ ret = -ENOTSUP;
161
+ goto out;
162
+ break;
163
+
164
+ case QCOW2_CLUSTER_NORMAL:
165
+ child = bs->file;
166
+ copy_offset += offset_into_cluster(s, src_offset);
167
+ if ((copy_offset & 511) != 0) {
168
+ ret = -EIO;
169
+ goto out;
170
+ }
171
+ break;
172
+
173
+ default:
174
+ abort();
175
+ }
176
+ qemu_co_mutex_unlock(&s->lock);
177
+ ret = bdrv_co_copy_range_from(child,
178
+ copy_offset,
179
+ dst, dst_offset,
180
+ cur_bytes, cur_flags);
181
+ qemu_co_mutex_lock(&s->lock);
182
+ if (ret < 0) {
183
+ goto out;
184
+ }
185
+
186
+ bytes -= cur_bytes;
187
+ src_offset += cur_bytes;
188
+ dst_offset += cur_bytes;
189
+ }
190
+ ret = 0;
191
+
192
+out:
193
+ qemu_co_mutex_unlock(&s->lock);
194
+ return ret;
195
+}
196
+
197
+static int coroutine_fn
198
+qcow2_co_copy_range_to(BlockDriverState *bs,
199
+ BdrvChild *src, uint64_t src_offset,
200
+ BdrvChild *dst, uint64_t dst_offset,
201
+ uint64_t bytes, BdrvRequestFlags flags)
202
+{
203
+ BDRVQcow2State *s = bs->opaque;
204
+ int offset_in_cluster;
205
+ int ret;
206
+ unsigned int cur_bytes; /* number of sectors in current iteration */
207
+ uint64_t cluster_offset;
208
+ uint8_t *cluster_data = NULL;
209
+ QCowL2Meta *l2meta = NULL;
210
+
211
+ assert(!bs->encrypted);
212
+ s->cluster_cache_offset = -1; /* disable compressed cache */
213
+
214
+ qemu_co_mutex_lock(&s->lock);
215
+
216
+ while (bytes != 0) {
217
+
218
+ l2meta = NULL;
219
+
220
+ offset_in_cluster = offset_into_cluster(s, dst_offset);
221
+ cur_bytes = MIN(bytes, INT_MAX);
222
+
223
+ /* TODO:
224
+ * If src->bs == dst->bs, we could simply copy by incrementing
225
+ * the refcnt, without copying user data.
226
+ * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
227
+ ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
228
+ &cluster_offset, &l2meta);
229
+ if (ret < 0) {
230
+ goto fail;
231
+ }
232
+
233
+ assert((cluster_offset & 511) == 0);
234
+
235
+ ret = qcow2_pre_write_overlap_check(bs, 0,
236
+ cluster_offset + offset_in_cluster, cur_bytes);
237
+ if (ret < 0) {
238
+ goto fail;
239
+ }
240
+
241
+ qemu_co_mutex_unlock(&s->lock);
242
+ ret = bdrv_co_copy_range_to(src, src_offset,
243
+ bs->file,
244
+ cluster_offset + offset_in_cluster,
245
+ cur_bytes, flags);
246
+ qemu_co_mutex_lock(&s->lock);
247
+ if (ret < 0) {
248
+ goto fail;
249
+ }
250
+
251
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
252
+ if (ret) {
253
+ goto fail;
254
+ }
255
+
256
+ bytes -= cur_bytes;
257
+ dst_offset += cur_bytes;
258
+ }
259
+ ret = 0;
260
+
261
+fail:
262
+ qcow2_handle_l2meta(bs, &l2meta, false);
263
+
264
+ qemu_co_mutex_unlock(&s->lock);
265
+
266
+ qemu_vfree(cluster_data);
267
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
268
+
269
+ return ret;
270
+}
271
+
272
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
273
PreallocMode prealloc, Error **errp)
274
{
275
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
276
277
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
278
.bdrv_co_pdiscard = qcow2_co_pdiscard,
279
+ .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
280
+ .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
281
.bdrv_truncate = qcow2_truncate,
282
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
283
.bdrv_make_empty = qcow2_make_empty,
284
--
285
2.17.1
286
287
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
With copy_file_range(2), we can implement the bdrv_co_copy_range
4
semantics.
5
6
Signed-off-by: Fam Zheng <famz@redhat.com>
7
Message-id: 20180601092648.24614-6-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
configure | 17 +++++++
11
include/block/raw-aio.h | 10 ++++-
12
block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++++--
13
3 files changed, 120 insertions(+), 5 deletions(-)
14
15
diff --git a/configure b/configure
16
index XXXXXXX..XXXXXXX 100755
17
--- a/configure
18
+++ b/configure
19
@@ -XXX,XX +XXX,XX @@ if test "$fortify_source" != "no"; then
20
fi
21
fi
22
23
+###############################################
24
+# Check if copy_file_range is provided by glibc
25
+have_copy_file_range=no
26
+cat > $TMPC << EOF
27
+#include <unistd.h>
28
+int main(void) {
29
+ copy_file_range(0, NULL, 0, NULL, 0, 0);
30
+ return 0;
31
+}
32
+EOF
33
+if compile_prog "" "" ; then
34
+ have_copy_file_range=yes
35
+fi
36
+
37
##########################################
38
# check if struct fsxattr is available via linux/fs.h
39
40
@@ -XXX,XX +XXX,XX @@ fi
41
if test "$have_fsxattr" = "yes" ; then
42
echo "HAVE_FSXATTR=y" >> $config_host_mak
43
fi
44
+if test "$have_copy_file_range" = "yes" ; then
45
+ echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak
46
+fi
47
if test "$vte" = "yes" ; then
48
echo "CONFIG_VTE=y" >> $config_host_mak
49
echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
50
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/include/block/raw-aio.h
53
+++ b/include/block/raw-aio.h
54
@@ -XXX,XX +XXX,XX @@
55
#define QEMU_AIO_FLUSH 0x0008
56
#define QEMU_AIO_DISCARD 0x0010
57
#define QEMU_AIO_WRITE_ZEROES 0x0020
58
+#define QEMU_AIO_COPY_RANGE 0x0040
59
#define QEMU_AIO_TYPE_MASK \
60
- (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
61
- QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
62
+ (QEMU_AIO_READ | \
63
+ QEMU_AIO_WRITE | \
64
+ QEMU_AIO_IOCTL | \
65
+ QEMU_AIO_FLUSH | \
66
+ QEMU_AIO_DISCARD | \
67
+ QEMU_AIO_WRITE_ZEROES | \
68
+ QEMU_AIO_COPY_RANGE)
69
70
/* AIO flags */
71
#define QEMU_AIO_MISALIGNED 0x1000
72
diff --git a/block/file-posix.c b/block/file-posix.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/block/file-posix.c
75
+++ b/block/file-posix.c
76
@@ -XXX,XX +XXX,XX @@
77
#ifdef __linux__
78
#include <sys/ioctl.h>
79
#include <sys/param.h>
80
+#include <sys/syscall.h>
81
#include <linux/cdrom.h>
82
#include <linux/fd.h>
83
#include <linux/fs.h>
84
@@ -XXX,XX +XXX,XX @@ typedef struct RawPosixAIOData {
85
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
86
off_t aio_offset;
87
int aio_type;
88
+ int aio_fd2;
89
+ off_t aio_offset2;
90
} RawPosixAIOData;
91
92
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
93
@@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
94
return -ENOTSUP;
95
}
96
97
+#ifndef HAVE_COPY_FILE_RANGE
98
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
99
+ off_t *out_off, size_t len, unsigned int flags)
100
+{
101
+#ifdef __NR_copy_file_range
102
+ return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
103
+ out_off, len, flags);
104
+#else
105
+ errno = ENOSYS;
106
+ return -1;
107
+#endif
108
+}
109
+#endif
110
+
111
+static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
112
+{
113
+ uint64_t bytes = aiocb->aio_nbytes;
114
+ off_t in_off = aiocb->aio_offset;
115
+ off_t out_off = aiocb->aio_offset2;
116
+
117
+ while (bytes) {
118
+ ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
119
+ aiocb->aio_fd2, &out_off,
120
+ bytes, 0);
121
+ if (ret == -EINTR) {
122
+ continue;
123
+ }
124
+ if (ret < 0) {
125
+ if (errno == ENOSYS) {
126
+ return -ENOTSUP;
127
+ } else {
128
+ return -errno;
129
+ }
130
+ }
131
+ if (!ret) {
132
+ /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */
133
+ return -ENOTSUP;
134
+ }
135
+ bytes -= ret;
136
+ }
137
+ return 0;
138
+}
139
+
140
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
141
{
142
int ret = -EOPNOTSUPP;
143
@@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg)
144
case QEMU_AIO_WRITE_ZEROES:
145
ret = handle_aiocb_write_zeroes(aiocb);
146
break;
147
+ case QEMU_AIO_COPY_RANGE:
148
+ ret = handle_aiocb_copy_range(aiocb);
149
+ break;
150
default:
151
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
152
ret = -EINVAL;
153
@@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg)
154
return ret;
155
}
156
157
-static int paio_submit_co(BlockDriverState *bs, int fd,
158
- int64_t offset, QEMUIOVector *qiov,
159
- int bytes, int type)
160
+static int paio_submit_co_full(BlockDriverState *bs, int fd,
161
+ int64_t offset, int fd2, int64_t offset2,
162
+ QEMUIOVector *qiov,
163
+ int bytes, int type)
164
{
165
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
166
ThreadPool *pool;
167
@@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd,
168
acb->bs = bs;
169
acb->aio_type = type;
170
acb->aio_fildes = fd;
171
+ acb->aio_fd2 = fd2;
172
+ acb->aio_offset2 = offset2;
173
174
acb->aio_nbytes = bytes;
175
acb->aio_offset = offset;
176
@@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd,
177
return thread_pool_submit_co(pool, aio_worker, acb);
178
}
179
180
+static inline int paio_submit_co(BlockDriverState *bs, int fd,
181
+ int64_t offset, QEMUIOVector *qiov,
182
+ int bytes, int type)
183
+{
184
+ return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
185
+}
186
+
187
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
188
int64_t offset, QEMUIOVector *qiov, int bytes,
189
BlockCompletionFunc *cb, void *opaque, int type)
190
@@ -XXX,XX +XXX,XX @@ static void raw_abort_perm_update(BlockDriverState *bs)
191
raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
192
}
193
194
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
195
+ BdrvChild *src, uint64_t src_offset,
196
+ BdrvChild *dst, uint64_t dst_offset,
197
+ uint64_t bytes, BdrvRequestFlags flags)
198
+{
199
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
200
+}
201
+
202
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
203
+ BdrvChild *src, uint64_t src_offset,
204
+ BdrvChild *dst, uint64_t dst_offset,
205
+ uint64_t bytes, BdrvRequestFlags flags)
206
+{
207
+ BDRVRawState *s = bs->opaque;
208
+ BDRVRawState *src_s;
209
+
210
+ assert(dst->bs == bs);
211
+ if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) {
212
+ return -ENOTSUP;
213
+ }
214
+
215
+ src_s = src->bs->opaque;
216
+ if (fd_open(bs) < 0 || fd_open(bs) < 0) {
217
+ return -EIO;
218
+ }
219
+ return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
220
+ NULL, bytes, QEMU_AIO_COPY_RANGE);
221
+}
222
+
223
BlockDriver bdrv_file = {
224
.format_name = "file",
225
.protocol_name = "file",
226
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
227
.bdrv_co_pwritev = raw_co_pwritev,
228
.bdrv_aio_flush = raw_aio_flush,
229
.bdrv_aio_pdiscard = raw_aio_pdiscard,
230
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
231
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
232
.bdrv_refresh_limits = raw_refresh_limits,
233
.bdrv_io_plug = raw_aio_plug,
234
.bdrv_io_unplug = raw_aio_unplug,
235
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = {
236
.bdrv_co_pwritev = raw_co_pwritev,
237
.bdrv_aio_flush    = raw_aio_flush,
238
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
239
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
240
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
241
.bdrv_refresh_limits = raw_refresh_limits,
242
.bdrv_io_plug = raw_aio_plug,
243
.bdrv_io_unplug = raw_aio_unplug,
244
--
245
2.17.1
246
247
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
The device designator data returned in INQUIRY command will be useful to
4
fill in source/target fields during copy offloading. Do this when
5
connecting to the target and save the data for later use.
6
7
Signed-off-by: Fam Zheng <famz@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20180601092648.24614-7-famz@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
block/iscsi.c | 41 +++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 41 insertions(+)
14
15
diff --git a/block/iscsi.c b/block/iscsi.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/iscsi.c
18
+++ b/block/iscsi.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct IscsiLun {
20
QemuMutex mutex;
21
struct scsi_inquiry_logical_block_provisioning lbp;
22
struct scsi_inquiry_block_limits bl;
23
+ struct scsi_inquiry_device_designator *dd;
24
unsigned char *zeroblock;
25
/* The allocmap tracks which clusters (pages) on the iSCSI target are
26
* allocated and which are not. In case a target returns zeros for
27
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
28
},
29
};
30
31
+static void iscsi_save_designator(IscsiLun *lun,
32
+ struct scsi_inquiry_device_identification *inq_di)
33
+{
34
+ struct scsi_inquiry_device_designator *desig, *copy = NULL;
35
+
36
+ for (desig = inq_di->designators; desig; desig = desig->next) {
37
+ if (desig->association ||
38
+ desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
39
+ continue;
40
+ }
41
+ /* NAA works better than T10 vendor ID based designator. */
42
+ if (!copy || copy->designator_type < desig->designator_type) {
43
+ copy = desig;
44
+ }
45
+ }
46
+ if (copy) {
47
+ lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
48
+ *lun->dd = *copy;
49
+ lun->dd->next = NULL;
50
+ lun->dd->designator = g_malloc(copy->designator_length);
51
+ memcpy(lun->dd->designator, copy->designator, copy->designator_length);
52
+ }
53
+}
54
+
55
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
56
Error **errp)
57
{
58
@@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
59
struct scsi_task *inq_task;
60
struct scsi_inquiry_logical_block_provisioning *inq_lbp;
61
struct scsi_inquiry_block_limits *inq_bl;
62
+ struct scsi_inquiry_device_identification *inq_di;
63
switch (inq_vpd->pages[i]) {
64
case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
65
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
66
@@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
67
sizeof(struct scsi_inquiry_block_limits));
68
scsi_free_scsi_task(inq_task);
69
break;
70
+ case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
71
+ inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
72
+ SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
73
+ (void **) &inq_di, errp);
74
+ if (inq_task == NULL) {
75
+ ret = -EINVAL;
76
+ goto out;
77
+ }
78
+ iscsi_save_designator(iscsilun, inq_di);
79
+ scsi_free_scsi_task(inq_task);
80
+ break;
81
default:
82
break;
83
}
84
@@ -XXX,XX +XXX,XX @@ static void iscsi_close(BlockDriverState *bs)
85
iscsi_logout_sync(iscsi);
86
}
87
iscsi_destroy_context(iscsi);
88
+ if (iscsilun->dd) {
89
+ g_free(iscsilun->dd->designator);
90
+ g_free(iscsilun->dd);
91
+ }
92
g_free(iscsilun->zeroblock);
93
iscsi_allocmap_free(iscsilun);
94
qemu_mutex_destroy(&iscsilun->mutex);
95
--
96
2.17.1
97
98
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
This loop is repeated a growing number times. Make a helper.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Message-id: 20180601092648.24614-8-famz@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/iscsi.c | 54 ++++++++++++++++-----------------------------------
12
1 file changed, 17 insertions(+), 37 deletions(-)
13
14
diff --git a/block/iscsi.c b/block/iscsi.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/iscsi.c
17
+++ b/block/iscsi.c
18
@@ -XXX,XX +XXX,XX @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
19
offset / iscsilun->cluster_size) == size);
20
}
21
22
+static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
23
+ IscsiLun *iscsilun)
24
+{
25
+ while (!iTask->complete) {
26
+ iscsi_set_events(iscsilun);
27
+ qemu_mutex_unlock(&iscsilun->mutex);
28
+ qemu_coroutine_yield();
29
+ qemu_mutex_lock(&iscsilun->mutex);
30
+ }
31
+}
32
+
33
static int coroutine_fn
34
iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
35
QEMUIOVector *iov, int flags)
36
@@ -XXX,XX +XXX,XX @@ retry:
37
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
38
iov->niov);
39
#endif
40
- while (!iTask.complete) {
41
- iscsi_set_events(iscsilun);
42
- qemu_mutex_unlock(&iscsilun->mutex);
43
- qemu_coroutine_yield();
44
- qemu_mutex_lock(&iscsilun->mutex);
45
- }
46
+ iscsi_co_wait_for_task(&iTask, iscsilun);
47
48
if (iTask.task != NULL) {
49
scsi_free_scsi_task(iTask.task);
50
@@ -XXX,XX +XXX,XX @@ retry:
51
ret = -ENOMEM;
52
goto out_unlock;
53
}
54
-
55
- while (!iTask.complete) {
56
- iscsi_set_events(iscsilun);
57
- qemu_mutex_unlock(&iscsilun->mutex);
58
- qemu_coroutine_yield();
59
- qemu_mutex_lock(&iscsilun->mutex);
60
- }
61
+ iscsi_co_wait_for_task(&iTask, iscsilun);
62
63
if (iTask.do_retry) {
64
if (iTask.task != NULL) {
65
@@ -XXX,XX +XXX,XX @@ retry:
66
#if LIBISCSI_API_VERSION < (20160603)
67
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
68
#endif
69
- while (!iTask.complete) {
70
- iscsi_set_events(iscsilun);
71
- qemu_mutex_unlock(&iscsilun->mutex);
72
- qemu_coroutine_yield();
73
- qemu_mutex_lock(&iscsilun->mutex);
74
- }
75
76
+ iscsi_co_wait_for_task(&iTask, iscsilun);
77
if (iTask.task != NULL) {
78
scsi_free_scsi_task(iTask.task);
79
iTask.task = NULL;
80
@@ -XXX,XX +XXX,XX @@ retry:
81
return -ENOMEM;
82
}
83
84
- while (!iTask.complete) {
85
- iscsi_set_events(iscsilun);
86
- qemu_mutex_unlock(&iscsilun->mutex);
87
- qemu_coroutine_yield();
88
- qemu_mutex_lock(&iscsilun->mutex);
89
- }
90
+ iscsi_co_wait_for_task(&iTask, iscsilun);
91
92
if (iTask.task != NULL) {
93
scsi_free_scsi_task(iTask.task);
94
@@ -XXX,XX +XXX,XX @@ retry:
95
goto out_unlock;
96
}
97
98
- while (!iTask.complete) {
99
- iscsi_set_events(iscsilun);
100
- qemu_mutex_unlock(&iscsilun->mutex);
101
- qemu_coroutine_yield();
102
- qemu_mutex_lock(&iscsilun->mutex);
103
- }
104
+ iscsi_co_wait_for_task(&iTask, iscsilun);
105
106
if (iTask.task != NULL) {
107
scsi_free_scsi_task(iTask.task);
108
@@ -XXX,XX +XXX,XX @@ retry:
109
return -ENOMEM;
110
}
111
112
- while (!iTask.complete) {
113
- iscsi_set_events(iscsilun);
114
- qemu_mutex_unlock(&iscsilun->mutex);
115
- qemu_coroutine_yield();
116
- qemu_mutex_lock(&iscsilun->mutex);
117
- }
118
+ iscsi_co_wait_for_task(&iTask, iscsilun);
119
120
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
121
iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
122
--
123
2.17.1
124
125
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Issue EXTENDED COPY (LID1) command to implement the copy_range API.
4
5
The parameter data construction code is modified from libiscsi's
6
iscsi-dd.c.
7
8
Signed-off-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20180601092648.24614-9-famz@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
include/scsi/constants.h | 4 +
14
block/iscsi.c | 219 +++++++++++++++++++++++++++++++++++++++
15
2 files changed, 223 insertions(+)
16
17
diff --git a/include/scsi/constants.h b/include/scsi/constants.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/scsi/constants.h
20
+++ b/include/scsi/constants.h
21
@@ -XXX,XX +XXX,XX @@
22
#define MMC_PROFILE_HDDVD_RW_DL 0x005A
23
#define MMC_PROFILE_INVALID 0xFFFF
24
25
+#define XCOPY_DESC_OFFSET 16
26
+#define IDENT_DESCR_TGT_DESCR_SIZE 32
27
+#define XCOPY_BLK2BLK_SEG_DESC_SIZE 28
28
+
29
#endif
30
diff --git a/block/iscsi.c b/block/iscsi.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/iscsi.c
33
+++ b/block/iscsi.c
34
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
35
iscsi_allocmap_invalidate(iscsilun);
36
}
37
38
+static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
39
+ BdrvChild *src,
40
+ uint64_t src_offset,
41
+ BdrvChild *dst,
42
+ uint64_t dst_offset,
43
+ uint64_t bytes,
44
+ BdrvRequestFlags flags)
45
+{
46
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
47
+}
48
+
49
+static struct scsi_task *iscsi_xcopy_task(int param_len)
50
+{
51
+ struct scsi_task *task;
52
+
53
+ task = g_new0(struct scsi_task, 1);
54
+
55
+ task->cdb[0] = EXTENDED_COPY;
56
+ task->cdb[10] = (param_len >> 24) & 0xFF;
57
+ task->cdb[11] = (param_len >> 16) & 0xFF;
58
+ task->cdb[12] = (param_len >> 8) & 0xFF;
59
+ task->cdb[13] = param_len & 0xFF;
60
+ task->cdb_size = 16;
61
+ task->xfer_dir = SCSI_XFER_WRITE;
62
+ task->expxferlen = param_len;
63
+
64
+ return task;
65
+}
66
+
67
+static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
68
+{
69
+ struct scsi_inquiry_device_designator *dd = lun->dd;
70
+
71
+ memset(desc, 0, 32);
72
+ desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
73
+ desc[4] = dd->code_set;
74
+ desc[5] = (dd->designator_type & 0xF)
75
+ | ((dd->association & 3) << 4);
76
+ desc[7] = dd->designator_length;
77
+ memcpy(desc + 8, dd->designator, dd->designator_length);
78
+
79
+ desc[28] = 0;
80
+ desc[29] = (lun->block_size >> 16) & 0xFF;
81
+ desc[30] = (lun->block_size >> 8) & 0xFF;
82
+ desc[31] = lun->block_size & 0xFF;
83
+}
84
+
85
+static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
86
+ int dst_index)
87
+{
88
+ hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
89
+ hdr[1] = ((dc << 1) | cat) & 0xFF;
90
+ hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
91
+ /* don't account for the first 4 bytes in descriptor header*/
92
+ hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
93
+ hdr[4] = (src_index >> 8) & 0xFF;
94
+ hdr[5] = src_index & 0xFF;
95
+ hdr[6] = (dst_index >> 8) & 0xFF;
96
+ hdr[7] = dst_index & 0xFF;
97
+}
98
+
99
+static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
100
+ int src_index, int dst_index, int num_blks,
101
+ uint64_t src_lba, uint64_t dst_lba)
102
+{
103
+ iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
104
+
105
+ /* The caller should verify the request size */
106
+ assert(num_blks < 65536);
107
+ desc[10] = (num_blks >> 8) & 0xFF;
108
+ desc[11] = num_blks & 0xFF;
109
+ desc[12] = (src_lba >> 56) & 0xFF;
110
+ desc[13] = (src_lba >> 48) & 0xFF;
111
+ desc[14] = (src_lba >> 40) & 0xFF;
112
+ desc[15] = (src_lba >> 32) & 0xFF;
113
+ desc[16] = (src_lba >> 24) & 0xFF;
114
+ desc[17] = (src_lba >> 16) & 0xFF;
115
+ desc[18] = (src_lba >> 8) & 0xFF;
116
+ desc[19] = src_lba & 0xFF;
117
+ desc[20] = (dst_lba >> 56) & 0xFF;
118
+ desc[21] = (dst_lba >> 48) & 0xFF;
119
+ desc[22] = (dst_lba >> 40) & 0xFF;
120
+ desc[23] = (dst_lba >> 32) & 0xFF;
121
+ desc[24] = (dst_lba >> 24) & 0xFF;
122
+ desc[25] = (dst_lba >> 16) & 0xFF;
123
+ desc[26] = (dst_lba >> 8) & 0xFF;
124
+ desc[27] = dst_lba & 0xFF;
125
+}
126
+
127
+static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
128
+ int list_id_usage, int prio,
129
+ int tgt_desc_len,
130
+ int seg_desc_len, int inline_data_len)
131
+{
132
+ buf[0] = list_id;
133
+ buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
134
+ buf[2] = (tgt_desc_len >> 8) & 0xFF;
135
+ buf[3] = tgt_desc_len & 0xFF;
136
+ buf[8] = (seg_desc_len >> 24) & 0xFF;
137
+ buf[9] = (seg_desc_len >> 16) & 0xFF;
138
+ buf[10] = (seg_desc_len >> 8) & 0xFF;
139
+ buf[11] = seg_desc_len & 0xFF;
140
+ buf[12] = (inline_data_len >> 24) & 0xFF;
141
+ buf[13] = (inline_data_len >> 16) & 0xFF;
142
+ buf[14] = (inline_data_len >> 8) & 0xFF;
143
+ buf[15] = inline_data_len & 0xFF;
144
+}
145
+
146
+static void iscsi_xcopy_data(struct iscsi_data *data,
147
+ IscsiLun *src, int64_t src_lba,
148
+ IscsiLun *dst, int64_t dst_lba,
149
+ uint16_t num_blocks)
150
+{
151
+ uint8_t *buf;
152
+ const int src_offset = XCOPY_DESC_OFFSET;
153
+ const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
154
+ const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
155
+
156
+ data->size = XCOPY_DESC_OFFSET +
157
+ IDENT_DESCR_TGT_DESCR_SIZE * 2 +
158
+ XCOPY_BLK2BLK_SEG_DESC_SIZE;
159
+ data->data = g_malloc0(data->size);
160
+ buf = data->data;
161
+
162
+ /* Initialise the parameter list header */
163
+ iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
164
+ 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
165
+ XCOPY_BLK2BLK_SEG_DESC_SIZE,
166
+ 0);
167
+
168
+ /* Initialise CSCD list with one src + one dst descriptor */
169
+ iscsi_populate_target_desc(&buf[src_offset], src);
170
+ iscsi_populate_target_desc(&buf[dst_offset], dst);
171
+
172
+ /* Initialise one segment descriptor */
173
+ iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
174
+ src_lba, dst_lba);
175
+}
176
+
177
+static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
178
+ BdrvChild *src,
179
+ uint64_t src_offset,
180
+ BdrvChild *dst,
181
+ uint64_t dst_offset,
182
+ uint64_t bytes,
183
+ BdrvRequestFlags flags)
184
+{
185
+ IscsiLun *dst_lun = dst->bs->opaque;
186
+ IscsiLun *src_lun;
187
+ struct IscsiTask iscsi_task;
188
+ struct iscsi_data data;
189
+ int r = 0;
190
+ int block_size;
191
+
192
+ if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
193
+ return -ENOTSUP;
194
+ }
195
+ src_lun = src->bs->opaque;
196
+
197
+ if (!src_lun->dd || !dst_lun->dd) {
198
+ return -ENOTSUP;
199
+ }
200
+ if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
201
+ return -ENOTSUP;
202
+ }
203
+ if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
204
+ return -ENOTSUP;
205
+ }
206
+ if (dst_lun->block_size != src_lun->block_size ||
207
+ !dst_lun->block_size) {
208
+ return -ENOTSUP;
209
+ }
210
+
211
+ block_size = dst_lun->block_size;
212
+ if (bytes / block_size > 65535) {
213
+ return -ENOTSUP;
214
+ }
215
+
216
+ iscsi_xcopy_data(&data,
217
+ src_lun, src_offset / block_size,
218
+ dst_lun, dst_offset / block_size,
219
+ bytes / block_size);
220
+
221
+ iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
222
+
223
+ qemu_mutex_lock(&dst_lun->mutex);
224
+ iscsi_task.task = iscsi_xcopy_task(data.size);
225
+retry:
226
+ if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
227
+ iscsi_task.task, iscsi_co_generic_cb,
228
+ &data,
229
+ &iscsi_task) != 0) {
230
+ r = -EIO;
231
+ goto out_unlock;
232
+ }
233
+
234
+ iscsi_co_wait_for_task(&iscsi_task, dst_lun);
235
+
236
+ if (iscsi_task.do_retry) {
237
+ iscsi_task.complete = 0;
238
+ goto retry;
239
+ }
240
+
241
+ if (iscsi_task.status != SCSI_STATUS_GOOD) {
242
+ r = iscsi_task.err_code;
243
+ goto out_unlock;
244
+ }
245
+
246
+out_unlock:
247
+ g_free(iscsi_task.task);
248
+ qemu_mutex_unlock(&dst_lun->mutex);
249
+ g_free(iscsi_task.err_str);
250
+ return r;
251
+}
252
+
253
static QemuOptsList iscsi_create_opts = {
254
.name = "iscsi-create-opts",
255
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
256
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = {
257
258
.bdrv_co_block_status = iscsi_co_block_status,
259
.bdrv_co_pdiscard = iscsi_co_pdiscard,
260
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
261
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
262
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
263
.bdrv_co_readv = iscsi_co_readv,
264
.bdrv_co_writev = iscsi_co_writev,
265
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = {
266
267
.bdrv_co_block_status = iscsi_co_block_status,
268
.bdrv_co_pdiscard = iscsi_co_pdiscard,
269
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
270
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
271
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
272
.bdrv_co_readv = iscsi_co_readv,
273
.bdrv_co_writev = iscsi_co_writev,
274
--
275
2.17.1
276
277
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
It's a BlockBackend wrapper of the BDS interface.
4
5
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20180601092648.24614-10-famz@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
include/sysemu/block-backend.h | 4 ++++
11
block/block-backend.c | 18 ++++++++++++++++++
12
2 files changed, 22 insertions(+)
13
14
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/sysemu/block-backend.h
17
+++ b/include/sysemu/block-backend.h
18
@@ -XXX,XX +XXX,XX @@ void blk_set_force_allow_inactivate(BlockBackend *blk);
19
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
20
void blk_unregister_buf(BlockBackend *blk, void *host);
21
22
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
23
+ BlockBackend *blk_out, int64_t off_out,
24
+ int bytes, BdrvRequestFlags flags);
25
+
26
#endif
27
diff --git a/block/block-backend.c b/block/block-backend.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/block/block-backend.c
30
+++ b/block/block-backend.c
31
@@ -XXX,XX +XXX,XX @@ void blk_unregister_buf(BlockBackend *blk, void *host)
32
{
33
bdrv_unregister_buf(blk_bs(blk), host);
34
}
35
+
36
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
37
+ BlockBackend *blk_out, int64_t off_out,
38
+ int bytes, BdrvRequestFlags flags)
39
+{
40
+ int r;
41
+ r = blk_check_byte_request(blk_in, off_in, bytes);
42
+ if (r) {
43
+ return r;
44
+ }
45
+ r = blk_check_byte_request(blk_out, off_out, bytes);
46
+ if (r) {
47
+ return r;
48
+ }
49
+ return bdrv_co_copy_range(blk_in->root, off_in,
50
+ blk_out->root, off_out,
51
+ bytes, flags);
52
+}
53
--
54
2.17.1
55
56
diff view generated by jsdifflib