1 | The following changes since commit c25e8bba1f546ea72744ccfab77f8a9e8a323be8: | 1 | The following changes since commit f6b06fcceef465de0cf2514c9f76fe0192896781: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/otubo/tags/pull-seccomp-20180601' into staging (2018-06-01 13:11:30 +0100) | 3 | Merge remote-tracking branch 'remotes/kraxel/tags/ui-20190121-pull-request' into staging (2019-01-23 17:57:47 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | git://github.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 21891a5a3011608845b5d7f1f9cce60cdc2bcc62: | 9 | for you to fetch changes up to 8595685986152334b1ec28c78cb0e5e855d56b54: |
10 | 10 | ||
11 | main-loop: drop spin_counter (2018-06-01 16:01:29 +0100) | 11 | qemu-coroutine-sleep: drop CoSleepCB (2019-01-24 10:05:16 +0000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | * Copy offloading for qemu-img convert (iSCSI, raw, and qcow2) | 16 | Changelog: No user-visible changes. |
17 | |||
18 | If the underlying storage supports copy offloading, qemu-img convert will | ||
19 | use it instead of performing reads and writes. This avoids data transfers | ||
20 | and thus frees up storage bandwidth for other purposes. SCSI EXTENDED COPY | ||
21 | and Linux copy_file_range(2) are used to implement this optimization. | ||
22 | |||
23 | * Drop spurious "WARNING: I\/O thread spun for 1000 iterations" warning | ||
24 | 17 | ||
25 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
26 | 19 | ||
27 | Fam Zheng (10): | 20 | Stefan Hajnoczi (2): |
28 | block: Introduce API for copy offloading | 21 | throttle-groups: fix restart coroutine iothread race |
29 | raw: Check byte range uniformly | 22 | iotests: add 238 for throttling tgm unregister iothread segfault |
30 | raw: Implement copy offloading | ||
31 | qcow2: Implement copy offloading | ||
32 | file-posix: Implement bdrv_co_copy_range | ||
33 | iscsi: Query and save device designator when opening | ||
34 | iscsi: Create and use iscsi_co_wait_for_task | ||
35 | iscsi: Implement copy offloading | ||
36 | block-backend: Add blk_co_copy_range | ||
37 | qemu-img: Convert with copy offloading | ||
38 | 23 | ||
39 | Stefan Hajnoczi (1): | 24 | Vladimir Sementsov-Ogievskiy (1): |
40 | main-loop: drop spin_counter | 25 | qemu-coroutine-sleep: drop CoSleepCB |
41 | 26 | ||
42 | configure | 17 ++ | 27 | include/block/throttle-groups.h | 5 ++++ |
43 | include/block/block.h | 32 ++++ | 28 | block/throttle-groups.c | 9 +++++++ |
44 | include/block/block_int.h | 38 ++++ | 29 | util/qemu-coroutine-sleep.c | 27 +++++++------------ |
45 | include/block/raw-aio.h | 10 +- | 30 | tests/qemu-iotests/238 | 47 +++++++++++++++++++++++++++++++++ |
46 | include/scsi/constants.h | 4 + | 31 | tests/qemu-iotests/238.out | 6 +++++ |
47 | include/sysemu/block-backend.h | 4 + | 32 | tests/qemu-iotests/group | 1 + |
48 | block/block-backend.c | 18 ++ | 33 | 6 files changed, 78 insertions(+), 17 deletions(-) |
49 | block/file-posix.c | 98 +++++++++- | 34 | create mode 100755 tests/qemu-iotests/238 |
50 | block/io.c | 97 ++++++++++ | 35 | create mode 100644 tests/qemu-iotests/238.out |
51 | block/iscsi.c | 314 +++++++++++++++++++++++++++---- | ||
52 | block/qcow2.c | 229 +++++++++++++++++++--- | ||
53 | block/raw-format.c | 96 +++++++--- | ||
54 | qemu-img.c | 50 ++++- | ||
55 | util/main-loop.c | 25 --- | ||
56 | tests/qemu-iotests/common.filter | 1 - | ||
57 | 15 files changed, 908 insertions(+), 125 deletions(-) | ||
58 | 36 | ||
59 | -- | 37 | -- |
60 | 2.17.1 | 38 | 2.20.1 |
61 | 39 | ||
62 | 40 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | Introduce the bdrv_co_copy_range() API for copy offloading. Block | ||
4 | drivers implementing this API support efficient copy operations that | ||
5 | avoid reading each block from the source device and writing it to the | ||
6 | destination devices. Examples of copy offload primitives are SCSI | ||
7 | EXTENDED COPY and Linux copy_file_range(2). | ||
8 | |||
9 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Message-id: 20180601092648.24614-2-famz@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | --- | ||
14 | include/block/block.h | 32 +++++++++++++ | ||
15 | include/block/block_int.h | 38 +++++++++++++++ | ||
16 | block/io.c | 97 +++++++++++++++++++++++++++++++++++++++ | ||
17 | 3 files changed, 167 insertions(+) | ||
18 | |||
19 | diff --git a/include/block/block.h b/include/block/block.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/include/block/block.h | ||
22 | +++ b/include/block/block.h | ||
23 | @@ -XXX,XX +XXX,XX @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, | ||
24 | */ | ||
25 | void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); | ||
26 | void bdrv_unregister_buf(BlockDriverState *bs, void *host); | ||
27 | + | ||
28 | +/** | ||
29 | + * | ||
30 | + * bdrv_co_copy_range: | ||
31 | + * | ||
32 | + * Do offloaded copy between two children. If the operation is not implemented | ||
33 | + * by the driver, or if the backend storage doesn't support it, a negative | ||
34 | + * error code will be returned. | ||
35 | + * | ||
36 | + * Note: block layer doesn't emulate or fallback to a bounce buffer approach | ||
37 | + * because usually the caller shouldn't attempt offloaded copy any more (e.g. | ||
38 | + * calling copy_file_range(2)) after the first error, thus it should fall back | ||
39 | + * to a read+write path in the caller level. | ||
40 | + * | ||
41 | + * @src: Source child to copy data from | ||
42 | + * @src_offset: offset in @src image to read data | ||
43 | + * @dst: Destination child to copy data to | ||
44 | + * @dst_offset: offset in @dst image to write data | ||
45 | + * @bytes: number of bytes to copy | ||
46 | + * @flags: request flags. Must be one of: | ||
47 | + * 0 - actually read data from src; | ||
48 | + * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero | ||
49 | + * write on @dst as if bdrv_co_pwrite_zeroes is | ||
50 | + * called. Used to simplify caller code, or | ||
51 | + * during BlockDriver.bdrv_co_copy_range_from() | ||
52 | + * recursion. | ||
53 | + * | ||
54 | + * Returns: 0 if succeeded; negative error code if failed. | ||
55 | + **/ | ||
56 | +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, | ||
57 | + BdrvChild *dst, uint64_t dst_offset, | ||
58 | + uint64_t bytes, BdrvRequestFlags flags); | ||
59 | #endif | ||
60 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/include/block/block_int.h | ||
63 | +++ b/include/block/block_int.h | ||
64 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
65 | int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, | ||
66 | int64_t offset, int bytes); | ||
67 | |||
68 | + /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, | ||
69 | + * and invoke bdrv_co_copy_range_from(child, ...), or invoke | ||
70 | + * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. | ||
71 | + * | ||
72 | + * See the comment of bdrv_co_copy_range for the parameter and return value | ||
73 | + * semantics. | ||
74 | + */ | ||
75 | + int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, | ||
76 | + BdrvChild *src, | ||
77 | + uint64_t offset, | ||
78 | + BdrvChild *dst, | ||
79 | + uint64_t dst_offset, | ||
80 | + uint64_t bytes, | ||
81 | + BdrvRequestFlags flags); | ||
82 | + | ||
83 | + /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, | ||
84 | + * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy | ||
85 | + * operation if @bs is the leaf and @src has the same BlockDriver. Return | ||
86 | + * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. | ||
87 | + * | ||
88 | + * See the comment of bdrv_co_copy_range for the parameter and return value | ||
89 | + * semantics. | ||
90 | + */ | ||
91 | + int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, | ||
92 | + BdrvChild *src, | ||
93 | + uint64_t src_offset, | ||
94 | + BdrvChild *dst, | ||
95 | + uint64_t dst_offset, | ||
96 | + uint64_t bytes, | ||
97 | + BdrvRequestFlags flags); | ||
98 | + | ||
99 | /* | ||
100 | * Building block for bdrv_block_status[_above] and | ||
101 | * bdrv_is_allocated[_above]. The driver should answer only | ||
102 | @@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs); | ||
103 | |||
104 | void blockdev_close_all_bdrv_states(void); | ||
105 | |||
106 | +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, | ||
107 | + BdrvChild *dst, uint64_t dst_offset, | ||
108 | + uint64_t bytes, BdrvRequestFlags flags); | ||
109 | +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, | ||
110 | + BdrvChild *dst, uint64_t dst_offset, | ||
111 | + uint64_t bytes, BdrvRequestFlags flags); | ||
112 | + | ||
113 | #endif /* BLOCK_INT_H */ | ||
114 | diff --git a/block/io.c b/block/io.c | ||
115 | index XXXXXXX..XXXXXXX 100644 | ||
116 | --- a/block/io.c | ||
117 | +++ b/block/io.c | ||
118 | @@ -XXX,XX +XXX,XX @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host) | ||
119 | bdrv_unregister_buf(child->bs, host); | ||
120 | } | ||
121 | } | ||
122 | + | ||
123 | +static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, | ||
124 | + uint64_t src_offset, | ||
125 | + BdrvChild *dst, | ||
126 | + uint64_t dst_offset, | ||
127 | + uint64_t bytes, | ||
128 | + BdrvRequestFlags flags, | ||
129 | + bool recurse_src) | ||
130 | +{ | ||
131 | + int ret; | ||
132 | + | ||
133 | + if (!src || !dst || !src->bs || !dst->bs) { | ||
134 | + return -ENOMEDIUM; | ||
135 | + } | ||
136 | + ret = bdrv_check_byte_request(src->bs, src_offset, bytes); | ||
137 | + if (ret) { | ||
138 | + return ret; | ||
139 | + } | ||
140 | + | ||
141 | + ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes); | ||
142 | + if (ret) { | ||
143 | + return ret; | ||
144 | + } | ||
145 | + if (flags & BDRV_REQ_ZERO_WRITE) { | ||
146 | + return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags); | ||
147 | + } | ||
148 | + | ||
149 | + if (!src->bs->drv->bdrv_co_copy_range_from | ||
150 | + || !dst->bs->drv->bdrv_co_copy_range_to | ||
151 | + || src->bs->encrypted || dst->bs->encrypted) { | ||
152 | + return -ENOTSUP; | ||
153 | + } | ||
154 | + if (recurse_src) { | ||
155 | + return src->bs->drv->bdrv_co_copy_range_from(src->bs, | ||
156 | + src, src_offset, | ||
157 | + dst, dst_offset, | ||
158 | + bytes, flags); | ||
159 | + } else { | ||
160 | + return dst->bs->drv->bdrv_co_copy_range_to(dst->bs, | ||
161 | + src, src_offset, | ||
162 | + dst, dst_offset, | ||
163 | + bytes, flags); | ||
164 | + } | ||
165 | +} | ||
166 | + | ||
167 | +/* Copy range from @src to @dst. | ||
168 | + * | ||
169 | + * See the comment of bdrv_co_copy_range for the parameter and return value | ||
170 | + * semantics. */ | ||
171 | +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, | ||
172 | + BdrvChild *dst, uint64_t dst_offset, | ||
173 | + uint64_t bytes, BdrvRequestFlags flags) | ||
174 | +{ | ||
175 | + return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, | ||
176 | + bytes, flags, true); | ||
177 | +} | ||
178 | + | ||
179 | +/* Copy range from @src to @dst. | ||
180 | + * | ||
181 | + * See the comment of bdrv_co_copy_range for the parameter and return value | ||
182 | + * semantics. */ | ||
183 | +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, | ||
184 | + BdrvChild *dst, uint64_t dst_offset, | ||
185 | + uint64_t bytes, BdrvRequestFlags flags) | ||
186 | +{ | ||
187 | + return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, | ||
188 | + bytes, flags, false); | ||
189 | +} | ||
190 | + | ||
191 | +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, | ||
192 | + BdrvChild *dst, uint64_t dst_offset, | ||
193 | + uint64_t bytes, BdrvRequestFlags flags) | ||
194 | +{ | ||
195 | + BdrvTrackedRequest src_req, dst_req; | ||
196 | + BlockDriverState *src_bs = src->bs; | ||
197 | + BlockDriverState *dst_bs = dst->bs; | ||
198 | + int ret; | ||
199 | + | ||
200 | + bdrv_inc_in_flight(src_bs); | ||
201 | + bdrv_inc_in_flight(dst_bs); | ||
202 | + tracked_request_begin(&src_req, src_bs, src_offset, | ||
203 | + bytes, BDRV_TRACKED_READ); | ||
204 | + tracked_request_begin(&dst_req, dst_bs, dst_offset, | ||
205 | + bytes, BDRV_TRACKED_WRITE); | ||
206 | + | ||
207 | + wait_serialising_requests(&src_req); | ||
208 | + wait_serialising_requests(&dst_req); | ||
209 | + ret = bdrv_co_copy_range_from(src, src_offset, | ||
210 | + dst, dst_offset, | ||
211 | + bytes, flags); | ||
212 | + | ||
213 | + tracked_request_end(&src_req); | ||
214 | + tracked_request_end(&dst_req); | ||
215 | + bdrv_dec_in_flight(src_bs); | ||
216 | + bdrv_dec_in_flight(dst_bs); | ||
217 | + return ret; | ||
218 | +} | ||
219 | -- | ||
220 | 2.17.1 | ||
221 | |||
222 | diff view generated by jsdifflib |
1 | Commit d759c951f3287fad04210a52f2dc93f94cf58c7f ("replay: push | 1 | The following QMP command leads to a crash when iothreads are used: |
---|---|---|---|
2 | replay_mutex_lock up the call tree") removed the !timeout lock | ||
3 | optimization in the main loop. | ||
4 | 2 | ||
5 | The idea of the optimization was to avoid ping-pongs between threads by | 3 | { 'execute': 'device_del', 'arguments': {'id': 'data'} } |
6 | keeping the Big QEMU Lock held across non-blocking (!timeout) main loop | ||
7 | iterations. | ||
8 | 4 | ||
9 | A warning is printed when the main loop spins without releasing BQL for | 5 | The backtrace involves the queue restart coroutine where |
10 | long periods of time. These warnings were supposed to aid debugging but | 6 | tgm->throttle_state is a NULL pointer because |
11 | in practice they just alarm users. They are considered noise because | 7 | throttle_group_unregister_tgm() has already been called: |
12 | the cause of spinning is not shown and is hard to find. | ||
13 | 8 | ||
14 | Now that the lock optimization has been removed, there is no danger of | 9 | (gdb) bt full |
15 | hogging the BQL. Drop the spin counter and the infamous warning. | 10 | #0 0x00005585a7a3b378 in qemu_mutex_lock_impl (mutex=0xffffffffffffffd0, file=0x5585a7bb3d54 "block/throttle-groups.c", line=412) at util/qemu-thread-posix.c:64 |
11 | err = <optimized out> | ||
12 | __PRETTY_FUNCTION__ = "qemu_mutex_lock_impl" | ||
13 | __func__ = "qemu_mutex_lock_impl" | ||
14 | #1 0x00005585a79be074 in throttle_group_restart_queue_entry (opaque=0x5585a9de4eb0) at block/throttle-groups.c:412 | ||
15 | _f = <optimized out> | ||
16 | data = 0x5585a9de4eb0 | ||
17 | tgm = 0x5585a9079440 | ||
18 | ts = 0x0 | ||
19 | tg = 0xffffffffffffff98 | ||
20 | is_write = false | ||
21 | empty_queue = 255 | ||
22 | |||
23 | This coroutine should not execute in the iothread after the throttle | ||
24 | group member has been unregistered! | ||
25 | |||
26 | The root cause is that the device_del code path schedules the restart | ||
27 | coroutine in the iothread while holding the AioContext lock. Therefore | ||
28 | the iothread cannot execute the coroutine until after device_del | ||
29 | releases the lock - by this time it's too late. | ||
30 | |||
31 | This patch adds a reference count to ThrottleGroupMember so we can | ||
32 | synchronously wait for restart coroutines to complete. Once they are | ||
33 | done it is safe to unregister the ThrottleGroupMember. | ||
16 | 34 | ||
17 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 35 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
18 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 36 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
37 | Message-id: 20190114133257.30299-2-stefanha@redhat.com | ||
38 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | 39 | --- |
20 | util/main-loop.c | 25 ------------------------- | 40 | include/block/throttle-groups.h | 5 +++++ |
21 | tests/qemu-iotests/common.filter | 1 - | 41 | block/throttle-groups.c | 9 +++++++++ |
22 | 2 files changed, 26 deletions(-) | 42 | 2 files changed, 14 insertions(+) |
23 | 43 | ||
24 | diff --git a/util/main-loop.c b/util/main-loop.c | 44 | diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h |
25 | index XXXXXXX..XXXXXXX 100644 | 45 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/util/main-loop.c | 46 | --- a/include/block/throttle-groups.h |
27 | +++ b/util/main-loop.c | 47 | +++ b/include/block/throttle-groups.h |
28 | @@ -XXX,XX +XXX,XX @@ static int os_host_main_loop_wait(int64_t timeout) | 48 | @@ -XXX,XX +XXX,XX @@ typedef struct ThrottleGroupMember { |
29 | { | 49 | */ |
30 | GMainContext *context = g_main_context_default(); | 50 | unsigned int io_limits_disabled; |
31 | int ret; | 51 | |
32 | - static int spin_counter; | 52 | + /* Number of pending throttle_group_restart_queue_entry() coroutines. |
33 | 53 | + * Accessed with atomic operations. | |
34 | g_main_context_acquire(context); | 54 | + */ |
35 | 55 | + unsigned int restart_pending; | |
36 | glib_pollfds_fill(&timeout); | 56 | + |
37 | 57 | /* The following fields are protected by the ThrottleGroup lock. | |
38 | - /* If the I/O thread is very busy or we are incorrectly busy waiting in | 58 | * See the ThrottleGroup documentation for details. |
39 | - * the I/O thread, this can lead to starvation of the BQL such that the | 59 | * throttle_state tells us if I/O limits are configured. */ |
40 | - * VCPU threads never run. To make sure we can detect the later case, | 60 | diff --git a/block/throttle-groups.c b/block/throttle-groups.c |
41 | - * print a message to the screen. If we run into this condition, create | ||
42 | - * a fake timeout in order to give the VCPU threads a chance to run. | ||
43 | - */ | ||
44 | - if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { | ||
45 | - static bool notified; | ||
46 | - | ||
47 | - if (!notified && !qtest_enabled() && !qtest_driver()) { | ||
48 | - warn_report("I/O thread spun for %d iterations", | ||
49 | - MAX_MAIN_LOOP_SPIN); | ||
50 | - notified = true; | ||
51 | - } | ||
52 | - | ||
53 | - timeout = SCALE_MS; | ||
54 | - } | ||
55 | - | ||
56 | - | ||
57 | - if (timeout) { | ||
58 | - spin_counter = 0; | ||
59 | - } else { | ||
60 | - spin_counter++; | ||
61 | - } | ||
62 | qemu_mutex_unlock_iothread(); | ||
63 | replay_mutex_unlock(); | ||
64 | |||
65 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | ||
66 | index XXXXXXX..XXXXXXX 100644 | 61 | index XXXXXXX..XXXXXXX 100644 |
67 | --- a/tests/qemu-iotests/common.filter | 62 | --- a/block/throttle-groups.c |
68 | +++ b/tests/qemu-iotests/common.filter | 63 | +++ b/block/throttle-groups.c |
69 | @@ -XXX,XX +XXX,XX @@ _filter_qemu() | 64 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) |
70 | { | 65 | } |
71 | sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ | 66 | |
72 | -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ | 67 | g_free(data); |
73 | - -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \ | 68 | + |
74 | -e $'s#\r##' # QEMU monitor uses \r\n line endings | 69 | + atomic_dec(&tgm->restart_pending); |
70 | + aio_wait_kick(); | ||
75 | } | 71 | } |
76 | 72 | ||
73 | static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) | ||
74 | @@ -XXX,XX +XXX,XX @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write | ||
75 | * be no timer pending on this tgm at this point */ | ||
76 | assert(!timer_pending(tgm->throttle_timers.timers[is_write])); | ||
77 | |||
78 | + atomic_inc(&tgm->restart_pending); | ||
79 | + | ||
80 | co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); | ||
81 | aio_co_enter(tgm->aio_context, co); | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, | ||
84 | |||
85 | tgm->throttle_state = ts; | ||
86 | tgm->aio_context = ctx; | ||
87 | + atomic_set(&tgm->restart_pending, 0); | ||
88 | |||
89 | qemu_mutex_lock(&tg->lock); | ||
90 | /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ | ||
91 | @@ -XXX,XX +XXX,XX @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | + /* Wait for throttle_group_restart_queue_entry() coroutines to finish */ | ||
96 | + AIO_WAIT_WHILE(tgm->aio_context, atomic_read(&tgm->restart_pending) > 0); | ||
97 | + | ||
98 | qemu_mutex_lock(&tg->lock); | ||
99 | for (i = 0; i < 2; i++) { | ||
100 | assert(tgm->pending_reqs[i] == 0); | ||
77 | -- | 101 | -- |
78 | 2.17.1 | 102 | 2.20.1 |
79 | 103 | ||
80 | 104 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | Hot-unplug a scsi-hd using an iothread. The previous patch fixes a |
---|---|---|---|
2 | segfault in this scenario. | ||
2 | 3 | ||
3 | The new blk_co_copy_range interface offers a more efficient way in the | 4 | This patch adds a regression test. |
4 | case of network based storage. Make use of it to allow faster convert | ||
5 | operation. | ||
6 | 5 | ||
7 | Since copy offloading cannot do zero detection ('-S') and compression | 6 | Suggested-by: Alberto Garcia <berto@igalia.com> |
8 | (-c), only try it when these options are not used. | 7 | Suggested-by: Kevin Wolf <kwolf@redhat.com> |
9 | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | |
10 | Signed-off-by: Fam Zheng <famz@redhat.com> | 9 | Reviewed-by: Alberto Garcia <berto@igalia.com> |
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Message-id: 20190114133257.30299-3-stefanha@redhat.com |
12 | Message-id: 20180601092648.24614-11-famz@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
14 | --- | 12 | --- |
15 | qemu-img.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- | 13 | tests/qemu-iotests/238 | 47 ++++++++++++++++++++++++++++++++++++++ |
16 | 1 file changed, 48 insertions(+), 2 deletions(-) | 14 | tests/qemu-iotests/238.out | 6 +++++ |
15 | tests/qemu-iotests/group | 1 + | ||
16 | 3 files changed, 54 insertions(+) | ||
17 | create mode 100755 tests/qemu-iotests/238 | ||
18 | create mode 100644 tests/qemu-iotests/238.out | ||
17 | 19 | ||
18 | diff --git a/qemu-img.c b/qemu-img.c | 20 | diff --git a/tests/qemu-iotests/238 b/tests/qemu-iotests/238 |
21 | new file mode 100755 | ||
22 | index XXXXXXX..XXXXXXX | ||
23 | --- /dev/null | ||
24 | +++ b/tests/qemu-iotests/238 | ||
25 | @@ -XXX,XX +XXX,XX @@ | ||
26 | +#!/usr/bin/env python | ||
27 | +# | ||
28 | +# Regression test for throttle group member unregister segfault with iothread | ||
29 | +# | ||
30 | +# Copyright (c) 2019 Red Hat, Inc. | ||
31 | +# | ||
32 | +# This program is free software; you can redistribute it and/or modify | ||
33 | +# it under the terms of the GNU General Public License as published by | ||
34 | +# the Free Software Foundation; either version 2 of the License, or | ||
35 | +# (at your option) any later version. | ||
36 | +# | ||
37 | +# This program is distributed in the hope that it will be useful, | ||
38 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
39 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
40 | +# GNU General Public License for more details. | ||
41 | +# | ||
42 | +# You should have received a copy of the GNU General Public License | ||
43 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
44 | +# | ||
45 | + | ||
46 | +import sys | ||
47 | +import os | ||
48 | +import iotests | ||
49 | +from iotests import log | ||
50 | + | ||
51 | +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) | ||
52 | + | ||
53 | +from qemu import QEMUMachine | ||
54 | + | ||
55 | +if iotests.qemu_default_machine == 's390-ccw-virtio': | ||
56 | + virtio_scsi_device = 'virtio-scsi-ccw' | ||
57 | +else: | ||
58 | + virtio_scsi_device = 'virtio-scsi-pci' | ||
59 | + | ||
60 | +vm = QEMUMachine(iotests.qemu_prog) | ||
61 | +vm.add_args('-machine', 'accel=kvm') | ||
62 | +vm.launch() | ||
63 | + | ||
64 | +log(vm.qmp('blockdev-add', node_name='hd0', driver='null-co')) | ||
65 | +log(vm.qmp('object-add', qom_type='iothread', id='iothread0')) | ||
66 | +log(vm.qmp('device_add', id='scsi0', driver=virtio_scsi_device, iothread='iothread0')) | ||
67 | +log(vm.qmp('device_add', id='scsi-hd0', driver='scsi-hd', drive='hd0')) | ||
68 | +log(vm.qmp('block_set_io_throttle', id='scsi-hd0', bps=0, bps_rd=0, bps_wr=0, | ||
69 | + iops=1000, iops_rd=0, iops_wr=0, conv_keys=False)) | ||
70 | +log(vm.qmp('device_del', id='scsi-hd0')) | ||
71 | + | ||
72 | +vm.shutdown() | ||
73 | diff --git a/tests/qemu-iotests/238.out b/tests/qemu-iotests/238.out | ||
74 | new file mode 100644 | ||
75 | index XXXXXXX..XXXXXXX | ||
76 | --- /dev/null | ||
77 | +++ b/tests/qemu-iotests/238.out | ||
78 | @@ -XXX,XX +XXX,XX @@ | ||
79 | +{"return": {}} | ||
80 | +{"return": {}} | ||
81 | +{"return": {}} | ||
82 | +{"return": {}} | ||
83 | +{"return": {}} | ||
84 | +{"return": {}} | ||
85 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | ||
19 | index XXXXXXX..XXXXXXX 100644 | 86 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/qemu-img.c | 87 | --- a/tests/qemu-iotests/group |
21 | +++ b/qemu-img.c | 88 | +++ b/tests/qemu-iotests/group |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct ImgConvertState { | 89 | @@ -XXX,XX +XXX,XX @@ |
23 | bool compressed; | 90 | 234 auto quick migration |
24 | bool target_has_backing; | 91 | 235 auto quick |
25 | bool wr_in_order; | 92 | 236 auto quick |
26 | + bool copy_range; | 93 | +238 auto quick |
27 | int min_sparse; | ||
28 | size_t cluster_sectors; | ||
29 | size_t buf_sectors; | ||
30 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | +static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num, | ||
35 | + int nb_sectors) | ||
36 | +{ | ||
37 | + int n, ret; | ||
38 | + | ||
39 | + while (nb_sectors > 0) { | ||
40 | + BlockBackend *blk; | ||
41 | + int src_cur; | ||
42 | + int64_t bs_sectors, src_cur_offset; | ||
43 | + int64_t offset; | ||
44 | + | ||
45 | + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); | ||
46 | + offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS; | ||
47 | + blk = s->src[src_cur]; | ||
48 | + bs_sectors = s->src_sectors[src_cur]; | ||
49 | + | ||
50 | + n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); | ||
51 | + | ||
52 | + ret = blk_co_copy_range(blk, offset, s->target, | ||
53 | + sector_num << BDRV_SECTOR_BITS, | ||
54 | + n << BDRV_SECTOR_BITS, 0); | ||
55 | + if (ret < 0) { | ||
56 | + return ret; | ||
57 | + } | ||
58 | + | ||
59 | + sector_num += n; | ||
60 | + nb_sectors -= n; | ||
61 | + } | ||
62 | + return 0; | ||
63 | +} | ||
64 | + | ||
65 | static void coroutine_fn convert_co_do_copy(void *opaque) | ||
66 | { | ||
67 | ImgConvertState *s = opaque; | ||
68 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque) | ||
69 | int n; | ||
70 | int64_t sector_num; | ||
71 | enum ImgConvertBlockStatus status; | ||
72 | + bool copy_range; | ||
73 | |||
74 | qemu_co_mutex_lock(&s->lock); | ||
75 | if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { | ||
76 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque) | ||
77 | s->allocated_sectors, 0); | ||
78 | } | ||
79 | |||
80 | - if (status == BLK_DATA) { | ||
81 | +retry: | ||
82 | + copy_range = s->copy_range && s->status == BLK_DATA; | ||
83 | + if (status == BLK_DATA && !copy_range) { | ||
84 | ret = convert_co_read(s, sector_num, n, buf); | ||
85 | if (ret < 0) { | ||
86 | error_report("error while reading sector %" PRId64 | ||
87 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn convert_co_do_copy(void *opaque) | ||
88 | } | ||
89 | |||
90 | if (s->ret == -EINPROGRESS) { | ||
91 | - ret = convert_co_write(s, sector_num, n, buf, status); | ||
92 | + if (copy_range) { | ||
93 | + ret = convert_co_copy_range(s, sector_num, n); | ||
94 | + if (ret) { | ||
95 | + s->copy_range = false; | ||
96 | + goto retry; | ||
97 | + } | ||
98 | + } else { | ||
99 | + ret = convert_co_write(s, sector_num, n, buf, status); | ||
100 | + } | ||
101 | if (ret < 0) { | ||
102 | error_report("error while writing sector %" PRId64 | ||
103 | ": %s", sector_num, strerror(-ret)); | ||
104 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | ||
105 | ImgConvertState s = (ImgConvertState) { | ||
106 | /* Need at least 4k of zeros for sparse detection */ | ||
107 | .min_sparse = 8, | ||
108 | + .copy_range = true, | ||
109 | .buf_sectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE, | ||
110 | .wr_in_order = true, | ||
111 | .num_coroutines = 8, | ||
112 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | ||
113 | break; | ||
114 | case 'c': | ||
115 | s.compressed = true; | ||
116 | + s.copy_range = false; | ||
117 | break; | ||
118 | case 'o': | ||
119 | if (!is_valid_option_list(optarg)) { | ||
120 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | ||
121 | } | ||
122 | |||
123 | s.min_sparse = sval / BDRV_SECTOR_SIZE; | ||
124 | + s.copy_range = false; | ||
125 | break; | ||
126 | } | ||
127 | case 'p': | ||
128 | -- | 94 | -- |
129 | 2.17.1 | 95 | 2.20.1 |
130 | 96 | ||
131 | 97 | diff view generated by jsdifflib |
1 | From: Fam Zheng <famz@redhat.com> | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
---|---|---|---|
2 | 2 | ||
3 | We don't verify the request range against s->size in the I/O callbacks | 3 | Drop CoSleepCB structure. It's actually unused. |
4 | except for raw_co_pwritev. This is inconsistent (especially for | ||
5 | raw_co_pwrite_zeroes and raw_co_pdiscard), so fix them, in the meanwhile | ||
6 | make the helper reusable by the coming new callbacks. | ||
7 | 4 | ||
8 | Note that in most cases the block layer already verifies the request | 5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> |
9 | byte range against our reported image length, before invoking the driver | 6 | Message-id: 20190122143113.20331-1-vsementsov@virtuozzo.com |
10 | callbacks. The exception is during image creating, after | ||
11 | blk_set_allow_write_beyond_eof(blk, true) is called. But in that case, | ||
12 | the requests are not directly from the user or guest. So there is no | ||
13 | visible behavior change in adding the check code. | ||
14 | |||
15 | The int64_t -> uint64_t inconsistency, as shown by the type casting, is | ||
16 | pre-existing due to the interface. | ||
17 | |||
18 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
20 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
21 | Message-id: 20180601092648.24614-3-famz@redhat.com | ||
22 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
23 | --- | 8 | --- |
24 | block/raw-format.c | 64 ++++++++++++++++++++++++++++------------------ | 9 | util/qemu-coroutine-sleep.c | 27 ++++++++++----------------- |
25 | 1 file changed, 39 insertions(+), 25 deletions(-) | 10 | 1 file changed, 10 insertions(+), 17 deletions(-) |
26 | 11 | ||
27 | diff --git a/block/raw-format.c b/block/raw-format.c | 12 | diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c |
28 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/block/raw-format.c | 14 | --- a/util/qemu-coroutine-sleep.c |
30 | +++ b/block/raw-format.c | 15 | +++ b/util/qemu-coroutine-sleep.c |
31 | @@ -XXX,XX +XXX,XX @@ static void raw_reopen_abort(BDRVReopenState *state) | 16 | @@ -XXX,XX +XXX,XX @@ |
32 | state->opaque = NULL; | 17 | #include "qemu/timer.h" |
18 | #include "block/aio.h" | ||
19 | |||
20 | -typedef struct CoSleepCB { | ||
21 | - QEMUTimer *ts; | ||
22 | - Coroutine *co; | ||
23 | -} CoSleepCB; | ||
24 | - | ||
25 | static void co_sleep_cb(void *opaque) | ||
26 | { | ||
27 | - CoSleepCB *sleep_cb = opaque; | ||
28 | + Coroutine *co = opaque; | ||
29 | |||
30 | /* Write of schedule protected by barrier write in aio_co_schedule */ | ||
31 | - atomic_set(&sleep_cb->co->scheduled, NULL); | ||
32 | - aio_co_wake(sleep_cb->co); | ||
33 | + atomic_set(&co->scheduled, NULL); | ||
34 | + aio_co_wake(co); | ||
33 | } | 35 | } |
34 | 36 | ||
35 | +/* Check and adjust the offset, against 'offset' and 'size' options. */ | 37 | void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) |
36 | +static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, | ||
37 | + uint64_t bytes, bool is_write) | ||
38 | +{ | ||
39 | + BDRVRawState *s = bs->opaque; | ||
40 | + | ||
41 | + if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) { | ||
42 | + /* There's not enough space for the write, or the read request is | ||
43 | + * out-of-range. Don't read/write anything to prevent leaking out of | ||
44 | + * the size specified in options. */ | ||
45 | + return is_write ? -ENOSPC : -EINVAL;; | ||
46 | + } | ||
47 | + | ||
48 | + if (*offset > INT64_MAX - s->offset) { | ||
49 | + return -EINVAL; | ||
50 | + } | ||
51 | + *offset += s->offset; | ||
52 | + | ||
53 | + return 0; | ||
54 | +} | ||
55 | + | ||
56 | static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
57 | uint64_t bytes, QEMUIOVector *qiov, | ||
58 | int flags) | ||
59 | { | 38 | { |
60 | - BDRVRawState *s = bs->opaque; | 39 | AioContext *ctx = qemu_get_current_aio_context(); |
61 | + int ret; | 40 | - CoSleepCB sleep_cb = { |
62 | 41 | - .co = qemu_coroutine_self(), | |
63 | - if (offset > UINT64_MAX - s->offset) { | 42 | - }; |
64 | - return -EINVAL; | 43 | + QEMUTimer *ts; |
65 | + ret = raw_adjust_offset(bs, &offset, bytes, false); | 44 | + Coroutine *co = qemu_coroutine_self(); |
66 | + if (ret) { | 45 | |
67 | + return ret; | 46 | - const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL, |
47 | - __func__); | ||
48 | + const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, __func__); | ||
49 | if (scheduled) { | ||
50 | fprintf(stderr, | ||
51 | "%s: Co-routine was already scheduled in '%s'\n", | ||
52 | __func__, scheduled); | ||
53 | abort(); | ||
68 | } | 54 | } |
69 | - offset += s->offset; | 55 | - sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); |
70 | 56 | - timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); | |
71 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | 57 | + ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, co); |
72 | return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); | 58 | + timer_mod(ts, qemu_clock_get_ns(type) + ns); |
73 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, | 59 | qemu_coroutine_yield(); |
74 | uint64_t bytes, QEMUIOVector *qiov, | 60 | - timer_del(sleep_cb.ts); |
75 | int flags) | 61 | - timer_free(sleep_cb.ts); |
76 | { | 62 | + timer_del(ts); |
77 | - BDRVRawState *s = bs->opaque; | 63 | + timer_free(ts); |
78 | void *buf = NULL; | ||
79 | BlockDriver *drv; | ||
80 | QEMUIOVector local_qiov; | ||
81 | int ret; | ||
82 | |||
83 | - if (s->has_size && (offset > s->size || bytes > (s->size - offset))) { | ||
84 | - /* There's not enough space for the data. Don't write anything and just | ||
85 | - * fail to prevent leaking out of the size specified in options. */ | ||
86 | - return -ENOSPC; | ||
87 | - } | ||
88 | - | ||
89 | - if (offset > UINT64_MAX - s->offset) { | ||
90 | - ret = -EINVAL; | ||
91 | - goto fail; | ||
92 | - } | ||
93 | - | ||
94 | if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { | ||
95 | /* Handling partial writes would be a pain - so we just | ||
96 | * require that guests have 512-byte request alignment if | ||
97 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
98 | qiov = &local_qiov; | ||
99 | } | ||
100 | |||
101 | - offset += s->offset; | ||
102 | + ret = raw_adjust_offset(bs, &offset, bytes, true); | ||
103 | + if (ret) { | ||
104 | + goto fail; | ||
105 | + } | ||
106 | |||
107 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | ||
108 | ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); | ||
109 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, | ||
110 | int64_t offset, int bytes, | ||
111 | BdrvRequestFlags flags) | ||
112 | { | ||
113 | - BDRVRawState *s = bs->opaque; | ||
114 | - if (offset > UINT64_MAX - s->offset) { | ||
115 | - return -EINVAL; | ||
116 | + int ret; | ||
117 | + | ||
118 | + ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); | ||
119 | + if (ret) { | ||
120 | + return ret; | ||
121 | } | ||
122 | - offset += s->offset; | ||
123 | return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); | ||
124 | } | 64 | } |
125 | |||
126 | static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, | ||
127 | int64_t offset, int bytes) | ||
128 | { | ||
129 | - BDRVRawState *s = bs->opaque; | ||
130 | - if (offset > UINT64_MAX - s->offset) { | ||
131 | - return -EINVAL; | ||
132 | + int ret; | ||
133 | + | ||
134 | + ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); | ||
135 | + if (ret) { | ||
136 | + return ret; | ||
137 | } | ||
138 | - offset += s->offset; | ||
139 | return bdrv_co_pdiscard(bs->file->bs, offset, bytes); | ||
140 | } | ||
141 | |||
142 | -- | 65 | -- |
143 | 2.17.1 | 66 | 2.20.1 |
144 | 67 | ||
145 | 68 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | Just pass down to ->file. | ||
4 | |||
5 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20180601092648.24614-4-famz@redhat.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | block/raw-format.c | 32 ++++++++++++++++++++++++++++++++ | ||
11 | 1 file changed, 32 insertions(+) | ||
12 | |||
13 | diff --git a/block/raw-format.c b/block/raw-format.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/block/raw-format.c | ||
16 | +++ b/block/raw-format.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) | ||
18 | return bdrv_probe_geometry(bs->file->bs, geo); | ||
19 | } | ||
20 | |||
21 | +static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, | ||
22 | + BdrvChild *src, uint64_t src_offset, | ||
23 | + BdrvChild *dst, uint64_t dst_offset, | ||
24 | + uint64_t bytes, BdrvRequestFlags flags) | ||
25 | +{ | ||
26 | + int ret; | ||
27 | + | ||
28 | + ret = raw_adjust_offset(bs, &src_offset, bytes, false); | ||
29 | + if (ret) { | ||
30 | + return ret; | ||
31 | + } | ||
32 | + return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, | ||
33 | + bytes, flags); | ||
34 | +} | ||
35 | + | ||
36 | +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, | ||
37 | + BdrvChild *src, uint64_t src_offset, | ||
38 | + BdrvChild *dst, uint64_t dst_offset, | ||
39 | + uint64_t bytes, BdrvRequestFlags flags) | ||
40 | +{ | ||
41 | + int ret; | ||
42 | + | ||
43 | + ret = raw_adjust_offset(bs, &dst_offset, bytes, true); | ||
44 | + if (ret) { | ||
45 | + return ret; | ||
46 | + } | ||
47 | + return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, | ||
48 | + flags); | ||
49 | +} | ||
50 | + | ||
51 | BlockDriver bdrv_raw = { | ||
52 | .format_name = "raw", | ||
53 | .instance_size = sizeof(BDRVRawState), | ||
54 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = { | ||
55 | .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, | ||
56 | .bdrv_co_pdiscard = &raw_co_pdiscard, | ||
57 | .bdrv_co_block_status = &raw_co_block_status, | ||
58 | + .bdrv_co_copy_range_from = &raw_co_copy_range_from, | ||
59 | + .bdrv_co_copy_range_to = &raw_co_copy_range_to, | ||
60 | .bdrv_truncate = &raw_truncate, | ||
61 | .bdrv_getlength = &raw_getlength, | ||
62 | .has_variable_length = true, | ||
63 | -- | ||
64 | 2.17.1 | ||
65 | |||
66 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | The two callbacks are implemented quite similarly to the read/write | ||
4 | functions: bdrv_co_copy_range_from maps for read and calls into bs->file | ||
5 | or bs->backing depending on the allocation status; bdrv_co_copy_range_to | ||
6 | maps for write and calls into bs->file. | ||
7 | |||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
10 | Message-id: 20180601092648.24614-5-famz@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | ||
13 | block/qcow2.c | 229 +++++++++++++++++++++++++++++++++++++++++++------- | ||
14 | 1 file changed, 199 insertions(+), 30 deletions(-) | ||
15 | |||
16 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/block/qcow2.c | ||
19 | +++ b/block/qcow2.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, | ||
21 | return status; | ||
22 | } | ||
23 | |||
24 | +static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, | ||
25 | + QCowL2Meta **pl2meta, | ||
26 | + bool link_l2) | ||
27 | +{ | ||
28 | + int ret = 0; | ||
29 | + QCowL2Meta *l2meta = *pl2meta; | ||
30 | + | ||
31 | + while (l2meta != NULL) { | ||
32 | + QCowL2Meta *next; | ||
33 | + | ||
34 | + if (!ret && link_l2) { | ||
35 | + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); | ||
36 | + if (ret) { | ||
37 | + goto out; | ||
38 | + } | ||
39 | + } | ||
40 | + | ||
41 | + /* Take the request off the list of running requests */ | ||
42 | + if (l2meta->nb_clusters != 0) { | ||
43 | + QLIST_REMOVE(l2meta, next_in_flight); | ||
44 | + } | ||
45 | + | ||
46 | + qemu_co_queue_restart_all(&l2meta->dependent_requests); | ||
47 | + | ||
48 | + next = l2meta->next; | ||
49 | + g_free(l2meta); | ||
50 | + l2meta = next; | ||
51 | + } | ||
52 | +out: | ||
53 | + *pl2meta = l2meta; | ||
54 | + return ret; | ||
55 | +} | ||
56 | + | ||
57 | static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
58 | uint64_t bytes, QEMUIOVector *qiov, | ||
59 | int flags) | ||
60 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
61 | } | ||
62 | } | ||
63 | |||
64 | - while (l2meta != NULL) { | ||
65 | - QCowL2Meta *next; | ||
66 | - | ||
67 | - ret = qcow2_alloc_cluster_link_l2(bs, l2meta); | ||
68 | - if (ret < 0) { | ||
69 | - goto fail; | ||
70 | - } | ||
71 | - | ||
72 | - /* Take the request off the list of running requests */ | ||
73 | - if (l2meta->nb_clusters != 0) { | ||
74 | - QLIST_REMOVE(l2meta, next_in_flight); | ||
75 | - } | ||
76 | - | ||
77 | - qemu_co_queue_restart_all(&l2meta->dependent_requests); | ||
78 | - | ||
79 | - next = l2meta->next; | ||
80 | - g_free(l2meta); | ||
81 | - l2meta = next; | ||
82 | + ret = qcow2_handle_l2meta(bs, &l2meta, true); | ||
83 | + if (ret) { | ||
84 | + goto fail; | ||
85 | } | ||
86 | |||
87 | bytes -= cur_bytes; | ||
88 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
89 | ret = 0; | ||
90 | |||
91 | fail: | ||
92 | - while (l2meta != NULL) { | ||
93 | - QCowL2Meta *next; | ||
94 | - | ||
95 | - if (l2meta->nb_clusters != 0) { | ||
96 | - QLIST_REMOVE(l2meta, next_in_flight); | ||
97 | - } | ||
98 | - qemu_co_queue_restart_all(&l2meta->dependent_requests); | ||
99 | - | ||
100 | - next = l2meta->next; | ||
101 | - g_free(l2meta); | ||
102 | - l2meta = next; | ||
103 | - } | ||
104 | + qcow2_handle_l2meta(bs, &l2meta, false); | ||
105 | |||
106 | qemu_co_mutex_unlock(&s->lock); | ||
107 | |||
108 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | +static int coroutine_fn | ||
113 | +qcow2_co_copy_range_from(BlockDriverState *bs, | ||
114 | + BdrvChild *src, uint64_t src_offset, | ||
115 | + BdrvChild *dst, uint64_t dst_offset, | ||
116 | + uint64_t bytes, BdrvRequestFlags flags) | ||
117 | +{ | ||
118 | + BDRVQcow2State *s = bs->opaque; | ||
119 | + int ret; | ||
120 | + unsigned int cur_bytes; /* number of bytes in current iteration */ | ||
121 | + BdrvChild *child = NULL; | ||
122 | + BdrvRequestFlags cur_flags; | ||
123 | + | ||
124 | + assert(!bs->encrypted); | ||
125 | + qemu_co_mutex_lock(&s->lock); | ||
126 | + | ||
127 | + while (bytes != 0) { | ||
128 | + uint64_t copy_offset = 0; | ||
129 | + /* prepare next request */ | ||
130 | + cur_bytes = MIN(bytes, INT_MAX); | ||
131 | + cur_flags = flags; | ||
132 | + | ||
133 | + ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); | ||
134 | + if (ret < 0) { | ||
135 | + goto out; | ||
136 | + } | ||
137 | + | ||
138 | + switch (ret) { | ||
139 | + case QCOW2_CLUSTER_UNALLOCATED: | ||
140 | + if (bs->backing && bs->backing->bs) { | ||
141 | + int64_t backing_length = bdrv_getlength(bs->backing->bs); | ||
142 | + if (src_offset >= backing_length) { | ||
143 | + cur_flags |= BDRV_REQ_ZERO_WRITE; | ||
144 | + } else { | ||
145 | + child = bs->backing; | ||
146 | + cur_bytes = MIN(cur_bytes, backing_length - src_offset); | ||
147 | + copy_offset = src_offset; | ||
148 | + } | ||
149 | + } else { | ||
150 | + cur_flags |= BDRV_REQ_ZERO_WRITE; | ||
151 | + } | ||
152 | + break; | ||
153 | + | ||
154 | + case QCOW2_CLUSTER_ZERO_PLAIN: | ||
155 | + case QCOW2_CLUSTER_ZERO_ALLOC: | ||
156 | + cur_flags |= BDRV_REQ_ZERO_WRITE; | ||
157 | + break; | ||
158 | + | ||
159 | + case QCOW2_CLUSTER_COMPRESSED: | ||
160 | + ret = -ENOTSUP; | ||
161 | + goto out; | ||
162 | + break; | ||
163 | + | ||
164 | + case QCOW2_CLUSTER_NORMAL: | ||
165 | + child = bs->file; | ||
166 | + copy_offset += offset_into_cluster(s, src_offset); | ||
167 | + if ((copy_offset & 511) != 0) { | ||
168 | + ret = -EIO; | ||
169 | + goto out; | ||
170 | + } | ||
171 | + break; | ||
172 | + | ||
173 | + default: | ||
174 | + abort(); | ||
175 | + } | ||
176 | + qemu_co_mutex_unlock(&s->lock); | ||
177 | + ret = bdrv_co_copy_range_from(child, | ||
178 | + copy_offset, | ||
179 | + dst, dst_offset, | ||
180 | + cur_bytes, cur_flags); | ||
181 | + qemu_co_mutex_lock(&s->lock); | ||
182 | + if (ret < 0) { | ||
183 | + goto out; | ||
184 | + } | ||
185 | + | ||
186 | + bytes -= cur_bytes; | ||
187 | + src_offset += cur_bytes; | ||
188 | + dst_offset += cur_bytes; | ||
189 | + } | ||
190 | + ret = 0; | ||
191 | + | ||
192 | +out: | ||
193 | + qemu_co_mutex_unlock(&s->lock); | ||
194 | + return ret; | ||
195 | +} | ||
196 | + | ||
197 | +static int coroutine_fn | ||
198 | +qcow2_co_copy_range_to(BlockDriverState *bs, | ||
199 | + BdrvChild *src, uint64_t src_offset, | ||
200 | + BdrvChild *dst, uint64_t dst_offset, | ||
201 | + uint64_t bytes, BdrvRequestFlags flags) | ||
202 | +{ | ||
203 | + BDRVQcow2State *s = bs->opaque; | ||
204 | + int offset_in_cluster; | ||
205 | + int ret; | ||
206 | + unsigned int cur_bytes; /* number of sectors in current iteration */ | ||
207 | + uint64_t cluster_offset; | ||
208 | + uint8_t *cluster_data = NULL; | ||
209 | + QCowL2Meta *l2meta = NULL; | ||
210 | + | ||
211 | + assert(!bs->encrypted); | ||
212 | + s->cluster_cache_offset = -1; /* disable compressed cache */ | ||
213 | + | ||
214 | + qemu_co_mutex_lock(&s->lock); | ||
215 | + | ||
216 | + while (bytes != 0) { | ||
217 | + | ||
218 | + l2meta = NULL; | ||
219 | + | ||
220 | + offset_in_cluster = offset_into_cluster(s, dst_offset); | ||
221 | + cur_bytes = MIN(bytes, INT_MAX); | ||
222 | + | ||
223 | + /* TODO: | ||
224 | + * If src->bs == dst->bs, we could simply copy by incrementing | ||
225 | + * the refcnt, without copying user data. | ||
226 | + * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ | ||
227 | + ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes, | ||
228 | + &cluster_offset, &l2meta); | ||
229 | + if (ret < 0) { | ||
230 | + goto fail; | ||
231 | + } | ||
232 | + | ||
233 | + assert((cluster_offset & 511) == 0); | ||
234 | + | ||
235 | + ret = qcow2_pre_write_overlap_check(bs, 0, | ||
236 | + cluster_offset + offset_in_cluster, cur_bytes); | ||
237 | + if (ret < 0) { | ||
238 | + goto fail; | ||
239 | + } | ||
240 | + | ||
241 | + qemu_co_mutex_unlock(&s->lock); | ||
242 | + ret = bdrv_co_copy_range_to(src, src_offset, | ||
243 | + bs->file, | ||
244 | + cluster_offset + offset_in_cluster, | ||
245 | + cur_bytes, flags); | ||
246 | + qemu_co_mutex_lock(&s->lock); | ||
247 | + if (ret < 0) { | ||
248 | + goto fail; | ||
249 | + } | ||
250 | + | ||
251 | + ret = qcow2_handle_l2meta(bs, &l2meta, true); | ||
252 | + if (ret) { | ||
253 | + goto fail; | ||
254 | + } | ||
255 | + | ||
256 | + bytes -= cur_bytes; | ||
257 | + dst_offset += cur_bytes; | ||
258 | + } | ||
259 | + ret = 0; | ||
260 | + | ||
261 | +fail: | ||
262 | + qcow2_handle_l2meta(bs, &l2meta, false); | ||
263 | + | ||
264 | + qemu_co_mutex_unlock(&s->lock); | ||
265 | + | ||
266 | + qemu_vfree(cluster_data); | ||
267 | + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); | ||
268 | + | ||
269 | + return ret; | ||
270 | +} | ||
271 | + | ||
272 | static int qcow2_truncate(BlockDriverState *bs, int64_t offset, | ||
273 | PreallocMode prealloc, Error **errp) | ||
274 | { | ||
275 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | ||
276 | |||
277 | .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, | ||
278 | .bdrv_co_pdiscard = qcow2_co_pdiscard, | ||
279 | + .bdrv_co_copy_range_from = qcow2_co_copy_range_from, | ||
280 | + .bdrv_co_copy_range_to = qcow2_co_copy_range_to, | ||
281 | .bdrv_truncate = qcow2_truncate, | ||
282 | .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, | ||
283 | .bdrv_make_empty = qcow2_make_empty, | ||
284 | -- | ||
285 | 2.17.1 | ||
286 | |||
287 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | With copy_file_range(2), we can implement the bdrv_co_copy_range | ||
4 | semantics. | ||
5 | |||
6 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
7 | Message-id: 20180601092648.24614-6-famz@redhat.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | configure | 17 +++++++ | ||
11 | include/block/raw-aio.h | 10 ++++- | ||
12 | block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++++-- | ||
13 | 3 files changed, 120 insertions(+), 5 deletions(-) | ||
14 | |||
15 | diff --git a/configure b/configure | ||
16 | index XXXXXXX..XXXXXXX 100755 | ||
17 | --- a/configure | ||
18 | +++ b/configure | ||
19 | @@ -XXX,XX +XXX,XX @@ if test "$fortify_source" != "no"; then | ||
20 | fi | ||
21 | fi | ||
22 | |||
23 | +############################################### | ||
24 | +# Check if copy_file_range is provided by glibc | ||
25 | +have_copy_file_range=no | ||
26 | +cat > $TMPC << EOF | ||
27 | +#include <unistd.h> | ||
28 | +int main(void) { | ||
29 | + copy_file_range(0, NULL, 0, NULL, 0, 0); | ||
30 | + return 0; | ||
31 | +} | ||
32 | +EOF | ||
33 | +if compile_prog "" "" ; then | ||
34 | + have_copy_file_range=yes | ||
35 | +fi | ||
36 | + | ||
37 | ########################################## | ||
38 | # check if struct fsxattr is available via linux/fs.h | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ fi | ||
41 | if test "$have_fsxattr" = "yes" ; then | ||
42 | echo "HAVE_FSXATTR=y" >> $config_host_mak | ||
43 | fi | ||
44 | +if test "$have_copy_file_range" = "yes" ; then | ||
45 | + echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak | ||
46 | +fi | ||
47 | if test "$vte" = "yes" ; then | ||
48 | echo "CONFIG_VTE=y" >> $config_host_mak | ||
49 | echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak | ||
50 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/include/block/raw-aio.h | ||
53 | +++ b/include/block/raw-aio.h | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | #define QEMU_AIO_FLUSH 0x0008 | ||
56 | #define QEMU_AIO_DISCARD 0x0010 | ||
57 | #define QEMU_AIO_WRITE_ZEROES 0x0020 | ||
58 | +#define QEMU_AIO_COPY_RANGE 0x0040 | ||
59 | #define QEMU_AIO_TYPE_MASK \ | ||
60 | - (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \ | ||
61 | - QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES) | ||
62 | + (QEMU_AIO_READ | \ | ||
63 | + QEMU_AIO_WRITE | \ | ||
64 | + QEMU_AIO_IOCTL | \ | ||
65 | + QEMU_AIO_FLUSH | \ | ||
66 | + QEMU_AIO_DISCARD | \ | ||
67 | + QEMU_AIO_WRITE_ZEROES | \ | ||
68 | + QEMU_AIO_COPY_RANGE) | ||
69 | |||
70 | /* AIO flags */ | ||
71 | #define QEMU_AIO_MISALIGNED 0x1000 | ||
72 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/block/file-posix.c | ||
75 | +++ b/block/file-posix.c | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | #ifdef __linux__ | ||
78 | #include <sys/ioctl.h> | ||
79 | #include <sys/param.h> | ||
80 | +#include <sys/syscall.h> | ||
81 | #include <linux/cdrom.h> | ||
82 | #include <linux/fd.h> | ||
83 | #include <linux/fs.h> | ||
84 | @@ -XXX,XX +XXX,XX @@ typedef struct RawPosixAIOData { | ||
85 | #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ | ||
86 | off_t aio_offset; | ||
87 | int aio_type; | ||
88 | + int aio_fd2; | ||
89 | + off_t aio_offset2; | ||
90 | } RawPosixAIOData; | ||
91 | |||
92 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) | ||
93 | @@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) | ||
94 | return -ENOTSUP; | ||
95 | } | ||
96 | |||
97 | +#ifndef HAVE_COPY_FILE_RANGE | ||
98 | +static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, | ||
99 | + off_t *out_off, size_t len, unsigned int flags) | ||
100 | +{ | ||
101 | +#ifdef __NR_copy_file_range | ||
102 | + return syscall(__NR_copy_file_range, in_fd, in_off, out_fd, | ||
103 | + out_off, len, flags); | ||
104 | +#else | ||
105 | + errno = ENOSYS; | ||
106 | + return -1; | ||
107 | +#endif | ||
108 | +} | ||
109 | +#endif | ||
110 | + | ||
111 | +static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) | ||
112 | +{ | ||
113 | + uint64_t bytes = aiocb->aio_nbytes; | ||
114 | + off_t in_off = aiocb->aio_offset; | ||
115 | + off_t out_off = aiocb->aio_offset2; | ||
116 | + | ||
117 | + while (bytes) { | ||
118 | + ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, | ||
119 | + aiocb->aio_fd2, &out_off, | ||
120 | + bytes, 0); | ||
121 | + if (ret == -EINTR) { | ||
122 | + continue; | ||
123 | + } | ||
124 | + if (ret < 0) { | ||
125 | + if (errno == ENOSYS) { | ||
126 | + return -ENOTSUP; | ||
127 | + } else { | ||
128 | + return -errno; | ||
129 | + } | ||
130 | + } | ||
131 | + if (!ret) { | ||
132 | + /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */ | ||
133 | + return -ENOTSUP; | ||
134 | + } | ||
135 | + bytes -= ret; | ||
136 | + } | ||
137 | + return 0; | ||
138 | +} | ||
139 | + | ||
140 | static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) | ||
141 | { | ||
142 | int ret = -EOPNOTSUPP; | ||
143 | @@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg) | ||
144 | case QEMU_AIO_WRITE_ZEROES: | ||
145 | ret = handle_aiocb_write_zeroes(aiocb); | ||
146 | break; | ||
147 | + case QEMU_AIO_COPY_RANGE: | ||
148 | + ret = handle_aiocb_copy_range(aiocb); | ||
149 | + break; | ||
150 | default: | ||
151 | fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); | ||
152 | ret = -EINVAL; | ||
153 | @@ -XXX,XX +XXX,XX @@ static int aio_worker(void *arg) | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | -static int paio_submit_co(BlockDriverState *bs, int fd, | ||
158 | - int64_t offset, QEMUIOVector *qiov, | ||
159 | - int bytes, int type) | ||
160 | +static int paio_submit_co_full(BlockDriverState *bs, int fd, | ||
161 | + int64_t offset, int fd2, int64_t offset2, | ||
162 | + QEMUIOVector *qiov, | ||
163 | + int bytes, int type) | ||
164 | { | ||
165 | RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); | ||
166 | ThreadPool *pool; | ||
167 | @@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd, | ||
168 | acb->bs = bs; | ||
169 | acb->aio_type = type; | ||
170 | acb->aio_fildes = fd; | ||
171 | + acb->aio_fd2 = fd2; | ||
172 | + acb->aio_offset2 = offset2; | ||
173 | |||
174 | acb->aio_nbytes = bytes; | ||
175 | acb->aio_offset = offset; | ||
176 | @@ -XXX,XX +XXX,XX @@ static int paio_submit_co(BlockDriverState *bs, int fd, | ||
177 | return thread_pool_submit_co(pool, aio_worker, acb); | ||
178 | } | ||
179 | |||
180 | +static inline int paio_submit_co(BlockDriverState *bs, int fd, | ||
181 | + int64_t offset, QEMUIOVector *qiov, | ||
182 | + int bytes, int type) | ||
183 | +{ | ||
184 | + return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); | ||
185 | +} | ||
186 | + | ||
187 | static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, | ||
188 | int64_t offset, QEMUIOVector *qiov, int bytes, | ||
189 | BlockCompletionFunc *cb, void *opaque, int type) | ||
190 | @@ -XXX,XX +XXX,XX @@ static void raw_abort_perm_update(BlockDriverState *bs) | ||
191 | raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); | ||
192 | } | ||
193 | |||
194 | +static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, | ||
195 | + BdrvChild *src, uint64_t src_offset, | ||
196 | + BdrvChild *dst, uint64_t dst_offset, | ||
197 | + uint64_t bytes, BdrvRequestFlags flags) | ||
198 | +{ | ||
199 | + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); | ||
200 | +} | ||
201 | + | ||
202 | +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, | ||
203 | + BdrvChild *src, uint64_t src_offset, | ||
204 | + BdrvChild *dst, uint64_t dst_offset, | ||
205 | + uint64_t bytes, BdrvRequestFlags flags) | ||
206 | +{ | ||
207 | + BDRVRawState *s = bs->opaque; | ||
208 | + BDRVRawState *src_s; | ||
209 | + | ||
210 | + assert(dst->bs == bs); | ||
211 | + if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) { | ||
212 | + return -ENOTSUP; | ||
213 | + } | ||
214 | + | ||
215 | + src_s = src->bs->opaque; | ||
216 | + if (fd_open(bs) < 0 || fd_open(bs) < 0) { | ||
217 | + return -EIO; | ||
218 | + } | ||
219 | + return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset, | ||
220 | + NULL, bytes, QEMU_AIO_COPY_RANGE); | ||
221 | +} | ||
222 | + | ||
223 | BlockDriver bdrv_file = { | ||
224 | .format_name = "file", | ||
225 | .protocol_name = "file", | ||
226 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = { | ||
227 | .bdrv_co_pwritev = raw_co_pwritev, | ||
228 | .bdrv_aio_flush = raw_aio_flush, | ||
229 | .bdrv_aio_pdiscard = raw_aio_pdiscard, | ||
230 | + .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
231 | + .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
232 | .bdrv_refresh_limits = raw_refresh_limits, | ||
233 | .bdrv_io_plug = raw_aio_plug, | ||
234 | .bdrv_io_unplug = raw_aio_unplug, | ||
235 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = { | ||
236 | .bdrv_co_pwritev = raw_co_pwritev, | ||
237 | .bdrv_aio_flush = raw_aio_flush, | ||
238 | .bdrv_aio_pdiscard = hdev_aio_pdiscard, | ||
239 | + .bdrv_co_copy_range_from = raw_co_copy_range_from, | ||
240 | + .bdrv_co_copy_range_to = raw_co_copy_range_to, | ||
241 | .bdrv_refresh_limits = raw_refresh_limits, | ||
242 | .bdrv_io_plug = raw_aio_plug, | ||
243 | .bdrv_io_unplug = raw_aio_unplug, | ||
244 | -- | ||
245 | 2.17.1 | ||
246 | |||
247 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | The device designator data returned in INQUIRY command will be useful to | ||
4 | fill in source/target fields during copy offloading. Do this when | ||
5 | connecting to the target and save the data for later use. | ||
6 | |||
7 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Message-id: 20180601092648.24614-7-famz@redhat.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | block/iscsi.c | 41 +++++++++++++++++++++++++++++++++++++++++ | ||
13 | 1 file changed, 41 insertions(+) | ||
14 | |||
15 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/iscsi.c | ||
18 | +++ b/block/iscsi.c | ||
19 | @@ -XXX,XX +XXX,XX @@ typedef struct IscsiLun { | ||
20 | QemuMutex mutex; | ||
21 | struct scsi_inquiry_logical_block_provisioning lbp; | ||
22 | struct scsi_inquiry_block_limits bl; | ||
23 | + struct scsi_inquiry_device_designator *dd; | ||
24 | unsigned char *zeroblock; | ||
25 | /* The allocmap tracks which clusters (pages) on the iSCSI target are | ||
26 | * allocated and which are not. In case a target returns zeros for | ||
27 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { | ||
28 | }, | ||
29 | }; | ||
30 | |||
31 | +static void iscsi_save_designator(IscsiLun *lun, | ||
32 | + struct scsi_inquiry_device_identification *inq_di) | ||
33 | +{ | ||
34 | + struct scsi_inquiry_device_designator *desig, *copy = NULL; | ||
35 | + | ||
36 | + for (desig = inq_di->designators; desig; desig = desig->next) { | ||
37 | + if (desig->association || | ||
38 | + desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) { | ||
39 | + continue; | ||
40 | + } | ||
41 | + /* NAA works better than T10 vendor ID based designator. */ | ||
42 | + if (!copy || copy->designator_type < desig->designator_type) { | ||
43 | + copy = desig; | ||
44 | + } | ||
45 | + } | ||
46 | + if (copy) { | ||
47 | + lun->dd = g_new(struct scsi_inquiry_device_designator, 1); | ||
48 | + *lun->dd = *copy; | ||
49 | + lun->dd->next = NULL; | ||
50 | + lun->dd->designator = g_malloc(copy->designator_length); | ||
51 | + memcpy(lun->dd->designator, copy->designator, copy->designator_length); | ||
52 | + } | ||
53 | +} | ||
54 | + | ||
55 | static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, | ||
56 | Error **errp) | ||
57 | { | ||
58 | @@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, | ||
59 | struct scsi_task *inq_task; | ||
60 | struct scsi_inquiry_logical_block_provisioning *inq_lbp; | ||
61 | struct scsi_inquiry_block_limits *inq_bl; | ||
62 | + struct scsi_inquiry_device_identification *inq_di; | ||
63 | switch (inq_vpd->pages[i]) { | ||
64 | case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING: | ||
65 | inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, | ||
66 | @@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, | ||
67 | sizeof(struct scsi_inquiry_block_limits)); | ||
68 | scsi_free_scsi_task(inq_task); | ||
69 | break; | ||
70 | + case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION: | ||
71 | + inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, | ||
72 | + SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION, | ||
73 | + (void **) &inq_di, errp); | ||
74 | + if (inq_task == NULL) { | ||
75 | + ret = -EINVAL; | ||
76 | + goto out; | ||
77 | + } | ||
78 | + iscsi_save_designator(iscsilun, inq_di); | ||
79 | + scsi_free_scsi_task(inq_task); | ||
80 | + break; | ||
81 | default: | ||
82 | break; | ||
83 | } | ||
84 | @@ -XXX,XX +XXX,XX @@ static void iscsi_close(BlockDriverState *bs) | ||
85 | iscsi_logout_sync(iscsi); | ||
86 | } | ||
87 | iscsi_destroy_context(iscsi); | ||
88 | + if (iscsilun->dd) { | ||
89 | + g_free(iscsilun->dd->designator); | ||
90 | + g_free(iscsilun->dd); | ||
91 | + } | ||
92 | g_free(iscsilun->zeroblock); | ||
93 | iscsi_allocmap_free(iscsilun); | ||
94 | qemu_mutex_destroy(&iscsilun->mutex); | ||
95 | -- | ||
96 | 2.17.1 | ||
97 | |||
98 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | This loop is repeated a growing number times. Make a helper. | ||
4 | |||
5 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
8 | Message-id: 20180601092648.24614-8-famz@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/iscsi.c | 54 ++++++++++++++++----------------------------------- | ||
12 | 1 file changed, 17 insertions(+), 37 deletions(-) | ||
13 | |||
14 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/iscsi.c | ||
17 | +++ b/block/iscsi.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, | ||
19 | offset / iscsilun->cluster_size) == size); | ||
20 | } | ||
21 | |||
22 | +static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask, | ||
23 | + IscsiLun *iscsilun) | ||
24 | +{ | ||
25 | + while (!iTask->complete) { | ||
26 | + iscsi_set_events(iscsilun); | ||
27 | + qemu_mutex_unlock(&iscsilun->mutex); | ||
28 | + qemu_coroutine_yield(); | ||
29 | + qemu_mutex_lock(&iscsilun->mutex); | ||
30 | + } | ||
31 | +} | ||
32 | + | ||
33 | static int coroutine_fn | ||
34 | iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, | ||
35 | QEMUIOVector *iov, int flags) | ||
36 | @@ -XXX,XX +XXX,XX @@ retry: | ||
37 | scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, | ||
38 | iov->niov); | ||
39 | #endif | ||
40 | - while (!iTask.complete) { | ||
41 | - iscsi_set_events(iscsilun); | ||
42 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
43 | - qemu_coroutine_yield(); | ||
44 | - qemu_mutex_lock(&iscsilun->mutex); | ||
45 | - } | ||
46 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
47 | |||
48 | if (iTask.task != NULL) { | ||
49 | scsi_free_scsi_task(iTask.task); | ||
50 | @@ -XXX,XX +XXX,XX @@ retry: | ||
51 | ret = -ENOMEM; | ||
52 | goto out_unlock; | ||
53 | } | ||
54 | - | ||
55 | - while (!iTask.complete) { | ||
56 | - iscsi_set_events(iscsilun); | ||
57 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
58 | - qemu_coroutine_yield(); | ||
59 | - qemu_mutex_lock(&iscsilun->mutex); | ||
60 | - } | ||
61 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
62 | |||
63 | if (iTask.do_retry) { | ||
64 | if (iTask.task != NULL) { | ||
65 | @@ -XXX,XX +XXX,XX @@ retry: | ||
66 | #if LIBISCSI_API_VERSION < (20160603) | ||
67 | scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); | ||
68 | #endif | ||
69 | - while (!iTask.complete) { | ||
70 | - iscsi_set_events(iscsilun); | ||
71 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
72 | - qemu_coroutine_yield(); | ||
73 | - qemu_mutex_lock(&iscsilun->mutex); | ||
74 | - } | ||
75 | |||
76 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
77 | if (iTask.task != NULL) { | ||
78 | scsi_free_scsi_task(iTask.task); | ||
79 | iTask.task = NULL; | ||
80 | @@ -XXX,XX +XXX,XX @@ retry: | ||
81 | return -ENOMEM; | ||
82 | } | ||
83 | |||
84 | - while (!iTask.complete) { | ||
85 | - iscsi_set_events(iscsilun); | ||
86 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
87 | - qemu_coroutine_yield(); | ||
88 | - qemu_mutex_lock(&iscsilun->mutex); | ||
89 | - } | ||
90 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
91 | |||
92 | if (iTask.task != NULL) { | ||
93 | scsi_free_scsi_task(iTask.task); | ||
94 | @@ -XXX,XX +XXX,XX @@ retry: | ||
95 | goto out_unlock; | ||
96 | } | ||
97 | |||
98 | - while (!iTask.complete) { | ||
99 | - iscsi_set_events(iscsilun); | ||
100 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
101 | - qemu_coroutine_yield(); | ||
102 | - qemu_mutex_lock(&iscsilun->mutex); | ||
103 | - } | ||
104 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
105 | |||
106 | if (iTask.task != NULL) { | ||
107 | scsi_free_scsi_task(iTask.task); | ||
108 | @@ -XXX,XX +XXX,XX @@ retry: | ||
109 | return -ENOMEM; | ||
110 | } | ||
111 | |||
112 | - while (!iTask.complete) { | ||
113 | - iscsi_set_events(iscsilun); | ||
114 | - qemu_mutex_unlock(&iscsilun->mutex); | ||
115 | - qemu_coroutine_yield(); | ||
116 | - qemu_mutex_lock(&iscsilun->mutex); | ||
117 | - } | ||
118 | + iscsi_co_wait_for_task(&iTask, iscsilun); | ||
119 | |||
120 | if (iTask.status == SCSI_STATUS_CHECK_CONDITION && | ||
121 | iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST && | ||
122 | -- | ||
123 | 2.17.1 | ||
124 | |||
125 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | Issue EXTENDED COPY (LID1) command to implement the copy_range API. | ||
4 | |||
5 | The parameter data construction code is modified from libiscsi's | ||
6 | iscsi-dd.c. | ||
7 | |||
8 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Message-id: 20180601092648.24614-9-famz@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | ||
13 | include/scsi/constants.h | 4 + | ||
14 | block/iscsi.c | 219 +++++++++++++++++++++++++++++++++++++++ | ||
15 | 2 files changed, 223 insertions(+) | ||
16 | |||
17 | diff --git a/include/scsi/constants.h b/include/scsi/constants.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/scsi/constants.h | ||
20 | +++ b/include/scsi/constants.h | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | #define MMC_PROFILE_HDDVD_RW_DL 0x005A | ||
23 | #define MMC_PROFILE_INVALID 0xFFFF | ||
24 | |||
25 | +#define XCOPY_DESC_OFFSET 16 | ||
26 | +#define IDENT_DESCR_TGT_DESCR_SIZE 32 | ||
27 | +#define XCOPY_BLK2BLK_SEG_DESC_SIZE 28 | ||
28 | + | ||
29 | #endif | ||
30 | diff --git a/block/iscsi.c b/block/iscsi.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/block/iscsi.c | ||
33 | +++ b/block/iscsi.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs, | ||
35 | iscsi_allocmap_invalidate(iscsilun); | ||
36 | } | ||
37 | |||
38 | +static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs, | ||
39 | + BdrvChild *src, | ||
40 | + uint64_t src_offset, | ||
41 | + BdrvChild *dst, | ||
42 | + uint64_t dst_offset, | ||
43 | + uint64_t bytes, | ||
44 | + BdrvRequestFlags flags) | ||
45 | +{ | ||
46 | + return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); | ||
47 | +} | ||
48 | + | ||
49 | +static struct scsi_task *iscsi_xcopy_task(int param_len) | ||
50 | +{ | ||
51 | + struct scsi_task *task; | ||
52 | + | ||
53 | + task = g_new0(struct scsi_task, 1); | ||
54 | + | ||
55 | + task->cdb[0] = EXTENDED_COPY; | ||
56 | + task->cdb[10] = (param_len >> 24) & 0xFF; | ||
57 | + task->cdb[11] = (param_len >> 16) & 0xFF; | ||
58 | + task->cdb[12] = (param_len >> 8) & 0xFF; | ||
59 | + task->cdb[13] = param_len & 0xFF; | ||
60 | + task->cdb_size = 16; | ||
61 | + task->xfer_dir = SCSI_XFER_WRITE; | ||
62 | + task->expxferlen = param_len; | ||
63 | + | ||
64 | + return task; | ||
65 | +} | ||
66 | + | ||
67 | +static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) | ||
68 | +{ | ||
69 | + struct scsi_inquiry_device_designator *dd = lun->dd; | ||
70 | + | ||
71 | + memset(desc, 0, 32); | ||
72 | + desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */ | ||
73 | + desc[4] = dd->code_set; | ||
74 | + desc[5] = (dd->designator_type & 0xF) | ||
75 | + | ((dd->association & 3) << 4); | ||
76 | + desc[7] = dd->designator_length; | ||
77 | + memcpy(desc + 8, dd->designator, dd->designator_length); | ||
78 | + | ||
79 | + desc[28] = 0; | ||
80 | + desc[29] = (lun->block_size >> 16) & 0xFF; | ||
81 | + desc[30] = (lun->block_size >> 8) & 0xFF; | ||
82 | + desc[31] = lun->block_size & 0xFF; | ||
83 | +} | ||
84 | + | ||
85 | +static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index, | ||
86 | + int dst_index) | ||
87 | +{ | ||
88 | + hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */ | ||
89 | + hdr[1] = ((dc << 1) | cat) & 0xFF; | ||
90 | + hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF; | ||
91 | + /* don't account for the first 4 bytes in descriptor header*/ | ||
92 | + hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF; | ||
93 | + hdr[4] = (src_index >> 8) & 0xFF; | ||
94 | + hdr[5] = src_index & 0xFF; | ||
95 | + hdr[6] = (dst_index >> 8) & 0xFF; | ||
96 | + hdr[7] = dst_index & 0xFF; | ||
97 | +} | ||
98 | + | ||
99 | +static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat, | ||
100 | + int src_index, int dst_index, int num_blks, | ||
101 | + uint64_t src_lba, uint64_t dst_lba) | ||
102 | +{ | ||
103 | + iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index); | ||
104 | + | ||
105 | + /* The caller should verify the request size */ | ||
106 | + assert(num_blks < 65536); | ||
107 | + desc[10] = (num_blks >> 8) & 0xFF; | ||
108 | + desc[11] = num_blks & 0xFF; | ||
109 | + desc[12] = (src_lba >> 56) & 0xFF; | ||
110 | + desc[13] = (src_lba >> 48) & 0xFF; | ||
111 | + desc[14] = (src_lba >> 40) & 0xFF; | ||
112 | + desc[15] = (src_lba >> 32) & 0xFF; | ||
113 | + desc[16] = (src_lba >> 24) & 0xFF; | ||
114 | + desc[17] = (src_lba >> 16) & 0xFF; | ||
115 | + desc[18] = (src_lba >> 8) & 0xFF; | ||
116 | + desc[19] = src_lba & 0xFF; | ||
117 | + desc[20] = (dst_lba >> 56) & 0xFF; | ||
118 | + desc[21] = (dst_lba >> 48) & 0xFF; | ||
119 | + desc[22] = (dst_lba >> 40) & 0xFF; | ||
120 | + desc[23] = (dst_lba >> 32) & 0xFF; | ||
121 | + desc[24] = (dst_lba >> 24) & 0xFF; | ||
122 | + desc[25] = (dst_lba >> 16) & 0xFF; | ||
123 | + desc[26] = (dst_lba >> 8) & 0xFF; | ||
124 | + desc[27] = dst_lba & 0xFF; | ||
125 | +} | ||
126 | + | ||
127 | +static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str, | ||
128 | + int list_id_usage, int prio, | ||
129 | + int tgt_desc_len, | ||
130 | + int seg_desc_len, int inline_data_len) | ||
131 | +{ | ||
132 | + buf[0] = list_id; | ||
133 | + buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7); | ||
134 | + buf[2] = (tgt_desc_len >> 8) & 0xFF; | ||
135 | + buf[3] = tgt_desc_len & 0xFF; | ||
136 | + buf[8] = (seg_desc_len >> 24) & 0xFF; | ||
137 | + buf[9] = (seg_desc_len >> 16) & 0xFF; | ||
138 | + buf[10] = (seg_desc_len >> 8) & 0xFF; | ||
139 | + buf[11] = seg_desc_len & 0xFF; | ||
140 | + buf[12] = (inline_data_len >> 24) & 0xFF; | ||
141 | + buf[13] = (inline_data_len >> 16) & 0xFF; | ||
142 | + buf[14] = (inline_data_len >> 8) & 0xFF; | ||
143 | + buf[15] = inline_data_len & 0xFF; | ||
144 | +} | ||
145 | + | ||
146 | +static void iscsi_xcopy_data(struct iscsi_data *data, | ||
147 | + IscsiLun *src, int64_t src_lba, | ||
148 | + IscsiLun *dst, int64_t dst_lba, | ||
149 | + uint16_t num_blocks) | ||
150 | +{ | ||
151 | + uint8_t *buf; | ||
152 | + const int src_offset = XCOPY_DESC_OFFSET; | ||
153 | + const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE; | ||
154 | + const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE; | ||
155 | + | ||
156 | + data->size = XCOPY_DESC_OFFSET + | ||
157 | + IDENT_DESCR_TGT_DESCR_SIZE * 2 + | ||
158 | + XCOPY_BLK2BLK_SEG_DESC_SIZE; | ||
159 | + data->data = g_malloc0(data->size); | ||
160 | + buf = data->data; | ||
161 | + | ||
162 | + /* Initialise the parameter list header */ | ||
163 | + iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */, | ||
164 | + 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE, | ||
165 | + XCOPY_BLK2BLK_SEG_DESC_SIZE, | ||
166 | + 0); | ||
167 | + | ||
168 | + /* Initialise CSCD list with one src + one dst descriptor */ | ||
169 | + iscsi_populate_target_desc(&buf[src_offset], src); | ||
170 | + iscsi_populate_target_desc(&buf[dst_offset], dst); | ||
171 | + | ||
172 | + /* Initialise one segment descriptor */ | ||
173 | + iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks, | ||
174 | + src_lba, dst_lba); | ||
175 | +} | ||
176 | + | ||
177 | +static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs, | ||
178 | + BdrvChild *src, | ||
179 | + uint64_t src_offset, | ||
180 | + BdrvChild *dst, | ||
181 | + uint64_t dst_offset, | ||
182 | + uint64_t bytes, | ||
183 | + BdrvRequestFlags flags) | ||
184 | +{ | ||
185 | + IscsiLun *dst_lun = dst->bs->opaque; | ||
186 | + IscsiLun *src_lun; | ||
187 | + struct IscsiTask iscsi_task; | ||
188 | + struct iscsi_data data; | ||
189 | + int r = 0; | ||
190 | + int block_size; | ||
191 | + | ||
192 | + if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) { | ||
193 | + return -ENOTSUP; | ||
194 | + } | ||
195 | + src_lun = src->bs->opaque; | ||
196 | + | ||
197 | + if (!src_lun->dd || !dst_lun->dd) { | ||
198 | + return -ENOTSUP; | ||
199 | + } | ||
200 | + if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) { | ||
201 | + return -ENOTSUP; | ||
202 | + } | ||
203 | + if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) { | ||
204 | + return -ENOTSUP; | ||
205 | + } | ||
206 | + if (dst_lun->block_size != src_lun->block_size || | ||
207 | + !dst_lun->block_size) { | ||
208 | + return -ENOTSUP; | ||
209 | + } | ||
210 | + | ||
211 | + block_size = dst_lun->block_size; | ||
212 | + if (bytes / block_size > 65535) { | ||
213 | + return -ENOTSUP; | ||
214 | + } | ||
215 | + | ||
216 | + iscsi_xcopy_data(&data, | ||
217 | + src_lun, src_offset / block_size, | ||
218 | + dst_lun, dst_offset / block_size, | ||
219 | + bytes / block_size); | ||
220 | + | ||
221 | + iscsi_co_init_iscsitask(dst_lun, &iscsi_task); | ||
222 | + | ||
223 | + qemu_mutex_lock(&dst_lun->mutex); | ||
224 | + iscsi_task.task = iscsi_xcopy_task(data.size); | ||
225 | +retry: | ||
226 | + if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun, | ||
227 | + iscsi_task.task, iscsi_co_generic_cb, | ||
228 | + &data, | ||
229 | + &iscsi_task) != 0) { | ||
230 | + r = -EIO; | ||
231 | + goto out_unlock; | ||
232 | + } | ||
233 | + | ||
234 | + iscsi_co_wait_for_task(&iscsi_task, dst_lun); | ||
235 | + | ||
236 | + if (iscsi_task.do_retry) { | ||
237 | + iscsi_task.complete = 0; | ||
238 | + goto retry; | ||
239 | + } | ||
240 | + | ||
241 | + if (iscsi_task.status != SCSI_STATUS_GOOD) { | ||
242 | + r = iscsi_task.err_code; | ||
243 | + goto out_unlock; | ||
244 | + } | ||
245 | + | ||
246 | +out_unlock: | ||
247 | + g_free(iscsi_task.task); | ||
248 | + qemu_mutex_unlock(&dst_lun->mutex); | ||
249 | + g_free(iscsi_task.err_str); | ||
250 | + return r; | ||
251 | +} | ||
252 | + | ||
253 | static QemuOptsList iscsi_create_opts = { | ||
254 | .name = "iscsi-create-opts", | ||
255 | .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), | ||
256 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = { | ||
257 | |||
258 | .bdrv_co_block_status = iscsi_co_block_status, | ||
259 | .bdrv_co_pdiscard = iscsi_co_pdiscard, | ||
260 | + .bdrv_co_copy_range_from = iscsi_co_copy_range_from, | ||
261 | + .bdrv_co_copy_range_to = iscsi_co_copy_range_to, | ||
262 | .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, | ||
263 | .bdrv_co_readv = iscsi_co_readv, | ||
264 | .bdrv_co_writev = iscsi_co_writev, | ||
265 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = { | ||
266 | |||
267 | .bdrv_co_block_status = iscsi_co_block_status, | ||
268 | .bdrv_co_pdiscard = iscsi_co_pdiscard, | ||
269 | + .bdrv_co_copy_range_from = iscsi_co_copy_range_from, | ||
270 | + .bdrv_co_copy_range_to = iscsi_co_copy_range_to, | ||
271 | .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, | ||
272 | .bdrv_co_readv = iscsi_co_readv, | ||
273 | .bdrv_co_writev = iscsi_co_writev, | ||
274 | -- | ||
275 | 2.17.1 | ||
276 | |||
277 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Fam Zheng <famz@redhat.com> | ||
2 | 1 | ||
3 | It's a BlockBackend wrapper of the BDS interface. | ||
4 | |||
5 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20180601092648.24614-10-famz@redhat.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | include/sysemu/block-backend.h | 4 ++++ | ||
11 | block/block-backend.c | 18 ++++++++++++++++++ | ||
12 | 2 files changed, 22 insertions(+) | ||
13 | |||
14 | diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/include/sysemu/block-backend.h | ||
17 | +++ b/include/sysemu/block-backend.h | ||
18 | @@ -XXX,XX +XXX,XX @@ void blk_set_force_allow_inactivate(BlockBackend *blk); | ||
19 | void blk_register_buf(BlockBackend *blk, void *host, size_t size); | ||
20 | void blk_unregister_buf(BlockBackend *blk, void *host); | ||
21 | |||
22 | +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, | ||
23 | + BlockBackend *blk_out, int64_t off_out, | ||
24 | + int bytes, BdrvRequestFlags flags); | ||
25 | + | ||
26 | #endif | ||
27 | diff --git a/block/block-backend.c b/block/block-backend.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/block/block-backend.c | ||
30 | +++ b/block/block-backend.c | ||
31 | @@ -XXX,XX +XXX,XX @@ void blk_unregister_buf(BlockBackend *blk, void *host) | ||
32 | { | ||
33 | bdrv_unregister_buf(blk_bs(blk), host); | ||
34 | } | ||
35 | + | ||
36 | +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, | ||
37 | + BlockBackend *blk_out, int64_t off_out, | ||
38 | + int bytes, BdrvRequestFlags flags) | ||
39 | +{ | ||
40 | + int r; | ||
41 | + r = blk_check_byte_request(blk_in, off_in, bytes); | ||
42 | + if (r) { | ||
43 | + return r; | ||
44 | + } | ||
45 | + r = blk_check_byte_request(blk_out, off_out, bytes); | ||
46 | + if (r) { | ||
47 | + return r; | ||
48 | + } | ||
49 | + return bdrv_co_copy_range(blk_in->root, off_in, | ||
50 | + blk_out->root, off_out, | ||
51 | + bytes, flags); | ||
52 | +} | ||
53 | -- | ||
54 | 2.17.1 | ||
55 | |||
56 | diff view generated by jsdifflib |