1
The following changes since commit 4c8c1cc544dbd5e2564868e61c5037258e393832:
1
The following changes since commit ac5f7bf8e208cd7893dbb1a9520559e569a4677c:
2
2
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging (2017-06-22 19:01:58 +0100)
3
Merge tag 'migration-20230424-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-04-24 15:00:39 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
7
8
8
git://repo.or.cz/qemu/kevin.git tags/for-upstream
9
for you to fetch changes up to 8c1e8fb2e7fc2cbeb57703e143965a4cd3ad301a:
9
10
10
for you to fetch changes up to 1512008812410ca4054506a7c44343088abdd977:
11
block/monitor: Fix crash when executing HMP commit (2023-04-25 15:11:57 +0200)
11
12
Merge remote-tracking branch 'mreitz/tags/pull-block-2017-06-23' into queue-block (2017-06-23 14:09:12 +0200)
13
12
14
----------------------------------------------------------------
13
----------------------------------------------------------------
15
16
Block layer patches
14
Block layer patches
17
15
16
- Protect BlockBackend.queued_requests with its own lock
17
- Switch to AIO_WAIT_WHILE_UNLOCKED() where possible
18
- AioContext removal: LinuxAioState/LuringState/ThreadPool
19
- Add more coroutine_fn annotations, use bdrv/blk_co_*
20
- Fix crash when execute hmp_commit
21
18
----------------------------------------------------------------
22
----------------------------------------------------------------
19
Alberto Garcia (9):
23
Emanuele Giuseppe Esposito (4):
20
throttle: Update throttle-groups.c documentation
24
linux-aio: use LinuxAioState from the running thread
21
qcow2: Remove unused Error variable in do_perform_cow()
25
io_uring: use LuringState from the running thread
22
qcow2: Use unsigned int for both members of Qcow2COWRegion
26
thread-pool: use ThreadPool from the running thread
23
qcow2: Make perform_cow() call do_perform_cow() twice
27
thread-pool: avoid passing the pool parameter every time
24
qcow2: Split do_perform_cow() into _read(), _encrypt() and _write()
25
qcow2: Allow reading both COW regions with only one request
26
qcow2: Pass a QEMUIOVector to do_perform_cow_{read,write}()
27
qcow2: Merge the writing of the COW regions with the guest data
28
qcow2: Use offset_into_cluster() and offset_to_l2_index()
29
28
30
Kevin Wolf (37):
29
Paolo Bonzini (9):
31
commit: Fix completion with extra reference
30
vvfat: mark various functions as coroutine_fn
32
qemu-iotests: Allow starting new qemu after cleanup
31
blkdebug: add missing coroutine_fn annotation
33
qemu-iotests: Test exiting qemu with running job
32
mirror: make mirror_flush a coroutine_fn, do not use co_wrappers
34
doc: Document generic -blockdev options
33
nbd: mark more coroutine_fns, do not use co_wrappers
35
doc: Document driver-specific -blockdev options
34
9pfs: mark more coroutine_fns
36
qed: Use bottom half to resume waiting requests
35
qemu-pr-helper: mark more coroutine_fns
37
qed: Make qed_read_table() synchronous
36
tests: mark more coroutine_fns
38
qed: Remove callback from qed_read_table()
37
qcow2: mark various functions as coroutine_fn and GRAPH_RDLOCK
39
qed: Remove callback from qed_read_l2_table()
38
vmdk: make vmdk_is_cid_valid a coroutine_fn
40
qed: Remove callback from qed_find_cluster()
41
qed: Make qed_read_backing_file() synchronous
42
qed: Make qed_copy_from_backing_file() synchronous
43
qed: Remove callback from qed_copy_from_backing_file()
44
qed: Make qed_write_header() synchronous
45
qed: Remove callback from qed_write_header()
46
qed: Make qed_write_table() synchronous
47
qed: Remove GenericCB
48
qed: Remove callback from qed_write_table()
49
qed: Make qed_aio_read_data() synchronous
50
qed: Make qed_aio_write_main() synchronous
51
qed: Inline qed_commit_l2_update()
52
qed: Add return value to qed_aio_write_l1_update()
53
qed: Add return value to qed_aio_write_l2_update()
54
qed: Add return value to qed_aio_write_main()
55
qed: Add return value to qed_aio_write_cow()
56
qed: Add return value to qed_aio_write_inplace/alloc()
57
qed: Add return value to qed_aio_read/write_data()
58
qed: Remove ret argument from qed_aio_next_io()
59
qed: Remove recursion in qed_aio_next_io()
60
qed: Implement .bdrv_co_readv/writev
61
qed: Use CoQueue for serialising allocations
62
qed: Simplify request handling
63
qed: Use a coroutine for need_check_timer
64
qed: Add coroutine_fn to I/O path functions
65
qed: Use bdrv_co_* for coroutine_fns
66
block: Remove bdrv_aio_readv/writev/flush()
67
Merge remote-tracking branch 'mreitz/tags/pull-block-2017-06-23' into queue-block
68
69
Manos Pitsidianakis (1):
70
block: change variable names in BlockDriverState
71
72
Max Reitz (3):
73
blkdebug: Catch bs->exact_filename overflow
74
blkverify: Catch bs->exact_filename overflow
75
block: Do not strcmp() with NULL uri->scheme
76
39
77
Stefan Hajnoczi (10):
40
Stefan Hajnoczi (10):
78
block: count bdrv_co_rw_vmstate() requests
41
block: make BlockBackend->quiesce_counter atomic
79
block: use BDRV_POLL_WHILE() in bdrv_rw_vmstate()
42
block: make BlockBackend->disable_request_queuing atomic
80
migration: avoid recursive AioContext locking in save_vmstate()
43
block: protect BlockBackend->queued_requests with a lock
81
migration: use bdrv_drain_all_begin/end() instead bdrv_drain_all()
44
block: don't acquire AioContext lock in bdrv_drain_all()
82
virtio-pci: use ioeventfd even when KVM is disabled
45
block: convert blk_exp_close_all_type() to AIO_WAIT_WHILE_UNLOCKED()
83
migration: hold AioContext lock for loadvm qemu_fclose()
46
block: convert bdrv_graph_wrlock() to AIO_WAIT_WHILE_UNLOCKED()
84
qemu-iotests: 068: extract _qemu() function
47
block: convert bdrv_drain_all_begin() to AIO_WAIT_WHILE_UNLOCKED()
85
qemu-iotests: 068: use -drive/-device instead of -hda
48
hmp: convert handle_hmp_command() to AIO_WAIT_WHILE_UNLOCKED()
86
qemu-iotests: 068: test iothread mode
49
monitor: convert monitor_cleanup() to AIO_WAIT_WHILE_UNLOCKED()
87
qemu-img: don't shadow opts variable in img_dd()
50
block: add missing coroutine_fn to bdrv_sum_allocated_file_size()
88
51
89
Stephen Bates (1):
52
Wang Liang (1):
90
nvme: Add support for Read Data and Write Data in CMBs.
53
block/monitor: Fix crash when executing HMP commit
91
54
92
sochin.jiang (1):
55
Wilfred Mallawa (1):
93
fix: avoid an infinite loop or a dangling pointer problem in img_commit
56
include/block: fixup typos
94
57
95
block/Makefile.objs | 2 +-
58
block/qcow2.h | 15 +++++-----
96
block/blkdebug.c | 46 +--
59
hw/9pfs/9p.h | 4 +--
97
block/blkreplay.c | 8 +-
60
include/block/aio-wait.h | 2 +-
98
block/blkverify.c | 12 +-
61
include/block/aio.h | 8 ------
99
block/block-backend.c | 22 +-
62
include/block/block_int-common.h | 2 +-
100
block/commit.c | 7 +
63
include/block/raw-aio.h | 33 +++++++++++++++-------
101
block/file-posix.c | 34 +-
64
include/block/thread-pool.h | 15 ++++++----
102
block/io.c | 240 ++-----------
65
include/sysemu/block-backend-io.h | 5 ++++
103
block/iscsi.c | 20 +-
66
backends/tpm/tpm_backend.c | 4 +--
104
block/mirror.c | 8 +-
67
block.c | 2 +-
105
block/nbd-client.c | 8 +-
68
block/blkdebug.c | 4 +--
106
block/nbd-client.h | 4 +-
69
block/block-backend.c | 45 ++++++++++++++++++------------
107
block/nbd.c | 6 +-
70
block/export/export.c | 2 +-
108
block/nfs.c | 2 +-
71
block/file-posix.c | 45 ++++++++++++------------------
109
block/qcow2-cluster.c | 201 ++++++++---
72
block/file-win32.c | 4 +--
110
block/qcow2.c | 94 +++--
73
block/graph-lock.c | 2 +-
111
block/qcow2.h | 11 +-
74
block/io.c | 2 +-
112
block/qed-cluster.c | 124 +++----
75
block/io_uring.c | 23 ++++++++++------
113
block/qed-gencb.c | 33 --
76
block/linux-aio.c | 29 ++++++++++++--------
114
block/qed-table.c | 261 +++++---------
77
block/mirror.c | 4 +--
115
block/qed.c | 779 ++++++++++++++++-------------------------
78
block/monitor/block-hmp-cmds.c | 10 ++++---
116
block/qed.h | 54 +--
79
block/qcow2-bitmap.c | 2 +-
117
block/raw-format.c | 8 +-
80
block/qcow2-cluster.c | 21 ++++++++------
118
block/rbd.c | 4 +-
81
block/qcow2-refcount.c | 8 +++---
119
block/sheepdog.c | 12 +-
82
block/qcow2-snapshot.c | 25 +++++++++--------
120
block/ssh.c | 2 +-
83
block/qcow2-threads.c | 3 +-
121
block/throttle-groups.c | 2 +-
84
block/qcow2.c | 27 +++++++++---------
122
block/trace-events | 3 -
85
block/vmdk.c | 2 +-
123
blockjob.c | 4 +-
86
block/vvfat.c | 58 ++++++++++++++++++++-------------------
124
hw/block/nvme.c | 83 +++--
87
hw/9pfs/codir.c | 6 ++--
125
hw/block/nvme.h | 1 +
88
hw/9pfs/coth.c | 3 +-
126
hw/virtio/virtio-pci.c | 2 +-
89
hw/ppc/spapr_nvdimm.c | 6 ++--
127
include/block/block.h | 16 +-
90
hw/virtio/virtio-pmem.c | 3 +-
128
include/block/block_int.h | 6 +-
91
monitor/hmp.c | 2 +-
129
include/block/blockjob.h | 18 +
92
monitor/monitor.c | 4 +--
130
include/sysemu/block-backend.h | 20 +-
93
nbd/server.c | 48 ++++++++++++++++----------------
131
migration/savevm.c | 32 +-
94
scsi/pr-manager.c | 3 +-
132
qemu-img.c | 29 +-
95
scsi/qemu-pr-helper.c | 25 ++++++++---------
133
qemu-io-cmds.c | 46 +--
96
tests/unit/test-thread-pool.c | 14 ++++------
134
qemu-options.hx | 221 ++++++++++--
97
util/thread-pool.c | 25 ++++++++---------
135
tests/qemu-iotests/068 | 37 +-
98
40 files changed, 283 insertions(+), 262 deletions(-)
136
tests/qemu-iotests/068.out | 11 +-
137
tests/qemu-iotests/185 | 206 +++++++++++
138
tests/qemu-iotests/185.out | 59 ++++
139
tests/qemu-iotests/common.qemu | 3 +
140
tests/qemu-iotests/group | 1 +
141
46 files changed, 1477 insertions(+), 1325 deletions(-)
142
delete mode 100644 block/qed-gencb.c
143
create mode 100755 tests/qemu-iotests/185
144
create mode 100644 tests/qemu-iotests/185.out
145
diff view generated by jsdifflib
Deleted patch
1
commit_complete() can't assume that after its block_job_completed() the
2
job is actually immediately freed; someone else may still be holding
3
references. In this case, the op blockers on the intermediate nodes make
4
the graph reconfiguration in the completion code fail.
5
1
6
Call block_job_remove_all_bdrv() manually so that we know for sure that
7
any blockers on intermediate nodes are given up.
8
9
Cc: qemu-stable@nongnu.org
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Max Reitz <mreitz@redhat.com>
13
---
14
block/commit.c | 7 +++++++
15
1 file changed, 7 insertions(+)
16
17
diff --git a/block/commit.c b/block/commit.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/commit.c
20
+++ b/block/commit.c
21
@@ -XXX,XX +XXX,XX @@ static void commit_complete(BlockJob *job, void *opaque)
22
}
23
g_free(s->backing_file_str);
24
blk_unref(s->top);
25
+
26
+ /* If there is more than one reference to the job (e.g. if called from
27
+ * block_job_finish_sync()), block_job_completed() won't free it and
28
+ * therefore the blockers on the intermediate nodes remain. This would
29
+ * cause bdrv_set_backing_hd() to fail. */
30
+ block_job_remove_all_bdrv(job);
31
+
32
block_job_completed(&s->common, ret);
33
g_free(data);
34
35
--
36
1.8.3.1
37
38
diff view generated by jsdifflib
Deleted patch
1
After _cleanup_qemu(), test cases should be able to start the next qemu
2
process and call _cleanup_qemu() for that one as well. For this to work
3
cleanly, we need to improve the cleanup so that the second invocation
4
doesn't try to kill the qemu instances from the first invocation a
5
second time (which would result in error messages).
6
1
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
---
11
tests/qemu-iotests/common.qemu | 3 +++
12
1 file changed, 3 insertions(+)
13
14
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qemu-iotests/common.qemu
17
+++ b/tests/qemu-iotests/common.qemu
18
@@ -XXX,XX +XXX,XX @@ function _cleanup_qemu()
19
rm -f "${QEMU_FIFO_IN}_${i}" "${QEMU_FIFO_OUT}_${i}"
20
eval "exec ${QEMU_IN[$i]}<&-" # close file descriptors
21
eval "exec ${QEMU_OUT[$i]}<&-"
22
+
23
+ unset QEMU_IN[$i]
24
+ unset QEMU_OUT[$i]
25
done
26
}
27
--
28
1.8.3.1
29
30
diff view generated by jsdifflib
Deleted patch
1
When qemu is exited, all running jobs should be cancelled successfully.
2
This adds a test for this for all types of block jobs that currently
3
exist in qemu.
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
---
8
tests/qemu-iotests/185 | 206 +++++++++++++++++++++++++++++++++++++++++++++
9
tests/qemu-iotests/185.out | 59 +++++++++++++
10
tests/qemu-iotests/group | 1 +
11
3 files changed, 266 insertions(+)
12
create mode 100755 tests/qemu-iotests/185
13
create mode 100644 tests/qemu-iotests/185.out
14
15
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
16
new file mode 100755
17
index XXXXXXX..XXXXXXX
18
--- /dev/null
19
+++ b/tests/qemu-iotests/185
20
@@ -XXX,XX +XXX,XX @@
21
+#!/bin/bash
22
+#
23
+# Test exiting qemu while jobs are still running
24
+#
25
+# Copyright (C) 2017 Red Hat, Inc.
26
+#
27
+# This program is free software; you can redistribute it and/or modify
28
+# it under the terms of the GNU General Public License as published by
29
+# the Free Software Foundation; either version 2 of the License, or
30
+# (at your option) any later version.
31
+#
32
+# This program is distributed in the hope that it will be useful,
33
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
34
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35
+# GNU General Public License for more details.
36
+#
37
+# You should have received a copy of the GNU General Public License
38
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
39
+#
40
+
41
+# creator
42
+owner=kwolf@redhat.com
43
+
44
+seq=`basename $0`
45
+echo "QA output created by $seq"
46
+
47
+here=`pwd`
48
+status=1 # failure is the default!
49
+
50
+MIG_SOCKET="${TEST_DIR}/migrate"
51
+
52
+_cleanup()
53
+{
54
+ rm -f "${TEST_IMG}.mid"
55
+ rm -f "${TEST_IMG}.copy"
56
+ _cleanup_test_img
57
+ _cleanup_qemu
58
+}
59
+trap "_cleanup; exit \$status" 0 1 2 3 15
60
+
61
+# get standard environment, filters and checks
62
+. ./common.rc
63
+. ./common.filter
64
+. ./common.qemu
65
+
66
+_supported_fmt qcow2
67
+_supported_proto file
68
+_supported_os Linux
69
+
70
+size=64M
71
+TEST_IMG="${TEST_IMG}.base" _make_test_img $size
72
+
73
+echo
74
+echo === Starting VM ===
75
+echo
76
+
77
+qemu_comm_method="qmp"
78
+
79
+_launch_qemu \
80
+ -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk
81
+h=$QEMU_HANDLE
82
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
83
+
84
+echo
85
+echo === Creating backing chain ===
86
+echo
87
+
88
+_send_qemu_cmd $h \
89
+ "{ 'execute': 'blockdev-snapshot-sync',
90
+ 'arguments': { 'device': 'disk',
91
+ 'snapshot-file': '$TEST_IMG.mid',
92
+ 'format': '$IMGFMT',
93
+ 'mode': 'absolute-paths' } }" \
94
+ "return"
95
+
96
+_send_qemu_cmd $h \
97
+ "{ 'execute': 'human-monitor-command',
98
+ 'arguments': { 'command-line':
99
+ 'qemu-io disk \"write 0 4M\"' } }" \
100
+ "return"
101
+
102
+_send_qemu_cmd $h \
103
+ "{ 'execute': 'blockdev-snapshot-sync',
104
+ 'arguments': { 'device': 'disk',
105
+ 'snapshot-file': '$TEST_IMG',
106
+ 'format': '$IMGFMT',
107
+ 'mode': 'absolute-paths' } }" \
108
+ "return"
109
+
110
+echo
111
+echo === Start commit job and exit qemu ===
112
+echo
113
+
114
+# Note that the reference output intentionally includes the 'offset' field in
115
+# BLOCK_JOB_CANCELLED events for all of the following block jobs. They are
116
+# predictable and any change in the offsets would hint at a bug in the job
117
+# throttling code.
118
+#
119
+# In order to achieve these predictable offsets, all of the following tests
120
+# use speed=65536. Each job will perform exactly one iteration before it has
121
+# to sleep at least for a second, which is plenty of time for the 'quit' QMP
122
+# command to be received (after receiving the command, the rest runs
123
+# synchronously, so jobs can arbitrarily continue or complete).
124
+#
125
+# The buffer size for commit and streaming is 512k (waiting for 8 seconds after
126
+# the first request), for active commit and mirror it's large enough to cover
127
+# the full 4M, and for backup it's the qcow2 cluster size, which we know is
128
+# 64k. As all of these are at least as large as the speed, we are sure that the
129
+# offset doesn't advance after the first iteration before qemu exits.
130
+
131
+_send_qemu_cmd $h \
132
+ "{ 'execute': 'block-commit',
133
+ 'arguments': { 'device': 'disk',
134
+ 'base':'$TEST_IMG.base',
135
+ 'top': '$TEST_IMG.mid',
136
+ 'speed': 65536 } }" \
137
+ "return"
138
+
139
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
140
+wait=1 _cleanup_qemu
141
+
142
+echo
143
+echo === Start active commit job and exit qemu ===
144
+echo
145
+
146
+_launch_qemu \
147
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
148
+h=$QEMU_HANDLE
149
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
150
+
151
+_send_qemu_cmd $h \
152
+ "{ 'execute': 'block-commit',
153
+ 'arguments': { 'device': 'disk',
154
+ 'base':'$TEST_IMG.base',
155
+ 'speed': 65536 } }" \
156
+ "return"
157
+
158
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
159
+wait=1 _cleanup_qemu
160
+
161
+echo
162
+echo === Start mirror job and exit qemu ===
163
+echo
164
+
165
+_launch_qemu \
166
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
167
+h=$QEMU_HANDLE
168
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
169
+
170
+_send_qemu_cmd $h \
171
+ "{ 'execute': 'drive-mirror',
172
+ 'arguments': { 'device': 'disk',
173
+ 'target': '$TEST_IMG.copy',
174
+ 'format': '$IMGFMT',
175
+ 'sync': 'full',
176
+ 'speed': 65536 } }" \
177
+ "return"
178
+
179
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
180
+wait=1 _cleanup_qemu
181
+
182
+echo
183
+echo === Start backup job and exit qemu ===
184
+echo
185
+
186
+_launch_qemu \
187
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
188
+h=$QEMU_HANDLE
189
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
190
+
191
+_send_qemu_cmd $h \
192
+ "{ 'execute': 'drive-backup',
193
+ 'arguments': { 'device': 'disk',
194
+ 'target': '$TEST_IMG.copy',
195
+ 'format': '$IMGFMT',
196
+ 'sync': 'full',
197
+ 'speed': 65536 } }" \
198
+ "return"
199
+
200
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
201
+wait=1 _cleanup_qemu
202
+
203
+echo
204
+echo === Start streaming job and exit qemu ===
205
+echo
206
+
207
+_launch_qemu \
208
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
209
+h=$QEMU_HANDLE
210
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
211
+
212
+_send_qemu_cmd $h \
213
+ "{ 'execute': 'block-stream',
214
+ 'arguments': { 'device': 'disk',
215
+ 'speed': 65536 } }" \
216
+ "return"
217
+
218
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
219
+wait=1 _cleanup_qemu
220
+
221
+_check_test_img
222
+
223
+# success, all done
224
+echo "*** done"
225
+rm -f $seq.full
226
+status=0
227
diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out
228
new file mode 100644
229
index XXXXXXX..XXXXXXX
230
--- /dev/null
231
+++ b/tests/qemu-iotests/185.out
232
@@ -XXX,XX +XXX,XX @@
233
+QA output created by 185
234
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
235
+
236
+=== Starting VM ===
237
+
238
+{"return": {}}
239
+
240
+=== Creating backing chain ===
241
+
242
+Formatting 'TEST_DIR/t.qcow2.mid', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.base backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
243
+{"return": {}}
244
+wrote 4194304/4194304 bytes at offset 0
245
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
246
+{"return": ""}
247
+Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.mid backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
248
+{"return": {}}
249
+
250
+=== Start commit job and exit qemu ===
251
+
252
+{"return": {}}
253
+{"return": {}}
254
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
255
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}}
256
+
257
+=== Start active commit job and exit qemu ===
258
+
259
+{"return": {}}
260
+{"return": {}}
261
+{"return": {}}
262
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
263
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}}
264
+
265
+=== Start mirror job and exit qemu ===
266
+
267
+{"return": {}}
268
+Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
269
+{"return": {}}
270
+{"return": {}}
271
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
272
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}}
273
+
274
+=== Start backup job and exit qemu ===
275
+
276
+{"return": {}}
277
+Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
278
+{"return": {}}
279
+{"return": {}}
280
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
281
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}}
282
+
283
+=== Start streaming job and exit qemu ===
284
+
285
+{"return": {}}
286
+{"return": {}}
287
+{"return": {}}
288
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}
289
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}}
290
+No errors were found on the image.
291
+*** done
292
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
293
index XXXXXXX..XXXXXXX 100644
294
--- a/tests/qemu-iotests/group
295
+++ b/tests/qemu-iotests/group
296
@@ -XXX,XX +XXX,XX @@
297
181 rw auto migration
298
182 rw auto quick
299
183 rw auto migration
300
+185 rw auto
301
--
302
1.8.3.1
303
304
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Avoid duplicating the QEMU command-line.
3
The main loop thread increments/decrements BlockBackend->quiesce_counter
4
when drained sections begin/end. The counter is read in the I/O code
5
path. Therefore this field is used to communicate between threads
6
without a lock.
7
8
Acquire/release are not necessary because the BlockBackend->in_flight
9
counter already uses sequentially consistent accesses and running I/O
10
requests hold that counter when blk_wait_while_drained() is called.
11
qatomic_read() can be used.
12
13
Use qatomic_fetch_inc()/qatomic_fetch_dec() for modifications even
14
though sequentially consistent atomic accesses are not strictly required
15
here. They are, however, nicer to read than multiple calls to
16
qatomic_read() and qatomic_set(). Since beginning and ending drain is
17
not a hot path the extra cost doesn't matter.
4
18
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Message-Id: <20230307210427.269214-2-stefanha@redhat.com>
21
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
22
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
---
24
---
8
tests/qemu-iotests/068 | 15 +++++++++------
25
block/block-backend.c | 14 +++++++-------
9
1 file changed, 9 insertions(+), 6 deletions(-)
26
1 file changed, 7 insertions(+), 7 deletions(-)
10
27
11
diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068
28
diff --git a/block/block-backend.c b/block/block-backend.c
12
index XXXXXXX..XXXXXXX 100755
29
index XXXXXXX..XXXXXXX 100644
13
--- a/tests/qemu-iotests/068
30
--- a/block/block-backend.c
14
+++ b/tests/qemu-iotests/068
31
+++ b/block/block-backend.c
15
@@ -XXX,XX +XXX,XX @@ case "$QEMU_DEFAULT_MACHINE" in
32
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
16
;;
33
NotifierList remove_bs_notifiers, insert_bs_notifiers;
17
esac
34
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
18
35
19
-# Give qemu some time to boot before saving the VM state
36
- int quiesce_counter;
20
-bash -c 'sleep 1; echo -e "savevm 0\nquit"' |\
37
+ int quiesce_counter; /* atomic: written under BQL, read by other threads */
21
- $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" |\
38
CoQueue queued_requests;
22
+_qemu()
39
bool disable_request_queuing;
23
+{
40
24
+ $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" \
41
@@ -XXX,XX +XXX,XX @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
25
+ "$@" |\
42
blk->dev_opaque = opaque;
26
_filter_qemu | _filter_hmp
43
27
+}
44
/* Are we currently quiesced? Should we enforce this right now? */
28
+
45
- if (blk->quiesce_counter && ops && ops->drained_begin) {
29
+# Give qemu some time to boot before saving the VM state
46
+ if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
30
+bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu
47
ops->drained_begin(opaque);
31
# Now try to continue from that VM state (this should just work)
48
}
32
-echo quit |\
49
}
33
- $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" -loadvm 0 |\
50
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
34
- _filter_qemu | _filter_hmp
51
{
35
+echo quit | _qemu -loadvm 0
52
assert(blk->in_flight > 0);
36
53
37
# success, all done
54
- if (blk->quiesce_counter && !blk->disable_request_queuing) {
38
echo "*** done"
55
+ if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) {
56
blk_dec_in_flight(blk);
57
qemu_co_queue_wait(&blk->queued_requests, NULL);
58
blk_inc_in_flight(blk);
59
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child)
60
BlockBackend *blk = child->opaque;
61
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
62
63
- if (++blk->quiesce_counter == 1) {
64
+ if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
65
if (blk->dev_ops && blk->dev_ops->drained_begin) {
66
blk->dev_ops->drained_begin(blk->dev_opaque);
67
}
68
@@ -XXX,XX +XXX,XX @@ static bool blk_root_drained_poll(BdrvChild *child)
69
{
70
BlockBackend *blk = child->opaque;
71
bool busy = false;
72
- assert(blk->quiesce_counter);
73
+ assert(qatomic_read(&blk->quiesce_counter));
74
75
if (blk->dev_ops && blk->dev_ops->drained_poll) {
76
busy = blk->dev_ops->drained_poll(blk->dev_opaque);
77
@@ -XXX,XX +XXX,XX @@ static bool blk_root_drained_poll(BdrvChild *child)
78
static void blk_root_drained_end(BdrvChild *child)
79
{
80
BlockBackend *blk = child->opaque;
81
- assert(blk->quiesce_counter);
82
+ assert(qatomic_read(&blk->quiesce_counter));
83
84
assert(blk->public.throttle_group_member.io_limits_disabled);
85
qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
86
87
- if (--blk->quiesce_counter == 0) {
88
+ if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
89
if (blk->dev_ops && blk->dev_ops->drained_end) {
90
blk->dev_ops->drained_end(blk->dev_opaque);
91
}
39
--
92
--
40
1.8.3.1
93
2.40.0
41
94
42
95
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Perform the savevm/loadvm test with both iothread on and off. This
3
This field is accessed by multiple threads without a lock. Use explicit
4
covers the recently found savevm/loadvm hang when iothread is enabled.
4
qatomic_read()/qatomic_set() calls. There is no need for acquire/release
5
because blk_set_disable_request_queuing() doesn't provide any
6
guarantees (it helps that it's used at BlockBackend creation time and
7
not when there is I/O in flight).
5
8
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
11
Message-Id: <20230307210427.269214-3-stefanha@redhat.com>
12
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
15
---
9
tests/qemu-iotests/068 | 23 ++++++++++++++---------
16
block/block-backend.c | 7 ++++---
10
tests/qemu-iotests/068.out | 11 ++++++++++-
17
1 file changed, 4 insertions(+), 3 deletions(-)
11
2 files changed, 24 insertions(+), 10 deletions(-)
12
18
13
diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068
19
diff --git a/block/block-backend.c b/block/block-backend.c
14
index XXXXXXX..XXXXXXX 100755
20
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/qemu-iotests/068
21
--- a/block/block-backend.c
16
+++ b/tests/qemu-iotests/068
22
+++ b/block/block-backend.c
17
@@ -XXX,XX +XXX,XX @@ _supported_os Linux
23
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
18
IMGOPTS="compat=1.1"
24
19
IMG_SIZE=128K
25
int quiesce_counter; /* atomic: written under BQL, read by other threads */
20
26
CoQueue queued_requests;
21
-echo
27
- bool disable_request_queuing;
22
-echo "=== Saving and reloading a VM state to/from a qcow2 image ==="
28
+ bool disable_request_queuing; /* atomic */
23
-echo
29
24
-_make_test_img $IMG_SIZE
30
VMChangeStateEntry *vmsh;
25
-
31
bool force_allow_inactivate;
26
case "$QEMU_DEFAULT_MACHINE" in
32
@@ -XXX,XX +XXX,XX @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
27
s390-ccw-virtio)
33
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
28
platform_parm="-no-shutdown"
34
{
29
@@ -XXX,XX +XXX,XX @@ _qemu()
35
IO_CODE();
30
_filter_qemu | _filter_hmp
36
- blk->disable_request_queuing = disable;
37
+ qatomic_set(&blk->disable_request_queuing, disable);
31
}
38
}
32
39
33
-# Give qemu some time to boot before saving the VM state
40
static int coroutine_fn GRAPH_RDLOCK
34
-bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu
41
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
35
-# Now try to continue from that VM state (this should just work)
42
{
36
-echo quit | _qemu -loadvm 0
43
assert(blk->in_flight > 0);
37
+for extra_args in \
44
38
+ "" \
45
- if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) {
39
+ "-object iothread,id=iothread0 -set device.hba0.iothread=iothread0"; do
46
+ if (qatomic_read(&blk->quiesce_counter) &&
40
+ echo
47
+ !qatomic_read(&blk->disable_request_queuing)) {
41
+ echo "=== Saving and reloading a VM state to/from a qcow2 image ($extra_args) ==="
48
blk_dec_in_flight(blk);
42
+ echo
49
qemu_co_queue_wait(&blk->queued_requests, NULL);
43
+
50
blk_inc_in_flight(blk);
44
+ _make_test_img $IMG_SIZE
45
+
46
+ # Give qemu some time to boot before saving the VM state
47
+ bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu $extra_args
48
+ # Now try to continue from that VM state (this should just work)
49
+ echo quit | _qemu $extra_args -loadvm 0
50
+done
51
52
# success, all done
53
echo "*** done"
54
diff --git a/tests/qemu-iotests/068.out b/tests/qemu-iotests/068.out
55
index XXXXXXX..XXXXXXX 100644
56
--- a/tests/qemu-iotests/068.out
57
+++ b/tests/qemu-iotests/068.out
58
@@ -XXX,XX +XXX,XX @@
59
QA output created by 068
60
61
-=== Saving and reloading a VM state to/from a qcow2 image ===
62
+=== Saving and reloading a VM state to/from a qcow2 image () ===
63
+
64
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072
65
+QEMU X.Y.Z monitor - type 'help' for more information
66
+(qemu) savevm 0
67
+(qemu) quit
68
+QEMU X.Y.Z monitor - type 'help' for more information
69
+(qemu) quit
70
+
71
+=== Saving and reloading a VM state to/from a qcow2 image (-object iothread,id=iothread0 -set device.hba0.iothread=iothread0) ===
72
73
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072
74
QEMU X.Y.Z monitor - type 'help' for more information
75
--
51
--
76
1.8.3.1
52
2.40.0
77
53
78
54
diff view generated by jsdifflib
1
Now that we process a request in the same coroutine from beginning to
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
end and don't drop out of it any more, we can look like a proper
3
coroutine-based driver and simply call qed_aio_next_io() and get a
4
return value from it instead of spawning an additional coroutine that
5
reenters the parent when it's done.
6
2
3
The CoQueue API offers thread-safety via the lock argument that
4
qemu_co_queue_wait() and qemu_co_enter_next() take. BlockBackend
5
currently does not make use of the lock argument. This means that
6
multiple threads submitting I/O requests can corrupt the CoQueue's
7
QSIMPLEQ.
8
9
Add a QemuMutex and pass it to CoQueue APIs so that the queue is
10
protected. While we're at it, also assert that the queue is empty when
11
the BlockBackend is deleted.
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
15
Message-Id: <20230307210427.269214-4-stefanha@redhat.com>
16
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
18
---
10
block/qed.c | 101 +++++++++++++-----------------------------------------------
19
block/block-backend.c | 18 ++++++++++++++++--
11
block/qed.h | 3 +-
20
1 file changed, 16 insertions(+), 2 deletions(-)
12
2 files changed, 22 insertions(+), 82 deletions(-)
13
21
14
diff --git a/block/qed.c b/block/qed.c
22
diff --git a/block/block-backend.c b/block/block-backend.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qed.c
24
--- a/block/block-backend.c
17
+++ b/block/qed.c
25
+++ b/block/block-backend.c
18
@@ -XXX,XX +XXX,XX @@
26
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
19
#include "qapi/qmp/qerror.h"
27
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
20
#include "sysemu/block-backend.h"
28
21
29
int quiesce_counter; /* atomic: written under BQL, read by other threads */
22
-static const AIOCBInfo qed_aiocb_info = {
30
+ QemuMutex queued_requests_lock; /* protects queued_requests */
23
- .aiocb_size = sizeof(QEDAIOCB),
31
CoQueue queued_requests;
24
-};
32
bool disable_request_queuing; /* atomic */
25
-
33
26
static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
34
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
27
const char *filename)
35
28
{
36
block_acct_init(&blk->stats);
29
@@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
37
30
return l2_table;
38
+ qemu_mutex_init(&blk->queued_requests_lock);
31
}
39
qemu_co_queue_init(&blk->queued_requests);
32
40
notifier_list_init(&blk->remove_bs_notifiers);
33
-static void qed_aio_next_io(QEDAIOCB *acb);
41
notifier_list_init(&blk->insert_bs_notifiers);
34
-
42
@@ -XXX,XX +XXX,XX @@ static void blk_delete(BlockBackend *blk)
35
-static void qed_aio_start_io(QEDAIOCB *acb)
43
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
36
-{
44
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
37
- qed_aio_next_io(acb);
45
assert(QLIST_EMPTY(&blk->aio_notifiers));
38
-}
46
+ assert(qemu_co_queue_empty(&blk->queued_requests));
39
-
47
+ qemu_mutex_destroy(&blk->queued_requests_lock);
40
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
48
QTAILQ_REMOVE(&block_backends, blk, link);
41
{
49
drive_info_del(blk->legacy_dinfo);
42
assert(!s->allocating_write_reqs_plugged);
50
block_acct_cleanup(&blk->stats);
43
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
51
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
44
52
45
static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
53
if (qatomic_read(&blk->quiesce_counter) &&
46
{
54
!qatomic_read(&blk->disable_request_queuing)) {
47
- return acb->common.bs->opaque;
55
+ /*
48
+ return acb->bs->opaque;
56
+ * Take lock before decrementing in flight counter so main loop thread
49
}
57
+ * waits for us to enqueue ourselves before it can leave the drained
50
58
+ * section.
51
/**
59
+ */
52
@@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
60
+ qemu_mutex_lock(&blk->queued_requests_lock);
61
blk_dec_in_flight(blk);
62
- qemu_co_queue_wait(&blk->queued_requests, NULL);
63
+ qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
64
blk_inc_in_flight(blk);
65
+ qemu_mutex_unlock(&blk->queued_requests_lock);
53
}
66
}
54
}
67
}
55
68
56
-static void qed_aio_complete_bh(void *opaque)
69
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child)
57
-{
70
if (blk->dev_ops && blk->dev_ops->drained_end) {
58
- QEDAIOCB *acb = opaque;
71
blk->dev_ops->drained_end(blk->dev_opaque);
59
- BDRVQEDState *s = acb_to_s(acb);
72
}
60
- BlockCompletionFunc *cb = acb->common.cb;
73
- while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
61
- void *user_opaque = acb->common.opaque;
74
+ qemu_mutex_lock(&blk->queued_requests_lock);
62
- int ret = acb->bh_ret;
75
+ while (qemu_co_enter_next(&blk->queued_requests,
63
-
76
+ &blk->queued_requests_lock)) {
64
- qemu_aio_unref(acb);
77
/* Resume all queued requests */
65
-
78
}
66
- /* Invoke callback */
79
+ qemu_mutex_unlock(&blk->queued_requests_lock);
67
- qed_acquire(s);
68
- cb(user_opaque, ret);
69
- qed_release(s);
70
-}
71
-
72
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
73
+static void qed_aio_complete(QEDAIOCB *acb)
74
{
75
BDRVQEDState *s = acb_to_s(acb);
76
77
- trace_qed_aio_complete(s, acb, ret);
78
-
79
/* Free resources */
80
qemu_iovec_destroy(&acb->cur_qiov);
81
qed_unref_l2_cache_entry(acb->request.l2_table);
82
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
83
acb->qiov->iov[0].iov_base = NULL;
84
}
80
}
85
86
- /* Arrange for a bh to invoke the completion function */
87
- acb->bh_ret = ret;
88
- aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
89
- qed_aio_complete_bh, acb);
90
-
91
/* Start next allocating write request waiting behind this one. Note that
92
* requests enqueue themselves when they first hit an unallocated cluster
93
* but they wait until the entire request is finished before waking up the
94
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
95
struct iovec *iov = acb->qiov->iov;
96
97
if (!iov->iov_base) {
98
- iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
99
+ iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len);
100
if (iov->iov_base == NULL) {
101
return -ENOMEM;
102
}
103
@@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
104
{
105
QEDAIOCB *acb = opaque;
106
BDRVQEDState *s = acb_to_s(acb);
107
- BlockDriverState *bs = acb->common.bs;
108
+ BlockDriverState *bs = acb->bs;
109
110
/* Adjust offset into cluster */
111
offset += qed_offset_into_cluster(s, acb->cur_pos);
112
@@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
113
/**
114
* Begin next I/O or complete the request
115
*/
116
-static void qed_aio_next_io(QEDAIOCB *acb)
117
+static int qed_aio_next_io(QEDAIOCB *acb)
118
{
119
BDRVQEDState *s = acb_to_s(acb);
120
uint64_t offset;
121
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb)
122
123
/* Complete request */
124
if (acb->cur_pos >= acb->end_pos) {
125
- qed_aio_complete(acb, 0);
126
- return;
127
+ ret = 0;
128
+ break;
129
}
130
131
/* Find next cluster and start I/O */
132
len = acb->end_pos - acb->cur_pos;
133
ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset);
134
if (ret < 0) {
135
- qed_aio_complete(acb, ret);
136
- return;
137
+ break;
138
}
139
140
if (acb->flags & QED_AIOCB_WRITE) {
141
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb)
142
}
143
144
if (ret < 0 && ret != -EAGAIN) {
145
- qed_aio_complete(acb, ret);
146
- return;
147
+ break;
148
}
149
}
150
-}
151
152
-typedef struct QEDRequestCo {
153
- Coroutine *co;
154
- bool done;
155
- int ret;
156
-} QEDRequestCo;
157
-
158
-static void qed_co_request_cb(void *opaque, int ret)
159
-{
160
- QEDRequestCo *co = opaque;
161
-
162
- co->done = true;
163
- co->ret = ret;
164
- qemu_coroutine_enter_if_inactive(co->co);
165
+ trace_qed_aio_complete(s, acb, ret);
166
+ qed_aio_complete(acb);
167
+ return ret;
168
}
81
}
169
82
170
static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
171
QEMUIOVector *qiov, int nb_sectors,
172
int flags)
173
{
174
- QEDRequestCo co = {
175
- .co = qemu_coroutine_self(),
176
- .done = false,
177
+ QEDAIOCB acb = {
178
+ .bs = bs,
179
+ .cur_pos = (uint64_t) sector_num * BDRV_SECTOR_SIZE,
180
+ .end_pos = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE,
181
+ .qiov = qiov,
182
+ .flags = flags,
183
};
184
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, qed_co_request_cb, &co);
185
-
186
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, &co, flags);
187
+ qemu_iovec_init(&acb.cur_qiov, qiov->niov);
188
189
- acb->flags = flags;
190
- acb->qiov = qiov;
191
- acb->qiov_offset = 0;
192
- acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
193
- acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
194
- acb->backing_qiov = NULL;
195
- acb->request.l2_table = NULL;
196
- qemu_iovec_init(&acb->cur_qiov, qiov->niov);
197
+ trace_qed_aio_setup(bs->opaque, &acb, sector_num, nb_sectors, NULL, flags);
198
199
/* Start request */
200
- qed_aio_start_io(acb);
201
-
202
- if (!co.done) {
203
- qemu_coroutine_yield();
204
- }
205
-
206
- return co.ret;
207
+ return qed_aio_next_io(&acb);
208
}
209
210
static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
211
diff --git a/block/qed.h b/block/qed.h
212
index XXXXXXX..XXXXXXX 100644
213
--- a/block/qed.h
214
+++ b/block/qed.h
215
@@ -XXX,XX +XXX,XX @@ enum {
216
};
217
218
typedef struct QEDAIOCB {
219
- BlockAIOCB common;
220
- int bh_ret; /* final return status for completion bh */
221
+ BlockDriverState *bs;
222
QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
223
int flags; /* QED_AIOCB_* bits ORed together */
224
uint64_t end_pos; /* request end on block device, in bytes */
225
--
83
--
226
1.8.3.1
84
2.40.0
227
228
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
AioContext was designed to allow nested acquire/release calls. It uses
3
There is no need for the AioContext lock in bdrv_drain_all() because
4
a recursive mutex so callers don't need to worry about nesting...or so
4
nothing in AIO_WAIT_WHILE() needs the lock and the condition is atomic.
5
we thought.
6
5
7
BDRV_POLL_WHILE() is used to wait for block I/O requests. It releases
6
AIO_WAIT_WHILE_UNLOCKED() has no use for the AioContext parameter other
8
the AioContext temporarily around aio_poll(). This gives IOThreads a
7
than performing a check that is nowadays already done by the
9
chance to acquire the AioContext to process I/O completions.
8
GLOBAL_STATE_CODE()/IO_CODE() macros. Set the ctx argument to NULL here
9
to help us keep track of all converted callers. Eventually all callers
10
will have been converted and then the argument can be dropped entirely.
10
11
11
It turns out that recursive locking and BDRV_POLL_WHILE() don't mix.
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
BDRV_POLL_WHILE() only releases the AioContext once, so the IOThread
13
will not be able to acquire the AioContext if it was acquired
14
multiple times.
15
16
Instead of trying to release AioContext n times in BDRV_POLL_WHILE(),
17
this patch simply avoids nested locking in save_vmstate(). It's the
18
simplest fix and we should step back to consider the big picture with
19
all the recent changes to block layer threading.
20
21
This patch is the final fix to solve 'savevm' hanging with -object
22
iothread.
23
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Reviewed-by: Eric Blake <eblake@redhat.com>
14
Message-Id: <20230309190855.414275-2-stefanha@redhat.com>
26
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
15
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
18
---
29
migration/savevm.c | 12 +++++++++++-
19
block/block-backend.c | 8 +-------
30
1 file changed, 11 insertions(+), 1 deletion(-)
20
1 file changed, 1 insertion(+), 7 deletions(-)
31
21
32
diff --git a/migration/savevm.c b/migration/savevm.c
22
diff --git a/block/block-backend.c b/block/block-backend.c
33
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
34
--- a/migration/savevm.c
24
--- a/block/block-backend.c
35
+++ b/migration/savevm.c
25
+++ b/block/block-backend.c
36
@@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp)
26
@@ -XXX,XX +XXX,XX @@ void blk_drain_all(void)
37
goto the_end;
27
bdrv_drain_all_begin();
28
29
while ((blk = blk_all_next(blk)) != NULL) {
30
- AioContext *ctx = blk_get_aio_context(blk);
31
-
32
- aio_context_acquire(ctx);
33
-
34
/* We may have -ENOMEDIUM completions in flight */
35
- AIO_WAIT_WHILE(ctx, qatomic_read(&blk->in_flight) > 0);
36
-
37
- aio_context_release(ctx);
38
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
38
}
39
}
39
40
40
+ /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
41
bdrv_drain_all_end();
41
+ * for itself. BDRV_POLL_WHILE() does not support nested locking because
42
+ * it only releases the lock once. Therefore synchronous I/O will deadlock
43
+ * unless we release the AioContext before bdrv_all_create_snapshot().
44
+ */
45
+ aio_context_release(aio_context);
46
+ aio_context = NULL;
47
+
48
ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
49
if (ret < 0) {
50
error_setg(errp, "Error while creating snapshot on '%s'",
51
@@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp)
52
ret = 0;
53
54
the_end:
55
- aio_context_release(aio_context);
56
+ if (aio_context) {
57
+ aio_context_release(aio_context);
58
+ }
59
if (saved_vm_running) {
60
vm_start();
61
}
62
--
42
--
63
1.8.3.1
43
2.40.0
64
65
diff view generated by jsdifflib
1
With this change, qed_aio_write_prefill() and qed_aio_write_postfill()
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
collapse into a single function. This is reflected by a rename of the
3
combined function to qed_aio_write_cow().
4
2
3
There is no change in behavior. Switch to AIO_WAIT_WHILE_UNLOCKED()
4
instead of AIO_WAIT_WHILE() to document that this code has already been
5
audited and converted. The AioContext argument is already NULL so
6
aio_context_release() is never called anyway.
7
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-Id: <20230309190855.414275-3-stefanha@redhat.com>
13
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
15
---
9
block/qed.c | 57 +++++++++++++++++++++++----------------------------------
16
block/export/export.c | 2 +-
10
1 file changed, 23 insertions(+), 34 deletions(-)
17
1 file changed, 1 insertion(+), 1 deletion(-)
11
18
12
diff --git a/block/qed.c b/block/qed.c
19
diff --git a/block/export/export.c b/block/export/export.c
13
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
14
--- a/block/qed.c
21
--- a/block/export/export.c
15
+++ b/block/qed.c
22
+++ b/block/export/export.c
16
@@ -XXX,XX +XXX,XX @@ static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
23
@@ -XXX,XX +XXX,XX @@ void blk_exp_close_all_type(BlockExportType type)
17
* @pos: Byte position in device
24
blk_exp_request_shutdown(exp);
18
* @len: Number of bytes
19
* @offset: Byte offset in image file
20
- * @cb: Completion function
21
- * @opaque: User data for completion function
22
*/
23
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
24
- uint64_t len, uint64_t offset,
25
- BlockCompletionFunc *cb,
26
- void *opaque)
27
+static int qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
28
+ uint64_t len, uint64_t offset)
29
{
30
QEMUIOVector qiov;
31
QEMUIOVector *backing_qiov = NULL;
32
@@ -XXX,XX +XXX,XX @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
33
34
/* Skip copy entirely if there is no work to do */
35
if (len == 0) {
36
- cb(opaque, 0);
37
- return;
38
+ return 0;
39
}
25
}
40
26
41
iov = (struct iovec) {
27
- AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
42
@@ -XXX,XX +XXX,XX @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
28
+ AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type));
43
ret = 0;
44
out:
45
qemu_vfree(iov.iov_base);
46
- cb(opaque, ret);
47
+ return ret;
48
}
29
}
49
30
50
/**
31
void blk_exp_close_all(void)
51
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
52
}
53
54
/**
55
- * Populate back untouched region of new data cluster
56
+ * Populate untouched regions of new data cluster
57
*/
58
-static void qed_aio_write_postfill(void *opaque, int ret)
59
+static void qed_aio_write_cow(void *opaque, int ret)
60
{
61
QEDAIOCB *acb = opaque;
62
BDRVQEDState *s = acb_to_s(acb);
63
- uint64_t start = acb->cur_pos + acb->cur_qiov.size;
64
- uint64_t len =
65
- qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
66
- uint64_t offset = acb->cur_cluster +
67
- qed_offset_into_cluster(s, acb->cur_pos) +
68
- acb->cur_qiov.size;
69
+ uint64_t start, len, offset;
70
+
71
+ /* Populate front untouched region of new data cluster */
72
+ start = qed_start_of_cluster(s, acb->cur_pos);
73
+ len = qed_offset_into_cluster(s, acb->cur_pos);
74
75
+ trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
76
+ ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
77
if (ret) {
78
qed_aio_complete(acb, ret);
79
return;
80
}
81
82
- trace_qed_aio_write_postfill(s, acb, start, len, offset);
83
- qed_copy_from_backing_file(s, start, len, offset,
84
- qed_aio_write_main, acb);
85
-}
86
+ /* Populate back untouched region of new data cluster */
87
+ start = acb->cur_pos + acb->cur_qiov.size;
88
+ len = qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
89
+ offset = acb->cur_cluster +
90
+ qed_offset_into_cluster(s, acb->cur_pos) +
91
+ acb->cur_qiov.size;
92
93
-/**
94
- * Populate front untouched region of new data cluster
95
- */
96
-static void qed_aio_write_prefill(void *opaque, int ret)
97
-{
98
- QEDAIOCB *acb = opaque;
99
- BDRVQEDState *s = acb_to_s(acb);
100
- uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
101
- uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
102
+ trace_qed_aio_write_postfill(s, acb, start, len, offset);
103
+ ret = qed_copy_from_backing_file(s, start, len, offset);
104
105
- trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
106
- qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
107
- qed_aio_write_postfill, acb);
108
+ qed_aio_write_main(acb, ret);
109
}
110
111
/**
112
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
113
114
cb = qed_aio_write_zero_cluster;
115
} else {
116
- cb = qed_aio_write_prefill;
117
+ cb = qed_aio_write_cow;
118
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
119
}
120
121
--
32
--
122
1.8.3.1
33
2.40.0
123
34
124
35
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
blk/bdrv_drain_all() only takes effect for a single instant and then
3
The following conversion is safe and does not change behavior:
4
resumes block jobs, guest devices, and other external clients like the
5
NBD server. This can be handy when performing a synchronous drain
6
before terminating the program, for example.
7
4
8
Monitor commands usually need to quiesce I/O across an entire code
5
GLOBAL_STATE_CODE();
9
region so blk/bdrv_drain_all() is not suitable. They must use
6
...
10
bdrv_drain_all_begin/end() to mark the region. This prevents new I/O
7
- AIO_WAIT_WHILE(qemu_get_aio_context(), ...);
11
requests from slipping in or worse - block jobs completing and modifying
8
+ AIO_WAIT_WHILE_UNLOCKED(NULL, ...);
12
the graph.
13
9
14
I audited other blk/bdrv_drain_all() callers but did not find anything
10
Since we're in GLOBAL_STATE_CODE(), qemu_get_aio_context() is our home
15
that needs a similar fix. This patch fixes the savevm/loadvm commands.
11
thread's AioContext. Thus AIO_WAIT_WHILE() does not unlock the
16
Although I haven't encountered a read world issue this makes the code
12
AioContext:
17
safer.
18
13
19
Suggested-by: Kevin Wolf <kwolf@redhat.com>
14
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
15
while ((cond)) { \
16
aio_poll(ctx_, true); \
17
waited_ = true; \
18
} \
19
20
And that means AIO_WAIT_WHILE_UNLOCKED(NULL, ...) can be substituted.
21
22
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
23
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
24
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Reviewed-by: Eric Blake <eblake@redhat.com>
26
Message-Id: <20230309190855.414275-4-stefanha@redhat.com>
27
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
22
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
---
29
---
24
migration/savevm.c | 18 +++++++++++++++---
30
block/graph-lock.c | 2 +-
25
1 file changed, 15 insertions(+), 3 deletions(-)
31
1 file changed, 1 insertion(+), 1 deletion(-)
26
32
27
diff --git a/migration/savevm.c b/migration/savevm.c
33
diff --git a/block/graph-lock.c b/block/graph-lock.c
28
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
29
--- a/migration/savevm.c
35
--- a/block/graph-lock.c
30
+++ b/migration/savevm.c
36
+++ b/block/graph-lock.c
31
@@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp)
37
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(void)
32
}
38
* reader lock.
33
vm_stop(RUN_STATE_SAVE_VM);
39
*/
34
40
qatomic_set(&has_writer, 0);
35
+ bdrv_drain_all_begin();
41
- AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1);
36
+
42
+ AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
37
aio_context_acquire(aio_context);
43
qatomic_set(&has_writer, 1);
38
44
39
memset(sn, 0, sizeof(*sn));
45
/*
40
@@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp)
41
if (aio_context) {
42
aio_context_release(aio_context);
43
}
44
+
45
+ bdrv_drain_all_end();
46
+
47
if (saved_vm_running) {
48
vm_start();
49
}
50
@@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp)
51
}
52
53
/* Flush all IO requests so they don't interfere with the new state. */
54
- bdrv_drain_all();
55
+ bdrv_drain_all_begin();
56
57
ret = bdrv_all_goto_snapshot(name, &bs);
58
if (ret < 0) {
59
error_setg(errp, "Error %d while activating snapshot '%s' on '%s'",
60
ret, name, bdrv_get_device_name(bs));
61
- return ret;
62
+ goto err_drain;
63
}
64
65
/* restore the VM state */
66
f = qemu_fopen_bdrv(bs_vm_state, 0);
67
if (!f) {
68
error_setg(errp, "Could not open VM state file");
69
- return -EINVAL;
70
+ ret = -EINVAL;
71
+ goto err_drain;
72
}
73
74
qemu_system_reset(SHUTDOWN_CAUSE_NONE);
75
@@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp)
76
ret = qemu_loadvm_state(f);
77
aio_context_release(aio_context);
78
79
+ bdrv_drain_all_end();
80
+
81
migration_incoming_state_destroy();
82
if (ret < 0) {
83
error_setg(errp, "Error %d while loading VM state", ret);
84
@@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp)
85
}
86
87
return 0;
88
+
89
+err_drain:
90
+ bdrv_drain_all_end();
91
+ return ret;
92
}
93
94
void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
95
--
46
--
96
1.8.3.1
47
2.40.0
97
48
98
49
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Calling aio_poll() directly may have been fine previously, but this is
3
Since the AioContext argument was already NULL, AIO_WAIT_WHILE() was
4
the future, man! The difference between an aio_poll() loop and
4
never going to unlock the AioContext. Therefore it is possible to
5
BDRV_POLL_WHILE() is that BDRV_POLL_WHILE() releases the AioContext
5
replace AIO_WAIT_WHILE() with AIO_WAIT_WHILE_UNLOCKED().
6
around aio_poll().
7
6
8
This allows the IOThread to run fd handlers or BHs to complete the
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
request. Failure to release the AioContext causes deadlocks.
8
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Using BDRV_POLL_WHILE() partially fixes a 'savevm' hang with -object
12
iothread.
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Message-Id: <20230309190855.414275-5-stefanha@redhat.com>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
14
---
19
block/io.c | 4 +---
15
block/io.c | 2 +-
20
1 file changed, 1 insertion(+), 3 deletions(-)
16
1 file changed, 1 insertion(+), 1 deletion(-)
21
17
22
diff --git a/block/io.c b/block/io.c
18
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/block/io.c
20
--- a/block/io.c
25
+++ b/block/io.c
21
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
27
Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
23
bdrv_drain_all_begin_nopoll();
28
24
29
bdrv_coroutine_enter(bs, co);
25
/* Now poll the in-flight requests */
30
- while (data.ret == -EINPROGRESS) {
26
- AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
31
- aio_poll(bdrv_get_aio_context(bs), true);
27
+ AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
32
- }
28
33
+ BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
29
while ((bs = bdrv_next_all_states(bs))) {
34
return data.ret;
30
bdrv_drain_assert_idle(bs);
35
}
36
}
37
--
31
--
38
1.8.3.1
32
2.40.0
39
33
40
34
diff view generated by jsdifflib
Deleted patch
1
This adds documentation for the -blockdev options that apply to all
2
nodes independent of the block driver used.
3
1
4
All options that are shared by -blockdev and -drive are now explained in
5
the section for -blockdev. The documentation of -drive mentions that all
6
-blockdev options are accepted as well.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
12
qemu-options.hx | 108 +++++++++++++++++++++++++++++++++++++++++---------------
13
1 file changed, 79 insertions(+), 29 deletions(-)
14
15
diff --git a/qemu-options.hx b/qemu-options.hx
16
index XXXXXXX..XXXXXXX 100644
17
--- a/qemu-options.hx
18
+++ b/qemu-options.hx
19
@@ -XXX,XX +XXX,XX @@ DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev,
20
" [,read-only=on|off][,detect-zeroes=on|off|unmap]\n"
21
" [,driver specific parameters...]\n"
22
" configure a block backend\n", QEMU_ARCH_ALL)
23
+STEXI
24
+@item -blockdev @var{option}[,@var{option}[,@var{option}[,...]]]
25
+@findex -blockdev
26
+
27
+Define a new block driver node.
28
+
29
+@table @option
30
+@item Valid options for any block driver node:
31
+
32
+@table @code
33
+@item driver
34
+Specifies the block driver to use for the given node.
35
+@item node-name
36
+This defines the name of the block driver node by which it will be referenced
37
+later. The name must be unique, i.e. it must not match the name of a different
38
+block driver node, or (if you use @option{-drive} as well) the ID of a drive.
39
+
40
+If no node name is specified, it is automatically generated. The generated node
41
+name is not intended to be predictable and changes between QEMU invocations.
42
+For the top level, an explicit node name must be specified.
43
+@item read-only
44
+Open the node read-only. Guest write attempts will fail.
45
+@item cache.direct
46
+The host page cache can be avoided with @option{cache.direct=on}. This will
47
+attempt to do disk IO directly to the guest's memory. QEMU may still perform an
48
+internal copy of the data.
49
+@item cache.no-flush
50
+In case you don't care about data integrity over host failures, you can use
51
+@option{cache.no-flush=on}. This option tells QEMU that it never needs to write
52
+any data to the disk but can instead keep things in cache. If anything goes
53
+wrong, like your host losing power, the disk storage getting disconnected
54
+accidentally, etc. your image will most probably be rendered unusable.
55
+@item discard=@var{discard}
56
+@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls
57
+whether @code{discard} (also known as @code{trim} or @code{unmap}) requests are
58
+ignored or passed to the filesystem. Some machine types may not support
59
+discard requests.
60
+@item detect-zeroes=@var{detect-zeroes}
61
+@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
62
+conversion of plain zero writes by the OS to driver specific optimized
63
+zero write commands. You may even choose "unmap" if @var{discard} is set
64
+to "unmap" to allow a zero write to be converted to an @code{unmap} operation.
65
+@end table
66
+
67
+@end table
68
+
69
+ETEXI
70
71
DEF("drive", HAS_ARG, QEMU_OPTION_drive,
72
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
73
@@ -XXX,XX +XXX,XX @@ STEXI
74
@item -drive @var{option}[,@var{option}[,@var{option}[,...]]]
75
@findex -drive
76
77
-Define a new drive. Valid options are:
78
+Define a new drive. This includes creating a block driver node (the backend) as
79
+well as a guest device, and is mostly a shortcut for defining the corresponding
80
+@option{-blockdev} and @option{-device} options.
81
+
82
+@option{-drive} accepts all options that are accepted by @option{-blockdev}. In
83
+addition, it knows the following options:
84
85
@table @option
86
@item file=@var{file}
87
@@ -XXX,XX +XXX,XX @@ These options have the same definition as they have in @option{-hdachs}.
88
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
89
(see @option{-snapshot}).
90
@item cache=@var{cache}
91
-@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" and controls how the host cache is used to access block data.
92
+@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough"
93
+and controls how the host cache is used to access block data. This is a
94
+shortcut that sets the @option{cache.direct} and @option{cache.no-flush}
95
+options (as in @option{-blockdev}), and additionally @option{cache.writeback},
96
+which provides a default for the @option{write-cache} option of block guest
97
+devices (as in @option{-device}). The modes correspond to the following
98
+settings:
99
+
100
+@c Our texi2pod.pl script doesn't support @multitable, so fall back to using
101
+@c plain ASCII art (well, UTF-8 art really). This looks okay both in the manpage
102
+@c and the HTML output.
103
+@example
104
+@ │ cache.writeback cache.direct cache.no-flush
105
+─────────────┼─────────────────────────────────────────────────
106
+writeback │ on off off
107
+none │ on on off
108
+writethrough │ off off off
109
+directsync │ off on off
110
+unsafe │ on off on
111
+@end example
112
+
113
+The default mode is @option{cache=writeback}.
114
+
115
@item aio=@var{aio}
116
@var{aio} is "threads", or "native" and selects between pthread based disk I/O and native Linux AIO.
117
-@item discard=@var{discard}
118
-@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls whether @dfn{discard} (also known as @dfn{trim} or @dfn{unmap}) requests are ignored or passed to the filesystem. Some machine types may not support discard requests.
119
@item format=@var{format}
120
Specify which disk @var{format} will be used rather than detecting
121
the format. Can be used to specify format=raw to avoid interpreting
122
@@ -XXX,XX +XXX,XX @@ Specify which @var{action} to take on write and read errors. Valid actions are:
123
"report" (report the error to the guest), "enospc" (pause QEMU only if the
124
host disk is full; report the error to the guest otherwise).
125
The default setting is @option{werror=enospc} and @option{rerror=report}.
126
-@item readonly
127
-Open drive @option{file} as read-only. Guest write attempts will fail.
128
@item copy-on-read=@var{copy-on-read}
129
@var{copy-on-read} is "on" or "off" and enables whether to copy read backing
130
file sectors into the image file.
131
-@item detect-zeroes=@var{detect-zeroes}
132
-@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic
133
-conversion of plain zero writes by the OS to driver specific optimized
134
-zero write commands. You may even choose "unmap" if @var{discard} is set
135
-to "unmap" to allow a zero write to be converted to an UNMAP operation.
136
@item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w}
137
Specify bandwidth throttling limits in bytes per second, either for all request
138
types or for reads or writes only. Small values can lead to timeouts or hangs
139
@@ -XXX,XX +XXX,XX @@ prevent guests from circumventing throttling limits by using many small disks
140
instead of a single larger disk.
141
@end table
142
143
-By default, the @option{cache=writeback} mode is used. It will report data
144
+By default, the @option{cache.writeback=on} mode is used. It will report data
145
writes as completed as soon as the data is present in the host page cache.
146
This is safe as long as your guest OS makes sure to correctly flush disk caches
147
where needed. If your guest OS does not handle volatile disk write caches
148
correctly and your host crashes or loses power, then the guest may experience
149
data corruption.
150
151
-For such guests, you should consider using @option{cache=writethrough}. This
152
+For such guests, you should consider using @option{cache.writeback=off}. This
153
means that the host page cache will be used to read and write data, but write
154
notification will be sent to the guest only after QEMU has made sure to flush
155
each write to the disk. Be aware that this has a major impact on performance.
156
157
-The host page cache can be avoided entirely with @option{cache=none}. This will
158
-attempt to do disk IO directly to the guest's memory. QEMU may still perform
159
-an internal copy of the data. Note that this is considered a writeback mode and
160
-the guest OS must handle the disk write cache correctly in order to avoid data
161
-corruption on host crashes.
162
-
163
-The host page cache can be avoided while only sending write notifications to
164
-the guest when the data has been flushed to the disk using
165
-@option{cache=directsync}.
166
-
167
-In case you don't care about data integrity over host failures, use
168
-@option{cache=unsafe}. This option tells QEMU that it never needs to write any
169
-data to the disk but can instead keep things in cache. If anything goes wrong,
170
-like your host losing power, the disk storage getting disconnected accidentally,
171
-etc. your image will most probably be rendered unusable. When using
172
-the @option{-snapshot} option, unsafe caching is always used.
173
+When using the @option{-snapshot} option, unsafe caching is always used.
174
175
Copy-on-read avoids accessing the same backing file sectors repeatedly and is
176
useful when the backing file is over a slow network. By default copy-on-read
177
--
178
1.8.3.1
179
180
diff view generated by jsdifflib
Deleted patch
1
This documents the driver-specific options for the raw, qcow2 and file
2
block drivers for the man page. For everything else, we refer to the
3
QAPI documentation.
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
---
9
qemu-options.hx | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
10
1 file changed, 114 insertions(+), 1 deletion(-)
11
12
diff --git a/qemu-options.hx b/qemu-options.hx
13
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-options.hx
15
+++ b/qemu-options.hx
16
@@ -XXX,XX +XXX,XX @@ STEXI
17
@item -blockdev @var{option}[,@var{option}[,@var{option}[,...]]]
18
@findex -blockdev
19
20
-Define a new block driver node.
21
+Define a new block driver node. Some of the options apply to all block drivers,
22
+other options are only accepted for a specific block driver. See below for a
23
+list of generic options and options for the most common block drivers.
24
+
25
+Options that expect a reference to another node (e.g. @code{file}) can be
26
+given in two ways. Either you specify the node name of an already existing node
27
+(file=@var{node-name}), or you define a new node inline, adding options
28
+for the referenced node after a dot (file.filename=@var{path},file.aio=native).
29
+
30
+A block driver node created with @option{-blockdev} can be used for a guest
31
+device by specifying its node name for the @code{drive} property in a
32
+@option{-device} argument that defines a block device.
33
34
@table @option
35
@item Valid options for any block driver node:
36
@@ -XXX,XX +XXX,XX @@ zero write commands. You may even choose "unmap" if @var{discard} is set
37
to "unmap" to allow a zero write to be converted to an @code{unmap} operation.
38
@end table
39
40
+@item Driver-specific options for @code{file}
41
+
42
+This is the protocol-level block driver for accessing regular files.
43
+
44
+@table @code
45
+@item filename
46
+The path to the image file in the local filesystem
47
+@item aio
48
+Specifies the AIO backend (threads/native, default: threads)
49
+@end table
50
+Example:
51
+@example
52
+-blockdev driver=file,node-name=disk,filename=disk.img
53
+@end example
54
+
55
+@item Driver-specific options for @code{raw}
56
+
57
+This is the image format block driver for raw images. It is usually
58
+stacked on top of a protocol level block driver such as @code{file}.
59
+
60
+@table @code
61
+@item file
62
+Reference to or definition of the data source block driver node
63
+(e.g. a @code{file} driver node)
64
+@end table
65
+Example 1:
66
+@example
67
+-blockdev driver=file,node-name=disk_file,filename=disk.img
68
+-blockdev driver=raw,node-name=disk,file=disk_file
69
+@end example
70
+Example 2:
71
+@example
72
+-blockdev driver=raw,node-name=disk,file.driver=file,file.filename=disk.img
73
+@end example
74
+
75
+@item Driver-specific options for @code{qcow2}
76
+
77
+This is the image format block driver for qcow2 images. It is usually
78
+stacked on top of a protocol level block driver such as @code{file}.
79
+
80
+@table @code
81
+@item file
82
+Reference to or definition of the data source block driver node
83
+(e.g. a @code{file} driver node)
84
+
85
+@item backing
86
+Reference to or definition of the backing file block device (default is taken
87
+from the image file). It is allowed to pass an empty string here in order to
88
+disable the default backing file.
89
+
90
+@item lazy-refcounts
91
+Whether to enable the lazy refcounts feature (on/off; default is taken from the
92
+image file)
93
+
94
+@item cache-size
95
+The maximum total size of the L2 table and refcount block caches in bytes
96
+(default: 1048576 bytes or 8 clusters, whichever is larger)
97
+
98
+@item l2-cache-size
99
+The maximum size of the L2 table cache in bytes
100
+(default: 4/5 of the total cache size)
101
+
102
+@item refcount-cache-size
103
+The maximum size of the refcount block cache in bytes
104
+(default: 1/5 of the total cache size)
105
+
106
+@item cache-clean-interval
107
+Clean unused entries in the L2 and refcount caches. The interval is in seconds.
108
+The default value is 0 and it disables this feature.
109
+
110
+@item pass-discard-request
111
+Whether discard requests to the qcow2 device should be forwarded to the data
112
+source (on/off; default: on if discard=unmap is specified, off otherwise)
113
+
114
+@item pass-discard-snapshot
115
+Whether discard requests for the data source should be issued when a snapshot
116
+operation (e.g. deleting a snapshot) frees clusters in the qcow2 file (on/off;
117
+default: on)
118
+
119
+@item pass-discard-other
120
+Whether discard requests for the data source should be issued on other
121
+occasions where a cluster gets freed (on/off; default: off)
122
+
123
+@item overlap-check
124
+Which overlap checks to perform for writes to the image
125
+(none/constant/cached/all; default: cached). For details or finer
126
+granularity control refer to the QAPI documentation of @code{blockdev-add}.
127
+@end table
128
+
129
+Example 1:
130
+@example
131
+-blockdev driver=file,node-name=my_file,filename=/tmp/disk.qcow2
132
+-blockdev driver=qcow2,node-name=hda,file=my_file,overlap-check=none,cache-size=16777216
133
+@end example
134
+Example 2:
135
+@example
136
+-blockdev driver=qcow2,node-name=disk,file.driver=http,file.filename=http://example.com/image.qcow2
137
+@end example
138
+
139
+@item Driver-specific options for other drivers
140
+Please refer to the QAPI documentation of the @code{blockdev-add} QMP command.
141
+
142
@end table
143
144
ETEXI
145
--
146
1.8.3.1
147
148
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Old kvm.ko versions only supported a tiny number of ioeventfds so
3
The HMP monitor runs in the main loop thread. Calling
4
virtio-pci avoids ioeventfds when kvm_has_many_ioeventfds() returns 0.
4
AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is
5
equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks
6
the AioContext and the latter's assertion that we're in the main loop
7
succeeds.
5
8
6
Do not check kvm_has_many_ioeventfds() when KVM is disabled since it
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
always returns 0. Since commit 8c56c1a592b5092d91da8d8943c17777d6462a6f
10
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
("memory: emulate ioeventfd") it has been possible to use ioeventfds in
11
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
qtest or TCG mode.
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
11
This patch makes -device virtio-blk-pci,iothread=iothread0 work even
12
when KVM is disabled.
13
14
I have tested that virtio-blk-pci works under TCG both with and without
15
iothread.
16
17
Cc: Michael S. Tsirkin <mst@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
14
Message-Id: <20230309190855.414275-6-stefanha@redhat.com>
15
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
21
---
17
---
22
hw/virtio/virtio-pci.c | 2 +-
18
monitor/hmp.c | 2 +-
23
1 file changed, 1 insertion(+), 1 deletion(-)
19
1 file changed, 1 insertion(+), 1 deletion(-)
24
20
25
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
21
diff --git a/monitor/hmp.c b/monitor/hmp.c
26
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
27
--- a/hw/virtio/virtio-pci.c
23
--- a/monitor/hmp.c
28
+++ b/hw/virtio/virtio-pci.c
24
+++ b/monitor/hmp.c
29
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
25
@@ -XXX,XX +XXX,XX @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
30
bool pcie_port = pci_bus_is_express(pci_dev->bus) &&
26
Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
31
!pci_bus_is_root(pci_dev->bus);
27
monitor_set_cur(co, &mon->common);
32
28
aio_co_enter(qemu_get_aio_context(), co);
33
- if (!kvm_has_many_ioeventfds()) {
29
- AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
34
+ if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
30
+ AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
35
proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
36
}
31
}
37
32
33
qobject_unref(qdict);
38
--
34
--
39
1.8.3.1
35
2.40.0
40
36
41
37
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
We already have functions for doing these calculations, so let's use
3
monitor_cleanup() is called from the main loop thread. Calling
4
them instead of doing everything by hand. This makes the code a bit
4
AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is
5
more readable.
5
equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks
6
the AioContext and the latter's assertion that we're in the main loop
7
succeeds.
6
8
7
Signed-off-by: Alberto Garcia <berto@igalia.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Reviewed-by: Markus Armbruster <armbru@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-Id: <20230309190855.414275-7-stefanha@redhat.com>
15
Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
17
---
10
block/qcow2-cluster.c | 4 ++--
18
monitor/monitor.c | 4 ++--
11
block/qcow2.c | 2 +-
19
1 file changed, 2 insertions(+), 2 deletions(-)
12
2 files changed, 3 insertions(+), 3 deletions(-)
13
20
14
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
21
diff --git a/monitor/monitor.c b/monitor/monitor.c
15
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2-cluster.c
23
--- a/monitor/monitor.c
17
+++ b/block/qcow2-cluster.c
24
+++ b/monitor/monitor.c
18
@@ -XXX,XX +XXX,XX @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
25
@@ -XXX,XX +XXX,XX @@ void monitor_cleanup(void)
19
26
* We need to poll both qemu_aio_context and iohandler_ctx to make
20
/* find the cluster offset for the given disk offset */
27
* sure that the dispatcher coroutine keeps making progress and
21
28
* eventually terminates. qemu_aio_context is automatically
22
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
29
- * polled by calling AIO_WAIT_WHILE on it, but we must poll
23
+ l2_index = offset_to_l2_index(s, offset);
30
+ * polled by calling AIO_WAIT_WHILE_UNLOCKED on it, but we must poll
24
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
31
* iohandler_ctx manually.
25
32
*
26
nb_clusters = size_to_clusters(s, bytes_needed);
33
* Letting the iothread continue while shutting down the dispatcher
27
@@ -XXX,XX +XXX,XX @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
34
@@ -XXX,XX +XXX,XX @@ void monitor_cleanup(void)
28
35
aio_co_wake(qmp_dispatcher_co);
29
/* find the cluster offset for the given disk offset */
30
31
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
32
+ l2_index = offset_to_l2_index(s, offset);
33
34
*new_l2_table = l2_table;
35
*new_l2_index = l2_index;
36
diff --git a/block/qcow2.c b/block/qcow2.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/qcow2.c
39
+++ b/block/qcow2.c
40
@@ -XXX,XX +XXX,XX @@ static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
41
}
36
}
42
37
43
/* Tables must be cluster aligned */
38
- AIO_WAIT_WHILE(qemu_get_aio_context(),
44
- if (offset & (s->cluster_size - 1)) {
39
+ AIO_WAIT_WHILE_UNLOCKED(NULL,
45
+ if (offset_into_cluster(s, offset) != 0) {
40
(aio_poll(iohandler_get_aio_context(), false),
46
return -EINVAL;
41
qatomic_mb_read(&qmp_dispatcher_co_busy)));
47
}
48
42
49
--
43
--
50
1.8.3.1
44
2.40.0
51
45
52
46
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Wilfred Mallawa <wilfred.mallawa@wdc.com>
2
2
3
The bs->exact_filename field may not be sufficient to store the full
3
Fixup a few minor typos
4
blkverify node filename. In this case, we should not generate a filename
5
at all instead of an unusable one.
6
4
7
Cc: qemu-stable@nongnu.org
5
Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
8
Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
6
Message-Id: <20230313003744.55476-1-wilfred.mallawa@opensource.wdc.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-id: 20170613172006.19685-3-mreitz@redhat.com
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
10
---
15
block/blkverify.c | 12 ++++++++----
11
include/block/aio-wait.h | 2 +-
16
1 file changed, 8 insertions(+), 4 deletions(-)
12
include/block/block_int-common.h | 2 +-
13
2 files changed, 2 insertions(+), 2 deletions(-)
17
14
18
diff --git a/block/blkverify.c b/block/blkverify.c
15
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/block/blkverify.c
17
--- a/include/block/aio-wait.h
21
+++ b/block/blkverify.c
18
+++ b/include/block/aio-wait.h
22
@@ -XXX,XX +XXX,XX @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
19
@@ -XXX,XX +XXX,XX @@ extern AioWait global_aio_wait;
23
if (bs->file->bs->exact_filename[0]
20
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
24
&& s->test_file->bs->exact_filename[0])
21
* caller does not hold a lock) are involved in the polling condition.
25
{
22
* @cond: wait while this conditional expression is true
26
- snprintf(bs->exact_filename, sizeof(bs->exact_filename),
23
- * @unlock: whether to unlock and then lock again @ctx. This apples
27
- "blkverify:%s:%s",
24
+ * @unlock: whether to unlock and then lock again @ctx. This applies
28
- bs->file->bs->exact_filename,
25
* only when waiting for another AioContext from the main loop.
29
- s->test_file->bs->exact_filename);
26
* Otherwise it's ignored.
30
+ int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
27
*
31
+ "blkverify:%s:%s",
28
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
32
+ bs->file->bs->exact_filename,
29
index XXXXXXX..XXXXXXX 100644
33
+ s->test_file->bs->exact_filename);
30
--- a/include/block/block_int-common.h
34
+ if (ret >= sizeof(bs->exact_filename)) {
31
+++ b/include/block/block_int-common.h
35
+ /* An overflow makes the filename unusable, so do not report any */
32
@@ -XXX,XX +XXX,XX @@ extern QemuOptsList bdrv_create_opts_simple;
36
+ bs->exact_filename[0] = 0;
33
/*
37
+ }
34
* Common functions that are neither I/O nor Global State.
38
}
35
*
39
}
36
- * See include/block/block-commmon.h for more information about
37
+ * See include/block/block-common.h for more information about
38
* the Common API.
39
*/
40
40
41
--
41
--
42
1.8.3.1
42
2.40.0
43
43
44
44
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
migration_incoming_state_destroy() uses qemu_fclose() on the vmstate
3
Not a coroutine_fn, you say?
4
file. Make sure to call it inside an AioContext acquire/release region.
5
4
6
This fixes an 'qemu: qemu_mutex_unlock: Operation not permitted' abort
5
static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
7
in loadvm.
6
{
7
BdrvChild *child;
8
int64_t child_size, sum = 0;
8
9
9
This patch closes the vmstate file before ending the drained region.
10
QLIST_FOREACH(child, &bs->children, next) {
10
Previously we closed the vmstate file after ending the drained region.
11
if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
11
The order does not matter.
12
BDRV_CHILD_FILTERED))
13
{
14
child_size = bdrv_co_get_allocated_file_size(child->bs);
15
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
16
17
Well what do we have here?!
18
19
I rest my case, your honor.
12
20
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-Id: <20230308211435.346375-1-stefanha@redhat.com>
23
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
25
---
16
migration/savevm.c | 2 +-
26
block.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
27
1 file changed, 1 insertion(+), 1 deletion(-)
18
28
19
diff --git a/migration/savevm.c b/migration/savevm.c
29
diff --git a/block.c b/block.c
20
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/savevm.c
31
--- a/block.c
22
+++ b/migration/savevm.c
32
+++ b/block.c
23
@@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp)
33
@@ -XXX,XX +XXX,XX @@ exit:
24
34
* sums the size of all data-bearing children. (This excludes backing
25
aio_context_acquire(aio_context);
35
* children.)
26
ret = qemu_loadvm_state(f);
36
*/
27
+ migration_incoming_state_destroy();
37
-static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
28
aio_context_release(aio_context);
38
+static int64_t coroutine_fn bdrv_sum_allocated_file_size(BlockDriverState *bs)
29
39
{
30
bdrv_drain_all_end();
40
BdrvChild *child;
31
41
int64_t child_size, sum = 0;
32
- migration_incoming_state_destroy();
33
if (ret < 0) {
34
error_setg(errp, "Error %d while loading VM state", ret);
35
return ret;
36
--
42
--
37
1.8.3.1
43
2.40.0
38
39
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
2
2
3
This patch splits do_perform_cow() into three separate functions to
3
Remove usage of aio_context_acquire by always submitting asynchronous
4
read, encrypt and write the COW regions.
4
AIO to the current thread's LinuxAioState.
5
5
6
perform_cow() can now read both regions first, then encrypt them and
6
In order to prevent mistakes from the caller side, avoid passing LinuxAioState
7
finally write them to disk. The memory allocation is also done in
7
in laio_io_{plug/unplug} and laio_co_submit, and document the functions
8
this function now, using one single buffer large enough to hold both
8
to make clear that they work in the current thread's AioContext.
9
regions.
9
10
10
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
11
Signed-off-by: Alberto Garcia <berto@igalia.com>
11
Message-Id: <20230203131731.851116-2-eesposit@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
15
---
15
block/qcow2-cluster.c | 117 +++++++++++++++++++++++++++++++++++++-------------
16
include/block/aio.h | 4 ----
16
1 file changed, 87 insertions(+), 30 deletions(-)
17
include/block/raw-aio.h | 18 ++++++++++++------
17
18
include/sysemu/block-backend-io.h | 5 +++++
18
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
19
block/file-posix.c | 10 +++-------
19
index XXXXXXX..XXXXXXX 100644
20
block/linux-aio.c | 29 +++++++++++++++++------------
20
--- a/block/qcow2-cluster.c
21
5 files changed, 37 insertions(+), 29 deletions(-)
21
+++ b/block/qcow2-cluster.c
22
22
@@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
23
diff --git a/include/block/aio.h b/include/block/aio.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/block/aio.h
26
+++ b/include/block/aio.h
27
@@ -XXX,XX +XXX,XX @@ struct AioContext {
28
struct ThreadPool *thread_pool;
29
30
#ifdef CONFIG_LINUX_AIO
31
- /*
32
- * State for native Linux AIO. Uses aio_context_acquire/release for
33
- * locking.
34
- */
35
struct LinuxAioState *linux_aio;
36
#endif
37
#ifdef CONFIG_LINUX_IO_URING
38
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/include/block/raw-aio.h
41
+++ b/include/block/raw-aio.h
42
@@ -XXX,XX +XXX,XX @@
43
typedef struct LinuxAioState LinuxAioState;
44
LinuxAioState *laio_init(Error **errp);
45
void laio_cleanup(LinuxAioState *s);
46
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
47
- uint64_t offset, QEMUIOVector *qiov, int type,
48
- uint64_t dev_max_batch);
49
+
50
+/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
51
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
52
+ int type, uint64_t dev_max_batch);
53
+
54
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
55
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
56
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
57
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
58
- uint64_t dev_max_batch);
59
+
60
+/*
61
+ * laio_io_plug/unplug work in the thread's current AioContext, therefore the
62
+ * caller must ensure that they are paired in the same IOThread.
63
+ */
64
+void laio_io_plug(void);
65
+void laio_io_unplug(uint64_t dev_max_batch);
66
#endif
67
/* io_uring.c - Linux io_uring implementation */
68
#ifdef CONFIG_LINUX_IO_URING
69
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/include/sysemu/block-backend-io.h
72
+++ b/include/sysemu/block-backend-io.h
73
@@ -XXX,XX +XXX,XX @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
74
int blk_get_max_iov(BlockBackend *blk);
75
int blk_get_max_hw_iov(BlockBackend *blk);
76
77
+/*
78
+ * blk_io_plug/unplug are thread-local operations. This means that multiple
79
+ * IOThreads can simultaneously call plug/unplug, but the caller must ensure
80
+ * that each unplug() is called in the same IOThread of the matching plug().
81
+ */
82
void coroutine_fn blk_co_io_plug(BlockBackend *blk);
83
void co_wrapper blk_io_plug(BlockBackend *blk);
84
85
diff --git a/block/file-posix.c b/block/file-posix.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/block/file-posix.c
88
+++ b/block/file-posix.c
89
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
90
#endif
91
#ifdef CONFIG_LINUX_AIO
92
} else if (s->use_linux_aio) {
93
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
94
assert(qiov->size == bytes);
95
- return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
96
- s->aio_max_batch);
97
+ return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch);
98
#endif
99
}
100
101
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs)
102
BDRVRawState __attribute__((unused)) *s = bs->opaque;
103
#ifdef CONFIG_LINUX_AIO
104
if (s->use_linux_aio) {
105
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
106
- laio_io_plug(bs, aio);
107
+ laio_io_plug();
108
}
109
#endif
110
#ifdef CONFIG_LINUX_IO_URING
111
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
112
BDRVRawState __attribute__((unused)) *s = bs->opaque;
113
#ifdef CONFIG_LINUX_AIO
114
if (s->use_linux_aio) {
115
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
116
- laio_io_unplug(bs, aio, s->aio_max_batch);
117
+ laio_io_unplug(s->aio_max_batch);
118
}
119
#endif
120
#ifdef CONFIG_LINUX_IO_URING
121
diff --git a/block/linux-aio.c b/block/linux-aio.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/block/linux-aio.c
124
+++ b/block/linux-aio.c
125
@@ -XXX,XX +XXX,XX @@
126
#include "qemu/coroutine.h"
127
#include "qapi/error.h"
128
129
+/* Only used for assertions. */
130
+#include "qemu/coroutine_int.h"
131
+
132
#include <libaio.h>
133
134
/*
135
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState {
136
io_context_t ctx;
137
EventNotifier e;
138
139
- /* io queue for submit at batch. Protected by AioContext lock. */
140
+ /* No locking required, only accessed from AioContext home thread */
141
LaioQueue io_q;
142
-
143
- /* I/O completion processing. Only runs in I/O thread. */
144
QEMUBH *completion_bh;
145
int event_idx;
146
int event_max;
147
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
148
* later. Coroutines cannot be entered recursively so avoid doing
149
* that!
150
*/
151
+ assert(laiocb->co->ctx == laiocb->ctx->aio_context);
152
if (!qemu_coroutine_entered(laiocb->co)) {
153
aio_co_wake(laiocb->co);
154
}
155
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s)
156
157
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
158
{
159
- aio_context_acquire(s->aio_context);
160
qemu_laio_process_completions(s);
161
162
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
163
ioq_submit(s);
164
}
165
- aio_context_release(s->aio_context);
166
}
167
168
static void qemu_laio_completion_bh(void *opaque)
169
@@ -XXX,XX +XXX,XX @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
170
return max_batch;
171
}
172
173
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
174
+void laio_io_plug(void)
175
{
176
+ AioContext *ctx = qemu_get_current_aio_context();
177
+ LinuxAioState *s = aio_get_linux_aio(ctx);
178
+
179
s->io_q.plugged++;
180
}
181
182
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
183
- uint64_t dev_max_batch)
184
+void laio_io_unplug(uint64_t dev_max_batch)
185
{
186
+ AioContext *ctx = qemu_get_current_aio_context();
187
+ LinuxAioState *s = aio_get_linux_aio(ctx);
188
+
189
assert(s->io_q.plugged);
190
s->io_q.plugged--;
191
192
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
23
return 0;
193
return 0;
24
}
194
}
25
195
26
-static int coroutine_fn do_perform_cow(BlockDriverState *bs,
196
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
27
- uint64_t src_cluster_offset,
197
- uint64_t offset, QEMUIOVector *qiov, int type,
28
- uint64_t cluster_offset,
198
- uint64_t dev_max_batch)
29
- unsigned offset_in_cluster,
199
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
30
- unsigned bytes)
200
+ int type, uint64_t dev_max_batch)
31
+static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
201
{
32
+ uint64_t src_cluster_offset,
33
+ unsigned offset_in_cluster,
34
+ uint8_t *buffer,
35
+ unsigned bytes)
36
{
37
- BDRVQcow2State *s = bs->opaque;
38
QEMUIOVector qiov;
39
- struct iovec iov;
40
+ struct iovec iov = { .iov_base = buffer, .iov_len = bytes };
41
int ret;
202
int ret;
42
203
+ AioContext *ctx = qemu_get_current_aio_context();
43
if (bytes == 0) {
204
struct qemu_laiocb laiocb = {
44
return 0;
205
.co = qemu_coroutine_self(),
45
}
206
.nbytes = qiov->size,
46
207
- .ctx = s,
47
- iov.iov_len = bytes;
208
+ .ctx = aio_get_linux_aio(ctx),
48
- iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
209
.ret = -EINPROGRESS,
49
- if (iov.iov_base == NULL) {
210
.is_read = (type == QEMU_AIO_READ),
50
- return -ENOMEM;
211
.qiov = qiov,
51
- }
52
-
53
qemu_iovec_init_external(&qiov, &iov, 1);
54
55
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
56
57
if (!bs->drv) {
58
- ret = -ENOMEDIUM;
59
- goto out;
60
+ return -ENOMEDIUM;
61
}
62
63
/* Call .bdrv_co_readv() directly instead of using the public block-layer
64
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
65
ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
66
bytes, &qiov, 0);
67
if (ret < 0) {
68
- goto out;
69
+ return ret;
70
}
71
72
- if (bs->encrypted) {
73
+ return 0;
74
+}
75
+
76
+static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
77
+ uint64_t src_cluster_offset,
78
+ unsigned offset_in_cluster,
79
+ uint8_t *buffer,
80
+ unsigned bytes)
81
+{
82
+ if (bytes && bs->encrypted) {
83
+ BDRVQcow2State *s = bs->opaque;
84
int64_t sector = (src_cluster_offset + offset_in_cluster)
85
>> BDRV_SECTOR_BITS;
86
assert(s->cipher);
87
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
88
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
89
- if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
90
+ if (qcow2_encrypt_sectors(s, sector, buffer, buffer,
91
bytes >> BDRV_SECTOR_BITS, true, NULL) < 0) {
92
- ret = -EIO;
93
- goto out;
94
+ return false;
95
}
96
}
97
+ return true;
98
+}
99
+
100
+static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
101
+ uint64_t cluster_offset,
102
+ unsigned offset_in_cluster,
103
+ uint8_t *buffer,
104
+ unsigned bytes)
105
+{
106
+ QEMUIOVector qiov;
107
+ struct iovec iov = { .iov_base = buffer, .iov_len = bytes };
108
+ int ret;
109
+
110
+ if (bytes == 0) {
111
+ return 0;
112
+ }
113
+
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
115
116
ret = qcow2_pre_write_overlap_check(bs, 0,
117
cluster_offset + offset_in_cluster, bytes);
118
if (ret < 0) {
119
- goto out;
120
+ return ret;
121
}
122
123
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
124
ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
125
bytes, &qiov, 0);
126
if (ret < 0) {
127
- goto out;
128
+ return ret;
129
}
130
131
- ret = 0;
132
-out:
133
- qemu_vfree(iov.iov_base);
134
- return ret;
135
+ return 0;
136
}
137
138
139
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
140
BDRVQcow2State *s = bs->opaque;
141
Qcow2COWRegion *start = &m->cow_start;
142
Qcow2COWRegion *end = &m->cow_end;
143
+ unsigned buffer_size;
144
+ uint8_t *start_buffer, *end_buffer;
145
int ret;
146
147
+ assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
148
+
149
if (start->nb_bytes == 0 && end->nb_bytes == 0) {
150
return 0;
151
}
152
153
+ /* Reserve a buffer large enough to store the data from both the
154
+ * start and end COW regions. Add some padding in the middle if
155
+ * necessary to make sure that the end region is optimally aligned */
156
+ buffer_size = QEMU_ALIGN_UP(start->nb_bytes, bdrv_opt_mem_align(bs)) +
157
+ end->nb_bytes;
158
+ start_buffer = qemu_try_blockalign(bs, buffer_size);
159
+ if (start_buffer == NULL) {
160
+ return -ENOMEM;
161
+ }
162
+ /* The part of the buffer where the end region is located */
163
+ end_buffer = start_buffer + buffer_size - end->nb_bytes;
164
+
165
qemu_co_mutex_unlock(&s->lock);
166
- ret = do_perform_cow(bs, m->offset, m->alloc_offset,
167
- start->offset, start->nb_bytes);
168
+ /* First we read the existing data from both COW regions */
169
+ ret = do_perform_cow_read(bs, m->offset, start->offset,
170
+ start_buffer, start->nb_bytes);
171
if (ret < 0) {
172
goto fail;
173
}
174
175
- ret = do_perform_cow(bs, m->offset, m->alloc_offset,
176
- end->offset, end->nb_bytes);
177
+ ret = do_perform_cow_read(bs, m->offset, end->offset,
178
+ end_buffer, end->nb_bytes);
179
+ if (ret < 0) {
180
+ goto fail;
181
+ }
182
+
183
+ /* Encrypt the data if necessary before writing it */
184
+ if (bs->encrypted) {
185
+ if (!do_perform_cow_encrypt(bs, m->offset, start->offset,
186
+ start_buffer, start->nb_bytes) ||
187
+ !do_perform_cow_encrypt(bs, m->offset, end->offset,
188
+ end_buffer, end->nb_bytes)) {
189
+ ret = -EIO;
190
+ goto fail;
191
+ }
192
+ }
193
+
194
+ /* And now we can write everything */
195
+ ret = do_perform_cow_write(bs, m->alloc_offset, start->offset,
196
+ start_buffer, start->nb_bytes);
197
+ if (ret < 0) {
198
+ goto fail;
199
+ }
200
201
+ ret = do_perform_cow_write(bs, m->alloc_offset, end->offset,
202
+ end_buffer, end->nb_bytes);
203
fail:
204
qemu_co_mutex_lock(&s->lock);
205
206
@@ -XXX,XX +XXX,XX @@ fail:
207
qcow2_cache_depends_on_flush(s->l2_table_cache);
208
}
209
210
+ qemu_vfree(start_buffer);
211
return ret;
212
}
213
214
--
212
--
215
1.8.3.1
213
2.40.0
216
217
diff view generated by jsdifflib
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
2
3
Remove usage of aio_context_acquire by always submitting asynchronous
4
AIO to the current thread's LuringState.
5
6
In order to prevent mistakes from the caller side, avoid passing LuringState
7
in luring_io_{plug/unplug} and luring_co_submit, and document the functions
8
to make clear that they work in the current thread's AioContext.
9
10
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
11
Message-Id: <20230203131731.851116-3-eesposit@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
---
15
---
4
block/qed-table.c | 47 ++++++++++++-----------------------------------
16
include/block/aio.h | 4 ----
5
block/qed.c | 12 +++++++-----
17
include/block/raw-aio.h | 15 +++++++++++----
6
block/qed.h | 8 +++-----
18
block/file-posix.c | 12 ++++--------
7
3 files changed, 22 insertions(+), 45 deletions(-)
19
block/io_uring.c | 23 +++++++++++++++--------
20
4 files changed, 30 insertions(+), 24 deletions(-)
8
21
9
diff --git a/block/qed-table.c b/block/qed-table.c
22
diff --git a/include/block/aio.h b/include/block/aio.h
10
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
11
--- a/block/qed-table.c
24
--- a/include/block/aio.h
12
+++ b/block/qed-table.c
25
+++ b/include/block/aio.h
13
@@ -XXX,XX +XXX,XX @@ out:
26
@@ -XXX,XX +XXX,XX @@ struct AioContext {
14
* @index: Index of first element
27
struct LinuxAioState *linux_aio;
15
* @n: Number of elements
28
#endif
16
* @flush: Whether or not to sync to disk
29
#ifdef CONFIG_LINUX_IO_URING
17
- * @cb: Completion function
30
- /*
18
- * @opaque: Argument for completion function
31
- * State for Linux io_uring. Uses aio_context_acquire/release for
19
*/
32
- * locking.
20
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
33
- */
21
- unsigned int index, unsigned int n, bool flush,
34
struct LuringState *linux_io_uring;
22
- BlockCompletionFunc *cb, void *opaque)
35
23
+static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
36
/* State for file descriptor monitoring using Linux io_uring */
24
+ unsigned int index, unsigned int n, bool flush)
37
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/block/raw-aio.h
40
+++ b/include/block/raw-aio.h
41
@@ -XXX,XX +XXX,XX @@ void laio_io_unplug(uint64_t dev_max_batch);
42
typedef struct LuringState LuringState;
43
LuringState *luring_init(Error **errp);
44
void luring_cleanup(LuringState *s);
45
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
46
- uint64_t offset, QEMUIOVector *qiov, int type);
47
+
48
+/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
49
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
50
+ QEMUIOVector *qiov, int type);
51
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
52
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
53
-void luring_io_plug(BlockDriverState *bs, LuringState *s);
54
-void luring_io_unplug(BlockDriverState *bs, LuringState *s);
55
+
56
+/*
57
+ * luring_io_plug/unplug work in the thread's current AioContext, therefore the
58
+ * caller must ensure that they are paired in the same IOThread.
59
+ */
60
+void luring_io_plug(void);
61
+void luring_io_unplug(void);
62
#endif
63
64
#ifdef _WIN32
65
diff --git a/block/file-posix.c b/block/file-posix.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/block/file-posix.c
68
+++ b/block/file-posix.c
69
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
70
type |= QEMU_AIO_MISALIGNED;
71
#ifdef CONFIG_LINUX_IO_URING
72
} else if (s->use_linux_io_uring) {
73
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
74
assert(qiov->size == bytes);
75
- return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
76
+ return luring_co_submit(bs, s->fd, offset, qiov, type);
77
#endif
78
#ifdef CONFIG_LINUX_AIO
79
} else if (s->use_linux_aio) {
80
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs)
81
#endif
82
#ifdef CONFIG_LINUX_IO_URING
83
if (s->use_linux_io_uring) {
84
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
85
- luring_io_plug(bs, aio);
86
+ luring_io_plug();
87
}
88
#endif
89
}
90
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs)
91
#endif
92
#ifdef CONFIG_LINUX_IO_URING
93
if (s->use_linux_io_uring) {
94
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
95
- luring_io_unplug(bs, aio);
96
+ luring_io_unplug();
97
}
98
#endif
99
}
100
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
101
102
#ifdef CONFIG_LINUX_IO_URING
103
if (s->use_linux_io_uring) {
104
- LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
105
- return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
106
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
107
}
108
#endif
109
return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
110
diff --git a/block/io_uring.c b/block/io_uring.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/block/io_uring.c
113
+++ b/block/io_uring.c
114
@@ -XXX,XX +XXX,XX @@
115
#include "qapi/error.h"
116
#include "trace.h"
117
118
+/* Only used for assertions. */
119
+#include "qemu/coroutine_int.h"
120
+
121
/* io_uring ring size */
122
#define MAX_ENTRIES 128
123
124
@@ -XXX,XX +XXX,XX @@ typedef struct LuringState {
125
126
struct io_uring ring;
127
128
- /* io queue for submit at batch. Protected by AioContext lock. */
129
+ /* No locking required, only accessed from AioContext home thread */
130
LuringQueue io_q;
131
132
- /* I/O completion processing. Only runs in I/O thread. */
133
QEMUBH *completion_bh;
134
} LuringState;
135
136
@@ -XXX,XX +XXX,XX @@ end:
137
* eventually runs later. Coroutines cannot be entered recursively
138
* so avoid doing that!
139
*/
140
+ assert(luringcb->co->ctx == s->aio_context);
141
if (!qemu_coroutine_entered(luringcb->co)) {
142
aio_co_wake(luringcb->co);
143
}
144
@@ -XXX,XX +XXX,XX @@ static int ioq_submit(LuringState *s)
145
146
static void luring_process_completions_and_submit(LuringState *s)
25
{
147
{
26
unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
148
- aio_context_acquire(s->aio_context);
27
unsigned int start, end, i;
149
luring_process_completions(s);
28
@@ -XXX,XX +XXX,XX @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
150
29
ret = 0;
151
if (!s->io_q.plugged && s->io_q.in_queue > 0) {
30
out:
152
ioq_submit(s);
31
qemu_vfree(new_table);
153
}
32
- cb(opaque, ret);
154
- aio_context_release(s->aio_context);
33
-}
34
-
35
-/**
36
- * Propagate return value from async callback
37
- */
38
-static void qed_sync_cb(void *opaque, int ret)
39
-{
40
- *(int *)opaque = ret;
41
+ return ret;
42
}
155
}
43
156
44
int qed_read_l1_table_sync(BDRVQEDState *s)
157
static void qemu_luring_completion_bh(void *opaque)
45
@@ -XXX,XX +XXX,XX @@ int qed_read_l1_table_sync(BDRVQEDState *s)
158
@@ -XXX,XX +XXX,XX @@ static void ioq_init(LuringQueue *io_q)
46
return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
159
io_q->blocked = false;
47
}
160
}
48
161
49
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
162
-void luring_io_plug(BlockDriverState *bs, LuringState *s)
50
- BlockCompletionFunc *cb, void *opaque)
163
+void luring_io_plug(void)
51
+int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
52
{
164
{
53
BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
165
+ AioContext *ctx = qemu_get_current_aio_context();
54
- qed_write_table(s, s->header.l1_table_offset,
166
+ LuringState *s = aio_get_linux_io_uring(ctx);
55
- s->l1_table, index, n, false, cb, opaque);
167
trace_luring_io_plug(s);
56
+ return qed_write_table(s, s->header.l1_table_offset,
168
s->io_q.plugged++;
57
+ s->l1_table, index, n, false);
58
}
169
}
59
170
60
int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
171
-void luring_io_unplug(BlockDriverState *bs, LuringState *s)
61
unsigned int n)
172
+void luring_io_unplug(void)
62
{
173
{
63
- int ret = -EINPROGRESS;
174
+ AioContext *ctx = qemu_get_current_aio_context();
64
-
175
+ LuringState *s = aio_get_linux_io_uring(ctx);
65
- qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
176
assert(s->io_q.plugged);
66
- BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
177
trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
67
-
178
s->io_q.in_queue, s->io_q.in_flight);
68
- return ret;
179
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
69
+ return qed_write_l1_table(s, index, n);
180
return 0;
70
}
181
}
71
182
72
int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
183
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
73
@@ -XXX,XX +XXX,XX @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
184
- uint64_t offset, QEMUIOVector *qiov, int type)
74
return qed_read_l2_table(s, request, offset);
185
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
75
}
186
+ QEMUIOVector *qiov, int type)
76
77
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
78
- unsigned int index, unsigned int n, bool flush,
79
- BlockCompletionFunc *cb, void *opaque)
80
+int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
81
+ unsigned int index, unsigned int n, bool flush)
82
{
187
{
83
BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
188
int ret;
84
- qed_write_table(s, request->l2_table->offset,
189
+ AioContext *ctx = qemu_get_current_aio_context();
85
- request->l2_table->table, index, n, flush, cb, opaque);
190
+ LuringState *s = aio_get_linux_io_uring(ctx);
86
+ return qed_write_table(s, request->l2_table->offset,
191
LuringAIOCB luringcb = {
87
+ request->l2_table->table, index, n, flush);
192
.co = qemu_coroutine_self(),
88
}
193
.ret = -EINPROGRESS,
89
90
int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
91
unsigned int index, unsigned int n, bool flush)
92
{
93
- int ret = -EINPROGRESS;
94
-
95
- qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
96
- BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
97
-
98
- return ret;
99
+ return qed_write_l2_table(s, request, index, n, flush);
100
}
101
diff --git a/block/qed.c b/block/qed.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/block/qed.c
104
+++ b/block/qed.c
105
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l1_update(void *opaque, int ret)
106
index = qed_l1_index(s, acb->cur_pos);
107
s->l1_table->offsets[index] = acb->request.l2_table->offset;
108
109
- qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
110
+ ret = qed_write_l1_table(s, index, 1);
111
+ qed_commit_l2_update(acb, ret);
112
}
113
114
/**
115
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
116
117
if (need_alloc) {
118
/* Write out the whole new L2 table */
119
- qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
120
- qed_aio_write_l1_update, acb);
121
+ ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true);
122
+ qed_aio_write_l1_update(acb, ret);
123
} else {
124
/* Write out only the updated part of the L2 table */
125
- qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
126
- qed_aio_next_io_cb, acb);
127
+ ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters,
128
+ false);
129
+ qed_aio_next_io(acb, ret);
130
}
131
return;
132
133
diff --git a/block/qed.h b/block/qed.h
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/qed.h
136
+++ b/block/qed.h
137
@@ -XXX,XX +XXX,XX @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
138
* Table I/O functions
139
*/
140
int qed_read_l1_table_sync(BDRVQEDState *s);
141
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
142
- BlockCompletionFunc *cb, void *opaque);
143
+int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
144
int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
145
unsigned int n);
146
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
147
uint64_t offset);
148
int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
149
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
150
- unsigned int index, unsigned int n, bool flush,
151
- BlockCompletionFunc *cb, void *opaque);
152
+int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
153
+ unsigned int index, unsigned int n, bool flush);
154
int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
155
unsigned int index, unsigned int n, bool flush);
156
157
--
194
--
158
1.8.3.1
195
2.40.0
159
160
diff view generated by jsdifflib
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
2
3
Use qemu_get_current_aio_context() where possible, since we always
4
submit work to the current thread anyways.
5
6
We want to also be sure that the thread submitting the work is
7
the same as the one processing the pool, to avoid adding
8
synchronization to the pool list.
9
10
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
11
Message-Id: <20230203131731.851116-4-eesposit@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Eric Blake <eblake@redhat.com>
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
---
15
---
5
block/qed-cluster.c | 94 ++++++++++++++++++-----------------------------------
16
include/block/thread-pool.h | 5 +++++
6
block/qed-table.c | 15 +++------
17
block/file-posix.c | 21 ++++++++++-----------
7
block/qed.h | 3 +-
18
block/file-win32.c | 2 +-
8
3 files changed, 36 insertions(+), 76 deletions(-)
19
block/qcow2-threads.c | 2 +-
9
20
util/thread-pool.c | 9 ++++-----
10
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
21
5 files changed, 21 insertions(+), 18 deletions(-)
11
index XXXXXXX..XXXXXXX 100644
22
12
--- a/block/qed-cluster.c
23
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
13
+++ b/block/qed-cluster.c
24
index XXXXXXX..XXXXXXX 100644
14
@@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
25
--- a/include/block/thread-pool.h
15
return i - index;
26
+++ b/include/block/thread-pool.h
16
}
27
@@ -XXX,XX +XXX,XX @@ typedef struct ThreadPool ThreadPool;
17
28
ThreadPool *thread_pool_new(struct AioContext *ctx);
18
-typedef struct {
29
void thread_pool_free(ThreadPool *pool);
19
- BDRVQEDState *s;
30
20
- uint64_t pos;
31
+/*
21
- size_t len;
32
+ * thread_pool_submit* API: submit I/O requests in the thread's
22
-
33
+ * current AioContext.
23
- QEDRequest *request;
34
+ */
24
-
35
BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
25
- /* User callback */
36
ThreadPoolFunc *func, void *arg,
26
- QEDFindClusterFunc *cb;
37
BlockCompletionFunc *cb, void *opaque);
27
- void *opaque;
38
int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
28
-} QEDFindClusterCB;
39
ThreadPoolFunc *func, void *arg);
29
-
40
void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
30
-static void qed_find_cluster_cb(void *opaque, int ret)
41
+
31
-{
42
void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
32
- QEDFindClusterCB *find_cluster_cb = opaque;
43
33
- BDRVQEDState *s = find_cluster_cb->s;
44
#endif
34
- QEDRequest *request = find_cluster_cb->request;
45
diff --git a/block/file-posix.c b/block/file-posix.c
35
- uint64_t offset = 0;
46
index XXXXXXX..XXXXXXX 100644
36
- size_t len = 0;
47
--- a/block/file-posix.c
37
- unsigned int index;
48
+++ b/block/file-posix.c
38
- unsigned int n;
39
-
40
- qed_acquire(s);
41
- if (ret) {
42
- goto out;
43
- }
44
-
45
- index = qed_l2_index(s, find_cluster_cb->pos);
46
- n = qed_bytes_to_clusters(s,
47
- qed_offset_into_cluster(s, find_cluster_cb->pos) +
48
- find_cluster_cb->len);
49
- n = qed_count_contiguous_clusters(s, request->l2_table->table,
50
- index, n, &offset);
51
-
52
- if (qed_offset_is_unalloc_cluster(offset)) {
53
- ret = QED_CLUSTER_L2;
54
- } else if (qed_offset_is_zero_cluster(offset)) {
55
- ret = QED_CLUSTER_ZERO;
56
- } else if (qed_check_cluster_offset(s, offset)) {
57
- ret = QED_CLUSTER_FOUND;
58
- } else {
59
- ret = -EINVAL;
60
- }
61
-
62
- len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
63
- qed_offset_into_cluster(s, find_cluster_cb->pos));
64
-
65
-out:
66
- find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
67
- qed_release(s);
68
- g_free(find_cluster_cb);
69
-}
70
-
71
/**
72
* Find the offset of a data cluster
73
*
74
@@ -XXX,XX +XXX,XX @@ out:
49
@@ -XXX,XX +XXX,XX @@ out:
75
void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
50
return result;
76
size_t len, QEDFindClusterFunc *cb, void *opaque)
51
}
52
53
-static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
54
- ThreadPoolFunc func, void *arg)
55
+static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg)
77
{
56
{
78
- QEDFindClusterCB *find_cluster_cb;
57
/* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
79
uint64_t l2_offset;
58
- ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
80
+ uint64_t offset = 0;
59
+ ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
81
+ unsigned int index;
60
return thread_pool_submit_co(pool, func, arg);
82
+ unsigned int n;
61
}
83
+ int ret;
62
84
63
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
85
/* Limit length to L2 boundary. Requests are broken up at the L2 boundary
64
};
86
* so that a request acts on one L2 table at a time.
65
87
@@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
66
assert(qiov->size == bytes);
88
return;
67
- return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
89
}
68
+ return raw_thread_pool_submit(handle_aiocb_rw, &acb);
90
69
}
91
- find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
70
92
- find_cluster_cb->s = s;
71
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
93
- find_cluster_cb->pos = pos;
72
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
94
- find_cluster_cb->len = len;
73
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
95
- find_cluster_cb->cb = cb;
74
}
96
- find_cluster_cb->opaque = opaque;
75
#endif
97
- find_cluster_cb->request = request;
76
- return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
98
+ ret = qed_read_l2_table(s, request, l2_offset);
77
+ return raw_thread_pool_submit(handle_aiocb_flush, &acb);
99
+ qed_acquire(s);
78
}
100
+ if (ret) {
79
101
+ goto out;
80
static void raw_aio_attach_aio_context(BlockDriverState *bs,
102
+ }
81
@@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
103
+
82
},
104
+ index = qed_l2_index(s, pos);
83
};
105
+ n = qed_bytes_to_clusters(s,
84
106
+ qed_offset_into_cluster(s, pos) + len);
85
- return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb);
107
+ n = qed_count_contiguous_clusters(s, request->l2_table->table,
86
+ return raw_thread_pool_submit(handle_aiocb_truncate, &acb);
108
+ index, n, &offset);
87
}
109
+
88
110
+ if (qed_offset_is_unalloc_cluster(offset)) {
89
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
111
+ ret = QED_CLUSTER_L2;
90
@@ -XXX,XX +XXX,XX @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
112
+ } else if (qed_offset_is_zero_cluster(offset)) {
91
acb.aio_type |= QEMU_AIO_BLKDEV;
113
+ ret = QED_CLUSTER_ZERO;
92
}
114
+ } else if (qed_check_cluster_offset(s, offset)) {
93
115
+ ret = QED_CLUSTER_FOUND;
94
- ret = raw_thread_pool_submit(bs, handle_aiocb_discard, &acb);
116
+ } else {
95
+ ret = raw_thread_pool_submit(handle_aiocb_discard, &acb);
117
+ ret = -EINVAL;
96
raw_account_discard(s, bytes, ret);
118
+ }
119
+
120
+ len = MIN(len,
121
+ n * s->header.cluster_size - qed_offset_into_cluster(s, pos));
122
123
- qed_read_l2_table(s, request, l2_offset,
124
- qed_find_cluster_cb, find_cluster_cb);
125
+out:
126
+ cb(opaque, ret, offset, len);
127
+ qed_release(s);
128
}
129
diff --git a/block/qed-table.c b/block/qed-table.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/qed-table.c
132
+++ b/block/qed-table.c
133
@@ -XXX,XX +XXX,XX @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
134
return ret;
97
return ret;
135
}
98
}
136
99
@@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
137
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
100
handler = handle_aiocb_write_zeroes;
138
- BlockCompletionFunc *cb, void *opaque)
101
}
139
+int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
102
103
- return raw_thread_pool_submit(bs, handler, &acb);
104
+ return raw_thread_pool_submit(handler, &acb);
105
}
106
107
static int coroutine_fn raw_co_pwrite_zeroes(
108
@@ -XXX,XX +XXX,XX @@ raw_co_copy_range_to(BlockDriverState *bs,
109
},
110
};
111
112
- return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb);
113
+ return raw_thread_pool_submit(handle_aiocb_copy_range, &acb);
114
}
115
116
BlockDriver bdrv_file = {
117
@@ -XXX,XX +XXX,XX @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
118
struct sg_io_hdr *io_hdr = buf;
119
if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
120
io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
121
- return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
122
+ return pr_manager_execute(s->pr_mgr, qemu_get_current_aio_context(),
123
s->fd, io_hdr);
124
}
125
}
126
@@ -XXX,XX +XXX,XX @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
127
},
128
};
129
130
- return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb);
131
+ return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
132
}
133
#endif /* linux */
134
135
diff --git a/block/file-win32.c b/block/file-win32.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/block/file-win32.c
138
+++ b/block/file-win32.c
139
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
140
acb->aio_offset = offset;
141
142
trace_file_paio_submit(acb, opaque, offset, count, type);
143
- pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
144
+ pool = aio_get_thread_pool(qemu_get_current_aio_context());
145
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
146
}
147
148
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/block/qcow2-threads.c
151
+++ b/block/qcow2-threads.c
152
@@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
140
{
153
{
141
int ret;
154
int ret;
142
155
BDRVQcow2State *s = bs->opaque;
143
@@ -XXX,XX +XXX,XX @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
156
- ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
144
/* Check for cached L2 entry */
157
+ ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
145
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
158
146
if (request->l2_table) {
159
qemu_co_mutex_lock(&s->lock);
147
- cb(opaque, 0);
160
while (s->nb_threads >= QCOW2_MAX_THREADS) {
148
- return;
161
diff --git a/util/thread-pool.c b/util/thread-pool.c
149
+ return 0;
162
index XXXXXXX..XXXXXXX 100644
150
}
163
--- a/util/thread-pool.c
151
164
+++ b/util/thread-pool.c
152
request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
165
@@ -XXX,XX +XXX,XX @@ struct ThreadPoolElement {
153
@@ -XXX,XX +XXX,XX @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
166
/* Access to this list is protected by lock. */
154
}
167
QTAILQ_ENTRY(ThreadPoolElement) reqs;
155
qed_release(s);
168
156
169
- /* Access to this list is protected by the global mutex. */
157
- cb(opaque, ret);
170
+ /* This list is only written by the thread pool's mother thread. */
158
+ return ret;
171
QLIST_ENTRY(ThreadPoolElement) all;
159
}
172
};
160
173
161
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
174
@@ -XXX,XX +XXX,XX @@ static void thread_pool_completion_bh(void *opaque)
175
ThreadPool *pool = opaque;
176
ThreadPoolElement *elem, *next;
177
178
- aio_context_acquire(pool->ctx);
179
restart:
180
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
181
if (elem->state != THREAD_DONE) {
182
@@ -XXX,XX +XXX,XX @@ restart:
183
*/
184
qemu_bh_schedule(pool->completion_bh);
185
186
- aio_context_release(pool->ctx);
187
elem->common.cb(elem->common.opaque, elem->ret);
188
- aio_context_acquire(pool->ctx);
189
190
/* We can safely cancel the completion_bh here regardless of someone
191
* else having scheduled it meanwhile because we reenter the
192
@@ -XXX,XX +XXX,XX @@ restart:
193
qemu_aio_unref(elem);
194
}
195
}
196
- aio_context_release(pool->ctx);
197
}
198
199
static void thread_pool_cancel(BlockAIOCB *acb)
200
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
162
{
201
{
163
- int ret = -EINPROGRESS;
202
ThreadPoolElement *req;
164
-
203
165
- qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
204
+ /* Assert that the thread submitting work is the same running the pool */
166
- BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
205
+ assert(pool->ctx == qemu_get_current_aio_context());
167
-
206
+
168
- return ret;
207
req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
169
+ return qed_read_l2_table(s, request, offset);
208
req->func = func;
170
}
209
req->arg = arg;
171
172
void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
173
diff --git a/block/qed.h b/block/qed.h
174
index XXXXXXX..XXXXXXX 100644
175
--- a/block/qed.h
176
+++ b/block/qed.h
177
@@ -XXX,XX +XXX,XX @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
178
unsigned int n);
179
int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
180
uint64_t offset);
181
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
182
- BlockCompletionFunc *cb, void *opaque);
183
+int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
184
void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
185
unsigned int index, unsigned int n, bool flush,
186
BlockCompletionFunc *cb, void *opaque);
187
--
210
--
188
1.8.3.1
211
2.40.0
189
190
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
2
2
3
There used to be throttle_timers_{detach,attach}_aio_context() calls
3
thread_pool_submit_aio() is always called on a pool taken from
4
in bdrv_set_aio_context(), but since 7ca7f0f6db1fedd28d490795d778cf239
4
qemu_get_current_aio_context(), and that is the only intended
5
they are now in blk_set_aio_context().
5
use: each pool runs only in the same thread that is submitting
6
work to it, it can't run anywhere else.
6
7
7
Signed-off-by: Alberto Garcia <berto@igalia.com>
8
Therefore simplify the thread_pool_submit* API and remove the
9
ThreadPool function parameter.
10
11
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
12
Message-Id: <20230203131731.851116-5-eesposit@redhat.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
16
---
11
block/throttle-groups.c | 2 +-
17
include/block/thread-pool.h | 10 ++++------
12
1 file changed, 1 insertion(+), 1 deletion(-)
18
backends/tpm/tpm_backend.c | 4 +---
19
block/file-posix.c | 4 +---
20
block/file-win32.c | 4 +---
21
block/qcow2-threads.c | 3 +--
22
hw/9pfs/coth.c | 3 +--
23
hw/ppc/spapr_nvdimm.c | 6 ++----
24
hw/virtio/virtio-pmem.c | 3 +--
25
scsi/pr-manager.c | 3 +--
26
scsi/qemu-pr-helper.c | 3 +--
27
tests/unit/test-thread-pool.c | 12 +++++-------
28
util/thread-pool.c | 16 ++++++++--------
29
12 files changed, 27 insertions(+), 44 deletions(-)
13
30
14
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
31
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
15
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
16
--- a/block/throttle-groups.c
33
--- a/include/block/thread-pool.h
17
+++ b/block/throttle-groups.c
34
+++ b/include/block/thread-pool.h
35
@@ -XXX,XX +XXX,XX @@ void thread_pool_free(ThreadPool *pool);
36
* thread_pool_submit* API: submit I/O requests in the thread's
37
* current AioContext.
38
*/
39
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
40
- ThreadPoolFunc *func, void *arg,
41
- BlockCompletionFunc *cb, void *opaque);
42
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
43
- ThreadPoolFunc *func, void *arg);
44
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
45
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
46
+ BlockCompletionFunc *cb, void *opaque);
47
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
48
+void thread_pool_submit(ThreadPoolFunc *func, void *arg);
49
50
void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
51
52
diff --git a/backends/tpm/tpm_backend.c b/backends/tpm/tpm_backend.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/backends/tpm/tpm_backend.c
55
+++ b/backends/tpm/tpm_backend.c
56
@@ -XXX,XX +XXX,XX @@ bool tpm_backend_had_startup_error(TPMBackend *s)
57
58
void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)
59
{
60
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
61
-
62
if (s->cmd != NULL) {
63
error_report("There is a TPM request pending");
64
return;
65
@@ -XXX,XX +XXX,XX @@ void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)
66
67
s->cmd = cmd;
68
object_ref(OBJECT(s));
69
- thread_pool_submit_aio(pool, tpm_backend_worker_thread, s,
70
+ thread_pool_submit_aio(tpm_backend_worker_thread, s,
71
tpm_backend_request_completed, s);
72
}
73
74
diff --git a/block/file-posix.c b/block/file-posix.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/block/file-posix.c
77
+++ b/block/file-posix.c
78
@@ -XXX,XX +XXX,XX @@ out:
79
80
static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg)
81
{
82
- /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
83
- ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
84
- return thread_pool_submit_co(pool, func, arg);
85
+ return thread_pool_submit_co(func, arg);
86
}
87
88
/*
89
diff --git a/block/file-win32.c b/block/file-win32.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/block/file-win32.c
92
+++ b/block/file-win32.c
93
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
94
BlockCompletionFunc *cb, void *opaque, int type)
95
{
96
RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
97
- ThreadPool *pool;
98
99
acb->bs = bs;
100
acb->hfile = hfile;
101
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
102
acb->aio_offset = offset;
103
104
trace_file_paio_submit(acb, opaque, offset, count, type);
105
- pool = aio_get_thread_pool(qemu_get_current_aio_context());
106
- return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
107
+ return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
108
}
109
110
int qemu_ftruncate64(int fd, int64_t length)
111
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/block/qcow2-threads.c
114
+++ b/block/qcow2-threads.c
115
@@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
116
{
117
int ret;
118
BDRVQcow2State *s = bs->opaque;
119
- ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
120
121
qemu_co_mutex_lock(&s->lock);
122
while (s->nb_threads >= QCOW2_MAX_THREADS) {
123
@@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
124
s->nb_threads++;
125
qemu_co_mutex_unlock(&s->lock);
126
127
- ret = thread_pool_submit_co(pool, func, arg);
128
+ ret = thread_pool_submit_co(func, arg);
129
130
qemu_co_mutex_lock(&s->lock);
131
s->nb_threads--;
132
diff --git a/hw/9pfs/coth.c b/hw/9pfs/coth.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/hw/9pfs/coth.c
135
+++ b/hw/9pfs/coth.c
136
@@ -XXX,XX +XXX,XX @@ static int coroutine_enter_func(void *arg)
137
void co_run_in_worker_bh(void *opaque)
138
{
139
Coroutine *co = opaque;
140
- thread_pool_submit_aio(aio_get_thread_pool(qemu_get_aio_context()),
141
- coroutine_enter_func, co, coroutine_enter_cb, co);
142
+ thread_pool_submit_aio(coroutine_enter_func, co, coroutine_enter_cb, co);
143
}
144
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
145
index XXXXXXX..XXXXXXX 100644
146
--- a/hw/ppc/spapr_nvdimm.c
147
+++ b/hw/ppc/spapr_nvdimm.c
148
@@ -XXX,XX +XXX,XX @@ static int spapr_nvdimm_flush_post_load(void *opaque, int version_id)
149
{
150
SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque;
151
SpaprNVDIMMDeviceFlushState *state;
152
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
153
HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem);
154
bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
155
bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm),
156
@@ -XXX,XX +XXX,XX @@ static int spapr_nvdimm_flush_post_load(void *opaque, int version_id)
157
}
158
159
QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
160
- thread_pool_submit_aio(pool, flush_worker_cb, state,
161
+ thread_pool_submit_aio(flush_worker_cb, state,
162
spapr_nvdimm_flush_completion_cb, state);
163
}
164
165
@@ -XXX,XX +XXX,XX @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
166
PCDIMMDevice *dimm;
167
HostMemoryBackend *backend = NULL;
168
SpaprNVDIMMDeviceFlushState *state;
169
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
170
int fd;
171
172
if (!drc || !drc->dev ||
173
@@ -XXX,XX +XXX,XX @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
174
175
state->drcidx = drc_index;
176
177
- thread_pool_submit_aio(pool, flush_worker_cb, state,
178
+ thread_pool_submit_aio(flush_worker_cb, state,
179
spapr_nvdimm_flush_completion_cb, state);
180
181
continue_token = state->continue_token;
182
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/hw/virtio/virtio-pmem.c
185
+++ b/hw/virtio/virtio-pmem.c
186
@@ -XXX,XX +XXX,XX @@ static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
187
VirtIODeviceRequest *req_data;
188
VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
189
HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev);
190
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
191
192
trace_virtio_pmem_flush_request();
193
req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest));
194
@@ -XXX,XX +XXX,XX @@ static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
195
req_data->fd = memory_region_get_fd(&backend->mr);
196
req_data->pmem = pmem;
197
req_data->vdev = vdev;
198
- thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data);
199
+ thread_pool_submit_aio(worker_cb, req_data, done_cb, req_data);
200
}
201
202
static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
203
diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/scsi/pr-manager.c
206
+++ b/scsi/pr-manager.c
207
@@ -XXX,XX +XXX,XX @@ static int pr_manager_worker(void *opaque)
208
int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
209
struct sg_io_hdr *hdr)
210
{
211
- ThreadPool *pool = aio_get_thread_pool(ctx);
212
PRManagerData data = {
213
.pr_mgr = pr_mgr,
214
.fd = fd,
215
@@ -XXX,XX +XXX,XX @@ int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
216
217
/* The matching object_unref is in pr_manager_worker. */
218
object_ref(OBJECT(pr_mgr));
219
- return thread_pool_submit_co(pool, pr_manager_worker, &data);
220
+ return thread_pool_submit_co(pr_manager_worker, &data);
221
}
222
223
bool pr_manager_is_connected(PRManager *pr_mgr)
224
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
225
index XXXXXXX..XXXXXXX 100644
226
--- a/scsi/qemu-pr-helper.c
227
+++ b/scsi/qemu-pr-helper.c
228
@@ -XXX,XX +XXX,XX @@ static int do_sgio_worker(void *opaque)
229
static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
230
uint8_t *buf, int *sz, int dir)
231
{
232
- ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
233
int r;
234
235
PRHelperSGIOData data = {
236
@@ -XXX,XX +XXX,XX @@ static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
237
.dir = dir,
238
};
239
240
- r = thread_pool_submit_co(pool, do_sgio_worker, &data);
241
+ r = thread_pool_submit_co(do_sgio_worker, &data);
242
*sz = data.sz;
243
return r;
244
}
245
diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c
246
index XXXXXXX..XXXXXXX 100644
247
--- a/tests/unit/test-thread-pool.c
248
+++ b/tests/unit/test-thread-pool.c
18
@@ -XXX,XX +XXX,XX @@
249
@@ -XXX,XX +XXX,XX @@
19
* Again, all this is handled internally and is mostly transparent to
250
#include "qemu/main-loop.h"
20
* the outside. The 'throttle_timers' field however has an additional
251
21
* constraint because it may be temporarily invalid (see for example
252
static AioContext *ctx;
22
- * bdrv_set_aio_context()). Therefore in this file a thread will
253
-static ThreadPool *pool;
23
+ * blk_set_aio_context()). Therefore in this file a thread will
254
static int active;
24
* access some other BlockBackend's timers only after verifying that
255
25
* that BlockBackend has throttled requests in the queue.
256
typedef struct {
26
*/
257
@@ -XXX,XX +XXX,XX @@ static void done_cb(void *opaque, int ret)
258
static void test_submit(void)
259
{
260
WorkerTestData data = { .n = 0 };
261
- thread_pool_submit(pool, worker_cb, &data);
262
+ thread_pool_submit(worker_cb, &data);
263
while (data.n == 0) {
264
aio_poll(ctx, true);
265
}
266
@@ -XXX,XX +XXX,XX @@ static void test_submit(void)
267
static void test_submit_aio(void)
268
{
269
WorkerTestData data = { .n = 0, .ret = -EINPROGRESS };
270
- data.aiocb = thread_pool_submit_aio(pool, worker_cb, &data,
271
+ data.aiocb = thread_pool_submit_aio(worker_cb, &data,
272
done_cb, &data);
273
274
/* The callbacks are not called until after the first wait. */
275
@@ -XXX,XX +XXX,XX @@ static void co_test_cb(void *opaque)
276
active = 1;
277
data->n = 0;
278
data->ret = -EINPROGRESS;
279
- thread_pool_submit_co(pool, worker_cb, data);
280
+ thread_pool_submit_co(worker_cb, data);
281
282
/* The test continues in test_submit_co, after qemu_coroutine_enter... */
283
284
@@ -XXX,XX +XXX,XX @@ static void test_submit_many(void)
285
for (i = 0; i < 100; i++) {
286
data[i].n = 0;
287
data[i].ret = -EINPROGRESS;
288
- thread_pool_submit_aio(pool, worker_cb, &data[i], done_cb, &data[i]);
289
+ thread_pool_submit_aio(worker_cb, &data[i], done_cb, &data[i]);
290
}
291
292
active = 100;
293
@@ -XXX,XX +XXX,XX @@ static void do_test_cancel(bool sync)
294
for (i = 0; i < 100; i++) {
295
data[i].n = 0;
296
data[i].ret = -EINPROGRESS;
297
- data[i].aiocb = thread_pool_submit_aio(pool, long_cb, &data[i],
298
+ data[i].aiocb = thread_pool_submit_aio(long_cb, &data[i],
299
done_cb, &data[i]);
300
}
301
302
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
303
{
304
qemu_init_main_loop(&error_abort);
305
ctx = qemu_get_current_aio_context();
306
- pool = aio_get_thread_pool(ctx);
307
308
g_test_init(&argc, &argv, NULL);
309
g_test_add_func("/thread-pool/submit", test_submit);
310
diff --git a/util/thread-pool.c b/util/thread-pool.c
311
index XXXXXXX..XXXXXXX 100644
312
--- a/util/thread-pool.c
313
+++ b/util/thread-pool.c
314
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo thread_pool_aiocb_info = {
315
.get_aio_context = thread_pool_get_aio_context,
316
};
317
318
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
319
- ThreadPoolFunc *func, void *arg,
320
- BlockCompletionFunc *cb, void *opaque)
321
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
322
+ BlockCompletionFunc *cb, void *opaque)
323
{
324
ThreadPoolElement *req;
325
+ AioContext *ctx = qemu_get_current_aio_context();
326
+ ThreadPool *pool = aio_get_thread_pool(ctx);
327
328
/* Assert that the thread submitting work is the same running the pool */
329
assert(pool->ctx == qemu_get_current_aio_context());
330
@@ -XXX,XX +XXX,XX @@ static void thread_pool_co_cb(void *opaque, int ret)
331
aio_co_wake(co->co);
332
}
333
334
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
335
- void *arg)
336
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
337
{
338
ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
339
assert(qemu_in_coroutine());
340
- thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc);
341
+ thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
342
qemu_coroutine_yield();
343
return tpc.ret;
344
}
345
346
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
347
+void thread_pool_submit(ThreadPoolFunc *func, void *arg)
348
{
349
- thread_pool_submit_aio(pool, func, arg, NULL, NULL);
350
+ thread_pool_submit_aio(func, arg, NULL, NULL);
351
}
352
353
void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
27
--
354
--
28
1.8.3.1
355
2.40.0
29
30
diff view generated by jsdifflib
1
Now that we stay in coroutine context for the whole request when doing
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
reads or writes, we can add coroutine_fn annotations to many functions
2
3
that can do I/O or yield directly.
3
Functions that can do I/O are prime candidates for being coroutine_fns. Make the
4
4
change for those that are themselves called only from coroutine_fns.
5
6
In addition, coroutine_fns should do I/O using bdrv_co_*() functions, for
7
which it is required to hold the BlockDriverState graph lock. So also nnotate
8
functions on the I/O path with TSA attributes, making it possible to
9
switch them to use bdrv_co_*() functions.
10
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Message-Id: <20230309084456.304669-2-pbonzini@redhat.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
15
---
8
block/qed-cluster.c | 5 +++--
16
block/vvfat.c | 58 ++++++++++++++++++++++++++-------------------------
9
block/qed.c | 44 ++++++++++++++++++++++++--------------------
17
1 file changed, 30 insertions(+), 28 deletions(-)
10
block/qed.h | 5 +++--
18
11
3 files changed, 30 insertions(+), 24 deletions(-)
19
diff --git a/block/vvfat.c b/block/vvfat.c
12
13
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/block/qed-cluster.c
21
--- a/block/vvfat.c
16
+++ b/block/qed-cluster.c
22
+++ b/block/vvfat.c
17
@@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
23
@@ -XXX,XX +XXX,XX @@ static BDRVVVFATState *vvv = NULL;
18
* On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
24
#endif
19
* table offset, respectively. len is number of contiguous unallocated bytes.
25
20
*/
26
static int enable_write_target(BlockDriverState *bs, Error **errp);
21
-int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
27
-static int is_consistent(BDRVVVFATState *s);
22
- size_t *len, uint64_t *img_offset)
28
+static int coroutine_fn is_consistent(BDRVVVFATState *s);
23
+int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
29
24
+ uint64_t pos, size_t *len,
30
static QemuOptsList runtime_opts = {
25
+ uint64_t *img_offset)
31
.name = "vvfat",
26
{
32
@@ -XXX,XX +XXX,XX @@ static void print_mapping(const mapping_t* mapping)
27
uint64_t l2_offset;
33
}
28
uint64_t offset = 0;
34
#endif
29
diff --git a/block/qed.c b/block/qed.c
35
30
index XXXXXXX..XXXXXXX 100644
36
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
31
--- a/block/qed.c
37
- uint8_t *buf, int nb_sectors)
32
+++ b/block/qed.c
38
+static int coroutine_fn GRAPH_RDLOCK
33
@@ -XXX,XX +XXX,XX @@ int qed_write_header_sync(BDRVQEDState *s)
39
+vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors)
34
* This function only updates known header fields in-place and does not affect
40
{
35
* extra data after the QED header.
41
BDRVVVFATState *s = bs->opaque;
36
*/
37
-static int qed_write_header(BDRVQEDState *s)
38
+static int coroutine_fn qed_write_header(BDRVQEDState *s)
39
{
40
/* We must write full sectors for O_DIRECT but cannot necessarily generate
41
* the data following the header if an unrecognized compat feature is
42
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
43
qemu_co_enter_next(&s->allocating_write_reqs);
44
}
45
46
-static void qed_need_check_timer_entry(void *opaque)
47
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
48
{
49
BDRVQEDState *s = opaque;
50
int ret;
51
@@ -XXX,XX +XXX,XX @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
52
* This function reads qiov->size bytes starting at pos from the backing file.
53
* If there is no backing file then zeroes are read.
54
*/
55
-static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
56
- QEMUIOVector *qiov,
57
- QEMUIOVector **backing_qiov)
58
+static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
59
+ QEMUIOVector *qiov,
60
+ QEMUIOVector **backing_qiov)
61
{
62
uint64_t backing_length = 0;
63
size_t size;
64
@@ -XXX,XX +XXX,XX @@ static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
65
* @len: Number of bytes
66
* @offset: Byte offset in image file
67
*/
68
-static int qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
69
- uint64_t len, uint64_t offset)
70
+static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
71
+ uint64_t pos, uint64_t len,
72
+ uint64_t offset)
73
{
74
QEMUIOVector qiov;
75
QEMUIOVector *backing_qiov = NULL;
76
@@ -XXX,XX +XXX,XX @@ out:
77
* The cluster offset may be an allocated byte offset in the image file, the
78
* zero cluster marker, or the unallocated cluster marker.
79
*/
80
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
81
- unsigned int n, uint64_t cluster)
82
+static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table,
83
+ int index, unsigned int n,
84
+ uint64_t cluster)
85
{
86
int i;
42
int i;
87
for (i = index; i < index + n; i++) {
43
@@ -XXX,XX +XXX,XX @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
88
@@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
44
DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
45
" allocated\n", sector_num,
46
n >> BDRV_SECTOR_BITS));
47
- if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
48
- buf + i * 0x200, 0) < 0) {
49
+ if (bdrv_co_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
50
+ buf + i * 0x200, 0) < 0) {
51
return -1;
52
}
53
i += (n >> BDRV_SECTOR_BITS) - 1;
54
@@ -XXX,XX +XXX,XX @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
55
return 0;
56
}
57
58
-static int coroutine_fn
59
+static int coroutine_fn GRAPH_RDLOCK
60
vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
61
QEMUIOVector *qiov, BdrvRequestFlags flags)
62
{
63
@@ -XXX,XX +XXX,XX @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
89
}
64
}
90
}
65
}
91
66
92
-static void qed_aio_complete(QEDAIOCB *acb)
67
-static inline bool cluster_was_modified(BDRVVVFATState *s,
93
+static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
68
- uint32_t cluster_num)
94
{
69
+static inline bool coroutine_fn GRAPH_RDLOCK
95
BDRVQEDState *s = acb_to_s(acb);
70
+cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num)
96
71
{
97
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb)
72
int was_modified = 0;
98
/**
73
int i;
99
* Update L1 table with new L2 table offset and write it out
74
@@ -XXX,XX +XXX,XX @@ typedef enum {
100
*/
75
* Further, the files/directories handled by this function are
101
-static int qed_aio_write_l1_update(QEDAIOCB *acb)
76
* assumed to be *not* deleted (and *only* those).
102
+static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
77
*/
103
{
78
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
104
BDRVQEDState *s = acb_to_s(acb);
79
- direntry_t* direntry, const char* path)
105
CachedL2Table *l2_table = acb->request.l2_table;
80
+static uint32_t coroutine_fn GRAPH_RDLOCK
106
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l1_update(QEDAIOCB *acb)
81
+get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const char* path)
107
/**
82
{
108
* Update L2 table with new cluster offsets and write them out
83
/*
109
*/
84
* This is a little bit tricky:
110
-static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
85
@@ -XXX,XX +XXX,XX @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
111
+static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
86
if (res) {
112
{
87
return -1;
113
BDRVQEDState *s = acb_to_s(acb);
88
}
114
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
89
- res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
115
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
90
- BDRV_SECTOR_SIZE, s->cluster_buffer,
116
/**
91
- 0);
117
* Write data to the image file
92
+ res = bdrv_co_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
118
*/
93
+ BDRV_SECTOR_SIZE, s->cluster_buffer,
119
-static int qed_aio_write_main(QEDAIOCB *acb)
94
+ 0);
120
+static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
95
if (res < 0) {
121
{
96
return -2;
122
BDRVQEDState *s = acb_to_s(acb);
97
}
123
uint64_t offset = acb->cur_cluster +
98
@@ -XXX,XX +XXX,XX @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
124
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_main(QEDAIOCB *acb)
99
* It returns 0 upon inconsistency or error, and the number of clusters
125
/**
100
* used by the directory, its subdirectories and their files.
126
* Populate untouched regions of new data cluster
101
*/
127
*/
102
-static int check_directory_consistency(BDRVVVFATState *s,
128
-static int qed_aio_write_cow(QEDAIOCB *acb)
103
- int cluster_num, const char* path)
129
+static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
104
+static int coroutine_fn GRAPH_RDLOCK
130
{
105
+check_directory_consistency(BDRVVVFATState *s, int cluster_num, const char* path)
131
BDRVQEDState *s = acb_to_s(acb);
106
{
132
uint64_t start, len, offset;
107
int ret = 0;
133
@@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s)
108
unsigned char* cluster = g_malloc(s->cluster_size);
134
*
109
@@ -XXX,XX +XXX,XX @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i))
135
* This path is taken when writing to previously unallocated clusters.
110
}
136
*/
111
137
-static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
112
/* returns 1 on success */
138
+static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
113
-static int is_consistent(BDRVVVFATState* s)
139
{
114
+static int coroutine_fn GRAPH_RDLOCK
140
BDRVQEDState *s = acb_to_s(acb);
115
+is_consistent(BDRVVVFATState* s)
141
int ret;
116
{
142
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
117
int i, check;
143
*
118
int used_clusters_count = 0;
144
* This path is taken when writing to already allocated clusters.
119
@@ -XXX,XX +XXX,XX @@ static int commit_mappings(BDRVVVFATState* s,
145
*/
120
return 0;
146
-static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
121
}
147
+static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
122
148
+ size_t len)
123
-static int commit_direntries(BDRVVVFATState* s,
149
{
124
- int dir_index, int parent_mapping_index)
150
/* Allocate buffer for zero writes */
125
+static int coroutine_fn GRAPH_RDLOCK
151
if (acb->flags & QED_AIOCB_ZERO) {
126
+commit_direntries(BDRVVVFATState* s, int dir_index, int parent_mapping_index)
152
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
127
{
153
* @offset: Cluster offset in bytes
128
direntry_t* direntry = array_get(&(s->directory), dir_index);
154
* @len: Length in bytes
129
uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
155
*/
130
@@ -XXX,XX +XXX,XX @@ static int commit_direntries(BDRVVVFATState* s,
156
-static int qed_aio_write_data(void *opaque, int ret,
131
157
- uint64_t offset, size_t len)
132
/* commit one file (adjust contents, adjust mapping),
158
+static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
133
return first_mapping_index */
159
+ uint64_t offset, size_t len)
134
-static int commit_one_file(BDRVVVFATState* s,
160
{
135
- int dir_index, uint32_t offset)
161
QEDAIOCB *acb = opaque;
136
+static int coroutine_fn GRAPH_RDLOCK
162
137
+commit_one_file(BDRVVVFATState* s, int dir_index, uint32_t offset)
163
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_data(void *opaque, int ret,
138
{
164
* @offset: Cluster offset in bytes
139
direntry_t* direntry = array_get(&(s->directory), dir_index);
165
* @len: Length in bytes
140
uint32_t c = begin_of_direntry(direntry);
166
*/
141
@@ -XXX,XX +XXX,XX @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
167
-static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
142
/*
168
+static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
143
* TODO: make sure that the short name is not matching *another* file
169
+ uint64_t offset, size_t len)
144
*/
170
{
145
-static int handle_commits(BDRVVVFATState* s)
171
QEDAIOCB *acb = opaque;
146
+static int coroutine_fn GRAPH_RDLOCK handle_commits(BDRVVVFATState* s)
172
BDRVQEDState *s = acb_to_s(acb);
147
{
173
@@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
148
int i, fail = 0;
174
/**
149
175
* Begin next I/O or complete the request
150
@@ -XXX,XX +XXX,XX @@ static int handle_deletes(BDRVVVFATState* s)
176
*/
151
* - recurse direntries from root (using bs->bdrv_pread)
177
-static int qed_aio_next_io(QEDAIOCB *acb)
152
* - delete files corresponding to mappings marked as deleted
178
+static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
153
*/
179
{
154
-static int do_commit(BDRVVVFATState* s)
180
BDRVQEDState *s = acb_to_s(acb);
155
+static int coroutine_fn GRAPH_RDLOCK do_commit(BDRVVVFATState* s)
181
uint64_t offset;
156
{
182
diff --git a/block/qed.h b/block/qed.h
157
int ret = 0;
183
index XXXXXXX..XXXXXXX 100644
158
184
--- a/block/qed.h
159
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
185
+++ b/block/qed.h
160
return 0;
186
@@ -XXX,XX +XXX,XX @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
161
}
187
/**
162
188
* Cluster functions
163
-static int try_commit(BDRVVVFATState* s)
189
*/
164
+static int coroutine_fn GRAPH_RDLOCK try_commit(BDRVVVFATState* s)
190
-int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
165
{
191
- size_t *len, uint64_t *img_offset);
166
vvfat_close_current_file(s);
192
+int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
167
DLOG(checkpoint());
193
+ uint64_t pos, size_t *len,
168
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
194
+ uint64_t *img_offset);
169
return do_commit(s);
195
170
}
196
/**
171
197
* Consistency check
172
-static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
173
- const uint8_t *buf, int nb_sectors)
174
+static int coroutine_fn GRAPH_RDLOCK
175
+vvfat_write(BlockDriverState *bs, int64_t sector_num,
176
+ const uint8_t *buf, int nb_sectors)
177
{
178
BDRVVVFATState *s = bs->opaque;
179
int i, ret;
180
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
181
* Use qcow backend. Commit later.
182
*/
183
DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
184
- ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE,
185
- nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
186
+ ret = bdrv_co_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE,
187
+ nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
188
if (ret < 0) {
189
fprintf(stderr, "Error writing to qcow backend\n");
190
return ret;
191
@@ -XXX,XX +XXX,XX @@ DLOG(checkpoint());
192
return 0;
193
}
194
195
-static int coroutine_fn
196
+static int coroutine_fn GRAPH_RDLOCK
197
vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
198
QEMUIOVector *qiov, BdrvRequestFlags flags)
199
{
198
--
200
--
199
1.8.3.1
201
2.40.0
200
201
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
The bs->exact_filename field may not be sufficient to store the full
3
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
blkdebug node filename. In this case, we should not generate a filename
4
Message-Id: <20230309084456.304669-3-pbonzini@redhat.com>
5
at all instead of an unusable one.
5
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
6
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Cc: qemu-stable@nongnu.org
8
Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Message-id: 20170613172006.19685-2-mreitz@redhat.com
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
7
---
15
block/blkdebug.c | 10 +++++++---
8
block/blkdebug.c | 4 ++--
16
1 file changed, 7 insertions(+), 3 deletions(-)
9
1 file changed, 2 insertions(+), 2 deletions(-)
17
10
18
diff --git a/block/blkdebug.c b/block/blkdebug.c
11
diff --git a/block/blkdebug.c b/block/blkdebug.c
19
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
20
--- a/block/blkdebug.c
13
--- a/block/blkdebug.c
21
+++ b/block/blkdebug.c
14
+++ b/block/blkdebug.c
22
@@ -XXX,XX +XXX,XX @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
15
@@ -XXX,XX +XXX,XX @@ out:
23
}
16
return ret;
24
17
}
25
if (!force_json && bs->file->bs->exact_filename[0]) {
18
26
- snprintf(bs->exact_filename, sizeof(bs->exact_filename),
19
-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
27
- "blkdebug:%s:%s", s->config_file ?: "",
20
- BlkdebugIOType iotype)
28
- bs->file->bs->exact_filename);
21
+static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
29
+ int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
22
+ uint64_t bytes, BlkdebugIOType iotype)
30
+ "blkdebug:%s:%s", s->config_file ?: "",
23
{
31
+ bs->file->bs->exact_filename);
24
BDRVBlkdebugState *s = bs->opaque;
32
+ if (ret >= sizeof(bs->exact_filename)) {
25
BlkdebugRule *rule = NULL;
33
+ /* An overflow makes the filename unusable, so do not report any */
34
+ bs->exact_filename[0] = 0;
35
+ }
36
}
37
38
opts = qdict_new();
39
--
26
--
40
1.8.3.1
27
2.40.0
41
42
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Qcow2COWRegion has two attributes:
3
mirror_flush calls a mixed function blk_flush but it is only called
4
from mirror_run; so call the coroutine version and make mirror_flush
5
a coroutine_fn too.
4
6
5
- The offset of the COW region from the start of the first cluster
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
touched by the I/O request. Since it's always going to be positive
8
Message-Id: <20230309084456.304669-4-pbonzini@redhat.com>
7
and the maximum request size is at most INT_MAX, we can use a
8
regular unsigned int to store this offset.
9
10
- The size of the COW region in bytes. This is guaranteed to be >= 0,
11
so we should use an unsigned type instead.
12
13
In x86_64 this reduces the size of Qcow2COWRegion from 16 to 8 bytes.
14
It will also help keep some assertions simpler now that we know that
15
there are no negative numbers.
16
17
The prototype of do_perform_cow() is also updated to reflect these
18
changes.
19
20
Signed-off-by: Alberto Garcia <berto@igalia.com>
21
Reviewed-by: Eric Blake <eblake@redhat.com>
22
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
---
11
---
25
block/qcow2-cluster.c | 4 ++--
12
block/mirror.c | 4 ++--
26
block/qcow2.h | 4 ++--
13
1 file changed, 2 insertions(+), 2 deletions(-)
27
2 files changed, 4 insertions(+), 4 deletions(-)
28
14
29
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
15
diff --git a/block/mirror.c b/block/mirror.c
30
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
31
--- a/block/qcow2-cluster.c
17
--- a/block/mirror.c
32
+++ b/block/qcow2-cluster.c
18
+++ b/block/mirror.c
33
@@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
19
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
34
static int coroutine_fn do_perform_cow(BlockDriverState *bs,
20
/* Called when going out of the streaming phase to flush the bulk of the
35
uint64_t src_cluster_offset,
21
* data to the medium, or just before completing.
36
uint64_t cluster_offset,
22
*/
37
- int offset_in_cluster,
23
-static int mirror_flush(MirrorBlockJob *s)
38
- int bytes)
24
+static int coroutine_fn mirror_flush(MirrorBlockJob *s)
39
+ unsigned offset_in_cluster,
40
+ unsigned bytes)
41
{
25
{
42
BDRVQcow2State *s = bs->opaque;
26
- int ret = blk_flush(s->target);
43
QEMUIOVector qiov;
27
+ int ret = blk_co_flush(s->target);
44
diff --git a/block/qcow2.h b/block/qcow2.h
28
if (ret < 0) {
45
index XXXXXXX..XXXXXXX 100644
29
if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
46
--- a/block/qcow2.h
30
s->ret = ret;
47
+++ b/block/qcow2.h
48
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2COWRegion {
49
* Offset of the COW region in bytes from the start of the first cluster
50
* touched by the request.
51
*/
52
- uint64_t offset;
53
+ unsigned offset;
54
55
/** Number of bytes to copy */
56
- int nb_bytes;
57
+ unsigned nb_bytes;
58
} Qcow2COWRegion;
59
60
/**
61
--
31
--
62
1.8.3.1
32
2.40.0
63
64
diff view generated by jsdifflib
1
All callers pass ret = 0, so we can just remove it.
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
---
6
---
6
block/qed.c | 17 ++++++-----------
7
nbd/server.c | 48 ++++++++++++++++++++++++------------------------
7
1 file changed, 6 insertions(+), 11 deletions(-)
8
1 file changed, 24 insertions(+), 24 deletions(-)
8
9
9
diff --git a/block/qed.c b/block/qed.c
10
diff --git a/nbd/server.c b/nbd/server.c
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/block/qed.c
12
--- a/nbd/server.c
12
+++ b/block/qed.c
13
+++ b/nbd/server.c
13
@@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
14
@@ -XXX,XX +XXX,XX @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
14
return l2_table;
15
return 1;
15
}
16
}
16
17
17
-static void qed_aio_next_io(QEDAIOCB *acb, int ret);
18
-static int nbd_receive_request(NBDClient *client, NBDRequest *request,
18
+static void qed_aio_next_io(QEDAIOCB *acb);
19
- Error **errp)
19
20
+static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
20
static void qed_aio_start_io(QEDAIOCB *acb)
21
+ Error **errp)
21
{
22
{
22
- qed_aio_next_io(acb, 0);
23
uint8_t buf[NBD_REQUEST_SIZE];
23
+ qed_aio_next_io(acb);
24
uint32_t magic;
25
@@ -XXX,XX +XXX,XX @@ static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
26
stq_be_p(&reply->handle, handle);
24
}
27
}
25
28
26
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
29
-static int nbd_co_send_simple_reply(NBDClient *client,
27
@@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
30
- uint64_t handle,
28
/**
31
- uint32_t error,
29
* Begin next I/O or complete the request
32
- void *data,
33
- size_t len,
34
- Error **errp)
35
+static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
36
+ uint64_t handle,
37
+ uint32_t error,
38
+ void *data,
39
+ size_t len,
40
+ Error **errp)
41
{
42
NBDSimpleReply reply;
43
int nbd_err = system_errno_to_nbd_errno(error);
44
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
45
stl_be_p(&chunk.length, pnum);
46
ret = nbd_co_send_iov(client, iov, 1, errp);
47
} else {
48
- ret = blk_pread(exp->common.blk, offset + progress, pnum,
49
- data + progress, 0);
50
+ ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
51
+ data + progress, 0);
52
if (ret < 0) {
53
error_setg_errno(errp, -ret, "reading from file failed");
54
break;
55
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
56
* @ea is converted to BE by the function
57
* @last controls whether NBD_REPLY_FLAG_DONE is sent.
30
*/
58
*/
31
-static void qed_aio_next_io(QEDAIOCB *acb, int ret)
59
-static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
32
+static void qed_aio_next_io(QEDAIOCB *acb)
60
- NBDExtentArray *ea,
61
- bool last, uint32_t context_id, Error **errp)
62
+static int coroutine_fn
63
+nbd_co_send_extents(NBDClient *client, uint64_t handle, NBDExtentArray *ea,
64
+ bool last, uint32_t context_id, Error **errp)
33
{
65
{
34
BDRVQEDState *s = acb_to_s(acb);
66
NBDStructuredMeta chunk;
35
uint64_t offset;
67
struct iovec iov[] = {
36
size_t len;
68
@@ -XXX,XX +XXX,XX @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
37
+ int ret;
69
bdrv_dirty_bitmap_unlock(bitmap);
38
70
}
39
- trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
71
40
+ trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size);
72
-static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
41
73
- BdrvDirtyBitmap *bitmap, uint64_t offset,
42
if (acb->backing_qiov) {
74
- uint32_t length, bool dont_fragment, bool last,
43
qemu_iovec_destroy(acb->backing_qiov);
75
- uint32_t context_id, Error **errp)
44
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret)
76
+static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
45
acb->backing_qiov = NULL;
77
+ BdrvDirtyBitmap *bitmap, uint64_t offset,
78
+ uint32_t length, bool dont_fragment, bool last,
79
+ uint32_t context_id, Error **errp)
80
{
81
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
82
g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
83
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
84
* to the client (although the caller may still need to disconnect after
85
* reporting the error).
86
*/
87
-static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
88
- Error **errp)
89
+static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
90
+ Error **errp)
91
{
92
NBDClient *client = req->client;
93
int valid_flags;
94
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
95
data, request->len, errp);
46
}
96
}
47
97
48
- /* Handle I/O error */
98
- ret = blk_pread(exp->common.blk, request->from, request->len, data, 0);
49
- if (ret) {
99
+ ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
50
- qed_aio_complete(acb, ret);
100
if (ret < 0) {
51
- return;
101
return nbd_send_generic_reply(client, request->handle, ret,
52
- }
102
"reading from file failed", errp);
53
-
103
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
54
acb->qiov_offset += acb->cur_qiov.size;
104
if (request->flags & NBD_CMD_FLAG_FUA) {
55
acb->cur_pos += acb->cur_qiov.size;
105
flags |= BDRV_REQ_FUA;
56
qemu_iovec_reset(&acb->cur_qiov);
57
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret)
58
}
106
}
59
return;
107
- ret = blk_pwrite(exp->common.blk, request->from, request->len, data,
60
}
108
- flags);
61
- qed_aio_next_io(acb, 0);
109
+ ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
62
+ qed_aio_next_io(acb);
110
+ flags);
63
}
111
return nbd_send_generic_reply(client, request->handle, ret,
64
112
"writing to file failed", errp);
65
static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
113
114
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
115
if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
116
flags |= BDRV_REQ_NO_FALLBACK;
117
}
118
- ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len,
119
- flags);
120
+ ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len,
121
+ flags);
122
return nbd_send_generic_reply(client, request->handle, ret,
123
"writing to file failed", errp);
124
66
--
125
--
67
1.8.3.1
126
2.40.0
68
69
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Instead of calling perform_cow() twice with a different COW region
3
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
each time, call it just once and make perform_cow() handle both
4
Message-Id: <20230309084456.304669-6-pbonzini@redhat.com>
5
regions.
6
7
This patch simply moves code around. The next one will do the actual
8
reordering of the COW operations.
9
10
Signed-off-by: Alberto Garcia <berto@igalia.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
7
---
15
block/qcow2-cluster.c | 36 ++++++++++++++++++++++--------------
8
hw/9pfs/9p.h | 4 ++--
16
1 file changed, 22 insertions(+), 14 deletions(-)
9
hw/9pfs/codir.c | 6 +++---
10
2 files changed, 5 insertions(+), 5 deletions(-)
17
11
18
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
12
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/block/qcow2-cluster.c
14
--- a/hw/9pfs/9p.h
21
+++ b/block/qcow2-cluster.c
15
+++ b/hw/9pfs/9p.h
22
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
16
@@ -XXX,XX +XXX,XX @@ typedef struct V9fsDir {
23
struct iovec iov;
17
QemuMutex readdir_mutex_L;
24
int ret;
18
} V9fsDir;
25
19
26
+ if (bytes == 0) {
20
-static inline void v9fs_readdir_lock(V9fsDir *dir)
27
+ return 0;
21
+static inline void coroutine_fn v9fs_readdir_lock(V9fsDir *dir)
28
+ }
22
{
29
+
23
if (dir->proto_version == V9FS_PROTO_2000U) {
30
iov.iov_len = bytes;
24
qemu_co_mutex_lock(&dir->readdir_mutex_u);
31
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
25
@@ -XXX,XX +XXX,XX @@ static inline void v9fs_readdir_lock(V9fsDir *dir)
32
if (iov.iov_base == NULL) {
26
}
33
@@ -XXX,XX +XXX,XX @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
34
return cluster_offset;
35
}
27
}
36
28
37
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
29
-static inline void v9fs_readdir_unlock(V9fsDir *dir)
38
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
30
+static inline void coroutine_fn v9fs_readdir_unlock(V9fsDir *dir)
39
{
31
{
40
BDRVQcow2State *s = bs->opaque;
32
if (dir->proto_version == V9FS_PROTO_2000U) {
41
+ Qcow2COWRegion *start = &m->cow_start;
33
qemu_co_mutex_unlock(&dir->readdir_mutex_u);
42
+ Qcow2COWRegion *end = &m->cow_end;
34
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
43
int ret;
35
index XXXXXXX..XXXXXXX 100644
44
36
--- a/hw/9pfs/codir.c
45
- if (r->nb_bytes == 0) {
37
+++ b/hw/9pfs/codir.c
46
+ if (start->nb_bytes == 0 && end->nb_bytes == 0) {
38
@@ -XXX,XX +XXX,XX @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
47
return 0;
39
*
48
}
40
* See v9fs_co_readdir_many() (as its only user) below for details.
49
41
*/
50
qemu_co_mutex_unlock(&s->lock);
42
-static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
51
- ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
43
- struct V9fsDirEnt **entries, off_t offset,
52
- qemu_co_mutex_lock(&s->lock);
44
- int32_t maxsize, bool dostat)
53
-
45
+static int coroutine_fn
54
+ ret = do_perform_cow(bs, m->offset, m->alloc_offset,
46
+do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, struct V9fsDirEnt **entries,
55
+ start->offset, start->nb_bytes);
47
+ off_t offset, int32_t maxsize, bool dostat)
56
if (ret < 0) {
48
{
57
- return ret;
49
V9fsState *s = pdu->s;
58
+ goto fail;
50
V9fsString name;
59
}
60
61
+ ret = do_perform_cow(bs, m->offset, m->alloc_offset,
62
+ end->offset, end->nb_bytes);
63
+
64
+fail:
65
+ qemu_co_mutex_lock(&s->lock);
66
+
67
/*
68
* Before we update the L2 table to actually point to the new cluster, we
69
* need to be sure that the refcounts have been increased and COW was
70
* handled.
71
*/
72
- qcow2_cache_depends_on_flush(s->l2_table_cache);
73
+ if (ret == 0) {
74
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
75
+ }
76
77
- return 0;
78
+ return ret;
79
}
80
81
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
82
@@ -XXX,XX +XXX,XX @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
83
}
84
85
/* copy content of unmodified sectors */
86
- ret = perform_cow(bs, m, &m->cow_start);
87
- if (ret < 0) {
88
- goto err;
89
- }
90
-
91
- ret = perform_cow(bs, m, &m->cow_end);
92
+ ret = perform_cow(bs, m);
93
if (ret < 0) {
94
goto err;
95
}
96
--
51
--
97
1.8.3.1
52
2.40.0
98
99
diff view generated by jsdifflib
1
Most of the qed code is now synchronous and matches the coroutine model.
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
One notable exception is the serialisation between requests which can
3
still schedule a callback. Before we can replace this with coroutine
4
locks, let's convert the driver's external interfaces to the coroutine
5
versions.
6
2
7
We need to be careful to handle both requests that call the completion
3
do_sgio can suspend via the coroutine function thread_pool_submit_co, so it
8
callback directly from the calling coroutine (i.e. fully synchronous
4
has to be coroutine_fn as well---and the same is true of all its direct and
9
code) and requests that involve some callback, so that we need to yield
5
indirect callers.
10
and wait for the completion callback coming from outside the coroutine.
11
6
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-Id: <20230309084456.304669-7-pbonzini@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
14
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
11
---
16
block/qed.c | 97 ++++++++++++++++++++++++++-----------------------------------
12
scsi/qemu-pr-helper.c | 22 +++++++++++-----------
17
1 file changed, 42 insertions(+), 55 deletions(-)
13
1 file changed, 11 insertions(+), 11 deletions(-)
18
14
19
diff --git a/block/qed.c b/block/qed.c
15
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/block/qed.c
17
--- a/scsi/qemu-pr-helper.c
22
+++ b/block/qed.c
18
+++ b/scsi/qemu-pr-helper.c
23
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb)
19
@@ -XXX,XX +XXX,XX @@ static int do_sgio_worker(void *opaque)
20
return status;
21
}
22
23
-static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
24
- uint8_t *buf, int *sz, int dir)
25
+static int coroutine_fn do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
26
+ uint8_t *buf, int *sz, int dir)
27
{
28
int r;
29
30
@@ -XXX,XX +XXX,XX @@ static SCSISense mpath_generic_sense(int r)
24
}
31
}
25
}
32
}
26
33
27
-static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
34
-static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
28
- int64_t sector_num,
35
+static int coroutine_fn mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
29
- QEMUIOVector *qiov, int nb_sectors,
30
- BlockCompletionFunc *cb,
31
- void *opaque, int flags)
32
+typedef struct QEDRequestCo {
33
+ Coroutine *co;
34
+ bool done;
35
+ int ret;
36
+} QEDRequestCo;
37
+
38
+static void qed_co_request_cb(void *opaque, int ret)
39
{
36
{
40
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
37
switch (r) {
41
+ QEDRequestCo *co = opaque;
38
case MPATH_PR_SUCCESS:
42
39
@@ -XXX,XX +XXX,XX @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
43
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
40
}
44
- opaque, flags);
45
+ co->done = true;
46
+ co->ret = ret;
47
+ qemu_coroutine_enter_if_inactive(co->co);
48
+}
49
+
50
+static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
51
+ QEMUIOVector *qiov, int nb_sectors,
52
+ int flags)
53
+{
54
+ QEDRequestCo co = {
55
+ .co = qemu_coroutine_self(),
56
+ .done = false,
57
+ };
58
+ QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, qed_co_request_cb, &co);
59
+
60
+ trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, &co, flags);
61
62
acb->flags = flags;
63
acb->qiov = qiov;
64
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
65
66
/* Start request */
67
qed_aio_start_io(acb);
68
- return &acb->common;
69
-}
70
71
-static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
72
- int64_t sector_num,
73
- QEMUIOVector *qiov, int nb_sectors,
74
- BlockCompletionFunc *cb,
75
- void *opaque)
76
-{
77
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
78
+ if (!co.done) {
79
+ qemu_coroutine_yield();
80
+ }
81
+
82
+ return co.ret;
83
}
41
}
84
42
85
-static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
43
-static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
86
- int64_t sector_num,
44
- uint8_t *data, int sz)
87
- QEMUIOVector *qiov, int nb_sectors,
45
+static int coroutine_fn multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
88
- BlockCompletionFunc *cb,
46
+ uint8_t *data, int sz)
89
- void *opaque)
90
+static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
91
+ int64_t sector_num, int nb_sectors,
92
+ QEMUIOVector *qiov)
93
{
47
{
94
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
48
int rq_servact = cdb[1];
95
- opaque, QED_AIOCB_WRITE);
49
struct prin_resp resp;
96
+ return qed_co_request(bs, sector_num, qiov, nb_sectors, 0);
50
@@ -XXX,XX +XXX,XX @@ static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
51
return mpath_reconstruct_sense(fd, r, sense);
97
}
52
}
98
53
99
-typedef struct {
54
-static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
100
- Coroutine *co;
55
- const uint8_t *param, int sz)
101
- int ret;
56
+static int coroutine_fn multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
102
- bool done;
57
+ const uint8_t *param, int sz)
103
-} QEDWriteZeroesCB;
104
-
105
-static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
106
+static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
107
+ int64_t sector_num, int nb_sectors,
108
+ QEMUIOVector *qiov)
109
{
58
{
110
- QEDWriteZeroesCB *cb = opaque;
59
int rq_servact = cdb[1];
111
-
60
int rq_scope = cdb[2] >> 4;
112
- cb->done = true;
61
@@ -XXX,XX +XXX,XX @@ static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
113
- cb->ret = ret;
114
- if (cb->co) {
115
- aio_co_wake(cb->co);
116
- }
117
+ return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
118
}
62
}
119
63
#endif
120
static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
64
121
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
65
-static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
122
int count,
66
- uint8_t *data, int *resp_sz)
123
BdrvRequestFlags flags)
67
+static int coroutine_fn do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
68
+ uint8_t *data, int *resp_sz)
124
{
69
{
125
- BlockAIOCB *blockacb;
70
#ifdef CONFIG_MPATH
126
BDRVQEDState *s = bs->opaque;
71
if (is_mpath(fd)) {
127
- QEDWriteZeroesCB cb = { .done = false };
72
@@ -XXX,XX +XXX,XX @@ static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
128
QEMUIOVector qiov;
73
SG_DXFER_FROM_DEV);
129
struct iovec iov;
130
131
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
132
iov.iov_len = count;
133
134
qemu_iovec_init_external(&qiov, &iov, 1);
135
- blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov,
136
- count >> BDRV_SECTOR_BITS,
137
- qed_co_pwrite_zeroes_cb, &cb,
138
- QED_AIOCB_WRITE | QED_AIOCB_ZERO);
139
- if (!blockacb) {
140
- return -EIO;
141
- }
142
- if (!cb.done) {
143
- cb.co = qemu_coroutine_self();
144
- qemu_coroutine_yield();
145
- }
146
- assert(cb.done);
147
- return cb.ret;
148
+ return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov,
149
+ count >> BDRV_SECTOR_BITS,
150
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
151
}
74
}
152
75
153
static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
76
-static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
154
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = {
77
- const uint8_t *param, int sz)
155
.bdrv_create = bdrv_qed_create,
78
+static int coroutine_fn do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
156
.bdrv_has_zero_init = bdrv_has_zero_init_1,
79
+ const uint8_t *param, int sz)
157
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
80
{
158
- .bdrv_aio_readv = bdrv_qed_aio_readv,
81
int resp_sz;
159
- .bdrv_aio_writev = bdrv_qed_aio_writev,
82
160
+ .bdrv_co_readv = bdrv_qed_co_readv,
161
+ .bdrv_co_writev = bdrv_qed_co_writev,
162
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
163
.bdrv_truncate = bdrv_qed_truncate,
164
.bdrv_getlength = bdrv_qed_getlength,
165
--
83
--
166
1.8.3.1
84
2.40.0
167
168
diff view generated by jsdifflib
1
From: Stephen Bates <sbates@raithlin.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add the ability for the NVMe model to support both the RDS and WDS
3
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
modes in the Controller Memory Buffer.
4
Message-Id: <20230309084456.304669-8-pbonzini@redhat.com>
5
5
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
6
Although not currently supported in the upstreamed Linux kernel a fork
7
with support exists [1] and user-space test programs that build on
8
this also exist [2].
9
10
Useful for testing CMB functionality in preperation for real CMB
11
enabled NVMe devices (coming soon).
12
13
[1] https://github.com/sbates130272/linux-p2pmem
14
[2] https://github.com/sbates130272/p2pmem-test
15
16
Signed-off-by: Stephen Bates <sbates@raithlin.com>
17
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
18
Reviewed-by: Keith Busch <keith.busch@intel.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
7
---
21
hw/block/nvme.c | 83 +++++++++++++++++++++++++++++++++++++++------------------
8
tests/unit/test-thread-pool.c | 2 +-
22
hw/block/nvme.h | 1 +
9
1 file changed, 1 insertion(+), 1 deletion(-)
23
2 files changed, 58 insertions(+), 26 deletions(-)
24
10
25
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
11
diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c
26
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
27
--- a/hw/block/nvme.c
13
--- a/tests/unit/test-thread-pool.c
28
+++ b/hw/block/nvme.c
14
+++ b/tests/unit/test-thread-pool.c
29
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ static void test_submit_aio(void)
30
* cmb_size_mb=<cmb_size_mb[optional]>
16
g_assert_cmpint(data.ret, ==, 0);
31
*
32
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
33
- * offset 0 in BAR2 and supports SQS only for now.
34
+ * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
35
*/
36
37
#include "qemu/osdep.h"
38
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
39
}
40
}
17
}
41
18
42
-static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
19
-static void co_test_cb(void *opaque)
43
- uint32_t len, NvmeCtrl *n)
20
+static void coroutine_fn co_test_cb(void *opaque)
44
+static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
45
+ uint64_t prp2, uint32_t len, NvmeCtrl *n)
46
{
21
{
47
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
22
WorkerTestData *data = opaque;
48
trans_len = MIN(len, trans_len);
49
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
50
51
if (!prp1) {
52
return NVME_INVALID_FIELD | NVME_DNR;
53
+ } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
54
+ prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
55
+ qsg->nsg = 0;
56
+ qemu_iovec_init(iov, num_prps);
57
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
58
+ } else {
59
+ pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
60
+ qemu_sglist_add(qsg, prp1, trans_len);
61
}
62
-
63
- pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
64
- qemu_sglist_add(qsg, prp1, trans_len);
65
len -= trans_len;
66
if (len) {
67
if (!prp2) {
68
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
69
70
nents = (len + n->page_size - 1) >> n->page_bits;
71
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
72
- pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans);
73
+ nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
74
while (len != 0) {
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
78
i = 0;
79
nents = (len + n->page_size - 1) >> n->page_bits;
80
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
81
- pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list,
82
+ nvme_addr_read(n, prp_ent, (void *)prp_list,
83
prp_trans);
84
prp_ent = le64_to_cpu(prp_list[i]);
85
}
86
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
87
}
88
89
trans_len = MIN(len, n->page_size);
90
- qemu_sglist_add(qsg, prp_ent, trans_len);
91
+ if (qsg->nsg){
92
+ qemu_sglist_add(qsg, prp_ent, trans_len);
93
+ } else {
94
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
95
+ }
96
len -= trans_len;
97
i++;
98
}
99
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
100
if (prp2 & (n->page_size - 1)) {
101
goto unmap;
102
}
103
- qemu_sglist_add(qsg, prp2, len);
104
+ if (qsg->nsg) {
105
+ qemu_sglist_add(qsg, prp2, len);
106
+ } else {
107
+ qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
108
+ }
109
}
110
}
111
return NVME_SUCCESS;
112
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
113
uint64_t prp1, uint64_t prp2)
114
{
115
QEMUSGList qsg;
116
+ QEMUIOVector iov;
117
+ uint16_t status = NVME_SUCCESS;
118
119
- if (nvme_map_prp(&qsg, prp1, prp2, len, n)) {
120
+ if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
121
return NVME_INVALID_FIELD | NVME_DNR;
122
}
123
- if (dma_buf_read(ptr, len, &qsg)) {
124
+ if (qsg.nsg > 0) {
125
+ if (dma_buf_read(ptr, len, &qsg)) {
126
+ status = NVME_INVALID_FIELD | NVME_DNR;
127
+ }
128
qemu_sglist_destroy(&qsg);
129
- return NVME_INVALID_FIELD | NVME_DNR;
130
+ } else {
131
+ if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
132
+ status = NVME_INVALID_FIELD | NVME_DNR;
133
+ }
134
+ qemu_iovec_destroy(&iov);
135
}
136
- qemu_sglist_destroy(&qsg);
137
- return NVME_SUCCESS;
138
+ return status;
139
}
140
141
static void nvme_post_cqes(void *opaque)
142
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
143
return NVME_LBA_RANGE | NVME_DNR;
144
}
145
146
- if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) {
147
+ if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
148
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
149
return NVME_INVALID_FIELD | NVME_DNR;
150
}
151
152
- assert((nlb << data_shift) == req->qsg.size);
153
-
154
- req->has_sg = true;
155
dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
156
- req->aiocb = is_write ?
157
- dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
158
- nvme_rw_cb, req) :
159
- dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
160
- nvme_rw_cb, req);
161
+ if (req->qsg.nsg > 0) {
162
+ req->has_sg = true;
163
+ req->aiocb = is_write ?
164
+ dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
165
+ nvme_rw_cb, req) :
166
+ dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
167
+ nvme_rw_cb, req);
168
+ } else {
169
+ req->has_sg = false;
170
+ req->aiocb = is_write ?
171
+ blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
172
+ req) :
173
+ blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
174
+ req);
175
+ }
176
177
return NVME_NO_COMPLETE;
178
}
179
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
180
NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
181
NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
182
NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
183
- NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0);
184
- NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0);
185
+ NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
186
+ NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
187
NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
188
NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
189
190
+ n->cmbloc = n->bar.cmbloc;
191
+ n->cmbsz = n->bar.cmbsz;
192
+
193
n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
194
memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
195
"nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
196
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
197
index XXXXXXX..XXXXXXX 100644
198
--- a/hw/block/nvme.h
199
+++ b/hw/block/nvme.h
200
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeRequest {
201
NvmeCqe cqe;
202
BlockAcctCookie acct;
203
QEMUSGList qsg;
204
+ QEMUIOVector iov;
205
QTAILQ_ENTRY(NvmeRequest)entry;
206
} NvmeRequest;
207
23
208
--
24
--
209
1.8.3.1
25
2.40.0
210
211
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
If the guest tries to write data that results on the allocation of a
3
Functions that can do I/O (including calling bdrv_is_allocated
4
new cluster, instead of writing the guest data first and then the data
4
and bdrv_block_status functions) are prime candidates for being
5
from the COW regions, write everything together using one single I/O
5
coroutine_fns. Make the change for those that are themselves called
6
operation.
6
only from coroutine_fns. Also annotate that they are called with the
7
graph rdlock taken, thus allowing them to call bdrv_co_*() functions
8
for I/O.
7
9
8
This can improve the write performance by 25% or more, depending on
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
several factors such as the media type, the cluster size and the I/O
11
Message-Id: <20230309084456.304669-9-pbonzini@redhat.com>
10
request size.
11
12
Signed-off-by: Alberto Garcia <berto@igalia.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
14
---
16
block/qcow2-cluster.c | 40 ++++++++++++++++++++++++--------
15
block/qcow2.h | 15 ++++++++-------
17
block/qcow2.c | 64 +++++++++++++++++++++++++++++++++++++++++++--------
16
block/qcow2-bitmap.c | 2 +-
18
block/qcow2.h | 7 ++++++
17
block/qcow2-cluster.c | 21 +++++++++++++--------
19
3 files changed, 91 insertions(+), 20 deletions(-)
18
block/qcow2-refcount.c | 8 ++++----
19
block/qcow2-snapshot.c | 25 +++++++++++++------------
20
block/qcow2.c | 27 ++++++++++++++-------------
21
6 files changed, 53 insertions(+), 45 deletions(-)
20
22
23
diff --git a/block/qcow2.h b/block/qcow2.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/qcow2.h
26
+++ b/block/qcow2.h
27
@@ -XXX,XX +XXX,XX @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
28
uint64_t new_refblock_offset);
29
30
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
31
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
32
- int64_t nb_clusters);
33
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
34
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
35
+ int64_t nb_clusters);
36
+int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size);
37
void qcow2_free_clusters(BlockDriverState *bs,
38
int64_t offset, int64_t size,
39
enum qcow2_discard_type type);
40
@@ -XXX,XX +XXX,XX @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
41
BlockDriverAmendStatusCB *status_cb,
42
void *cb_opaque, Error **errp);
43
int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
44
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
45
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
46
int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs);
47
48
/* qcow2-cluster.c functions */
49
@@ -XXX,XX +XXX,XX @@ void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
50
int coroutine_fn GRAPH_RDLOCK
51
qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
52
53
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
54
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
55
int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
56
uint64_t bytes, enum qcow2_discard_type type,
57
bool full_discard);
58
@@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
59
Error **errp);
60
61
void qcow2_free_snapshots(BlockDriverState *bs);
62
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
63
+int coroutine_fn GRAPH_RDLOCK
64
+qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
65
int qcow2_write_snapshots(BlockDriverState *bs);
66
67
int coroutine_fn GRAPH_RDLOCK
68
@@ -XXX,XX +XXX,XX @@ bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs,
69
bool qcow2_get_bitmap_info_list(BlockDriverState *bs,
70
Qcow2BitmapInfoList **info_list, Error **errp);
71
int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
72
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
73
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
74
bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
75
bool release_stored, Error **errp);
76
int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
77
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/block/qcow2-bitmap.c
80
+++ b/block/qcow2-bitmap.c
81
@@ -XXX,XX +XXX,XX @@ out:
82
}
83
84
/* Checks to see if it's safe to resize bitmaps */
85
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
86
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
87
{
88
BDRVQcow2State *s = bs->opaque;
89
Qcow2BitmapList *bm_list;
21
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
90
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
22
index XXXXXXX..XXXXXXX 100644
91
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2-cluster.c
92
--- a/block/qcow2-cluster.c
24
+++ b/block/qcow2-cluster.c
93
+++ b/block/qcow2-cluster.c
25
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
94
@@ -XXX,XX +XXX,XX @@ err:
26
assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
95
* Frees the allocated clusters because the request failed and they won't
27
assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
96
* actually be linked.
28
assert(start->offset + start->nb_bytes <= end->offset);
97
*/
29
+ assert(!m->data_qiov || m->data_qiov->size == data_bytes);
98
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
30
99
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
31
if (start->nb_bytes == 0 && end->nb_bytes == 0) {
100
{
32
return 0;
101
BDRVQcow2State *s = bs->opaque;
33
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
102
if (!has_data_file(bs) && !m->keep_old_clusters) {
34
/* The part of the buffer where the end region is located */
103
@@ -XXX,XX +XXX,XX @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
35
end_buffer = start_buffer + buffer_size - end->nb_bytes;
104
*
36
105
* Returns 0 on success, -errno on failure.
37
- qemu_iovec_init(&qiov, 1);
106
*/
38
+ qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
107
-static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
39
108
- uint64_t guest_offset, unsigned bytes,
40
qemu_co_mutex_unlock(&s->lock);
109
- uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
41
/* First we read the existing data from both COW regions. We
110
+static int coroutine_fn calculate_l2_meta(BlockDriverState *bs,
42
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
111
+ uint64_t host_cluster_offset,
112
+ uint64_t guest_offset, unsigned bytes,
113
+ uint64_t *l2_slice, QCowL2Meta **m,
114
+ bool keep_old)
115
{
116
BDRVQcow2State *s = bs->opaque;
117
int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
118
@@ -XXX,XX +XXX,XX @@ out:
119
* function has been waiting for another request and the allocation must be
120
* restarted, but the whole request should not be failed.
121
*/
122
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
123
- uint64_t *host_offset, uint64_t *nb_clusters)
124
+static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs,
125
+ uint64_t guest_offset,
126
+ uint64_t *host_offset,
127
+ uint64_t *nb_clusters)
128
{
129
BDRVQcow2State *s = bs->opaque;
130
131
@@ -XXX,XX +XXX,XX @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
132
return nb_clusters;
133
}
134
135
-static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
136
- unsigned nb_subclusters)
137
+static int coroutine_fn
138
+zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
139
+ unsigned nb_subclusters)
140
{
141
BDRVQcow2State *s = bs->opaque;
142
uint64_t *l2_slice;
143
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/qcow2-refcount.c
146
+++ b/block/qcow2-refcount.c
147
@@ -XXX,XX +XXX,XX @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
148
return offset;
149
}
150
151
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
152
- int64_t nb_clusters)
153
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
154
+ int64_t nb_clusters)
155
{
156
BDRVQcow2State *s = bs->opaque;
157
uint64_t cluster_index, refcount;
158
@@ -XXX,XX +XXX,XX @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
159
160
/* only used to allocate compressed sectors. We try to allocate
161
contiguous sectors. size must be <= cluster_size */
162
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
163
+int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size)
164
{
165
BDRVQcow2State *s = bs->opaque;
166
int64_t offset;
167
@@ -XXX,XX +XXX,XX @@ out:
168
return ret;
169
}
170
171
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
172
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
173
{
174
BDRVQcow2State *s = bs->opaque;
175
int64_t i;
176
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/block/qcow2-snapshot.c
179
+++ b/block/qcow2-snapshot.c
180
@@ -XXX,XX +XXX,XX @@ void qcow2_free_snapshots(BlockDriverState *bs)
181
* qcow2_check_refcounts() does not do anything with snapshots'
182
* extra data.)
183
*/
184
-static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
185
- int *nb_clusters_reduced,
186
- int *extra_data_dropped,
187
- Error **errp)
188
+static coroutine_fn GRAPH_RDLOCK
189
+int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
190
+ int *nb_clusters_reduced,
191
+ int *extra_data_dropped,
192
+ Error **errp)
193
{
194
BDRVQcow2State *s = bs->opaque;
195
QCowSnapshotHeader h;
196
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
197
198
/* Read statically sized part of the snapshot header */
199
offset = ROUND_UP(offset, 8);
200
- ret = bdrv_pread(bs->file, offset, sizeof(h), &h, 0);
201
+ ret = bdrv_co_pread(bs->file, offset, sizeof(h), &h, 0);
202
if (ret < 0) {
203
error_setg_errno(errp, -ret, "Failed to read snapshot table");
204
goto fail;
205
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
43
}
206
}
44
}
207
45
208
/* Read known extra data */
46
- /* And now we can write everything */
209
- ret = bdrv_pread(bs->file, offset,
47
- qemu_iovec_reset(&qiov);
210
- MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
48
- qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
211
+ ret = bdrv_co_pread(bs->file, offset,
49
- ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
212
+ MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
50
- if (ret < 0) {
213
if (ret < 0) {
51
- goto fail;
214
error_setg_errno(errp, -ret, "Failed to read snapshot table");
52
+ /* And now we can write everything. If we have the guest data we
215
goto fail;
53
+ * can write everything in one single operation */
216
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
54
+ if (m->data_qiov) {
217
/* Store unknown extra data */
55
+ qemu_iovec_reset(&qiov);
218
unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
56
+ if (start->nb_bytes) {
219
sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
57
+ qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
220
- ret = bdrv_pread(bs->file, offset, unknown_extra_data_size,
58
+ }
221
- sn->unknown_extra_data, 0);
59
+ qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
222
+ ret = bdrv_co_pread(bs->file, offset, unknown_extra_data_size,
60
+ if (end->nb_bytes) {
223
+ sn->unknown_extra_data, 0);
61
+ qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
224
if (ret < 0) {
62
+ }
225
error_setg_errno(errp, -ret,
63
+ /* NOTE: we have a write_aio blkdebug event here followed by
226
"Failed to read snapshot table");
64
+ * a cow_write one in do_perform_cow_write(), but there's only
227
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
65
+ * one single I/O operation */
228
66
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
229
/* Read snapshot ID */
67
+ ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
230
sn->id_str = g_malloc(id_str_size + 1);
68
+ } else {
231
- ret = bdrv_pread(bs->file, offset, id_str_size, sn->id_str, 0);
69
+ /* If there's no guest data then write both COW regions separately */
232
+ ret = bdrv_co_pread(bs->file, offset, id_str_size, sn->id_str, 0);
70
+ qemu_iovec_reset(&qiov);
233
if (ret < 0) {
71
+ qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
234
error_setg_errno(errp, -ret, "Failed to read snapshot table");
72
+ ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
235
goto fail;
73
+ if (ret < 0) {
236
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
74
+ goto fail;
237
75
+ }
238
/* Read snapshot name */
76
+
239
sn->name = g_malloc(name_size + 1);
77
+ qemu_iovec_reset(&qiov);
240
- ret = bdrv_pread(bs->file, offset, name_size, sn->name, 0);
78
+ qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
241
+ ret = bdrv_co_pread(bs->file, offset, name_size, sn->name, 0);
79
+ ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
242
if (ret < 0) {
80
}
243
error_setg_errno(errp, -ret, "Failed to read snapshot table");
81
244
goto fail;
82
- qemu_iovec_reset(&qiov);
245
@@ -XXX,XX +XXX,XX @@ fail:
83
- qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
246
return ret;
84
- ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
247
}
85
fail:
248
86
qemu_co_mutex_lock(&s->lock);
249
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
87
250
+int coroutine_fn qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
251
{
252
return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
253
}
88
diff --git a/block/qcow2.c b/block/qcow2.c
254
diff --git a/block/qcow2.c b/block/qcow2.c
89
index XXXXXXX..XXXXXXX 100644
255
index XXXXXXX..XXXXXXX 100644
90
--- a/block/qcow2.c
256
--- a/block/qcow2.c
91
+++ b/block/qcow2.c
257
+++ b/block/qcow2.c
92
@@ -XXX,XX +XXX,XX @@ fail:
258
@@ -XXX,XX +XXX,XX @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
93
return ret;
259
* unknown magic is skipped (future extension this version knows nothing about)
94
}
260
* return 0 upon success, non-0 otherwise
95
261
*/
96
+/* Check if it's possible to merge a write request with the writing of
262
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
97
+ * the data from the COW regions */
263
- uint64_t end_offset, void **p_feature_table,
98
+static bool merge_cow(uint64_t offset, unsigned bytes,
264
- int flags, bool *need_update_header,
99
+ QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
265
- Error **errp)
100
+{
266
+static int coroutine_fn GRAPH_RDLOCK
101
+ QCowL2Meta *m;
267
+qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
102
+
268
+ uint64_t end_offset, void **p_feature_table,
103
+ for (m = l2meta; m != NULL; m = m->next) {
269
+ int flags, bool *need_update_header, Error **errp)
104
+ /* If both COW regions are empty then there's nothing to merge */
270
{
105
+ if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
271
BDRVQcow2State *s = bs->opaque;
106
+ continue;
272
QCowExtension ext;
107
+ }
273
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
108
+
274
printf("attempting to read extended header in offset %lu\n", offset);
109
+ /* The data (middle) region must be immediately after the
275
#endif
110
+ * start region */
276
111
+ if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
277
- ret = bdrv_pread(bs->file, offset, sizeof(ext), &ext, 0);
112
+ continue;
278
+ ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0);
113
+ }
279
if (ret < 0) {
114
+
280
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
115
+ /* The end region must be immediately after the data (middle)
281
"pread fail from offset %" PRIu64, offset);
116
+ * region */
282
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
117
+ if (m->offset + m->cow_end.offset != offset + bytes) {
283
sizeof(bs->backing_format));
118
+ continue;
284
return 2;
119
+ }
285
}
120
+
286
- ret = bdrv_pread(bs->file, offset, ext.len, bs->backing_format, 0);
121
+ /* Make sure that adding both COW regions to the QEMUIOVector
287
+ ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0);
122
+ * does not exceed IOV_MAX */
288
if (ret < 0) {
123
+ if (hd_qiov->niov > IOV_MAX - 2) {
289
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
124
+ continue;
290
"Could not read format name");
125
+ }
291
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
126
+
292
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
127
+ m->data_qiov = hd_qiov;
293
if (p_feature_table != NULL) {
128
+ return true;
294
void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
129
+ }
295
- ret = bdrv_pread(bs->file, offset, ext.len, feature_table, 0);
130
+
296
+ ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0);
131
+ return false;
297
if (ret < 0) {
132
+}
298
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
133
+
299
"Could not read table");
134
static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
300
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
135
uint64_t bytes, QEMUIOVector *qiov,
301
return -EINVAL;
136
int flags)
302
}
137
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
303
138
goto fail;
304
- ret = bdrv_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
139
}
305
+ ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
140
306
if (ret < 0) {
141
- qemu_co_mutex_unlock(&s->lock);
307
error_setg_errno(errp, -ret,
142
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
308
"Unable to read CRYPTO header extension");
143
- trace_qcow2_writev_data(qemu_coroutine_self(),
309
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
144
- cluster_offset + offset_in_cluster);
310
break;
145
- ret = bdrv_co_pwritev(bs->file,
311
}
146
- cluster_offset + offset_in_cluster,
312
147
- cur_bytes, &hd_qiov, 0);
313
- ret = bdrv_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
148
- qemu_co_mutex_lock(&s->lock);
314
+ ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
149
- if (ret < 0) {
315
if (ret < 0) {
150
- goto fail;
316
error_setg_errno(errp, -ret, "bitmaps_ext: "
151
+ /* If we need to do COW, check if it's possible to merge the
317
"Could not read ext header");
152
+ * writing of the guest data together with that of the COW regions.
318
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
153
+ * If it's not possible (or not necessary) then write the
319
case QCOW2_EXT_MAGIC_DATA_FILE:
154
+ * guest data now. */
320
{
155
+ if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
321
s->image_data_file = g_malloc0(ext.len + 1);
156
+ qemu_co_mutex_unlock(&s->lock);
322
- ret = bdrv_pread(bs->file, offset, ext.len, s->image_data_file, 0);
157
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
323
+ ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0);
158
+ trace_qcow2_writev_data(qemu_coroutine_self(),
324
if (ret < 0) {
159
+ cluster_offset + offset_in_cluster);
325
error_setg_errno(errp, -ret,
160
+ ret = bdrv_co_pwritev(bs->file,
326
"ERROR: Could not read data file name");
161
+ cluster_offset + offset_in_cluster,
327
@@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
162
+ cur_bytes, &hd_qiov, 0);
328
uext->len = ext.len;
163
+ qemu_co_mutex_lock(&s->lock);
329
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
164
+ if (ret < 0) {
330
165
+ goto fail;
331
- ret = bdrv_pread(bs->file, offset, uext->len, uext->data, 0);
166
+ }
332
+ ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0);
167
}
333
if (ret < 0) {
168
334
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
169
while (l2meta != NULL) {
335
"Could not read data");
170
diff --git a/block/qcow2.h b/block/qcow2.h
336
@@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_abort(BlockDriverState *bs,
171
index XXXXXXX..XXXXXXX 100644
337
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
172
--- a/block/qcow2.h
338
}
173
+++ b/block/qcow2.h
339
174
@@ -XXX,XX +XXX,XX @@ typedef struct QCowL2Meta
340
-static int qcow2_update_options(BlockDriverState *bs, QDict *options,
175
*/
341
- int flags, Error **errp)
176
Qcow2COWRegion cow_end;
342
+static int coroutine_fn
177
343
+qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
178
+ /**
344
+ Error **errp)
179
+ * The I/O vector with the data from the actual guest write request.
345
{
180
+ * If non-NULL, this is meant to be merged together with the data
346
Qcow2ReopenState r = {};
181
+ * from @cow_start and @cow_end into one single write operation.
347
int ret;
182
+ */
183
+ QEMUIOVector *data_qiov;
184
+
185
/** Pointer to next L2Meta of the same write request */
186
struct QCowL2Meta *next;
187
188
--
348
--
189
1.8.3.1
349
2.40.0
190
191
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Instead of passing a single buffer pointer to do_perform_cow_write(),
3
Functions that can do I/O are prime candidates for being coroutine_fns. Make the
4
pass a QEMUIOVector. This will allow us to merge the write requests
4
change for the one that is itself called only from coroutine_fns. Unfortunately
5
for the COW regions and the actual data into a single one.
5
vmdk does not use a coroutine_fn for the bulk of the open (like qcow2 does) so
6
vmdk_read_cid cannot have the same treatment.
6
7
7
Although do_perform_cow_read() does not strictly need to change its
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
API, we're doing it here as well for consistency.
9
Message-Id: <20230309084456.304669-10-pbonzini@redhat.com>
9
10
Signed-off-by: Alberto Garcia <berto@igalia.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
12
---
14
block/qcow2-cluster.c | 51 ++++++++++++++++++++++++---------------------------
13
block/vmdk.c | 2 +-
15
1 file changed, 24 insertions(+), 27 deletions(-)
14
1 file changed, 1 insertion(+), 1 deletion(-)
16
15
17
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
16
diff --git a/block/vmdk.c b/block/vmdk.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/block/qcow2-cluster.c
18
--- a/block/vmdk.c
20
+++ b/block/qcow2-cluster.c
19
+++ b/block/vmdk.c
21
@@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
20
@@ -XXX,XX +XXX,XX @@ out:
22
static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
23
uint64_t src_cluster_offset,
24
unsigned offset_in_cluster,
25
- uint8_t *buffer,
26
- unsigned bytes)
27
+ QEMUIOVector *qiov)
28
{
29
- QEMUIOVector qiov;
30
- struct iovec iov = { .iov_base = buffer, .iov_len = bytes };
31
int ret;
32
33
- if (bytes == 0) {
34
+ if (qiov->size == 0) {
35
return 0;
36
}
37
38
- qemu_iovec_init_external(&qiov, &iov, 1);
39
-
40
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
41
42
if (!bs->drv) {
43
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
44
* which can lead to deadlock when block layer copy-on-read is enabled.
45
*/
46
ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
47
- bytes, &qiov, 0);
48
+ qiov->size, qiov, 0);
49
if (ret < 0) {
50
return ret;
51
}
52
@@ -XXX,XX +XXX,XX @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
53
static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
54
uint64_t cluster_offset,
55
unsigned offset_in_cluster,
56
- uint8_t *buffer,
57
- unsigned bytes)
58
+ QEMUIOVector *qiov)
59
{
60
- QEMUIOVector qiov;
61
- struct iovec iov = { .iov_base = buffer, .iov_len = bytes };
62
int ret;
63
64
- if (bytes == 0) {
65
+ if (qiov->size == 0) {
66
return 0;
67
}
68
69
- qemu_iovec_init_external(&qiov, &iov, 1);
70
-
71
ret = qcow2_pre_write_overlap_check(bs, 0,
72
- cluster_offset + offset_in_cluster, bytes);
73
+ cluster_offset + offset_in_cluster, qiov->size);
74
if (ret < 0) {
75
return ret;
76
}
77
78
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
79
ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
80
- bytes, &qiov, 0);
81
+ qiov->size, qiov, 0);
82
if (ret < 0) {
83
return ret;
84
}
85
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
86
unsigned data_bytes = end->offset - (start->offset + start->nb_bytes);
87
bool merge_reads;
88
uint8_t *start_buffer, *end_buffer;
89
+ QEMUIOVector qiov;
90
int ret;
91
92
assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
93
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
94
/* The part of the buffer where the end region is located */
95
end_buffer = start_buffer + buffer_size - end->nb_bytes;
96
97
+ qemu_iovec_init(&qiov, 1);
98
+
99
qemu_co_mutex_unlock(&s->lock);
100
/* First we read the existing data from both COW regions. We
101
* either read the whole region in one go, or the start and end
102
* regions separately. */
103
if (merge_reads) {
104
- ret = do_perform_cow_read(bs, m->offset, start->offset,
105
- start_buffer, buffer_size);
106
+ qemu_iovec_add(&qiov, start_buffer, buffer_size);
107
+ ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
108
} else {
109
- ret = do_perform_cow_read(bs, m->offset, start->offset,
110
- start_buffer, start->nb_bytes);
111
+ qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
112
+ ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
113
if (ret < 0) {
114
goto fail;
115
}
116
117
- ret = do_perform_cow_read(bs, m->offset, end->offset,
118
- end_buffer, end->nb_bytes);
119
+ qemu_iovec_reset(&qiov);
120
+ qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
121
+ ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov);
122
}
123
if (ret < 0) {
124
goto fail;
125
@@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
126
}
127
128
/* And now we can write everything */
129
- ret = do_perform_cow_write(bs, m->alloc_offset, start->offset,
130
- start_buffer, start->nb_bytes);
131
+ qemu_iovec_reset(&qiov);
132
+ qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
133
+ ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
134
if (ret < 0) {
135
goto fail;
136
}
137
138
- ret = do_perform_cow_write(bs, m->alloc_offset, end->offset,
139
- end_buffer, end->nb_bytes);
140
+ qemu_iovec_reset(&qiov);
141
+ qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
142
+ ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
143
fail:
144
qemu_co_mutex_lock(&s->lock);
145
146
@@ -XXX,XX +XXX,XX @@ fail:
147
}
148
149
qemu_vfree(start_buffer);
150
+ qemu_iovec_destroy(&qiov);
151
return ret;
21
return ret;
152
}
22
}
153
23
24
-static int vmdk_is_cid_valid(BlockDriverState *bs)
25
+static int coroutine_fn vmdk_is_cid_valid(BlockDriverState *bs)
26
{
27
BDRVVmdkState *s = bs->opaque;
28
uint32_t cur_pcid;
154
--
29
--
155
1.8.3.1
30
2.40.0
156
157
diff view generated by jsdifflib
Deleted patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
---
4
block/qed-cluster.c | 39 ++++++++++++++++++++++-----------------
5
block/qed.c | 24 +++++++++++-------------
6
block/qed.h | 4 ++--
7
3 files changed, 35 insertions(+), 32 deletions(-)
8
1
9
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/block/qed-cluster.c
12
+++ b/block/qed-cluster.c
13
@@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
14
* @s: QED state
15
* @request: L2 cache entry
16
* @pos: Byte position in device
17
- * @len: Number of bytes
18
- * @cb: Completion function
19
- * @opaque: User data for completion function
20
+ * @len: Number of bytes (may be shortened on return)
21
+ * @img_offset: Contains offset in the image file on success
22
*
23
* This function translates a position in the block device to an offset in the
24
- * image file. It invokes the cb completion callback to report back the
25
- * translated offset or unallocated range in the image file.
26
+ * image file. The translated offset or unallocated range in the image file is
27
+ * reported back in *img_offset and *len.
28
*
29
* If the L2 table exists, request->l2_table points to the L2 table cache entry
30
* and the caller must free the reference when they are finished. The cache
31
* entry is exposed in this way to avoid callers having to read the L2 table
32
* again later during request processing. If request->l2_table is non-NULL it
33
* will be unreferenced before taking on the new cache entry.
34
+ *
35
+ * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous
36
+ * range in the image file.
37
+ *
38
+ * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
39
+ * table offset, respectively. len is number of contiguous unallocated bytes.
40
*/
41
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
42
- size_t len, QEDFindClusterFunc *cb, void *opaque)
43
+int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
44
+ size_t *len, uint64_t *img_offset)
45
{
46
uint64_t l2_offset;
47
uint64_t offset = 0;
48
@@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
49
/* Limit length to L2 boundary. Requests are broken up at the L2 boundary
50
* so that a request acts on one L2 table at a time.
51
*/
52
- len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
53
+ *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
54
55
l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
56
if (qed_offset_is_unalloc_cluster(l2_offset)) {
57
- cb(opaque, QED_CLUSTER_L1, 0, len);
58
- return;
59
+ *img_offset = 0;
60
+ return QED_CLUSTER_L1;
61
}
62
if (!qed_check_table_offset(s, l2_offset)) {
63
- cb(opaque, -EINVAL, 0, 0);
64
- return;
65
+ *img_offset = *len = 0;
66
+ return -EINVAL;
67
}
68
69
ret = qed_read_l2_table(s, request, l2_offset);
70
@@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
71
}
72
73
index = qed_l2_index(s, pos);
74
- n = qed_bytes_to_clusters(s,
75
- qed_offset_into_cluster(s, pos) + len);
76
+ n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len);
77
n = qed_count_contiguous_clusters(s, request->l2_table->table,
78
index, n, &offset);
79
80
@@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
81
ret = -EINVAL;
82
}
83
84
- len = MIN(len,
85
- n * s->header.cluster_size - qed_offset_into_cluster(s, pos));
86
+ *len = MIN(*len,
87
+ n * s->header.cluster_size - qed_offset_into_cluster(s, pos));
88
89
out:
90
- cb(opaque, ret, offset, len);
91
+ *img_offset = offset;
92
qed_release(s);
93
+ return ret;
94
}
95
diff --git a/block/qed.c b/block/qed.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/block/qed.c
98
+++ b/block/qed.c
99
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
100
.file = file,
101
};
102
QEDRequest request = { .l2_table = NULL };
103
+ uint64_t offset;
104
+ int ret;
105
106
- qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
107
+ ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
108
+ qed_is_allocated_cb(&cb, ret, offset, len);
109
110
- /* Now sleep if the callback wasn't invoked immediately */
111
- while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
112
- cb.co = qemu_coroutine_self();
113
- qemu_coroutine_yield();
114
- }
115
+ /* The callback was invoked immediately */
116
+ assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
117
118
qed_unref_l2_cache_entry(request.l2_table);
119
120
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
121
* or -errno
122
* @offset: Cluster offset in bytes
123
* @len: Length in bytes
124
- *
125
- * Callback from qed_find_cluster().
126
*/
127
static void qed_aio_write_data(void *opaque, int ret,
128
uint64_t offset, size_t len)
129
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_data(void *opaque, int ret,
130
* or -errno
131
* @offset: Cluster offset in bytes
132
* @len: Length in bytes
133
- *
134
- * Callback from qed_find_cluster().
135
*/
136
static void qed_aio_read_data(void *opaque, int ret,
137
uint64_t offset, size_t len)
138
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret)
139
BDRVQEDState *s = acb_to_s(acb);
140
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
141
qed_aio_write_data : qed_aio_read_data;
142
+ uint64_t offset;
143
+ size_t len;
144
145
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
146
147
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret)
148
}
149
150
/* Find next cluster and start I/O */
151
- qed_find_cluster(s, &acb->request,
152
- acb->cur_pos, acb->end_pos - acb->cur_pos,
153
- io_fn, acb);
154
+ len = acb->end_pos - acb->cur_pos;
155
+ ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset);
156
+ io_fn(acb, ret, offset, len);
157
}
158
159
static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
160
diff --git a/block/qed.h b/block/qed.h
161
index XXXXXXX..XXXXXXX 100644
162
--- a/block/qed.h
163
+++ b/block/qed.h
164
@@ -XXX,XX +XXX,XX @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
165
/**
166
* Cluster functions
167
*/
168
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
169
- size_t len, QEDFindClusterFunc *cb, void *opaque);
170
+int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
171
+ size_t *len, uint64_t *img_offset);
172
173
/**
174
* Consistency check
175
--
176
1.8.3.1
177
178
diff view generated by jsdifflib
Deleted patch
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
---
4
block/qed.c | 32 ++++++++++++--------------------
5
1 file changed, 12 insertions(+), 20 deletions(-)
6
1
7
diff --git a/block/qed.c b/block/qed.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/block/qed.c
10
+++ b/block/qed.c
11
@@ -XXX,XX +XXX,XX @@ int qed_write_header_sync(BDRVQEDState *s)
12
* This function only updates known header fields in-place and does not affect
13
* extra data after the QED header.
14
*/
15
-static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
16
- void *opaque)
17
+static int qed_write_header(BDRVQEDState *s)
18
{
19
/* We must write full sectors for O_DIRECT but cannot necessarily generate
20
* the data following the header if an unrecognized compat feature is
21
@@ -XXX,XX +XXX,XX @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
22
ret = 0;
23
out:
24
qemu_vfree(buf);
25
- cb(opaque, ret);
26
+ return ret;
27
}
28
29
static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
30
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
31
}
32
}
33
34
-static void qed_finish_clear_need_check(void *opaque, int ret)
35
-{
36
- /* Do nothing */
37
-}
38
-
39
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
40
-{
41
- BDRVQEDState *s = opaque;
42
-
43
- bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
44
-
45
- /* No need to wait until flush completes */
46
- qed_unplug_allocating_write_reqs(s);
47
-}
48
-
49
static void qed_clear_need_check(void *opaque, int ret)
50
{
51
BDRVQEDState *s = opaque;
52
@@ -XXX,XX +XXX,XX @@ static void qed_clear_need_check(void *opaque, int ret)
53
}
54
55
s->header.features &= ~QED_F_NEED_CHECK;
56
- qed_write_header(s, qed_flush_after_clear_need_check, s);
57
+ ret = qed_write_header(s);
58
+ (void) ret;
59
+
60
+ qed_unplug_allocating_write_reqs(s);
61
+
62
+ ret = bdrv_flush(s->bs);
63
+ (void) ret;
64
}
65
66
static void qed_need_check_timer_cb(void *opaque)
67
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
68
{
69
BDRVQEDState *s = acb_to_s(acb);
70
BlockCompletionFunc *cb;
71
+ int ret;
72
73
/* Cancel timer when the first allocating request comes in */
74
if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
75
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
76
77
if (qed_should_set_need_check(s)) {
78
s->header.features |= QED_F_NEED_CHECK;
79
- qed_write_header(s, cb, acb);
80
+ ret = qed_write_header(s);
81
+ cb(acb, ret);
82
} else {
83
cb(acb, 0);
84
}
85
--
86
1.8.3.1
87
88
diff view generated by jsdifflib
Deleted patch
1
Note that this code is generally not running in coroutine context, so
2
this is an actual blocking synchronous operation. We'll fix this in a
3
moment.
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
block/qed.c | 61 +++++++++++++++++++------------------------------------------
9
1 file changed, 19 insertions(+), 42 deletions(-)
10
11
diff --git a/block/qed.c b/block/qed.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/block/qed.c
14
+++ b/block/qed.c
15
@@ -XXX,XX +XXX,XX @@ static void qed_aio_start_io(QEDAIOCB *acb)
16
qed_aio_next_io(acb, 0);
17
}
18
19
-static void qed_aio_next_io_cb(void *opaque, int ret)
20
-{
21
- QEDAIOCB *acb = opaque;
22
-
23
- qed_aio_next_io(acb, ret);
24
-}
25
-
26
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
27
{
28
assert(!s->allocating_write_reqs_plugged);
29
@@ -XXX,XX +XXX,XX @@ err:
30
qed_aio_complete(acb, ret);
31
}
32
33
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
34
-{
35
- QEDAIOCB *acb = opaque;
36
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
37
-}
38
-
39
-/**
40
- * Flush new data clusters before updating the L2 table
41
- *
42
- * This flush is necessary when a backing file is in use. A crash during an
43
- * allocating write could result in empty clusters in the image. If the write
44
- * only touched a subregion of the cluster, then backing image sectors have
45
- * been lost in the untouched region. The solution is to flush after writing a
46
- * new data cluster and before updating the L2 table.
47
- */
48
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
49
-{
50
- QEDAIOCB *acb = opaque;
51
- BDRVQEDState *s = acb_to_s(acb);
52
-
53
- if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
54
- qed_aio_complete(acb, -EIO);
55
- }
56
-}
57
-
58
/**
59
* Write data to the image file
60
*/
61
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
62
BDRVQEDState *s = acb_to_s(acb);
63
uint64_t offset = acb->cur_cluster +
64
qed_offset_into_cluster(s, acb->cur_pos);
65
- BlockCompletionFunc *next_fn;
66
67
trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
68
69
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
70
return;
71
}
72
73
+ BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
74
+ ret = bdrv_pwritev(s->bs->file, offset, &acb->cur_qiov);
75
+ if (ret >= 0) {
76
+ ret = 0;
77
+ }
78
+
79
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
80
- next_fn = qed_aio_next_io_cb;
81
+ qed_aio_next_io(acb, ret);
82
} else {
83
if (s->bs->backing) {
84
- next_fn = qed_aio_write_flush_before_l2_update;
85
- } else {
86
- next_fn = qed_aio_write_l2_update_cb;
87
+ /*
88
+ * Flush new data clusters before updating the L2 table
89
+ *
90
+ * This flush is necessary when a backing file is in use. A crash
91
+ * during an allocating write could result in empty clusters in the
92
+ * image. If the write only touched a subregion of the cluster,
93
+ * then backing image sectors have been lost in the untouched
94
+ * region. The solution is to flush after writing a new data
95
+ * cluster and before updating the L2 table.
96
+ */
97
+ ret = bdrv_flush(s->bs->file->bs);
98
}
99
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
100
}
101
-
102
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
103
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
104
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
105
- next_fn, acb);
106
}
107
108
/**
109
--
110
1.8.3.1
111
112
diff view generated by jsdifflib
Deleted patch
1
qed_commit_l2_update() is unconditionally called at the end of
2
qed_aio_write_l1_update(). Inline it.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/qed.c | 36 ++++++++++++++----------------------
8
1 file changed, 14 insertions(+), 22 deletions(-)
9
10
diff --git a/block/qed.c b/block/qed.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/qed.c
13
+++ b/block/qed.c
14
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
15
}
16
17
/**
18
- * Commit the current L2 table to the cache
19
+ * Update L1 table with new L2 table offset and write it out
20
*/
21
-static void qed_commit_l2_update(void *opaque, int ret)
22
+static void qed_aio_write_l1_update(void *opaque, int ret)
23
{
24
QEDAIOCB *acb = opaque;
25
BDRVQEDState *s = acb_to_s(acb);
26
CachedL2Table *l2_table = acb->request.l2_table;
27
uint64_t l2_offset = l2_table->offset;
28
+ int index;
29
+
30
+ if (ret) {
31
+ qed_aio_complete(acb, ret);
32
+ return;
33
+ }
34
35
+ index = qed_l1_index(s, acb->cur_pos);
36
+ s->l1_table->offsets[index] = l2_table->offset;
37
+
38
+ ret = qed_write_l1_table(s, index, 1);
39
+
40
+ /* Commit the current L2 table to the cache */
41
qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
42
43
/* This is guaranteed to succeed because we just committed the entry to the
44
@@ -XXX,XX +XXX,XX @@ static void qed_commit_l2_update(void *opaque, int ret)
45
qed_aio_next_io(acb, ret);
46
}
47
48
-/**
49
- * Update L1 table with new L2 table offset and write it out
50
- */
51
-static void qed_aio_write_l1_update(void *opaque, int ret)
52
-{
53
- QEDAIOCB *acb = opaque;
54
- BDRVQEDState *s = acb_to_s(acb);
55
- int index;
56
-
57
- if (ret) {
58
- qed_aio_complete(acb, ret);
59
- return;
60
- }
61
-
62
- index = qed_l1_index(s, acb->cur_pos);
63
- s->l1_table->offsets[index] = acb->request.l2_table->offset;
64
-
65
- ret = qed_write_l1_table(s, index, 1);
66
- qed_commit_l2_update(acb, ret);
67
-}
68
69
/**
70
* Update L2 table with new cluster offsets and write them out
71
--
72
1.8.3.1
73
74
diff view generated by jsdifflib
Deleted patch
1
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
2
just return an error code and let the caller handle it.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/qed.c | 19 +++++++++----------
8
1 file changed, 9 insertions(+), 10 deletions(-)
9
10
diff --git a/block/qed.c b/block/qed.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/qed.c
13
+++ b/block/qed.c
14
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
15
/**
16
* Update L1 table with new L2 table offset and write it out
17
*/
18
-static void qed_aio_write_l1_update(void *opaque, int ret)
19
+static int qed_aio_write_l1_update(QEDAIOCB *acb)
20
{
21
- QEDAIOCB *acb = opaque;
22
BDRVQEDState *s = acb_to_s(acb);
23
CachedL2Table *l2_table = acb->request.l2_table;
24
uint64_t l2_offset = l2_table->offset;
25
- int index;
26
-
27
- if (ret) {
28
- qed_aio_complete(acb, ret);
29
- return;
30
- }
31
+ int index, ret;
32
33
index = qed_l1_index(s, acb->cur_pos);
34
s->l1_table->offsets[index] = l2_table->offset;
35
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l1_update(void *opaque, int ret)
36
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
37
assert(acb->request.l2_table != NULL);
38
39
- qed_aio_next_io(acb, ret);
40
+ return ret;
41
}
42
43
44
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
45
if (need_alloc) {
46
/* Write out the whole new L2 table */
47
ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true);
48
- qed_aio_write_l1_update(acb, ret);
49
+ if (ret) {
50
+ goto err;
51
+ }
52
+ ret = qed_aio_write_l1_update(acb);
53
+ qed_aio_next_io(acb, ret);
54
+
55
} else {
56
/* Write out only the updated part of the L2 table */
57
ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters,
58
--
59
1.8.3.1
60
61
diff view generated by jsdifflib
Deleted patch
1
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
2
just return an error code and let the caller handle it.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/qed.c | 43 ++++++++++++++++++++++++++-----------------
8
1 file changed, 26 insertions(+), 17 deletions(-)
9
10
diff --git a/block/qed.c b/block/qed.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/qed.c
13
+++ b/block/qed.c
14
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l1_update(QEDAIOCB *acb)
15
/**
16
* Update L2 table with new cluster offsets and write them out
17
*/
18
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
19
+static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
20
{
21
BDRVQEDState *s = acb_to_s(acb);
22
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
23
- int index;
24
-
25
- if (ret) {
26
- goto err;
27
- }
28
+ int index, ret;
29
30
if (need_alloc) {
31
qed_unref_l2_cache_entry(acb->request.l2_table);
32
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
33
/* Write out the whole new L2 table */
34
ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true);
35
if (ret) {
36
- goto err;
37
+ return ret;
38
}
39
- ret = qed_aio_write_l1_update(acb);
40
- qed_aio_next_io(acb, ret);
41
-
42
+ return qed_aio_write_l1_update(acb);
43
} else {
44
/* Write out only the updated part of the L2 table */
45
ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters,
46
false);
47
- qed_aio_next_io(acb, ret);
48
+ if (ret) {
49
+ return ret;
50
+ }
51
}
52
- return;
53
-
54
-err:
55
- qed_aio_complete(acb, ret);
56
+ return 0;
57
}
58
59
/**
60
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
61
*/
62
ret = bdrv_flush(s->bs->file->bs);
63
}
64
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
65
+ if (ret) {
66
+ goto err;
67
+ }
68
+ ret = qed_aio_write_l2_update(acb, acb->cur_cluster);
69
+ if (ret) {
70
+ goto err;
71
+ }
72
+ qed_aio_next_io(acb, 0);
73
}
74
+ return;
75
+
76
+err:
77
+ qed_aio_complete(acb, ret);
78
}
79
80
/**
81
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
82
return;
83
}
84
85
- qed_aio_write_l2_update(acb, 0, 1);
86
+ ret = qed_aio_write_l2_update(acb, 1);
87
+ if (ret < 0) {
88
+ qed_aio_complete(acb, ret);
89
+ return;
90
+ }
91
+ qed_aio_next_io(acb, 0);
92
}
93
94
/**
95
--
96
1.8.3.1
97
98
diff view generated by jsdifflib
1
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
1
From: Wang Liang <wangliangzz@inspur.com>
2
just return an error code and let the caller handle it.
3
2
4
While refactoring qed_aio_write_alloc() to accomodate the change,
3
hmp_commit() calls blk_is_available() from a non-coroutine context (and in
5
qed_aio_write_zero_cluster() ended up with a single line, so I chose to
4
the main loop). blk_is_available() is a co_wrapper_mixed_bdrv_rdlock
6
inline that line and remove the function completely.
5
function, and in the non-coroutine context it calls AIO_WAIT_WHILE(),
6
which crashes if the aio_context lock is not taken before.
7
7
8
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1615
9
Signed-off-by: Wang Liang <wangliangzz@inspur.com>
10
Message-Id: <20230424103902.45265-1-wangliangzz@126.com>
11
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
14
---
11
block/qed.c | 58 +++++++++++++++++++++-------------------------------------
15
block/monitor/block-hmp-cmds.c | 10 ++++++----
12
1 file changed, 21 insertions(+), 37 deletions(-)
16
1 file changed, 6 insertions(+), 4 deletions(-)
13
17
14
diff --git a/block/qed.c b/block/qed.c
18
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qed.c
20
--- a/block/monitor/block-hmp-cmds.c
17
+++ b/block/qed.c
21
+++ b/block/monitor/block-hmp-cmds.c
18
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_main(QEDAIOCB *acb)
22
@@ -XXX,XX +XXX,XX @@ void hmp_commit(Monitor *mon, const QDict *qdict)
19
/**
23
error_report("Device '%s' not found", device);
20
* Populate untouched regions of new data cluster
21
*/
22
-static void qed_aio_write_cow(void *opaque, int ret)
23
+static int qed_aio_write_cow(QEDAIOCB *acb)
24
{
25
- QEDAIOCB *acb = opaque;
26
BDRVQEDState *s = acb_to_s(acb);
27
uint64_t start, len, offset;
28
+ int ret;
29
30
/* Populate front untouched region of new data cluster */
31
start = qed_start_of_cluster(s, acb->cur_pos);
32
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_cow(void *opaque, int ret)
33
34
trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
35
ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster);
36
- if (ret) {
37
- qed_aio_complete(acb, ret);
38
- return;
39
+ if (ret < 0) {
40
+ return ret;
41
}
42
43
/* Populate back untouched region of new data cluster */
44
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_cow(void *opaque, int ret)
45
46
trace_qed_aio_write_postfill(s, acb, start, len, offset);
47
ret = qed_copy_from_backing_file(s, start, len, offset);
48
- if (ret) {
49
- qed_aio_complete(acb, ret);
50
- return;
51
- }
52
-
53
- ret = qed_aio_write_main(acb);
54
if (ret < 0) {
55
- qed_aio_complete(acb, ret);
56
- return;
57
+ return ret;
58
}
59
- qed_aio_next_io(acb, 0);
60
+
61
+ return qed_aio_write_main(acb);
62
}
63
64
/**
65
@@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s)
66
return !(s->header.features & QED_F_NEED_CHECK);
67
}
68
69
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
70
-{
71
- QEDAIOCB *acb = opaque;
72
-
73
- if (ret) {
74
- qed_aio_complete(acb, ret);
75
- return;
76
- }
77
-
78
- ret = qed_aio_write_l2_update(acb, 1);
79
- if (ret < 0) {
80
- qed_aio_complete(acb, ret);
81
- return;
82
- }
83
- qed_aio_next_io(acb, 0);
84
-}
85
-
86
/**
87
* Write new data cluster
88
*
89
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_zero_cluster(void *opaque, int ret)
90
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
91
{
92
BDRVQEDState *s = acb_to_s(acb);
93
- BlockCompletionFunc *cb;
94
int ret;
95
96
/* Cancel timer when the first allocating request comes in */
97
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
98
qed_aio_start_io(acb);
99
return;
24
return;
100
}
25
}
101
-
26
- if (!blk_is_available(blk)) {
102
- cb = qed_aio_write_zero_cluster;
27
- error_report("Device '%s' has no medium", device);
103
} else {
28
- return;
104
- cb = qed_aio_write_cow;
29
- }
105
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
30
106
}
31
bs = bdrv_skip_implicit_filters(blk_bs(blk));
107
32
aio_context = bdrv_get_aio_context(bs);
108
if (qed_should_set_need_check(s)) {
33
aio_context_acquire(aio_context);
109
s->header.features |= QED_F_NEED_CHECK;
34
110
ret = qed_write_header(s);
35
+ if (!blk_is_available(blk)) {
111
- cb(acb, ret);
36
+ error_report("Device '%s' has no medium", device);
112
+ if (ret < 0) {
37
+ aio_context_release(aio_context);
113
+ qed_aio_complete(acb, ret);
114
+ return;
38
+ return;
115
+ }
39
+ }
116
+ }
117
+
40
+
118
+ if (acb->flags & QED_AIOCB_ZERO) {
41
ret = bdrv_commit(bs);
119
+ ret = qed_aio_write_l2_update(acb, 1);
42
120
} else {
43
aio_context_release(aio_context);
121
- cb(acb, 0);
122
+ ret = qed_aio_write_cow(acb);
123
}
124
+ if (ret < 0) {
125
+ qed_aio_complete(acb, ret);
126
+ return;
127
+ }
128
+ qed_aio_next_io(acb, 0);
129
}
130
131
/**
132
--
44
--
133
1.8.3.1
45
2.40.0
134
135
diff view generated by jsdifflib
Deleted patch
1
Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but
2
just return an error code and let the caller handle it.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/qed.c | 43 ++++++++++++++++++++-----------------------
8
1 file changed, 20 insertions(+), 23 deletions(-)
9
10
diff --git a/block/qed.c b/block/qed.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/qed.c
13
+++ b/block/qed.c
14
@@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s)
15
*
16
* This path is taken when writing to previously unallocated clusters.
17
*/
18
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
19
+static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
20
{
21
BDRVQEDState *s = acb_to_s(acb);
22
int ret;
23
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
24
}
25
if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
26
s->allocating_write_reqs_plugged) {
27
- return; /* wait for existing request to finish */
28
+ return -EINPROGRESS; /* wait for existing request to finish */
29
}
30
31
acb->cur_nclusters = qed_bytes_to_clusters(s,
32
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
33
if (acb->flags & QED_AIOCB_ZERO) {
34
/* Skip ahead if the clusters are already zero */
35
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
36
- qed_aio_start_io(acb);
37
- return;
38
+ return 0;
39
}
40
} else {
41
acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
42
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
43
s->header.features |= QED_F_NEED_CHECK;
44
ret = qed_write_header(s);
45
if (ret < 0) {
46
- qed_aio_complete(acb, ret);
47
- return;
48
+ return ret;
49
}
50
}
51
52
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
53
ret = qed_aio_write_cow(acb);
54
}
55
if (ret < 0) {
56
- qed_aio_complete(acb, ret);
57
- return;
58
+ return ret;
59
}
60
- qed_aio_next_io(acb, 0);
61
+ return 0;
62
}
63
64
/**
65
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
66
*
67
* This path is taken when writing to already allocated clusters.
68
*/
69
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
70
+static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
71
{
72
- int ret;
73
-
74
/* Allocate buffer for zero writes */
75
if (acb->flags & QED_AIOCB_ZERO) {
76
struct iovec *iov = acb->qiov->iov;
77
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
78
if (!iov->iov_base) {
79
iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
80
if (iov->iov_base == NULL) {
81
- qed_aio_complete(acb, -ENOMEM);
82
- return;
83
+ return -ENOMEM;
84
}
85
memset(iov->iov_base, 0, iov->iov_len);
86
}
87
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
88
qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
89
90
/* Do the actual write */
91
- ret = qed_aio_write_main(acb);
92
- if (ret < 0) {
93
- qed_aio_complete(acb, ret);
94
- return;
95
- }
96
- qed_aio_next_io(acb, 0);
97
+ return qed_aio_write_main(acb);
98
}
99
100
/**
101
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_data(void *opaque, int ret,
102
103
switch (ret) {
104
case QED_CLUSTER_FOUND:
105
- qed_aio_write_inplace(acb, offset, len);
106
+ ret = qed_aio_write_inplace(acb, offset, len);
107
break;
108
109
case QED_CLUSTER_L2:
110
case QED_CLUSTER_L1:
111
case QED_CLUSTER_ZERO:
112
- qed_aio_write_alloc(acb, len);
113
+ ret = qed_aio_write_alloc(acb, len);
114
break;
115
116
default:
117
- qed_aio_complete(acb, ret);
118
+ assert(ret < 0);
119
break;
120
}
121
+
122
+ if (ret < 0) {
123
+ if (ret != -EINPROGRESS) {
124
+ qed_aio_complete(acb, ret);
125
+ }
126
+ return;
127
+ }
128
+ qed_aio_next_io(acb, 0);
129
}
130
131
/**
132
--
133
1.8.3.1
134
135
diff view generated by jsdifflib
Deleted patch
1
Now that we're running in coroutine context, the ad-hoc serialisation
2
code (which drops a request that has to wait out of coroutine context)
3
can be replaced by a CoQueue.
4
1
5
This means that when we resume a serialised request, it is running in
6
coroutine context again and its I/O isn't blocking any more.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/qed.c | 49 +++++++++++++++++--------------------------------
12
block/qed.h | 3 ++-
13
2 files changed, 19 insertions(+), 33 deletions(-)
14
15
diff --git a/block/qed.c b/block/qed.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qed.c
18
+++ b/block/qed.c
19
@@ -XXX,XX +XXX,XX @@ static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
20
21
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
22
{
23
- QEDAIOCB *acb;
24
-
25
assert(s->allocating_write_reqs_plugged);
26
27
s->allocating_write_reqs_plugged = false;
28
-
29
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
30
- if (acb) {
31
- qed_aio_start_io(acb);
32
- }
33
+ qemu_co_enter_next(&s->allocating_write_reqs);
34
}
35
36
static void qed_clear_need_check(void *opaque, int ret)
37
@@ -XXX,XX +XXX,XX @@ static void qed_need_check_timer_cb(void *opaque)
38
BDRVQEDState *s = opaque;
39
40
/* The timer should only fire when allocating writes have drained */
41
- assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
42
+ assert(!s->allocating_acb);
43
44
trace_qed_need_check_timer_cb(s);
45
46
@@ -XXX,XX +XXX,XX @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
47
int ret;
48
49
s->bs = bs;
50
- QSIMPLEQ_INIT(&s->allocating_write_reqs);
51
+ qemu_co_queue_init(&s->allocating_write_reqs);
52
53
ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
54
if (ret < 0) {
55
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete_bh(void *opaque)
56
qed_release(s);
57
}
58
59
-static void qed_resume_alloc_bh(void *opaque)
60
-{
61
- qed_aio_start_io(opaque);
62
-}
63
-
64
static void qed_aio_complete(QEDAIOCB *acb, int ret)
65
{
66
BDRVQEDState *s = acb_to_s(acb);
67
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
68
* next request in the queue. This ensures that we don't cycle through
69
* requests multiple times but rather finish one at a time completely.
70
*/
71
- if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
72
- QEDAIOCB *next_acb;
73
- QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
74
- next_acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
75
- if (next_acb) {
76
- aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
77
- qed_resume_alloc_bh, next_acb);
78
+ if (acb == s->allocating_acb) {
79
+ s->allocating_acb = NULL;
80
+ if (!qemu_co_queue_empty(&s->allocating_write_reqs)) {
81
+ qemu_co_enter_next(&s->allocating_write_reqs);
82
} else if (s->header.features & QED_F_NEED_CHECK) {
83
qed_start_need_check_timer(s);
84
}
85
@@ -XXX,XX +XXX,XX @@ static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
86
int ret;
87
88
/* Cancel timer when the first allocating request comes in */
89
- if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
90
+ if (s->allocating_acb == NULL) {
91
qed_cancel_need_check_timer(s);
92
}
93
94
/* Freeze this request if another allocating write is in progress */
95
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
96
- QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
97
- }
98
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
99
- s->allocating_write_reqs_plugged) {
100
- return -EINPROGRESS; /* wait for existing request to finish */
101
+ if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) {
102
+ if (s->allocating_acb != NULL) {
103
+ qemu_co_queue_wait(&s->allocating_write_reqs, NULL);
104
+ assert(s->allocating_acb == NULL);
105
+ }
106
+ s->allocating_acb = acb;
107
+ return -EAGAIN; /* start over with looking up table entries */
108
}
109
110
acb->cur_nclusters = qed_bytes_to_clusters(s,
111
@@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb)
112
ret = qed_aio_read_data(acb, ret, offset, len);
113
}
114
115
- if (ret < 0) {
116
- if (ret != -EINPROGRESS) {
117
- qed_aio_complete(acb, ret);
118
- }
119
+ if (ret < 0 && ret != -EAGAIN) {
120
+ qed_aio_complete(acb, ret);
121
return;
122
}
123
}
124
diff --git a/block/qed.h b/block/qed.h
125
index XXXXXXX..XXXXXXX 100644
126
--- a/block/qed.h
127
+++ b/block/qed.h
128
@@ -XXX,XX +XXX,XX @@ typedef struct {
129
uint32_t l2_mask;
130
131
/* Allocating write request queue */
132
- QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
133
+ QEDAIOCB *allocating_acb;
134
+ CoQueue allocating_write_reqs;
135
bool allocating_write_reqs_plugged;
136
137
/* Periodic flush and clear need check flag */
138
--
139
1.8.3.1
140
141
diff view generated by jsdifflib
Deleted patch
1
This fixes the last place where we degraded from AIO to actual blocking
2
synchronous I/O requests. Putting it into a coroutine means that instead
3
of blocking, the coroutine simply yields while doing I/O.
4
1
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
block/qed.c | 33 +++++++++++++++++----------------
9
1 file changed, 17 insertions(+), 16 deletions(-)
10
11
diff --git a/block/qed.c b/block/qed.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/block/qed.c
14
+++ b/block/qed.c
15
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
16
qemu_co_enter_next(&s->allocating_write_reqs);
17
}
18
19
-static void qed_clear_need_check(void *opaque, int ret)
20
+static void qed_need_check_timer_entry(void *opaque)
21
{
22
BDRVQEDState *s = opaque;
23
+ int ret;
24
25
- if (ret) {
26
+ /* The timer should only fire when allocating writes have drained */
27
+ assert(!s->allocating_acb);
28
+
29
+ trace_qed_need_check_timer_cb(s);
30
+
31
+ qed_acquire(s);
32
+ qed_plug_allocating_write_reqs(s);
33
+
34
+ /* Ensure writes are on disk before clearing flag */
35
+ ret = bdrv_co_flush(s->bs->file->bs);
36
+ qed_release(s);
37
+ if (ret < 0) {
38
qed_unplug_allocating_write_reqs(s);
39
return;
40
}
41
@@ -XXX,XX +XXX,XX @@ static void qed_clear_need_check(void *opaque, int ret)
42
43
qed_unplug_allocating_write_reqs(s);
44
45
- ret = bdrv_flush(s->bs);
46
+ ret = bdrv_co_flush(s->bs);
47
(void) ret;
48
}
49
50
static void qed_need_check_timer_cb(void *opaque)
51
{
52
- BDRVQEDState *s = opaque;
53
-
54
- /* The timer should only fire when allocating writes have drained */
55
- assert(!s->allocating_acb);
56
-
57
- trace_qed_need_check_timer_cb(s);
58
-
59
- qed_acquire(s);
60
- qed_plug_allocating_write_reqs(s);
61
-
62
- /* Ensure writes are on disk before clearing flag */
63
- bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
64
- qed_release(s);
65
+ Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
66
+ qemu_coroutine_enter(co);
67
}
68
69
void qed_acquire(BDRVQEDState *s)
70
--
71
1.8.3.1
72
73
diff view generated by jsdifflib
Deleted patch
1
All functions that are marked coroutine_fn can directly call the
2
bdrv_co_* version of functions instead of going through the wrapper.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
block/qed.c | 16 +++++++++-------
9
1 file changed, 9 insertions(+), 7 deletions(-)
10
11
diff --git a/block/qed.c b/block/qed.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/block/qed.c
14
+++ b/block/qed.c
15
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
16
};
17
qemu_iovec_init_external(&qiov, &iov, 1);
18
19
- ret = bdrv_preadv(s->bs->file, 0, &qiov);
20
+ ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0);
21
if (ret < 0) {
22
goto out;
23
}
24
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
25
/* Update header */
26
qed_header_cpu_to_le(&s->header, (QEDHeader *) buf);
27
28
- ret = bdrv_pwritev(s->bs->file, 0, &qiov);
29
+ ret = bdrv_co_pwritev(s->bs->file, 0, qiov.size, &qiov, 0);
30
if (ret < 0) {
31
goto out;
32
}
33
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
34
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
35
36
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
37
- ret = bdrv_preadv(s->bs->backing, pos, *backing_qiov);
38
+ ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0);
39
if (ret < 0) {
40
return ret;
41
}
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
43
}
44
45
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
46
- ret = bdrv_pwritev(s->bs->file, offset, &qiov);
47
+ ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0);
48
if (ret < 0) {
49
goto out;
50
}
51
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
52
trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
53
54
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
55
- ret = bdrv_pwritev(s->bs->file, offset, &acb->cur_qiov);
56
+ ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
57
+ &acb->cur_qiov, 0);
58
if (ret < 0) {
59
return ret;
60
}
61
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
62
* region. The solution is to flush after writing a new data
63
* cluster and before updating the L2 table.
64
*/
65
- ret = bdrv_flush(s->bs->file->bs);
66
+ ret = bdrv_co_flush(s->bs->file->bs);
67
if (ret < 0) {
68
return ret;
69
}
70
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
71
}
72
73
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
74
- ret = bdrv_preadv(bs->file, offset, &acb->cur_qiov);
75
+ ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
76
+ &acb->cur_qiov, 0);
77
if (ret < 0) {
78
return ret;
79
}
80
--
81
1.8.3.1
82
83
diff view generated by jsdifflib
Deleted patch
1
From: "sochin.jiang" <sochin.jiang@huawei.com>
2
1
3
img_commit could fall into an infinite loop calling run_block_job() if
4
its blockjob fails on any I/O error, fix this already known problem.
5
6
Signed-off-by: sochin.jiang <sochin.jiang@huawei.com>
7
Message-id: 1497509253-28941-1-git-send-email-sochin.jiang@huawei.com
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
10
blockjob.c | 4 ++--
11
include/block/blockjob.h | 18 ++++++++++++++++++
12
qemu-img.c | 20 +++++++++++++-------
13
3 files changed, 33 insertions(+), 9 deletions(-)
14
15
diff --git a/blockjob.c b/blockjob.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/blockjob.c
18
+++ b/blockjob.c
19
@@ -XXX,XX +XXX,XX @@ static void block_job_resume(BlockJob *job)
20
block_job_enter(job);
21
}
22
23
-static void block_job_ref(BlockJob *job)
24
+void block_job_ref(BlockJob *job)
25
{
26
++job->refcnt;
27
}
28
@@ -XXX,XX +XXX,XX @@ static void block_job_attached_aio_context(AioContext *new_context,
29
void *opaque);
30
static void block_job_detach_aio_context(void *opaque);
31
32
-static void block_job_unref(BlockJob *job)
33
+void block_job_unref(BlockJob *job)
34
{
35
if (--job->refcnt == 0) {
36
BlockDriverState *bs = blk_bs(job->blk);
37
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/block/blockjob.h
40
+++ b/include/block/blockjob.h
41
@@ -XXX,XX +XXX,XX @@ void block_job_iostatus_reset(BlockJob *job);
42
BlockJobTxn *block_job_txn_new(void);
43
44
/**
45
+ * block_job_ref:
46
+ *
47
+ * Add a reference to BlockJob refcnt, it will be decreased with
48
+ * block_job_unref, and then be freed if it comes to be the last
49
+ * reference.
50
+ */
51
+void block_job_ref(BlockJob *job);
52
+
53
+/**
54
+ * block_job_unref:
55
+ *
56
+ * Release a reference that was previously acquired with block_job_ref
57
+ * or block_job_create. If it's the last reference to the object, it will be
58
+ * freed.
59
+ */
60
+void block_job_unref(BlockJob *job);
61
+
62
+/**
63
* block_job_txn_unref:
64
*
65
* Release a reference that was previously acquired with block_job_txn_add_job
66
diff --git a/qemu-img.c b/qemu-img.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/qemu-img.c
69
+++ b/qemu-img.c
70
@@ -XXX,XX +XXX,XX @@ static void common_block_job_cb(void *opaque, int ret)
71
static void run_block_job(BlockJob *job, Error **errp)
72
{
73
AioContext *aio_context = blk_get_aio_context(job->blk);
74
+ int ret = 0;
75
76
- /* FIXME In error cases, the job simply goes away and we access a dangling
77
- * pointer below. */
78
aio_context_acquire(aio_context);
79
+ block_job_ref(job);
80
do {
81
aio_poll(aio_context, true);
82
qemu_progress_print(job->len ?
83
((float)job->offset / job->len * 100.f) : 0.0f, 0);
84
- } while (!job->ready);
85
+ } while (!job->ready && !job->completed);
86
87
- block_job_complete_sync(job, errp);
88
+ if (!job->completed) {
89
+ ret = block_job_complete_sync(job, errp);
90
+ } else {
91
+ ret = job->ret;
92
+ }
93
+ block_job_unref(job);
94
aio_context_release(aio_context);
95
96
- /* A block job may finish instantaneously without publishing any progress,
97
- * so just signal completion here */
98
- qemu_progress_print(100.f, 0);
99
+ /* publish completion progress only when success */
100
+ if (!ret) {
101
+ qemu_progress_print(100.f, 0);
102
+ }
103
}
104
105
static int img_commit(int argc, char **argv)
106
--
107
1.8.3.1
108
109
diff view generated by jsdifflib
Deleted patch
1
From: Max Reitz <mreitz@redhat.com>
2
1
3
uri_parse(...)->scheme may be NULL. In fact, probably every field may be
4
NULL, and the callers do test this for all of the other fields but not
5
for scheme (except for block/gluster.c; block/vxhs.c does not access
6
that field at all).
7
8
We can easily fix this by using g_strcmp0() instead of strcmp().
9
10
Cc: qemu-stable@nongnu.org
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Message-id: 20170613205726.13544-1-mreitz@redhat.com
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
16
block/nbd.c | 6 +++---
17
block/nfs.c | 2 +-
18
block/sheepdog.c | 6 +++---
19
block/ssh.c | 2 +-
20
4 files changed, 8 insertions(+), 8 deletions(-)
21
22
diff --git a/block/nbd.c b/block/nbd.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/nbd.c
25
+++ b/block/nbd.c
26
@@ -XXX,XX +XXX,XX @@ static int nbd_parse_uri(const char *filename, QDict *options)
27
}
28
29
/* transport */
30
- if (!strcmp(uri->scheme, "nbd")) {
31
+ if (!g_strcmp0(uri->scheme, "nbd")) {
32
is_unix = false;
33
- } else if (!strcmp(uri->scheme, "nbd+tcp")) {
34
+ } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) {
35
is_unix = false;
36
- } else if (!strcmp(uri->scheme, "nbd+unix")) {
37
+ } else if (!g_strcmp0(uri->scheme, "nbd+unix")) {
38
is_unix = true;
39
} else {
40
ret = -EINVAL;
41
diff --git a/block/nfs.c b/block/nfs.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/block/nfs.c
44
+++ b/block/nfs.c
45
@@ -XXX,XX +XXX,XX @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
46
error_setg(errp, "Invalid URI specified");
47
goto out;
48
}
49
- if (strcmp(uri->scheme, "nfs") != 0) {
50
+ if (g_strcmp0(uri->scheme, "nfs") != 0) {
51
error_setg(errp, "URI scheme must be 'nfs'");
52
goto out;
53
}
54
diff --git a/block/sheepdog.c b/block/sheepdog.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/block/sheepdog.c
57
+++ b/block/sheepdog.c
58
@@ -XXX,XX +XXX,XX @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
59
}
60
61
/* transport */
62
- if (!strcmp(uri->scheme, "sheepdog")) {
63
+ if (!g_strcmp0(uri->scheme, "sheepdog")) {
64
is_unix = false;
65
- } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
66
+ } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
67
is_unix = false;
68
- } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
69
+ } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
70
is_unix = true;
71
} else {
72
error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
73
diff --git a/block/ssh.c b/block/ssh.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/block/ssh.c
76
+++ b/block/ssh.c
77
@@ -XXX,XX +XXX,XX @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
78
return -EINVAL;
79
}
80
81
- if (strcmp(uri->scheme, "ssh") != 0) {
82
+ if (g_strcmp0(uri->scheme, "ssh") != 0) {
83
error_setg(errp, "URI scheme must be 'ssh'");
84
goto err;
85
}
86
--
87
1.8.3.1
88
89
diff view generated by jsdifflib