1 | The following changes since commit 4c8c1cc544dbd5e2564868e61c5037258e393832: | 1 | The following changes since commit ac5f7bf8e208cd7893dbb1a9520559e569a4677c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging (2017-06-22 19:01:58 +0100) | 3 | Merge tag 'migration-20230424-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-04-24 15:00:39 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://repo.or.cz/qemu/kevin.git tags/for-upstream | ||
7 | 8 | ||
8 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 9 | for you to fetch changes up to 8c1e8fb2e7fc2cbeb57703e143965a4cd3ad301a: |
9 | 10 | ||
10 | for you to fetch changes up to 1512008812410ca4054506a7c44343088abdd977: | 11 | block/monitor: Fix crash when executing HMP commit (2023-04-25 15:11:57 +0200) |
11 | |||
12 | Merge remote-tracking branch 'mreitz/tags/pull-block-2017-06-23' into queue-block (2017-06-23 14:09:12 +0200) | ||
13 | 12 | ||
14 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
15 | |||
16 | Block layer patches | 14 | Block layer patches |
17 | 15 | ||
16 | - Protect BlockBackend.queued_requests with its own lock | ||
17 | - Switch to AIO_WAIT_WHILE_UNLOCKED() where possible | ||
18 | - AioContext removal: LinuxAioState/LuringState/ThreadPool | ||
19 | - Add more coroutine_fn annotations, use bdrv/blk_co_* | ||
20 | - Fix crash when execute hmp_commit | ||
21 | |||
18 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
19 | Alberto Garcia (9): | 23 | Emanuele Giuseppe Esposito (4): |
20 | throttle: Update throttle-groups.c documentation | 24 | linux-aio: use LinuxAioState from the running thread |
21 | qcow2: Remove unused Error variable in do_perform_cow() | 25 | io_uring: use LuringState from the running thread |
22 | qcow2: Use unsigned int for both members of Qcow2COWRegion | 26 | thread-pool: use ThreadPool from the running thread |
23 | qcow2: Make perform_cow() call do_perform_cow() twice | 27 | thread-pool: avoid passing the pool parameter every time |
24 | qcow2: Split do_perform_cow() into _read(), _encrypt() and _write() | ||
25 | qcow2: Allow reading both COW regions with only one request | ||
26 | qcow2: Pass a QEMUIOVector to do_perform_cow_{read,write}() | ||
27 | qcow2: Merge the writing of the COW regions with the guest data | ||
28 | qcow2: Use offset_into_cluster() and offset_to_l2_index() | ||
29 | 28 | ||
30 | Kevin Wolf (37): | 29 | Paolo Bonzini (9): |
31 | commit: Fix completion with extra reference | 30 | vvfat: mark various functions as coroutine_fn |
32 | qemu-iotests: Allow starting new qemu after cleanup | 31 | blkdebug: add missing coroutine_fn annotation |
33 | qemu-iotests: Test exiting qemu with running job | 32 | mirror: make mirror_flush a coroutine_fn, do not use co_wrappers |
34 | doc: Document generic -blockdev options | 33 | nbd: mark more coroutine_fns, do not use co_wrappers |
35 | doc: Document driver-specific -blockdev options | 34 | 9pfs: mark more coroutine_fns |
36 | qed: Use bottom half to resume waiting requests | 35 | qemu-pr-helper: mark more coroutine_fns |
37 | qed: Make qed_read_table() synchronous | 36 | tests: mark more coroutine_fns |
38 | qed: Remove callback from qed_read_table() | 37 | qcow2: mark various functions as coroutine_fn and GRAPH_RDLOCK |
39 | qed: Remove callback from qed_read_l2_table() | 38 | vmdk: make vmdk_is_cid_valid a coroutine_fn |
40 | qed: Remove callback from qed_find_cluster() | ||
41 | qed: Make qed_read_backing_file() synchronous | ||
42 | qed: Make qed_copy_from_backing_file() synchronous | ||
43 | qed: Remove callback from qed_copy_from_backing_file() | ||
44 | qed: Make qed_write_header() synchronous | ||
45 | qed: Remove callback from qed_write_header() | ||
46 | qed: Make qed_write_table() synchronous | ||
47 | qed: Remove GenericCB | ||
48 | qed: Remove callback from qed_write_table() | ||
49 | qed: Make qed_aio_read_data() synchronous | ||
50 | qed: Make qed_aio_write_main() synchronous | ||
51 | qed: Inline qed_commit_l2_update() | ||
52 | qed: Add return value to qed_aio_write_l1_update() | ||
53 | qed: Add return value to qed_aio_write_l2_update() | ||
54 | qed: Add return value to qed_aio_write_main() | ||
55 | qed: Add return value to qed_aio_write_cow() | ||
56 | qed: Add return value to qed_aio_write_inplace/alloc() | ||
57 | qed: Add return value to qed_aio_read/write_data() | ||
58 | qed: Remove ret argument from qed_aio_next_io() | ||
59 | qed: Remove recursion in qed_aio_next_io() | ||
60 | qed: Implement .bdrv_co_readv/writev | ||
61 | qed: Use CoQueue for serialising allocations | ||
62 | qed: Simplify request handling | ||
63 | qed: Use a coroutine for need_check_timer | ||
64 | qed: Add coroutine_fn to I/O path functions | ||
65 | qed: Use bdrv_co_* for coroutine_fns | ||
66 | block: Remove bdrv_aio_readv/writev/flush() | ||
67 | Merge remote-tracking branch 'mreitz/tags/pull-block-2017-06-23' into queue-block | ||
68 | |||
69 | Manos Pitsidianakis (1): | ||
70 | block: change variable names in BlockDriverState | ||
71 | |||
72 | Max Reitz (3): | ||
73 | blkdebug: Catch bs->exact_filename overflow | ||
74 | blkverify: Catch bs->exact_filename overflow | ||
75 | block: Do not strcmp() with NULL uri->scheme | ||
76 | 39 | ||
77 | Stefan Hajnoczi (10): | 40 | Stefan Hajnoczi (10): |
78 | block: count bdrv_co_rw_vmstate() requests | 41 | block: make BlockBackend->quiesce_counter atomic |
79 | block: use BDRV_POLL_WHILE() in bdrv_rw_vmstate() | 42 | block: make BlockBackend->disable_request_queuing atomic |
80 | migration: avoid recursive AioContext locking in save_vmstate() | 43 | block: protect BlockBackend->queued_requests with a lock |
81 | migration: use bdrv_drain_all_begin/end() instead bdrv_drain_all() | 44 | block: don't acquire AioContext lock in bdrv_drain_all() |
82 | virtio-pci: use ioeventfd even when KVM is disabled | 45 | block: convert blk_exp_close_all_type() to AIO_WAIT_WHILE_UNLOCKED() |
83 | migration: hold AioContext lock for loadvm qemu_fclose() | 46 | block: convert bdrv_graph_wrlock() to AIO_WAIT_WHILE_UNLOCKED() |
84 | qemu-iotests: 068: extract _qemu() function | 47 | block: convert bdrv_drain_all_begin() to AIO_WAIT_WHILE_UNLOCKED() |
85 | qemu-iotests: 068: use -drive/-device instead of -hda | 48 | hmp: convert handle_hmp_command() to AIO_WAIT_WHILE_UNLOCKED() |
86 | qemu-iotests: 068: test iothread mode | 49 | monitor: convert monitor_cleanup() to AIO_WAIT_WHILE_UNLOCKED() |
87 | qemu-img: don't shadow opts variable in img_dd() | 50 | block: add missing coroutine_fn to bdrv_sum_allocated_file_size() |
88 | 51 | ||
89 | Stephen Bates (1): | 52 | Wang Liang (1): |
90 | nvme: Add support for Read Data and Write Data in CMBs. | 53 | block/monitor: Fix crash when executing HMP commit |
91 | 54 | ||
92 | sochin.jiang (1): | 55 | Wilfred Mallawa (1): |
93 | fix: avoid an infinite loop or a dangling pointer problem in img_commit | 56 | include/block: fixup typos |
94 | 57 | ||
95 | block/Makefile.objs | 2 +- | 58 | block/qcow2.h | 15 +++++----- |
96 | block/blkdebug.c | 46 +-- | 59 | hw/9pfs/9p.h | 4 +-- |
97 | block/blkreplay.c | 8 +- | 60 | include/block/aio-wait.h | 2 +- |
98 | block/blkverify.c | 12 +- | 61 | include/block/aio.h | 8 ------ |
99 | block/block-backend.c | 22 +- | 62 | include/block/block_int-common.h | 2 +- |
100 | block/commit.c | 7 + | 63 | include/block/raw-aio.h | 33 +++++++++++++++------- |
101 | block/file-posix.c | 34 +- | 64 | include/block/thread-pool.h | 15 ++++++---- |
102 | block/io.c | 240 ++----------- | 65 | include/sysemu/block-backend-io.h | 5 ++++ |
103 | block/iscsi.c | 20 +- | 66 | backends/tpm/tpm_backend.c | 4 +-- |
104 | block/mirror.c | 8 +- | 67 | block.c | 2 +- |
105 | block/nbd-client.c | 8 +- | 68 | block/blkdebug.c | 4 +-- |
106 | block/nbd-client.h | 4 +- | 69 | block/block-backend.c | 45 ++++++++++++++++++------------ |
107 | block/nbd.c | 6 +- | 70 | block/export/export.c | 2 +- |
108 | block/nfs.c | 2 +- | 71 | block/file-posix.c | 45 ++++++++++++------------------ |
109 | block/qcow2-cluster.c | 201 ++++++++--- | 72 | block/file-win32.c | 4 +-- |
110 | block/qcow2.c | 94 +++-- | 73 | block/graph-lock.c | 2 +- |
111 | block/qcow2.h | 11 +- | 74 | block/io.c | 2 +- |
112 | block/qed-cluster.c | 124 +++---- | 75 | block/io_uring.c | 23 ++++++++++------ |
113 | block/qed-gencb.c | 33 -- | 76 | block/linux-aio.c | 29 ++++++++++++-------- |
114 | block/qed-table.c | 261 +++++--------- | 77 | block/mirror.c | 4 +-- |
115 | block/qed.c | 779 ++++++++++++++++------------------------- | 78 | block/monitor/block-hmp-cmds.c | 10 ++++--- |
116 | block/qed.h | 54 +-- | 79 | block/qcow2-bitmap.c | 2 +- |
117 | block/raw-format.c | 8 +- | 80 | block/qcow2-cluster.c | 21 ++++++++------ |
118 | block/rbd.c | 4 +- | 81 | block/qcow2-refcount.c | 8 +++--- |
119 | block/sheepdog.c | 12 +- | 82 | block/qcow2-snapshot.c | 25 +++++++++-------- |
120 | block/ssh.c | 2 +- | 83 | block/qcow2-threads.c | 3 +- |
121 | block/throttle-groups.c | 2 +- | 84 | block/qcow2.c | 27 +++++++++--------- |
122 | block/trace-events | 3 - | 85 | block/vmdk.c | 2 +- |
123 | blockjob.c | 4 +- | 86 | block/vvfat.c | 58 ++++++++++++++++++++------------------- |
124 | hw/block/nvme.c | 83 +++-- | 87 | hw/9pfs/codir.c | 6 ++-- |
125 | hw/block/nvme.h | 1 + | 88 | hw/9pfs/coth.c | 3 +- |
126 | hw/virtio/virtio-pci.c | 2 +- | 89 | hw/ppc/spapr_nvdimm.c | 6 ++-- |
127 | include/block/block.h | 16 +- | 90 | hw/virtio/virtio-pmem.c | 3 +- |
128 | include/block/block_int.h | 6 +- | 91 | monitor/hmp.c | 2 +- |
129 | include/block/blockjob.h | 18 + | 92 | monitor/monitor.c | 4 +-- |
130 | include/sysemu/block-backend.h | 20 +- | 93 | nbd/server.c | 48 ++++++++++++++++---------------- |
131 | migration/savevm.c | 32 +- | 94 | scsi/pr-manager.c | 3 +- |
132 | qemu-img.c | 29 +- | 95 | scsi/qemu-pr-helper.c | 25 ++++++++--------- |
133 | qemu-io-cmds.c | 46 +-- | 96 | tests/unit/test-thread-pool.c | 14 ++++------ |
134 | qemu-options.hx | 221 ++++++++++-- | 97 | util/thread-pool.c | 25 ++++++++--------- |
135 | tests/qemu-iotests/068 | 37 +- | 98 | 40 files changed, 283 insertions(+), 262 deletions(-) |
136 | tests/qemu-iotests/068.out | 11 +- | ||
137 | tests/qemu-iotests/185 | 206 +++++++++++ | ||
138 | tests/qemu-iotests/185.out | 59 ++++ | ||
139 | tests/qemu-iotests/common.qemu | 3 + | ||
140 | tests/qemu-iotests/group | 1 + | ||
141 | 46 files changed, 1477 insertions(+), 1325 deletions(-) | ||
142 | delete mode 100644 block/qed-gencb.c | ||
143 | create mode 100755 tests/qemu-iotests/185 | ||
144 | create mode 100644 tests/qemu-iotests/185.out | ||
145 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | commit_complete() can't assume that after its block_job_completed() the | ||
2 | job is actually immediately freed; someone else may still be holding | ||
3 | references. In this case, the op blockers on the intermediate nodes make | ||
4 | the graph reconfiguration in the completion code fail. | ||
5 | 1 | ||
6 | Call block_job_remove_all_bdrv() manually so that we know for sure that | ||
7 | any blockers on intermediate nodes are given up. | ||
8 | |||
9 | Cc: qemu-stable@nongnu.org | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
13 | --- | ||
14 | block/commit.c | 7 +++++++ | ||
15 | 1 file changed, 7 insertions(+) | ||
16 | |||
17 | diff --git a/block/commit.c b/block/commit.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/commit.c | ||
20 | +++ b/block/commit.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void commit_complete(BlockJob *job, void *opaque) | ||
22 | } | ||
23 | g_free(s->backing_file_str); | ||
24 | blk_unref(s->top); | ||
25 | + | ||
26 | + /* If there is more than one reference to the job (e.g. if called from | ||
27 | + * block_job_finish_sync()), block_job_completed() won't free it and | ||
28 | + * therefore the blockers on the intermediate nodes remain. This would | ||
29 | + * cause bdrv_set_backing_hd() to fail. */ | ||
30 | + block_job_remove_all_bdrv(job); | ||
31 | + | ||
32 | block_job_completed(&s->common, ret); | ||
33 | g_free(data); | ||
34 | |||
35 | -- | ||
36 | 1.8.3.1 | ||
37 | |||
38 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | After _cleanup_qemu(), test cases should be able to start the next qemu | ||
2 | process and call _cleanup_qemu() for that one as well. For this to work | ||
3 | cleanly, we need to improve the cleanup so that the second invocation | ||
4 | doesn't try to kill the qemu instances from the first invocation a | ||
5 | second time (which would result in error messages). | ||
6 | 1 | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
9 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
10 | --- | ||
11 | tests/qemu-iotests/common.qemu | 3 +++ | ||
12 | 1 file changed, 3 insertions(+) | ||
13 | |||
14 | diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tests/qemu-iotests/common.qemu | ||
17 | +++ b/tests/qemu-iotests/common.qemu | ||
18 | @@ -XXX,XX +XXX,XX @@ function _cleanup_qemu() | ||
19 | rm -f "${QEMU_FIFO_IN}_${i}" "${QEMU_FIFO_OUT}_${i}" | ||
20 | eval "exec ${QEMU_IN[$i]}<&-" # close file descriptors | ||
21 | eval "exec ${QEMU_OUT[$i]}<&-" | ||
22 | + | ||
23 | + unset QEMU_IN[$i] | ||
24 | + unset QEMU_OUT[$i] | ||
25 | done | ||
26 | } | ||
27 | -- | ||
28 | 1.8.3.1 | ||
29 | |||
30 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When qemu is exited, all running jobs should be cancelled successfully. | ||
2 | This adds a test for this for all types of block jobs that currently | ||
3 | exist in qemu. | ||
4 | 1 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | --- | ||
8 | tests/qemu-iotests/185 | 206 +++++++++++++++++++++++++++++++++++++++++++++ | ||
9 | tests/qemu-iotests/185.out | 59 +++++++++++++ | ||
10 | tests/qemu-iotests/group | 1 + | ||
11 | 3 files changed, 266 insertions(+) | ||
12 | create mode 100755 tests/qemu-iotests/185 | ||
13 | create mode 100644 tests/qemu-iotests/185.out | ||
14 | |||
15 | diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 | ||
16 | new file mode 100755 | ||
17 | index XXXXXXX..XXXXXXX | ||
18 | --- /dev/null | ||
19 | +++ b/tests/qemu-iotests/185 | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | +#!/bin/bash | ||
22 | +# | ||
23 | +# Test exiting qemu while jobs are still running | ||
24 | +# | ||
25 | +# Copyright (C) 2017 Red Hat, Inc. | ||
26 | +# | ||
27 | +# This program is free software; you can redistribute it and/or modify | ||
28 | +# it under the terms of the GNU General Public License as published by | ||
29 | +# the Free Software Foundation; either version 2 of the License, or | ||
30 | +# (at your option) any later version. | ||
31 | +# | ||
32 | +# This program is distributed in the hope that it will be useful, | ||
33 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
34 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
35 | +# GNU General Public License for more details. | ||
36 | +# | ||
37 | +# You should have received a copy of the GNU General Public License | ||
38 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
39 | +# | ||
40 | + | ||
41 | +# creator | ||
42 | +owner=kwolf@redhat.com | ||
43 | + | ||
44 | +seq=`basename $0` | ||
45 | +echo "QA output created by $seq" | ||
46 | + | ||
47 | +here=`pwd` | ||
48 | +status=1 # failure is the default! | ||
49 | + | ||
50 | +MIG_SOCKET="${TEST_DIR}/migrate" | ||
51 | + | ||
52 | +_cleanup() | ||
53 | +{ | ||
54 | + rm -f "${TEST_IMG}.mid" | ||
55 | + rm -f "${TEST_IMG}.copy" | ||
56 | + _cleanup_test_img | ||
57 | + _cleanup_qemu | ||
58 | +} | ||
59 | +trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
60 | + | ||
61 | +# get standard environment, filters and checks | ||
62 | +. ./common.rc | ||
63 | +. ./common.filter | ||
64 | +. ./common.qemu | ||
65 | + | ||
66 | +_supported_fmt qcow2 | ||
67 | +_supported_proto file | ||
68 | +_supported_os Linux | ||
69 | + | ||
70 | +size=64M | ||
71 | +TEST_IMG="${TEST_IMG}.base" _make_test_img $size | ||
72 | + | ||
73 | +echo | ||
74 | +echo === Starting VM === | ||
75 | +echo | ||
76 | + | ||
77 | +qemu_comm_method="qmp" | ||
78 | + | ||
79 | +_launch_qemu \ | ||
80 | + -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
81 | +h=$QEMU_HANDLE | ||
82 | +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
83 | + | ||
84 | +echo | ||
85 | +echo === Creating backing chain === | ||
86 | +echo | ||
87 | + | ||
88 | +_send_qemu_cmd $h \ | ||
89 | + "{ 'execute': 'blockdev-snapshot-sync', | ||
90 | + 'arguments': { 'device': 'disk', | ||
91 | + 'snapshot-file': '$TEST_IMG.mid', | ||
92 | + 'format': '$IMGFMT', | ||
93 | + 'mode': 'absolute-paths' } }" \ | ||
94 | + "return" | ||
95 | + | ||
96 | +_send_qemu_cmd $h \ | ||
97 | + "{ 'execute': 'human-monitor-command', | ||
98 | + 'arguments': { 'command-line': | ||
99 | + 'qemu-io disk \"write 0 4M\"' } }" \ | ||
100 | + "return" | ||
101 | + | ||
102 | +_send_qemu_cmd $h \ | ||
103 | + "{ 'execute': 'blockdev-snapshot-sync', | ||
104 | + 'arguments': { 'device': 'disk', | ||
105 | + 'snapshot-file': '$TEST_IMG', | ||
106 | + 'format': '$IMGFMT', | ||
107 | + 'mode': 'absolute-paths' } }" \ | ||
108 | + "return" | ||
109 | + | ||
110 | +echo | ||
111 | +echo === Start commit job and exit qemu === | ||
112 | +echo | ||
113 | + | ||
114 | +# Note that the reference output intentionally includes the 'offset' field in | ||
115 | +# BLOCK_JOB_CANCELLED events for all of the following block jobs. They are | ||
116 | +# predictable and any change in the offsets would hint at a bug in the job | ||
117 | +# throttling code. | ||
118 | +# | ||
119 | +# In order to achieve these predictable offsets, all of the following tests | ||
120 | +# use speed=65536. Each job will perform exactly one iteration before it has | ||
121 | +# to sleep at least for a second, which is plenty of time for the 'quit' QMP | ||
122 | +# command to be received (after receiving the command, the rest runs | ||
123 | +# synchronously, so jobs can arbitrarily continue or complete). | ||
124 | +# | ||
125 | +# The buffer size for commit and streaming is 512k (waiting for 8 seconds after | ||
126 | +# the first request), for active commit and mirror it's large enough to cover | ||
127 | +# the full 4M, and for backup it's the qcow2 cluster size, which we know is | ||
128 | +# 64k. As all of these are at least as large as the speed, we are sure that the | ||
129 | +# offset doesn't advance after the first iteration before qemu exits. | ||
130 | + | ||
131 | +_send_qemu_cmd $h \ | ||
132 | + "{ 'execute': 'block-commit', | ||
133 | + 'arguments': { 'device': 'disk', | ||
134 | + 'base':'$TEST_IMG.base', | ||
135 | + 'top': '$TEST_IMG.mid', | ||
136 | + 'speed': 65536 } }" \ | ||
137 | + "return" | ||
138 | + | ||
139 | +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" | ||
140 | +wait=1 _cleanup_qemu | ||
141 | + | ||
142 | +echo | ||
143 | +echo === Start active commit job and exit qemu === | ||
144 | +echo | ||
145 | + | ||
146 | +_launch_qemu \ | ||
147 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
148 | +h=$QEMU_HANDLE | ||
149 | +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
150 | + | ||
151 | +_send_qemu_cmd $h \ | ||
152 | + "{ 'execute': 'block-commit', | ||
153 | + 'arguments': { 'device': 'disk', | ||
154 | + 'base':'$TEST_IMG.base', | ||
155 | + 'speed': 65536 } }" \ | ||
156 | + "return" | ||
157 | + | ||
158 | +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" | ||
159 | +wait=1 _cleanup_qemu | ||
160 | + | ||
161 | +echo | ||
162 | +echo === Start mirror job and exit qemu === | ||
163 | +echo | ||
164 | + | ||
165 | +_launch_qemu \ | ||
166 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
167 | +h=$QEMU_HANDLE | ||
168 | +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
169 | + | ||
170 | +_send_qemu_cmd $h \ | ||
171 | + "{ 'execute': 'drive-mirror', | ||
172 | + 'arguments': { 'device': 'disk', | ||
173 | + 'target': '$TEST_IMG.copy', | ||
174 | + 'format': '$IMGFMT', | ||
175 | + 'sync': 'full', | ||
176 | + 'speed': 65536 } }" \ | ||
177 | + "return" | ||
178 | + | ||
179 | +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" | ||
180 | +wait=1 _cleanup_qemu | ||
181 | + | ||
182 | +echo | ||
183 | +echo === Start backup job and exit qemu === | ||
184 | +echo | ||
185 | + | ||
186 | +_launch_qemu \ | ||
187 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
188 | +h=$QEMU_HANDLE | ||
189 | +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
190 | + | ||
191 | +_send_qemu_cmd $h \ | ||
192 | + "{ 'execute': 'drive-backup', | ||
193 | + 'arguments': { 'device': 'disk', | ||
194 | + 'target': '$TEST_IMG.copy', | ||
195 | + 'format': '$IMGFMT', | ||
196 | + 'sync': 'full', | ||
197 | + 'speed': 65536 } }" \ | ||
198 | + "return" | ||
199 | + | ||
200 | +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" | ||
201 | +wait=1 _cleanup_qemu | ||
202 | + | ||
203 | +echo | ||
204 | +echo === Start streaming job and exit qemu === | ||
205 | +echo | ||
206 | + | ||
207 | +_launch_qemu \ | ||
208 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
209 | +h=$QEMU_HANDLE | ||
210 | +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
211 | + | ||
212 | +_send_qemu_cmd $h \ | ||
213 | + "{ 'execute': 'block-stream', | ||
214 | + 'arguments': { 'device': 'disk', | ||
215 | + 'speed': 65536 } }" \ | ||
216 | + "return" | ||
217 | + | ||
218 | +_send_qemu_cmd $h "{ 'execute': 'quit' }" "return" | ||
219 | +wait=1 _cleanup_qemu | ||
220 | + | ||
221 | +_check_test_img | ||
222 | + | ||
223 | +# success, all done | ||
224 | +echo "*** done" | ||
225 | +rm -f $seq.full | ||
226 | +status=0 | ||
227 | diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out | ||
228 | new file mode 100644 | ||
229 | index XXXXXXX..XXXXXXX | ||
230 | --- /dev/null | ||
231 | +++ b/tests/qemu-iotests/185.out | ||
232 | @@ -XXX,XX +XXX,XX @@ | ||
233 | +QA output created by 185 | ||
234 | +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 | ||
235 | + | ||
236 | +=== Starting VM === | ||
237 | + | ||
238 | +{"return": {}} | ||
239 | + | ||
240 | +=== Creating backing chain === | ||
241 | + | ||
242 | +Formatting 'TEST_DIR/t.qcow2.mid', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.base backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
243 | +{"return": {}} | ||
244 | +wrote 4194304/4194304 bytes at offset 0 | ||
245 | +4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
246 | +{"return": ""} | ||
247 | +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 backing_file=TEST_DIR/t.qcow2.mid backing_fmt=qcow2 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
248 | +{"return": {}} | ||
249 | + | ||
250 | +=== Start commit job and exit qemu === | ||
251 | + | ||
252 | +{"return": {}} | ||
253 | +{"return": {}} | ||
254 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} | ||
255 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} | ||
256 | + | ||
257 | +=== Start active commit job and exit qemu === | ||
258 | + | ||
259 | +{"return": {}} | ||
260 | +{"return": {}} | ||
261 | +{"return": {}} | ||
262 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} | ||
263 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} | ||
264 | + | ||
265 | +=== Start mirror job and exit qemu === | ||
266 | + | ||
267 | +{"return": {}} | ||
268 | +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
269 | +{"return": {}} | ||
270 | +{"return": {}} | ||
271 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} | ||
272 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} | ||
273 | + | ||
274 | +=== Start backup job and exit qemu === | ||
275 | + | ||
276 | +{"return": {}} | ||
277 | +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 | ||
278 | +{"return": {}} | ||
279 | +{"return": {}} | ||
280 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} | ||
281 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} | ||
282 | + | ||
283 | +=== Start streaming job and exit qemu === | ||
284 | + | ||
285 | +{"return": {}} | ||
286 | +{"return": {}} | ||
287 | +{"return": {}} | ||
288 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} | ||
289 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} | ||
290 | +No errors were found on the image. | ||
291 | +*** done | ||
292 | diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group | ||
293 | index XXXXXXX..XXXXXXX 100644 | ||
294 | --- a/tests/qemu-iotests/group | ||
295 | +++ b/tests/qemu-iotests/group | ||
296 | @@ -XXX,XX +XXX,XX @@ | ||
297 | 181 rw auto migration | ||
298 | 182 rw auto quick | ||
299 | 183 rw auto migration | ||
300 | +185 rw auto | ||
301 | -- | ||
302 | 1.8.3.1 | ||
303 | |||
304 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Avoid duplicating the QEMU command-line. | 3 | The main loop thread increments/decrements BlockBackend->quiesce_counter |
4 | when drained sections begin/end. The counter is read in the I/O code | ||
5 | path. Therefore this field is used to communicate between threads | ||
6 | without a lock. | ||
7 | |||
8 | Acquire/release are not necessary because the BlockBackend->in_flight | ||
9 | counter already uses sequentially consistent accesses and running I/O | ||
10 | requests hold that counter when blk_wait_while_drained() is called. | ||
11 | qatomic_read() can be used. | ||
12 | |||
13 | Use qatomic_fetch_inc()/qatomic_fetch_dec() for modifications even | ||
14 | though sequentially consistent atomic accesses are not strictly required | ||
15 | here. They are, however, nicer to read than multiple calls to | ||
16 | qatomic_read() and qatomic_set(). Since beginning and ending drain is | ||
17 | not a hot path the extra cost doesn't matter. | ||
4 | 18 | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
20 | Message-Id: <20230307210427.269214-2-stefanha@redhat.com> | ||
21 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
22 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 23 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
7 | --- | 24 | --- |
8 | tests/qemu-iotests/068 | 15 +++++++++------ | 25 | block/block-backend.c | 14 +++++++------- |
9 | 1 file changed, 9 insertions(+), 6 deletions(-) | 26 | 1 file changed, 7 insertions(+), 7 deletions(-) |
10 | 27 | ||
11 | diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068 | 28 | diff --git a/block/block-backend.c b/block/block-backend.c |
12 | index XXXXXXX..XXXXXXX 100755 | 29 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tests/qemu-iotests/068 | 30 | --- a/block/block-backend.c |
14 | +++ b/tests/qemu-iotests/068 | 31 | +++ b/block/block-backend.c |
15 | @@ -XXX,XX +XXX,XX @@ case "$QEMU_DEFAULT_MACHINE" in | 32 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { |
16 | ;; | 33 | NotifierList remove_bs_notifiers, insert_bs_notifiers; |
17 | esac | 34 | QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; |
18 | 35 | ||
19 | -# Give qemu some time to boot before saving the VM state | 36 | - int quiesce_counter; |
20 | -bash -c 'sleep 1; echo -e "savevm 0\nquit"' |\ | 37 | + int quiesce_counter; /* atomic: written under BQL, read by other threads */ |
21 | - $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" |\ | 38 | CoQueue queued_requests; |
22 | +_qemu() | 39 | bool disable_request_queuing; |
23 | +{ | 40 | |
24 | + $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" \ | 41 | @@ -XXX,XX +XXX,XX @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, |
25 | + "$@" |\ | 42 | blk->dev_opaque = opaque; |
26 | _filter_qemu | _filter_hmp | 43 | |
27 | +} | 44 | /* Are we currently quiesced? Should we enforce this right now? */ |
28 | + | 45 | - if (blk->quiesce_counter && ops && ops->drained_begin) { |
29 | +# Give qemu some time to boot before saving the VM state | 46 | + if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) { |
30 | +bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu | 47 | ops->drained_begin(opaque); |
31 | # Now try to continue from that VM state (this should just work) | 48 | } |
32 | -echo quit |\ | 49 | } |
33 | - $QEMU $platform_parm -nographic -monitor stdio -serial none -hda "$TEST_IMG" -loadvm 0 |\ | 50 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) |
34 | - _filter_qemu | _filter_hmp | 51 | { |
35 | +echo quit | _qemu -loadvm 0 | 52 | assert(blk->in_flight > 0); |
36 | 53 | ||
37 | # success, all done | 54 | - if (blk->quiesce_counter && !blk->disable_request_queuing) { |
38 | echo "*** done" | 55 | + if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) { |
56 | blk_dec_in_flight(blk); | ||
57 | qemu_co_queue_wait(&blk->queued_requests, NULL); | ||
58 | blk_inc_in_flight(blk); | ||
59 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child) | ||
60 | BlockBackend *blk = child->opaque; | ||
61 | ThrottleGroupMember *tgm = &blk->public.throttle_group_member; | ||
62 | |||
63 | - if (++blk->quiesce_counter == 1) { | ||
64 | + if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) { | ||
65 | if (blk->dev_ops && blk->dev_ops->drained_begin) { | ||
66 | blk->dev_ops->drained_begin(blk->dev_opaque); | ||
67 | } | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool blk_root_drained_poll(BdrvChild *child) | ||
69 | { | ||
70 | BlockBackend *blk = child->opaque; | ||
71 | bool busy = false; | ||
72 | - assert(blk->quiesce_counter); | ||
73 | + assert(qatomic_read(&blk->quiesce_counter)); | ||
74 | |||
75 | if (blk->dev_ops && blk->dev_ops->drained_poll) { | ||
76 | busy = blk->dev_ops->drained_poll(blk->dev_opaque); | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool blk_root_drained_poll(BdrvChild *child) | ||
78 | static void blk_root_drained_end(BdrvChild *child) | ||
79 | { | ||
80 | BlockBackend *blk = child->opaque; | ||
81 | - assert(blk->quiesce_counter); | ||
82 | + assert(qatomic_read(&blk->quiesce_counter)); | ||
83 | |||
84 | assert(blk->public.throttle_group_member.io_limits_disabled); | ||
85 | qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled); | ||
86 | |||
87 | - if (--blk->quiesce_counter == 0) { | ||
88 | + if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) { | ||
89 | if (blk->dev_ops && blk->dev_ops->drained_end) { | ||
90 | blk->dev_ops->drained_end(blk->dev_opaque); | ||
91 | } | ||
39 | -- | 92 | -- |
40 | 1.8.3.1 | 93 | 2.40.0 |
41 | 94 | ||
42 | 95 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Perform the savevm/loadvm test with both iothread on and off. This | 3 | This field is accessed by multiple threads without a lock. Use explicit |
4 | covers the recently found savevm/loadvm hang when iothread is enabled. | 4 | qatomic_read()/qatomic_set() calls. There is no need for acquire/release |
5 | because blk_set_disable_request_queuing() doesn't provide any | ||
6 | guarantees (it helps that it's used at BlockBackend creation time and | ||
7 | not when there is I/O in flight). | ||
5 | 8 | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
11 | Message-Id: <20230307210427.269214-3-stefanha@redhat.com> | ||
12 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 15 | --- |
9 | tests/qemu-iotests/068 | 23 ++++++++++++++--------- | 16 | block/block-backend.c | 7 ++++--- |
10 | tests/qemu-iotests/068.out | 11 ++++++++++- | 17 | 1 file changed, 4 insertions(+), 3 deletions(-) |
11 | 2 files changed, 24 insertions(+), 10 deletions(-) | ||
12 | 18 | ||
13 | diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068 | 19 | diff --git a/block/block-backend.c b/block/block-backend.c |
14 | index XXXXXXX..XXXXXXX 100755 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/qemu-iotests/068 | 21 | --- a/block/block-backend.c |
16 | +++ b/tests/qemu-iotests/068 | 22 | +++ b/block/block-backend.c |
17 | @@ -XXX,XX +XXX,XX @@ _supported_os Linux | 23 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { |
18 | IMGOPTS="compat=1.1" | 24 | |
19 | IMG_SIZE=128K | 25 | int quiesce_counter; /* atomic: written under BQL, read by other threads */ |
20 | 26 | CoQueue queued_requests; | |
21 | -echo | 27 | - bool disable_request_queuing; |
22 | -echo "=== Saving and reloading a VM state to/from a qcow2 image ===" | 28 | + bool disable_request_queuing; /* atomic */ |
23 | -echo | 29 | |
24 | -_make_test_img $IMG_SIZE | 30 | VMChangeStateEntry *vmsh; |
25 | - | 31 | bool force_allow_inactivate; |
26 | case "$QEMU_DEFAULT_MACHINE" in | 32 | @@ -XXX,XX +XXX,XX @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) |
27 | s390-ccw-virtio) | 33 | void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) |
28 | platform_parm="-no-shutdown" | 34 | { |
29 | @@ -XXX,XX +XXX,XX @@ _qemu() | 35 | IO_CODE(); |
30 | _filter_qemu | _filter_hmp | 36 | - blk->disable_request_queuing = disable; |
37 | + qatomic_set(&blk->disable_request_queuing, disable); | ||
31 | } | 38 | } |
32 | 39 | ||
33 | -# Give qemu some time to boot before saving the VM state | 40 | static int coroutine_fn GRAPH_RDLOCK |
34 | -bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu | 41 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) |
35 | -# Now try to continue from that VM state (this should just work) | 42 | { |
36 | -echo quit | _qemu -loadvm 0 | 43 | assert(blk->in_flight > 0); |
37 | +for extra_args in \ | 44 | |
38 | + "" \ | 45 | - if (qatomic_read(&blk->quiesce_counter) && !blk->disable_request_queuing) { |
39 | + "-object iothread,id=iothread0 -set device.hba0.iothread=iothread0"; do | 46 | + if (qatomic_read(&blk->quiesce_counter) && |
40 | + echo | 47 | + !qatomic_read(&blk->disable_request_queuing)) { |
41 | + echo "=== Saving and reloading a VM state to/from a qcow2 image ($extra_args) ===" | 48 | blk_dec_in_flight(blk); |
42 | + echo | 49 | qemu_co_queue_wait(&blk->queued_requests, NULL); |
43 | + | 50 | blk_inc_in_flight(blk); |
44 | + _make_test_img $IMG_SIZE | ||
45 | + | ||
46 | + # Give qemu some time to boot before saving the VM state | ||
47 | + bash -c 'sleep 1; echo -e "savevm 0\nquit"' | _qemu $extra_args | ||
48 | + # Now try to continue from that VM state (this should just work) | ||
49 | + echo quit | _qemu $extra_args -loadvm 0 | ||
50 | +done | ||
51 | |||
52 | # success, all done | ||
53 | echo "*** done" | ||
54 | diff --git a/tests/qemu-iotests/068.out b/tests/qemu-iotests/068.out | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/tests/qemu-iotests/068.out | ||
57 | +++ b/tests/qemu-iotests/068.out | ||
58 | @@ -XXX,XX +XXX,XX @@ | ||
59 | QA output created by 068 | ||
60 | |||
61 | -=== Saving and reloading a VM state to/from a qcow2 image === | ||
62 | +=== Saving and reloading a VM state to/from a qcow2 image () === | ||
63 | + | ||
64 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 | ||
65 | +QEMU X.Y.Z monitor - type 'help' for more information | ||
66 | +(qemu) savevm 0 | ||
67 | +(qemu) quit | ||
68 | +QEMU X.Y.Z monitor - type 'help' for more information | ||
69 | +(qemu) quit | ||
70 | + | ||
71 | +=== Saving and reloading a VM state to/from a qcow2 image (-object iothread,id=iothread0 -set device.hba0.iothread=iothread0) === | ||
72 | |||
73 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 | ||
74 | QEMU X.Y.Z monitor - type 'help' for more information | ||
75 | -- | 51 | -- |
76 | 1.8.3.1 | 52 | 2.40.0 |
77 | 53 | ||
78 | 54 | diff view generated by jsdifflib |
1 | Now that we process a request in the same coroutine from beginning to | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | end and don't drop out of it any more, we can look like a proper | ||
3 | coroutine-based driver and simply call qed_aio_next_io() and get a | ||
4 | return value from it instead of spawning an additional coroutine that | ||
5 | reenters the parent when it's done. | ||
6 | 2 | ||
3 | The CoQueue API offers thread-safety via the lock argument that | ||
4 | qemu_co_queue_wait() and qemu_co_enter_next() take. BlockBackend | ||
5 | currently does not make use of the lock argument. This means that | ||
6 | multiple threads submitting I/O requests can corrupt the CoQueue's | ||
7 | QSIMPLEQ. | ||
8 | |||
9 | Add a QemuMutex and pass it to CoQueue APIs so that the queue is | ||
10 | protected. While we're at it, also assert that the queue is empty when | ||
11 | the BlockBackend is deleted. | ||
12 | |||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Reviewed-by: Hanna Czenczek <hreitz@redhat.com> | ||
15 | Message-Id: <20230307210427.269214-4-stefanha@redhat.com> | ||
16 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | 18 | --- |
10 | block/qed.c | 101 +++++++++++++----------------------------------------------- | 19 | block/block-backend.c | 18 ++++++++++++++++-- |
11 | block/qed.h | 3 +- | 20 | 1 file changed, 16 insertions(+), 2 deletions(-) |
12 | 2 files changed, 22 insertions(+), 82 deletions(-) | ||
13 | 21 | ||
14 | diff --git a/block/qed.c b/block/qed.c | 22 | diff --git a/block/block-backend.c b/block/block-backend.c |
15 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/qed.c | 24 | --- a/block/block-backend.c |
17 | +++ b/block/qed.c | 25 | +++ b/block/block-backend.c |
18 | @@ -XXX,XX +XXX,XX @@ | 26 | @@ -XXX,XX +XXX,XX @@ struct BlockBackend { |
19 | #include "qapi/qmp/qerror.h" | 27 | QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; |
20 | #include "sysemu/block-backend.h" | 28 | |
21 | 29 | int quiesce_counter; /* atomic: written under BQL, read by other threads */ | |
22 | -static const AIOCBInfo qed_aiocb_info = { | 30 | + QemuMutex queued_requests_lock; /* protects queued_requests */ |
23 | - .aiocb_size = sizeof(QEDAIOCB), | 31 | CoQueue queued_requests; |
24 | -}; | 32 | bool disable_request_queuing; /* atomic */ |
25 | - | 33 | |
26 | static int bdrv_qed_probe(const uint8_t *buf, int buf_size, | 34 | @@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) |
27 | const char *filename) | 35 | |
28 | { | 36 | block_acct_init(&blk->stats); |
29 | @@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) | 37 | |
30 | return l2_table; | 38 | + qemu_mutex_init(&blk->queued_requests_lock); |
31 | } | 39 | qemu_co_queue_init(&blk->queued_requests); |
32 | 40 | notifier_list_init(&blk->remove_bs_notifiers); | |
33 | -static void qed_aio_next_io(QEDAIOCB *acb); | 41 | notifier_list_init(&blk->insert_bs_notifiers); |
34 | - | 42 | @@ -XXX,XX +XXX,XX @@ static void blk_delete(BlockBackend *blk) |
35 | -static void qed_aio_start_io(QEDAIOCB *acb) | 43 | assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); |
36 | -{ | 44 | assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); |
37 | - qed_aio_next_io(acb); | 45 | assert(QLIST_EMPTY(&blk->aio_notifiers)); |
38 | -} | 46 | + assert(qemu_co_queue_empty(&blk->queued_requests)); |
39 | - | 47 | + qemu_mutex_destroy(&blk->queued_requests_lock); |
40 | static void qed_plug_allocating_write_reqs(BDRVQEDState *s) | 48 | QTAILQ_REMOVE(&block_backends, blk, link); |
41 | { | 49 | drive_info_del(blk->legacy_dinfo); |
42 | assert(!s->allocating_write_reqs_plugged); | 50 | block_acct_cleanup(&blk->stats); |
43 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, | 51 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) |
44 | 52 | ||
45 | static BDRVQEDState *acb_to_s(QEDAIOCB *acb) | 53 | if (qatomic_read(&blk->quiesce_counter) && |
46 | { | 54 | !qatomic_read(&blk->disable_request_queuing)) { |
47 | - return acb->common.bs->opaque; | 55 | + /* |
48 | + return acb->bs->opaque; | 56 | + * Take lock before decrementing in flight counter so main loop thread |
49 | } | 57 | + * waits for us to enqueue ourselves before it can leave the drained |
50 | 58 | + * section. | |
51 | /** | 59 | + */ |
52 | @@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, | 60 | + qemu_mutex_lock(&blk->queued_requests_lock); |
61 | blk_dec_in_flight(blk); | ||
62 | - qemu_co_queue_wait(&blk->queued_requests, NULL); | ||
63 | + qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock); | ||
64 | blk_inc_in_flight(blk); | ||
65 | + qemu_mutex_unlock(&blk->queued_requests_lock); | ||
53 | } | 66 | } |
54 | } | 67 | } |
55 | 68 | ||
56 | -static void qed_aio_complete_bh(void *opaque) | 69 | @@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child) |
57 | -{ | 70 | if (blk->dev_ops && blk->dev_ops->drained_end) { |
58 | - QEDAIOCB *acb = opaque; | 71 | blk->dev_ops->drained_end(blk->dev_opaque); |
59 | - BDRVQEDState *s = acb_to_s(acb); | 72 | } |
60 | - BlockCompletionFunc *cb = acb->common.cb; | 73 | - while (qemu_co_enter_next(&blk->queued_requests, NULL)) { |
61 | - void *user_opaque = acb->common.opaque; | 74 | + qemu_mutex_lock(&blk->queued_requests_lock); |
62 | - int ret = acb->bh_ret; | 75 | + while (qemu_co_enter_next(&blk->queued_requests, |
63 | - | 76 | + &blk->queued_requests_lock)) { |
64 | - qemu_aio_unref(acb); | 77 | /* Resume all queued requests */ |
65 | - | 78 | } |
66 | - /* Invoke callback */ | 79 | + qemu_mutex_unlock(&blk->queued_requests_lock); |
67 | - qed_acquire(s); | ||
68 | - cb(user_opaque, ret); | ||
69 | - qed_release(s); | ||
70 | -} | ||
71 | - | ||
72 | -static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
73 | +static void qed_aio_complete(QEDAIOCB *acb) | ||
74 | { | ||
75 | BDRVQEDState *s = acb_to_s(acb); | ||
76 | |||
77 | - trace_qed_aio_complete(s, acb, ret); | ||
78 | - | ||
79 | /* Free resources */ | ||
80 | qemu_iovec_destroy(&acb->cur_qiov); | ||
81 | qed_unref_l2_cache_entry(acb->request.l2_table); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
83 | acb->qiov->iov[0].iov_base = NULL; | ||
84 | } | 80 | } |
85 | |||
86 | - /* Arrange for a bh to invoke the completion function */ | ||
87 | - acb->bh_ret = ret; | ||
88 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||
89 | - qed_aio_complete_bh, acb); | ||
90 | - | ||
91 | /* Start next allocating write request waiting behind this one. Note that | ||
92 | * requests enqueue themselves when they first hit an unallocated cluster | ||
93 | * but they wait until the entire request is finished before waking up the | ||
94 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
95 | struct iovec *iov = acb->qiov->iov; | ||
96 | |||
97 | if (!iov->iov_base) { | ||
98 | - iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len); | ||
99 | + iov->iov_base = qemu_try_blockalign(acb->bs, iov->iov_len); | ||
100 | if (iov->iov_base == NULL) { | ||
101 | return -ENOMEM; | ||
102 | } | ||
103 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) | ||
104 | { | ||
105 | QEDAIOCB *acb = opaque; | ||
106 | BDRVQEDState *s = acb_to_s(acb); | ||
107 | - BlockDriverState *bs = acb->common.bs; | ||
108 | + BlockDriverState *bs = acb->bs; | ||
109 | |||
110 | /* Adjust offset into cluster */ | ||
111 | offset += qed_offset_into_cluster(s, acb->cur_pos); | ||
112 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) | ||
113 | /** | ||
114 | * Begin next I/O or complete the request | ||
115 | */ | ||
116 | -static void qed_aio_next_io(QEDAIOCB *acb) | ||
117 | +static int qed_aio_next_io(QEDAIOCB *acb) | ||
118 | { | ||
119 | BDRVQEDState *s = acb_to_s(acb); | ||
120 | uint64_t offset; | ||
121 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb) | ||
122 | |||
123 | /* Complete request */ | ||
124 | if (acb->cur_pos >= acb->end_pos) { | ||
125 | - qed_aio_complete(acb, 0); | ||
126 | - return; | ||
127 | + ret = 0; | ||
128 | + break; | ||
129 | } | ||
130 | |||
131 | /* Find next cluster and start I/O */ | ||
132 | len = acb->end_pos - acb->cur_pos; | ||
133 | ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset); | ||
134 | if (ret < 0) { | ||
135 | - qed_aio_complete(acb, ret); | ||
136 | - return; | ||
137 | + break; | ||
138 | } | ||
139 | |||
140 | if (acb->flags & QED_AIOCB_WRITE) { | ||
141 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb) | ||
142 | } | ||
143 | |||
144 | if (ret < 0 && ret != -EAGAIN) { | ||
145 | - qed_aio_complete(acb, ret); | ||
146 | - return; | ||
147 | + break; | ||
148 | } | ||
149 | } | ||
150 | -} | ||
151 | |||
152 | -typedef struct QEDRequestCo { | ||
153 | - Coroutine *co; | ||
154 | - bool done; | ||
155 | - int ret; | ||
156 | -} QEDRequestCo; | ||
157 | - | ||
158 | -static void qed_co_request_cb(void *opaque, int ret) | ||
159 | -{ | ||
160 | - QEDRequestCo *co = opaque; | ||
161 | - | ||
162 | - co->done = true; | ||
163 | - co->ret = ret; | ||
164 | - qemu_coroutine_enter_if_inactive(co->co); | ||
165 | + trace_qed_aio_complete(s, acb, ret); | ||
166 | + qed_aio_complete(acb); | ||
167 | + return ret; | ||
168 | } | 81 | } |
169 | 82 | ||
170 | static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num, | ||
171 | QEMUIOVector *qiov, int nb_sectors, | ||
172 | int flags) | ||
173 | { | ||
174 | - QEDRequestCo co = { | ||
175 | - .co = qemu_coroutine_self(), | ||
176 | - .done = false, | ||
177 | + QEDAIOCB acb = { | ||
178 | + .bs = bs, | ||
179 | + .cur_pos = (uint64_t) sector_num * BDRV_SECTOR_SIZE, | ||
180 | + .end_pos = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE, | ||
181 | + .qiov = qiov, | ||
182 | + .flags = flags, | ||
183 | }; | ||
184 | - QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, qed_co_request_cb, &co); | ||
185 | - | ||
186 | - trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, &co, flags); | ||
187 | + qemu_iovec_init(&acb.cur_qiov, qiov->niov); | ||
188 | |||
189 | - acb->flags = flags; | ||
190 | - acb->qiov = qiov; | ||
191 | - acb->qiov_offset = 0; | ||
192 | - acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; | ||
193 | - acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; | ||
194 | - acb->backing_qiov = NULL; | ||
195 | - acb->request.l2_table = NULL; | ||
196 | - qemu_iovec_init(&acb->cur_qiov, qiov->niov); | ||
197 | + trace_qed_aio_setup(bs->opaque, &acb, sector_num, nb_sectors, NULL, flags); | ||
198 | |||
199 | /* Start request */ | ||
200 | - qed_aio_start_io(acb); | ||
201 | - | ||
202 | - if (!co.done) { | ||
203 | - qemu_coroutine_yield(); | ||
204 | - } | ||
205 | - | ||
206 | - return co.ret; | ||
207 | + return qed_aio_next_io(&acb); | ||
208 | } | ||
209 | |||
210 | static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs, | ||
211 | diff --git a/block/qed.h b/block/qed.h | ||
212 | index XXXXXXX..XXXXXXX 100644 | ||
213 | --- a/block/qed.h | ||
214 | +++ b/block/qed.h | ||
215 | @@ -XXX,XX +XXX,XX @@ enum { | ||
216 | }; | ||
217 | |||
218 | typedef struct QEDAIOCB { | ||
219 | - BlockAIOCB common; | ||
220 | - int bh_ret; /* final return status for completion bh */ | ||
221 | + BlockDriverState *bs; | ||
222 | QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */ | ||
223 | int flags; /* QED_AIOCB_* bits ORed together */ | ||
224 | uint64_t end_pos; /* request end on block device, in bytes */ | ||
225 | -- | 83 | -- |
226 | 1.8.3.1 | 84 | 2.40.0 |
227 | |||
228 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | AioContext was designed to allow nested acquire/release calls. It uses | 3 | There is no need for the AioContext lock in bdrv_drain_all() because |
4 | a recursive mutex so callers don't need to worry about nesting...or so | 4 | nothing in AIO_WAIT_WHILE() needs the lock and the condition is atomic. |
5 | we thought. | ||
6 | 5 | ||
7 | BDRV_POLL_WHILE() is used to wait for block I/O requests. It releases | 6 | AIO_WAIT_WHILE_UNLOCKED() has no use for the AioContext parameter other |
8 | the AioContext temporarily around aio_poll(). This gives IOThreads a | 7 | than performing a check that is nowadays already done by the |
9 | chance to acquire the AioContext to process I/O completions. | 8 | GLOBAL_STATE_CODE()/IO_CODE() macros. Set the ctx argument to NULL here |
9 | to help us keep track of all converted callers. Eventually all callers | ||
10 | will have been converted and then the argument can be dropped entirely. | ||
10 | 11 | ||
11 | It turns out that recursive locking and BDRV_POLL_WHILE() don't mix. | 12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
12 | BDRV_POLL_WHILE() only releases the AioContext once, so the IOThread | ||
13 | will not be able to acquire the AioContext if it was acquired | ||
14 | multiple times. | ||
15 | |||
16 | Instead of trying to release AioContext n times in BDRV_POLL_WHILE(), | ||
17 | this patch simply avoids nested locking in save_vmstate(). It's the | ||
18 | simplest fix and we should step back to consider the big picture with | ||
19 | all the recent changes to block layer threading. | ||
20 | |||
21 | This patch is the final fix to solve 'savevm' hanging with -object | ||
22 | iothread. | ||
23 | |||
24 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
25 | Reviewed-by: Eric Blake <eblake@redhat.com> | 14 | Message-Id: <20230309190855.414275-2-stefanha@redhat.com> |
26 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 15 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
16 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
27 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
28 | --- | 18 | --- |
29 | migration/savevm.c | 12 +++++++++++- | 19 | block/block-backend.c | 8 +------- |
30 | 1 file changed, 11 insertions(+), 1 deletion(-) | 20 | 1 file changed, 1 insertion(+), 7 deletions(-) |
31 | 21 | ||
32 | diff --git a/migration/savevm.c b/migration/savevm.c | 22 | diff --git a/block/block-backend.c b/block/block-backend.c |
33 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/migration/savevm.c | 24 | --- a/block/block-backend.c |
35 | +++ b/migration/savevm.c | 25 | +++ b/block/block-backend.c |
36 | @@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp) | 26 | @@ -XXX,XX +XXX,XX @@ void blk_drain_all(void) |
37 | goto the_end; | 27 | bdrv_drain_all_begin(); |
28 | |||
29 | while ((blk = blk_all_next(blk)) != NULL) { | ||
30 | - AioContext *ctx = blk_get_aio_context(blk); | ||
31 | - | ||
32 | - aio_context_acquire(ctx); | ||
33 | - | ||
34 | /* We may have -ENOMEDIUM completions in flight */ | ||
35 | - AIO_WAIT_WHILE(ctx, qatomic_read(&blk->in_flight) > 0); | ||
36 | - | ||
37 | - aio_context_release(ctx); | ||
38 | + AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0); | ||
38 | } | 39 | } |
39 | 40 | ||
40 | + /* The bdrv_all_create_snapshot() call that follows acquires the AioContext | 41 | bdrv_drain_all_end(); |
41 | + * for itself. BDRV_POLL_WHILE() does not support nested locking because | ||
42 | + * it only releases the lock once. Therefore synchronous I/O will deadlock | ||
43 | + * unless we release the AioContext before bdrv_all_create_snapshot(). | ||
44 | + */ | ||
45 | + aio_context_release(aio_context); | ||
46 | + aio_context = NULL; | ||
47 | + | ||
48 | ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); | ||
49 | if (ret < 0) { | ||
50 | error_setg(errp, "Error while creating snapshot on '%s'", | ||
51 | @@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp) | ||
52 | ret = 0; | ||
53 | |||
54 | the_end: | ||
55 | - aio_context_release(aio_context); | ||
56 | + if (aio_context) { | ||
57 | + aio_context_release(aio_context); | ||
58 | + } | ||
59 | if (saved_vm_running) { | ||
60 | vm_start(); | ||
61 | } | ||
62 | -- | 42 | -- |
63 | 1.8.3.1 | 43 | 2.40.0 |
64 | |||
65 | diff view generated by jsdifflib |
1 | With this change, qed_aio_write_prefill() and qed_aio_write_postfill() | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | collapse into a single function. This is reflected by a rename of the | ||
3 | combined function to qed_aio_write_cow(). | ||
4 | 2 | ||
3 | There is no change in behavior. Switch to AIO_WAIT_WHILE_UNLOCKED() | ||
4 | instead of AIO_WAIT_WHILE() to document that this code has already been | ||
5 | audited and converted. The AioContext argument is already NULL so | ||
6 | aio_context_release() is never called anyway. | ||
7 | |||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Message-Id: <20230309190855.414275-3-stefanha@redhat.com> | ||
13 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | --- | 15 | --- |
9 | block/qed.c | 57 +++++++++++++++++++++++---------------------------------- | 16 | block/export/export.c | 2 +- |
10 | 1 file changed, 23 insertions(+), 34 deletions(-) | 17 | 1 file changed, 1 insertion(+), 1 deletion(-) |
11 | 18 | ||
12 | diff --git a/block/qed.c b/block/qed.c | 19 | diff --git a/block/export/export.c b/block/export/export.c |
13 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/block/qed.c | 21 | --- a/block/export/export.c |
15 | +++ b/block/qed.c | 22 | +++ b/block/export/export.c |
16 | @@ -XXX,XX +XXX,XX @@ static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos, | 23 | @@ -XXX,XX +XXX,XX @@ void blk_exp_close_all_type(BlockExportType type) |
17 | * @pos: Byte position in device | 24 | blk_exp_request_shutdown(exp); |
18 | * @len: Number of bytes | ||
19 | * @offset: Byte offset in image file | ||
20 | - * @cb: Completion function | ||
21 | - * @opaque: User data for completion function | ||
22 | */ | ||
23 | -static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, | ||
24 | - uint64_t len, uint64_t offset, | ||
25 | - BlockCompletionFunc *cb, | ||
26 | - void *opaque) | ||
27 | +static int qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, | ||
28 | + uint64_t len, uint64_t offset) | ||
29 | { | ||
30 | QEMUIOVector qiov; | ||
31 | QEMUIOVector *backing_qiov = NULL; | ||
32 | @@ -XXX,XX +XXX,XX @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, | ||
33 | |||
34 | /* Skip copy entirely if there is no work to do */ | ||
35 | if (len == 0) { | ||
36 | - cb(opaque, 0); | ||
37 | - return; | ||
38 | + return 0; | ||
39 | } | 25 | } |
40 | 26 | ||
41 | iov = (struct iovec) { | 27 | - AIO_WAIT_WHILE(NULL, blk_exp_has_type(type)); |
42 | @@ -XXX,XX +XXX,XX @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, | 28 | + AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type)); |
43 | ret = 0; | ||
44 | out: | ||
45 | qemu_vfree(iov.iov_base); | ||
46 | - cb(opaque, ret); | ||
47 | + return ret; | ||
48 | } | 29 | } |
49 | 30 | ||
50 | /** | 31 | void blk_exp_close_all(void) |
51 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret) | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | - * Populate back untouched region of new data cluster | ||
56 | + * Populate untouched regions of new data cluster | ||
57 | */ | ||
58 | -static void qed_aio_write_postfill(void *opaque, int ret) | ||
59 | +static void qed_aio_write_cow(void *opaque, int ret) | ||
60 | { | ||
61 | QEDAIOCB *acb = opaque; | ||
62 | BDRVQEDState *s = acb_to_s(acb); | ||
63 | - uint64_t start = acb->cur_pos + acb->cur_qiov.size; | ||
64 | - uint64_t len = | ||
65 | - qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; | ||
66 | - uint64_t offset = acb->cur_cluster + | ||
67 | - qed_offset_into_cluster(s, acb->cur_pos) + | ||
68 | - acb->cur_qiov.size; | ||
69 | + uint64_t start, len, offset; | ||
70 | + | ||
71 | + /* Populate front untouched region of new data cluster */ | ||
72 | + start = qed_start_of_cluster(s, acb->cur_pos); | ||
73 | + len = qed_offset_into_cluster(s, acb->cur_pos); | ||
74 | |||
75 | + trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); | ||
76 | + ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); | ||
77 | if (ret) { | ||
78 | qed_aio_complete(acb, ret); | ||
79 | return; | ||
80 | } | ||
81 | |||
82 | - trace_qed_aio_write_postfill(s, acb, start, len, offset); | ||
83 | - qed_copy_from_backing_file(s, start, len, offset, | ||
84 | - qed_aio_write_main, acb); | ||
85 | -} | ||
86 | + /* Populate back untouched region of new data cluster */ | ||
87 | + start = acb->cur_pos + acb->cur_qiov.size; | ||
88 | + len = qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; | ||
89 | + offset = acb->cur_cluster + | ||
90 | + qed_offset_into_cluster(s, acb->cur_pos) + | ||
91 | + acb->cur_qiov.size; | ||
92 | |||
93 | -/** | ||
94 | - * Populate front untouched region of new data cluster | ||
95 | - */ | ||
96 | -static void qed_aio_write_prefill(void *opaque, int ret) | ||
97 | -{ | ||
98 | - QEDAIOCB *acb = opaque; | ||
99 | - BDRVQEDState *s = acb_to_s(acb); | ||
100 | - uint64_t start = qed_start_of_cluster(s, acb->cur_pos); | ||
101 | - uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); | ||
102 | + trace_qed_aio_write_postfill(s, acb, start, len, offset); | ||
103 | + ret = qed_copy_from_backing_file(s, start, len, offset); | ||
104 | |||
105 | - trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); | ||
106 | - qed_copy_from_backing_file(s, start, len, acb->cur_cluster, | ||
107 | - qed_aio_write_postfill, acb); | ||
108 | + qed_aio_write_main(acb, ret); | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
113 | |||
114 | cb = qed_aio_write_zero_cluster; | ||
115 | } else { | ||
116 | - cb = qed_aio_write_prefill; | ||
117 | + cb = qed_aio_write_cow; | ||
118 | acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); | ||
119 | } | ||
120 | |||
121 | -- | 32 | -- |
122 | 1.8.3.1 | 33 | 2.40.0 |
123 | 34 | ||
124 | 35 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | blk/bdrv_drain_all() only takes effect for a single instant and then | 3 | The following conversion is safe and does not change behavior: |
4 | resumes block jobs, guest devices, and other external clients like the | ||
5 | NBD server. This can be handy when performing a synchronous drain | ||
6 | before terminating the program, for example. | ||
7 | 4 | ||
8 | Monitor commands usually need to quiesce I/O across an entire code | 5 | GLOBAL_STATE_CODE(); |
9 | region so blk/bdrv_drain_all() is not suitable. They must use | 6 | ... |
10 | bdrv_drain_all_begin/end() to mark the region. This prevents new I/O | 7 | - AIO_WAIT_WHILE(qemu_get_aio_context(), ...); |
11 | requests from slipping in or worse - block jobs completing and modifying | 8 | + AIO_WAIT_WHILE_UNLOCKED(NULL, ...); |
12 | the graph. | ||
13 | 9 | ||
14 | I audited other blk/bdrv_drain_all() callers but did not find anything | 10 | Since we're in GLOBAL_STATE_CODE(), qemu_get_aio_context() is our home |
15 | that needs a similar fix. This patch fixes the savevm/loadvm commands. | 11 | thread's AioContext. Thus AIO_WAIT_WHILE() does not unlock the |
16 | Although I haven't encountered a read world issue this makes the code | 12 | AioContext: |
17 | safer. | ||
18 | 13 | ||
19 | Suggested-by: Kevin Wolf <kwolf@redhat.com> | 14 | if (ctx_ && in_aio_context_home_thread(ctx_)) { \ |
15 | while ((cond)) { \ | ||
16 | aio_poll(ctx_, true); \ | ||
17 | waited_ = true; \ | ||
18 | } \ | ||
19 | |||
20 | And that means AIO_WAIT_WHILE_UNLOCKED(NULL, ...) can be substituted. | ||
21 | |||
22 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
23 | Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
24 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
20 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 25 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
21 | Reviewed-by: Eric Blake <eblake@redhat.com> | 26 | Message-Id: <20230309190855.414275-4-stefanha@redhat.com> |
27 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
22 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 28 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
23 | --- | 29 | --- |
24 | migration/savevm.c | 18 +++++++++++++++--- | 30 | block/graph-lock.c | 2 +- |
25 | 1 file changed, 15 insertions(+), 3 deletions(-) | 31 | 1 file changed, 1 insertion(+), 1 deletion(-) |
26 | 32 | ||
27 | diff --git a/migration/savevm.c b/migration/savevm.c | 33 | diff --git a/block/graph-lock.c b/block/graph-lock.c |
28 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/migration/savevm.c | 35 | --- a/block/graph-lock.c |
30 | +++ b/migration/savevm.c | 36 | +++ b/block/graph-lock.c |
31 | @@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp) | 37 | @@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(void) |
32 | } | 38 | * reader lock. |
33 | vm_stop(RUN_STATE_SAVE_VM); | 39 | */ |
34 | 40 | qatomic_set(&has_writer, 0); | |
35 | + bdrv_drain_all_begin(); | 41 | - AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1); |
36 | + | 42 | + AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1); |
37 | aio_context_acquire(aio_context); | 43 | qatomic_set(&has_writer, 1); |
38 | 44 | ||
39 | memset(sn, 0, sizeof(*sn)); | 45 | /* |
40 | @@ -XXX,XX +XXX,XX @@ int save_snapshot(const char *name, Error **errp) | ||
41 | if (aio_context) { | ||
42 | aio_context_release(aio_context); | ||
43 | } | ||
44 | + | ||
45 | + bdrv_drain_all_end(); | ||
46 | + | ||
47 | if (saved_vm_running) { | ||
48 | vm_start(); | ||
49 | } | ||
50 | @@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp) | ||
51 | } | ||
52 | |||
53 | /* Flush all IO requests so they don't interfere with the new state. */ | ||
54 | - bdrv_drain_all(); | ||
55 | + bdrv_drain_all_begin(); | ||
56 | |||
57 | ret = bdrv_all_goto_snapshot(name, &bs); | ||
58 | if (ret < 0) { | ||
59 | error_setg(errp, "Error %d while activating snapshot '%s' on '%s'", | ||
60 | ret, name, bdrv_get_device_name(bs)); | ||
61 | - return ret; | ||
62 | + goto err_drain; | ||
63 | } | ||
64 | |||
65 | /* restore the VM state */ | ||
66 | f = qemu_fopen_bdrv(bs_vm_state, 0); | ||
67 | if (!f) { | ||
68 | error_setg(errp, "Could not open VM state file"); | ||
69 | - return -EINVAL; | ||
70 | + ret = -EINVAL; | ||
71 | + goto err_drain; | ||
72 | } | ||
73 | |||
74 | qemu_system_reset(SHUTDOWN_CAUSE_NONE); | ||
75 | @@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp) | ||
76 | ret = qemu_loadvm_state(f); | ||
77 | aio_context_release(aio_context); | ||
78 | |||
79 | + bdrv_drain_all_end(); | ||
80 | + | ||
81 | migration_incoming_state_destroy(); | ||
82 | if (ret < 0) { | ||
83 | error_setg(errp, "Error %d while loading VM state", ret); | ||
84 | @@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp) | ||
85 | } | ||
86 | |||
87 | return 0; | ||
88 | + | ||
89 | +err_drain: | ||
90 | + bdrv_drain_all_end(); | ||
91 | + return ret; | ||
92 | } | ||
93 | |||
94 | void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) | ||
95 | -- | 46 | -- |
96 | 1.8.3.1 | 47 | 2.40.0 |
97 | 48 | ||
98 | 49 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Calling aio_poll() directly may have been fine previously, but this is | 3 | Since the AioContext argument was already NULL, AIO_WAIT_WHILE() was |
4 | the future, man! The difference between an aio_poll() loop and | 4 | never going to unlock the AioContext. Therefore it is possible to |
5 | BDRV_POLL_WHILE() is that BDRV_POLL_WHILE() releases the AioContext | 5 | replace AIO_WAIT_WHILE() with AIO_WAIT_WHILE_UNLOCKED(). |
6 | around aio_poll(). | ||
7 | 6 | ||
8 | This allows the IOThread to run fd handlers or BHs to complete the | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | request. Failure to release the AioContext causes deadlocks. | 8 | Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | 9 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | |
11 | Using BDRV_POLL_WHILE() partially fixes a 'savevm' hang with -object | ||
12 | iothread. | ||
13 | |||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
15 | Reviewed-by: Eric Blake <eblake@redhat.com> | 11 | Message-Id: <20230309190855.414275-5-stefanha@redhat.com> |
16 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | 12 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> |
17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
18 | --- | 14 | --- |
19 | block/io.c | 4 +--- | 15 | block/io.c | 2 +- |
20 | 1 file changed, 1 insertion(+), 3 deletions(-) | 16 | 1 file changed, 1 insertion(+), 1 deletion(-) |
21 | 17 | ||
22 | diff --git a/block/io.c b/block/io.c | 18 | diff --git a/block/io.c b/block/io.c |
23 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/block/io.c | 20 | --- a/block/io.c |
25 | +++ b/block/io.c | 21 | +++ b/block/io.c |
26 | @@ -XXX,XX +XXX,XX @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, | 22 | @@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void) |
27 | Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data); | 23 | bdrv_drain_all_begin_nopoll(); |
28 | 24 | ||
29 | bdrv_coroutine_enter(bs, co); | 25 | /* Now poll the in-flight requests */ |
30 | - while (data.ret == -EINPROGRESS) { | 26 | - AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); |
31 | - aio_poll(bdrv_get_aio_context(bs), true); | 27 | + AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll()); |
32 | - } | 28 | |
33 | + BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS); | 29 | while ((bs = bdrv_next_all_states(bs))) { |
34 | return data.ret; | 30 | bdrv_drain_assert_idle(bs); |
35 | } | ||
36 | } | ||
37 | -- | 31 | -- |
38 | 1.8.3.1 | 32 | 2.40.0 |
39 | 33 | ||
40 | 34 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This adds documentation for the -blockdev options that apply to all | ||
2 | nodes independent of the block driver used. | ||
3 | 1 | ||
4 | All options that are shared by -blockdev and -drive are now explained in | ||
5 | the section for -blockdev. The documentation of -drive mentions that all | ||
6 | -blockdev options are accepted as well. | ||
7 | |||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
11 | --- | ||
12 | qemu-options.hx | 108 +++++++++++++++++++++++++++++++++++++++++--------------- | ||
13 | 1 file changed, 79 insertions(+), 29 deletions(-) | ||
14 | |||
15 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qemu-options.hx | ||
18 | +++ b/qemu-options.hx | ||
19 | @@ -XXX,XX +XXX,XX @@ DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev, | ||
20 | " [,read-only=on|off][,detect-zeroes=on|off|unmap]\n" | ||
21 | " [,driver specific parameters...]\n" | ||
22 | " configure a block backend\n", QEMU_ARCH_ALL) | ||
23 | +STEXI | ||
24 | +@item -blockdev @var{option}[,@var{option}[,@var{option}[,...]]] | ||
25 | +@findex -blockdev | ||
26 | + | ||
27 | +Define a new block driver node. | ||
28 | + | ||
29 | +@table @option | ||
30 | +@item Valid options for any block driver node: | ||
31 | + | ||
32 | +@table @code | ||
33 | +@item driver | ||
34 | +Specifies the block driver to use for the given node. | ||
35 | +@item node-name | ||
36 | +This defines the name of the block driver node by which it will be referenced | ||
37 | +later. The name must be unique, i.e. it must not match the name of a different | ||
38 | +block driver node, or (if you use @option{-drive} as well) the ID of a drive. | ||
39 | + | ||
40 | +If no node name is specified, it is automatically generated. The generated node | ||
41 | +name is not intended to be predictable and changes between QEMU invocations. | ||
42 | +For the top level, an explicit node name must be specified. | ||
43 | +@item read-only | ||
44 | +Open the node read-only. Guest write attempts will fail. | ||
45 | +@item cache.direct | ||
46 | +The host page cache can be avoided with @option{cache.direct=on}. This will | ||
47 | +attempt to do disk IO directly to the guest's memory. QEMU may still perform an | ||
48 | +internal copy of the data. | ||
49 | +@item cache.no-flush | ||
50 | +In case you don't care about data integrity over host failures, you can use | ||
51 | +@option{cache.no-flush=on}. This option tells QEMU that it never needs to write | ||
52 | +any data to the disk but can instead keep things in cache. If anything goes | ||
53 | +wrong, like your host losing power, the disk storage getting disconnected | ||
54 | +accidentally, etc. your image will most probably be rendered unusable. | ||
55 | +@item discard=@var{discard} | ||
56 | +@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls | ||
57 | +whether @code{discard} (also known as @code{trim} or @code{unmap}) requests are | ||
58 | +ignored or passed to the filesystem. Some machine types may not support | ||
59 | +discard requests. | ||
60 | +@item detect-zeroes=@var{detect-zeroes} | ||
61 | +@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic | ||
62 | +conversion of plain zero writes by the OS to driver specific optimized | ||
63 | +zero write commands. You may even choose "unmap" if @var{discard} is set | ||
64 | +to "unmap" to allow a zero write to be converted to an @code{unmap} operation. | ||
65 | +@end table | ||
66 | + | ||
67 | +@end table | ||
68 | + | ||
69 | +ETEXI | ||
70 | |||
71 | DEF("drive", HAS_ARG, QEMU_OPTION_drive, | ||
72 | "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n" | ||
73 | @@ -XXX,XX +XXX,XX @@ STEXI | ||
74 | @item -drive @var{option}[,@var{option}[,@var{option}[,...]]] | ||
75 | @findex -drive | ||
76 | |||
77 | -Define a new drive. Valid options are: | ||
78 | +Define a new drive. This includes creating a block driver node (the backend) as | ||
79 | +well as a guest device, and is mostly a shortcut for defining the corresponding | ||
80 | +@option{-blockdev} and @option{-device} options. | ||
81 | + | ||
82 | +@option{-drive} accepts all options that are accepted by @option{-blockdev}. In | ||
83 | +addition, it knows the following options: | ||
84 | |||
85 | @table @option | ||
86 | @item file=@var{file} | ||
87 | @@ -XXX,XX +XXX,XX @@ These options have the same definition as they have in @option{-hdachs}. | ||
88 | @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive | ||
89 | (see @option{-snapshot}). | ||
90 | @item cache=@var{cache} | ||
91 | -@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" and controls how the host cache is used to access block data. | ||
92 | +@var{cache} is "none", "writeback", "unsafe", "directsync" or "writethrough" | ||
93 | +and controls how the host cache is used to access block data. This is a | ||
94 | +shortcut that sets the @option{cache.direct} and @option{cache.no-flush} | ||
95 | +options (as in @option{-blockdev}), and additionally @option{cache.writeback}, | ||
96 | +which provides a default for the @option{write-cache} option of block guest | ||
97 | +devices (as in @option{-device}). The modes correspond to the following | ||
98 | +settings: | ||
99 | + | ||
100 | +@c Our texi2pod.pl script doesn't support @multitable, so fall back to using | ||
101 | +@c plain ASCII art (well, UTF-8 art really). This looks okay both in the manpage | ||
102 | +@c and the HTML output. | ||
103 | +@example | ||
104 | +@ │ cache.writeback cache.direct cache.no-flush | ||
105 | +─────────────┼───────────────────────────────────────────────── | ||
106 | +writeback │ on off off | ||
107 | +none │ on on off | ||
108 | +writethrough │ off off off | ||
109 | +directsync │ off on off | ||
110 | +unsafe │ on off on | ||
111 | +@end example | ||
112 | + | ||
113 | +The default mode is @option{cache=writeback}. | ||
114 | + | ||
115 | @item aio=@var{aio} | ||
116 | @var{aio} is "threads", or "native" and selects between pthread based disk I/O and native Linux AIO. | ||
117 | -@item discard=@var{discard} | ||
118 | -@var{discard} is one of "ignore" (or "off") or "unmap" (or "on") and controls whether @dfn{discard} (also known as @dfn{trim} or @dfn{unmap}) requests are ignored or passed to the filesystem. Some machine types may not support discard requests. | ||
119 | @item format=@var{format} | ||
120 | Specify which disk @var{format} will be used rather than detecting | ||
121 | the format. Can be used to specify format=raw to avoid interpreting | ||
122 | @@ -XXX,XX +XXX,XX @@ Specify which @var{action} to take on write and read errors. Valid actions are: | ||
123 | "report" (report the error to the guest), "enospc" (pause QEMU only if the | ||
124 | host disk is full; report the error to the guest otherwise). | ||
125 | The default setting is @option{werror=enospc} and @option{rerror=report}. | ||
126 | -@item readonly | ||
127 | -Open drive @option{file} as read-only. Guest write attempts will fail. | ||
128 | @item copy-on-read=@var{copy-on-read} | ||
129 | @var{copy-on-read} is "on" or "off" and enables whether to copy read backing | ||
130 | file sectors into the image file. | ||
131 | -@item detect-zeroes=@var{detect-zeroes} | ||
132 | -@var{detect-zeroes} is "off", "on" or "unmap" and enables the automatic | ||
133 | -conversion of plain zero writes by the OS to driver specific optimized | ||
134 | -zero write commands. You may even choose "unmap" if @var{discard} is set | ||
135 | -to "unmap" to allow a zero write to be converted to an UNMAP operation. | ||
136 | @item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w} | ||
137 | Specify bandwidth throttling limits in bytes per second, either for all request | ||
138 | types or for reads or writes only. Small values can lead to timeouts or hangs | ||
139 | @@ -XXX,XX +XXX,XX @@ prevent guests from circumventing throttling limits by using many small disks | ||
140 | instead of a single larger disk. | ||
141 | @end table | ||
142 | |||
143 | -By default, the @option{cache=writeback} mode is used. It will report data | ||
144 | +By default, the @option{cache.writeback=on} mode is used. It will report data | ||
145 | writes as completed as soon as the data is present in the host page cache. | ||
146 | This is safe as long as your guest OS makes sure to correctly flush disk caches | ||
147 | where needed. If your guest OS does not handle volatile disk write caches | ||
148 | correctly and your host crashes or loses power, then the guest may experience | ||
149 | data corruption. | ||
150 | |||
151 | -For such guests, you should consider using @option{cache=writethrough}. This | ||
152 | +For such guests, you should consider using @option{cache.writeback=off}. This | ||
153 | means that the host page cache will be used to read and write data, but write | ||
154 | notification will be sent to the guest only after QEMU has made sure to flush | ||
155 | each write to the disk. Be aware that this has a major impact on performance. | ||
156 | |||
157 | -The host page cache can be avoided entirely with @option{cache=none}. This will | ||
158 | -attempt to do disk IO directly to the guest's memory. QEMU may still perform | ||
159 | -an internal copy of the data. Note that this is considered a writeback mode and | ||
160 | -the guest OS must handle the disk write cache correctly in order to avoid data | ||
161 | -corruption on host crashes. | ||
162 | - | ||
163 | -The host page cache can be avoided while only sending write notifications to | ||
164 | -the guest when the data has been flushed to the disk using | ||
165 | -@option{cache=directsync}. | ||
166 | - | ||
167 | -In case you don't care about data integrity over host failures, use | ||
168 | -@option{cache=unsafe}. This option tells QEMU that it never needs to write any | ||
169 | -data to the disk but can instead keep things in cache. If anything goes wrong, | ||
170 | -like your host losing power, the disk storage getting disconnected accidentally, | ||
171 | -etc. your image will most probably be rendered unusable. When using | ||
172 | -the @option{-snapshot} option, unsafe caching is always used. | ||
173 | +When using the @option{-snapshot} option, unsafe caching is always used. | ||
174 | |||
175 | Copy-on-read avoids accessing the same backing file sectors repeatedly and is | ||
176 | useful when the backing file is over a slow network. By default copy-on-read | ||
177 | -- | ||
178 | 1.8.3.1 | ||
179 | |||
180 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This documents the driver-specific options for the raw, qcow2 and file | ||
2 | block drivers for the man page. For everything else, we refer to the | ||
3 | QAPI documentation. | ||
4 | 1 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Reviewed-by: Max Reitz <mreitz@redhat.com> | ||
8 | --- | ||
9 | qemu-options.hx | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- | ||
10 | 1 file changed, 114 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/qemu-options.hx | ||
15 | +++ b/qemu-options.hx | ||
16 | @@ -XXX,XX +XXX,XX @@ STEXI | ||
17 | @item -blockdev @var{option}[,@var{option}[,@var{option}[,...]]] | ||
18 | @findex -blockdev | ||
19 | |||
20 | -Define a new block driver node. | ||
21 | +Define a new block driver node. Some of the options apply to all block drivers, | ||
22 | +other options are only accepted for a specific block driver. See below for a | ||
23 | +list of generic options and options for the most common block drivers. | ||
24 | + | ||
25 | +Options that expect a reference to another node (e.g. @code{file}) can be | ||
26 | +given in two ways. Either you specify the node name of an already existing node | ||
27 | +(file=@var{node-name}), or you define a new node inline, adding options | ||
28 | +for the referenced node after a dot (file.filename=@var{path},file.aio=native). | ||
29 | + | ||
30 | +A block driver node created with @option{-blockdev} can be used for a guest | ||
31 | +device by specifying its node name for the @code{drive} property in a | ||
32 | +@option{-device} argument that defines a block device. | ||
33 | |||
34 | @table @option | ||
35 | @item Valid options for any block driver node: | ||
36 | @@ -XXX,XX +XXX,XX @@ zero write commands. You may even choose "unmap" if @var{discard} is set | ||
37 | to "unmap" to allow a zero write to be converted to an @code{unmap} operation. | ||
38 | @end table | ||
39 | |||
40 | +@item Driver-specific options for @code{file} | ||
41 | + | ||
42 | +This is the protocol-level block driver for accessing regular files. | ||
43 | + | ||
44 | +@table @code | ||
45 | +@item filename | ||
46 | +The path to the image file in the local filesystem | ||
47 | +@item aio | ||
48 | +Specifies the AIO backend (threads/native, default: threads) | ||
49 | +@end table | ||
50 | +Example: | ||
51 | +@example | ||
52 | +-blockdev driver=file,node-name=disk,filename=disk.img | ||
53 | +@end example | ||
54 | + | ||
55 | +@item Driver-specific options for @code{raw} | ||
56 | + | ||
57 | +This is the image format block driver for raw images. It is usually | ||
58 | +stacked on top of a protocol level block driver such as @code{file}. | ||
59 | + | ||
60 | +@table @code | ||
61 | +@item file | ||
62 | +Reference to or definition of the data source block driver node | ||
63 | +(e.g. a @code{file} driver node) | ||
64 | +@end table | ||
65 | +Example 1: | ||
66 | +@example | ||
67 | +-blockdev driver=file,node-name=disk_file,filename=disk.img | ||
68 | +-blockdev driver=raw,node-name=disk,file=disk_file | ||
69 | +@end example | ||
70 | +Example 2: | ||
71 | +@example | ||
72 | +-blockdev driver=raw,node-name=disk,file.driver=file,file.filename=disk.img | ||
73 | +@end example | ||
74 | + | ||
75 | +@item Driver-specific options for @code{qcow2} | ||
76 | + | ||
77 | +This is the image format block driver for qcow2 images. It is usually | ||
78 | +stacked on top of a protocol level block driver such as @code{file}. | ||
79 | + | ||
80 | +@table @code | ||
81 | +@item file | ||
82 | +Reference to or definition of the data source block driver node | ||
83 | +(e.g. a @code{file} driver node) | ||
84 | + | ||
85 | +@item backing | ||
86 | +Reference to or definition of the backing file block device (default is taken | ||
87 | +from the image file). It is allowed to pass an empty string here in order to | ||
88 | +disable the default backing file. | ||
89 | + | ||
90 | +@item lazy-refcounts | ||
91 | +Whether to enable the lazy refcounts feature (on/off; default is taken from the | ||
92 | +image file) | ||
93 | + | ||
94 | +@item cache-size | ||
95 | +The maximum total size of the L2 table and refcount block caches in bytes | ||
96 | +(default: 1048576 bytes or 8 clusters, whichever is larger) | ||
97 | + | ||
98 | +@item l2-cache-size | ||
99 | +The maximum size of the L2 table cache in bytes | ||
100 | +(default: 4/5 of the total cache size) | ||
101 | + | ||
102 | +@item refcount-cache-size | ||
103 | +The maximum size of the refcount block cache in bytes | ||
104 | +(default: 1/5 of the total cache size) | ||
105 | + | ||
106 | +@item cache-clean-interval | ||
107 | +Clean unused entries in the L2 and refcount caches. The interval is in seconds. | ||
108 | +The default value is 0 and it disables this feature. | ||
109 | + | ||
110 | +@item pass-discard-request | ||
111 | +Whether discard requests to the qcow2 device should be forwarded to the data | ||
112 | +source (on/off; default: on if discard=unmap is specified, off otherwise) | ||
113 | + | ||
114 | +@item pass-discard-snapshot | ||
115 | +Whether discard requests for the data source should be issued when a snapshot | ||
116 | +operation (e.g. deleting a snapshot) frees clusters in the qcow2 file (on/off; | ||
117 | +default: on) | ||
118 | + | ||
119 | +@item pass-discard-other | ||
120 | +Whether discard requests for the data source should be issued on other | ||
121 | +occasions where a cluster gets freed (on/off; default: off) | ||
122 | + | ||
123 | +@item overlap-check | ||
124 | +Which overlap checks to perform for writes to the image | ||
125 | +(none/constant/cached/all; default: cached). For details or finer | ||
126 | +granularity control refer to the QAPI documentation of @code{blockdev-add}. | ||
127 | +@end table | ||
128 | + | ||
129 | +Example 1: | ||
130 | +@example | ||
131 | +-blockdev driver=file,node-name=my_file,filename=/tmp/disk.qcow2 | ||
132 | +-blockdev driver=qcow2,node-name=hda,file=my_file,overlap-check=none,cache-size=16777216 | ||
133 | +@end example | ||
134 | +Example 2: | ||
135 | +@example | ||
136 | +-blockdev driver=qcow2,node-name=disk,file.driver=http,file.filename=http://example.com/image.qcow2 | ||
137 | +@end example | ||
138 | + | ||
139 | +@item Driver-specific options for other drivers | ||
140 | +Please refer to the QAPI documentation of the @code{blockdev-add} QMP command. | ||
141 | + | ||
142 | @end table | ||
143 | |||
144 | ETEXI | ||
145 | -- | ||
146 | 1.8.3.1 | ||
147 | |||
148 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Old kvm.ko versions only supported a tiny number of ioeventfds so | 3 | The HMP monitor runs in the main loop thread. Calling |
4 | virtio-pci avoids ioeventfds when kvm_has_many_ioeventfds() returns 0. | 4 | AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is |
5 | equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks | ||
6 | the AioContext and the latter's assertion that we're in the main loop | ||
7 | succeeds. | ||
5 | 8 | ||
6 | Do not check kvm_has_many_ioeventfds() when KVM is disabled since it | 9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | always returns 0. Since commit 8c56c1a592b5092d91da8d8943c17777d6462a6f | 10 | Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | ("memory: emulate ioeventfd") it has been possible to use ioeventfds in | 11 | Reviewed-by: Markus Armbruster <armbru@redhat.com> |
9 | qtest or TCG mode. | 12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
10 | |||
11 | This patch makes -device virtio-blk-pci,iothread=iothread0 work even | ||
12 | when KVM is disabled. | ||
13 | |||
14 | I have tested that virtio-blk-pci works under TCG both with and without | ||
15 | iothread. | ||
16 | |||
17 | Cc: Michael S. Tsirkin <mst@redhat.com> | ||
18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
19 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 14 | Message-Id: <20230309190855.414275-6-stefanha@redhat.com> |
15 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
20 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
21 | --- | 17 | --- |
22 | hw/virtio/virtio-pci.c | 2 +- | 18 | monitor/hmp.c | 2 +- |
23 | 1 file changed, 1 insertion(+), 1 deletion(-) | 19 | 1 file changed, 1 insertion(+), 1 deletion(-) |
24 | 20 | ||
25 | diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c | 21 | diff --git a/monitor/hmp.c b/monitor/hmp.c |
26 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/hw/virtio/virtio-pci.c | 23 | --- a/monitor/hmp.c |
28 | +++ b/hw/virtio/virtio-pci.c | 24 | +++ b/monitor/hmp.c |
29 | @@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) | 25 | @@ -XXX,XX +XXX,XX @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline) |
30 | bool pcie_port = pci_bus_is_express(pci_dev->bus) && | 26 | Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data); |
31 | !pci_bus_is_root(pci_dev->bus); | 27 | monitor_set_cur(co, &mon->common); |
32 | 28 | aio_co_enter(qemu_get_aio_context(), co); | |
33 | - if (!kvm_has_many_ioeventfds()) { | 29 | - AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done); |
34 | + if (kvm_enabled() && !kvm_has_many_ioeventfds()) { | 30 | + AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done); |
35 | proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; | ||
36 | } | 31 | } |
37 | 32 | ||
33 | qobject_unref(qdict); | ||
38 | -- | 34 | -- |
39 | 1.8.3.1 | 35 | 2.40.0 |
40 | 36 | ||
41 | 37 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We already have functions for doing these calculations, so let's use | 3 | monitor_cleanup() is called from the main loop thread. Calling |
4 | them instead of doing everything by hand. This makes the code a bit | 4 | AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is |
5 | more readable. | 5 | equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks |
6 | the AioContext and the latter's assertion that we're in the main loop | ||
7 | succeeds. | ||
6 | 8 | ||
7 | Signed-off-by: Alberto Garcia <berto@igalia.com> | 9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Markus Armbruster <armbru@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Message-Id: <20230309190855.414275-7-stefanha@redhat.com> | ||
15 | Reviewed-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 16 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 17 | --- |
10 | block/qcow2-cluster.c | 4 ++-- | 18 | monitor/monitor.c | 4 ++-- |
11 | block/qcow2.c | 2 +- | 19 | 1 file changed, 2 insertions(+), 2 deletions(-) |
12 | 2 files changed, 3 insertions(+), 3 deletions(-) | ||
13 | 20 | ||
14 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 21 | diff --git a/monitor/monitor.c b/monitor/monitor.c |
15 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/qcow2-cluster.c | 23 | --- a/monitor/monitor.c |
17 | +++ b/block/qcow2-cluster.c | 24 | +++ b/monitor/monitor.c |
18 | @@ -XXX,XX +XXX,XX @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, | 25 | @@ -XXX,XX +XXX,XX @@ void monitor_cleanup(void) |
19 | 26 | * We need to poll both qemu_aio_context and iohandler_ctx to make | |
20 | /* find the cluster offset for the given disk offset */ | 27 | * sure that the dispatcher coroutine keeps making progress and |
21 | 28 | * eventually terminates. qemu_aio_context is automatically | |
22 | - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); | 29 | - * polled by calling AIO_WAIT_WHILE on it, but we must poll |
23 | + l2_index = offset_to_l2_index(s, offset); | 30 | + * polled by calling AIO_WAIT_WHILE_UNLOCKED on it, but we must poll |
24 | *cluster_offset = be64_to_cpu(l2_table[l2_index]); | 31 | * iohandler_ctx manually. |
25 | 32 | * | |
26 | nb_clusters = size_to_clusters(s, bytes_needed); | 33 | * Letting the iothread continue while shutting down the dispatcher |
27 | @@ -XXX,XX +XXX,XX @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, | 34 | @@ -XXX,XX +XXX,XX @@ void monitor_cleanup(void) |
28 | 35 | aio_co_wake(qmp_dispatcher_co); | |
29 | /* find the cluster offset for the given disk offset */ | ||
30 | |||
31 | - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); | ||
32 | + l2_index = offset_to_l2_index(s, offset); | ||
33 | |||
34 | *new_l2_table = l2_table; | ||
35 | *new_l2_index = l2_index; | ||
36 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/block/qcow2.c | ||
39 | +++ b/block/qcow2.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static int validate_table_offset(BlockDriverState *bs, uint64_t offset, | ||
41 | } | 36 | } |
42 | 37 | ||
43 | /* Tables must be cluster aligned */ | 38 | - AIO_WAIT_WHILE(qemu_get_aio_context(), |
44 | - if (offset & (s->cluster_size - 1)) { | 39 | + AIO_WAIT_WHILE_UNLOCKED(NULL, |
45 | + if (offset_into_cluster(s, offset) != 0) { | 40 | (aio_poll(iohandler_get_aio_context(), false), |
46 | return -EINVAL; | 41 | qatomic_mb_read(&qmp_dispatcher_co_busy))); |
47 | } | ||
48 | 42 | ||
49 | -- | 43 | -- |
50 | 1.8.3.1 | 44 | 2.40.0 |
51 | 45 | ||
52 | 46 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Wilfred Mallawa <wilfred.mallawa@wdc.com> |
---|---|---|---|
2 | 2 | ||
3 | The bs->exact_filename field may not be sufficient to store the full | 3 | Fixup a few minor typos |
4 | blkverify node filename. In this case, we should not generate a filename | ||
5 | at all instead of an unusable one. | ||
6 | 4 | ||
7 | Cc: qemu-stable@nongnu.org | 5 | Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com> |
8 | Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com> | 6 | Message-Id: <20230313003744.55476-1-wilfred.mallawa@opensource.wdc.com> |
9 | Signed-off-by: Max Reitz <mreitz@redhat.com> | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Message-id: 20170613172006.19685-3-mreitz@redhat.com | 8 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | 9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
14 | --- | 10 | --- |
15 | block/blkverify.c | 12 ++++++++---- | 11 | include/block/aio-wait.h | 2 +- |
16 | 1 file changed, 8 insertions(+), 4 deletions(-) | 12 | include/block/block_int-common.h | 2 +- |
13 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
17 | 14 | ||
18 | diff --git a/block/blkverify.c b/block/blkverify.c | 15 | diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/blkverify.c | 17 | --- a/include/block/aio-wait.h |
21 | +++ b/block/blkverify.c | 18 | +++ b/include/block/aio-wait.h |
22 | @@ -XXX,XX +XXX,XX @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options) | 19 | @@ -XXX,XX +XXX,XX @@ extern AioWait global_aio_wait; |
23 | if (bs->file->bs->exact_filename[0] | 20 | * @ctx: the aio context, or NULL if multiple aio contexts (for which the |
24 | && s->test_file->bs->exact_filename[0]) | 21 | * caller does not hold a lock) are involved in the polling condition. |
25 | { | 22 | * @cond: wait while this conditional expression is true |
26 | - snprintf(bs->exact_filename, sizeof(bs->exact_filename), | 23 | - * @unlock: whether to unlock and then lock again @ctx. This apples |
27 | - "blkverify:%s:%s", | 24 | + * @unlock: whether to unlock and then lock again @ctx. This applies |
28 | - bs->file->bs->exact_filename, | 25 | * only when waiting for another AioContext from the main loop. |
29 | - s->test_file->bs->exact_filename); | 26 | * Otherwise it's ignored. |
30 | + int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), | 27 | * |
31 | + "blkverify:%s:%s", | 28 | diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h |
32 | + bs->file->bs->exact_filename, | 29 | index XXXXXXX..XXXXXXX 100644 |
33 | + s->test_file->bs->exact_filename); | 30 | --- a/include/block/block_int-common.h |
34 | + if (ret >= sizeof(bs->exact_filename)) { | 31 | +++ b/include/block/block_int-common.h |
35 | + /* An overflow makes the filename unusable, so do not report any */ | 32 | @@ -XXX,XX +XXX,XX @@ extern QemuOptsList bdrv_create_opts_simple; |
36 | + bs->exact_filename[0] = 0; | 33 | /* |
37 | + } | 34 | * Common functions that are neither I/O nor Global State. |
38 | } | 35 | * |
39 | } | 36 | - * See include/block/block-commmon.h for more information about |
37 | + * See include/block/block-common.h for more information about | ||
38 | * the Common API. | ||
39 | */ | ||
40 | 40 | ||
41 | -- | 41 | -- |
42 | 1.8.3.1 | 42 | 2.40.0 |
43 | 43 | ||
44 | 44 | diff view generated by jsdifflib |
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | migration_incoming_state_destroy() uses qemu_fclose() on the vmstate | 3 | Not a coroutine_fn, you say? |
4 | file. Make sure to call it inside an AioContext acquire/release region. | ||
5 | 4 | ||
6 | This fixes an 'qemu: qemu_mutex_unlock: Operation not permitted' abort | 5 | static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) |
7 | in loadvm. | 6 | { |
7 | BdrvChild *child; | ||
8 | int64_t child_size, sum = 0; | ||
8 | 9 | ||
9 | This patch closes the vmstate file before ending the drained region. | 10 | QLIST_FOREACH(child, &bs->children, next) { |
10 | Previously we closed the vmstate file after ending the drained region. | 11 | if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | |
11 | The order does not matter. | 12 | BDRV_CHILD_FILTERED)) |
13 | { | ||
14 | child_size = bdrv_co_get_allocated_file_size(child->bs); | ||
15 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
16 | |||
17 | Well what do we have here?! | ||
18 | |||
19 | I rest my case, your honor. | ||
12 | 20 | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 21 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
22 | Message-Id: <20230308211435.346375-1-stefanha@redhat.com> | ||
23 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 24 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 25 | --- |
16 | migration/savevm.c | 2 +- | 26 | block.c | 2 +- |
17 | 1 file changed, 1 insertion(+), 1 deletion(-) | 27 | 1 file changed, 1 insertion(+), 1 deletion(-) |
18 | 28 | ||
19 | diff --git a/migration/savevm.c b/migration/savevm.c | 29 | diff --git a/block.c b/block.c |
20 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/migration/savevm.c | 31 | --- a/block.c |
22 | +++ b/migration/savevm.c | 32 | +++ b/block.c |
23 | @@ -XXX,XX +XXX,XX @@ int load_snapshot(const char *name, Error **errp) | 33 | @@ -XXX,XX +XXX,XX @@ exit: |
24 | 34 | * sums the size of all data-bearing children. (This excludes backing | |
25 | aio_context_acquire(aio_context); | 35 | * children.) |
26 | ret = qemu_loadvm_state(f); | 36 | */ |
27 | + migration_incoming_state_destroy(); | 37 | -static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) |
28 | aio_context_release(aio_context); | 38 | +static int64_t coroutine_fn bdrv_sum_allocated_file_size(BlockDriverState *bs) |
29 | 39 | { | |
30 | bdrv_drain_all_end(); | 40 | BdrvChild *child; |
31 | 41 | int64_t child_size, sum = 0; | |
32 | - migration_incoming_state_destroy(); | ||
33 | if (ret < 0) { | ||
34 | error_setg(errp, "Error %d while loading VM state", ret); | ||
35 | return ret; | ||
36 | -- | 42 | -- |
37 | 1.8.3.1 | 43 | 2.40.0 |
38 | |||
39 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Emanuele Giuseppe Esposito <eesposit@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch splits do_perform_cow() into three separate functions to | 3 | Remove usage of aio_context_acquire by always submitting asynchronous |
4 | read, encrypt and write the COW regions. | 4 | AIO to the current thread's LinuxAioState. |
5 | 5 | ||
6 | perform_cow() can now read both regions first, then encrypt them and | 6 | In order to prevent mistakes from the caller side, avoid passing LinuxAioState |
7 | finally write them to disk. The memory allocation is also done in | 7 | in laio_io_{plug/unplug} and laio_co_submit, and document the functions |
8 | this function now, using one single buffer large enough to hold both | 8 | to make clear that they work in the current thread's AioContext. |
9 | regions. | 9 | |
10 | 10 | Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> | |
11 | Signed-off-by: Alberto Garcia <berto@igalia.com> | 11 | Message-Id: <20230203131731.851116-2-eesposit@redhat.com> |
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 15 | --- |
15 | block/qcow2-cluster.c | 117 +++++++++++++++++++++++++++++++++++++------------- | 16 | include/block/aio.h | 4 ---- |
16 | 1 file changed, 87 insertions(+), 30 deletions(-) | 17 | include/block/raw-aio.h | 18 ++++++++++++------ |
17 | 18 | include/sysemu/block-backend-io.h | 5 +++++ | |
18 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 19 | block/file-posix.c | 10 +++------- |
19 | index XXXXXXX..XXXXXXX 100644 | 20 | block/linux-aio.c | 29 +++++++++++++++++------------ |
20 | --- a/block/qcow2-cluster.c | 21 | 5 files changed, 37 insertions(+), 29 deletions(-) |
21 | +++ b/block/qcow2-cluster.c | 22 | |
22 | @@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, | 23 | diff --git a/include/block/aio.h b/include/block/aio.h |
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/block/aio.h | ||
26 | +++ b/include/block/aio.h | ||
27 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | ||
28 | struct ThreadPool *thread_pool; | ||
29 | |||
30 | #ifdef CONFIG_LINUX_AIO | ||
31 | - /* | ||
32 | - * State for native Linux AIO. Uses aio_context_acquire/release for | ||
33 | - * locking. | ||
34 | - */ | ||
35 | struct LinuxAioState *linux_aio; | ||
36 | #endif | ||
37 | #ifdef CONFIG_LINUX_IO_URING | ||
38 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/include/block/raw-aio.h | ||
41 | +++ b/include/block/raw-aio.h | ||
42 | @@ -XXX,XX +XXX,XX @@ | ||
43 | typedef struct LinuxAioState LinuxAioState; | ||
44 | LinuxAioState *laio_init(Error **errp); | ||
45 | void laio_cleanup(LinuxAioState *s); | ||
46 | -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, | ||
47 | - uint64_t offset, QEMUIOVector *qiov, int type, | ||
48 | - uint64_t dev_max_batch); | ||
49 | + | ||
50 | +/* laio_co_submit: submit I/O requests in the thread's current AioContext. */ | ||
51 | +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, | ||
52 | + int type, uint64_t dev_max_batch); | ||
53 | + | ||
54 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); | ||
55 | void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); | ||
56 | -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); | ||
57 | -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
58 | - uint64_t dev_max_batch); | ||
59 | + | ||
60 | +/* | ||
61 | + * laio_io_plug/unplug work in the thread's current AioContext, therefore the | ||
62 | + * caller must ensure that they are paired in the same IOThread. | ||
63 | + */ | ||
64 | +void laio_io_plug(void); | ||
65 | +void laio_io_unplug(uint64_t dev_max_batch); | ||
66 | #endif | ||
67 | /* io_uring.c - Linux io_uring implementation */ | ||
68 | #ifdef CONFIG_LINUX_IO_URING | ||
69 | diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/include/sysemu/block-backend-io.h | ||
72 | +++ b/include/sysemu/block-backend-io.h | ||
73 | @@ -XXX,XX +XXX,XX @@ void blk_iostatus_set_err(BlockBackend *blk, int error); | ||
74 | int blk_get_max_iov(BlockBackend *blk); | ||
75 | int blk_get_max_hw_iov(BlockBackend *blk); | ||
76 | |||
77 | +/* | ||
78 | + * blk_io_plug/unplug are thread-local operations. This means that multiple | ||
79 | + * IOThreads can simultaneously call plug/unplug, but the caller must ensure | ||
80 | + * that each unplug() is called in the same IOThread of the matching plug(). | ||
81 | + */ | ||
82 | void coroutine_fn blk_co_io_plug(BlockBackend *blk); | ||
83 | void co_wrapper blk_io_plug(BlockBackend *blk); | ||
84 | |||
85 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/block/file-posix.c | ||
88 | +++ b/block/file-posix.c | ||
89 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, | ||
90 | #endif | ||
91 | #ifdef CONFIG_LINUX_AIO | ||
92 | } else if (s->use_linux_aio) { | ||
93 | - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
94 | assert(qiov->size == bytes); | ||
95 | - return laio_co_submit(bs, aio, s->fd, offset, qiov, type, | ||
96 | - s->aio_max_batch); | ||
97 | + return laio_co_submit(s->fd, offset, qiov, type, s->aio_max_batch); | ||
98 | #endif | ||
99 | } | ||
100 | |||
101 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs) | ||
102 | BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
103 | #ifdef CONFIG_LINUX_AIO | ||
104 | if (s->use_linux_aio) { | ||
105 | - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
106 | - laio_io_plug(bs, aio); | ||
107 | + laio_io_plug(); | ||
108 | } | ||
109 | #endif | ||
110 | #ifdef CONFIG_LINUX_IO_URING | ||
111 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) | ||
112 | BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
113 | #ifdef CONFIG_LINUX_AIO | ||
114 | if (s->use_linux_aio) { | ||
115 | - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
116 | - laio_io_unplug(bs, aio, s->aio_max_batch); | ||
117 | + laio_io_unplug(s->aio_max_batch); | ||
118 | } | ||
119 | #endif | ||
120 | #ifdef CONFIG_LINUX_IO_URING | ||
121 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/block/linux-aio.c | ||
124 | +++ b/block/linux-aio.c | ||
125 | @@ -XXX,XX +XXX,XX @@ | ||
126 | #include "qemu/coroutine.h" | ||
127 | #include "qapi/error.h" | ||
128 | |||
129 | +/* Only used for assertions. */ | ||
130 | +#include "qemu/coroutine_int.h" | ||
131 | + | ||
132 | #include <libaio.h> | ||
133 | |||
134 | /* | ||
135 | @@ -XXX,XX +XXX,XX @@ struct LinuxAioState { | ||
136 | io_context_t ctx; | ||
137 | EventNotifier e; | ||
138 | |||
139 | - /* io queue for submit at batch. Protected by AioContext lock. */ | ||
140 | + /* No locking required, only accessed from AioContext home thread */ | ||
141 | LaioQueue io_q; | ||
142 | - | ||
143 | - /* I/O completion processing. Only runs in I/O thread. */ | ||
144 | QEMUBH *completion_bh; | ||
145 | int event_idx; | ||
146 | int event_max; | ||
147 | @@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) | ||
148 | * later. Coroutines cannot be entered recursively so avoid doing | ||
149 | * that! | ||
150 | */ | ||
151 | + assert(laiocb->co->ctx == laiocb->ctx->aio_context); | ||
152 | if (!qemu_coroutine_entered(laiocb->co)) { | ||
153 | aio_co_wake(laiocb->co); | ||
154 | } | ||
155 | @@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s) | ||
156 | |||
157 | static void qemu_laio_process_completions_and_submit(LinuxAioState *s) | ||
158 | { | ||
159 | - aio_context_acquire(s->aio_context); | ||
160 | qemu_laio_process_completions(s); | ||
161 | |||
162 | if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { | ||
163 | ioq_submit(s); | ||
164 | } | ||
165 | - aio_context_release(s->aio_context); | ||
166 | } | ||
167 | |||
168 | static void qemu_laio_completion_bh(void *opaque) | ||
169 | @@ -XXX,XX +XXX,XX @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch) | ||
170 | return max_batch; | ||
171 | } | ||
172 | |||
173 | -void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) | ||
174 | +void laio_io_plug(void) | ||
175 | { | ||
176 | + AioContext *ctx = qemu_get_current_aio_context(); | ||
177 | + LinuxAioState *s = aio_get_linux_aio(ctx); | ||
178 | + | ||
179 | s->io_q.plugged++; | ||
180 | } | ||
181 | |||
182 | -void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
183 | - uint64_t dev_max_batch) | ||
184 | +void laio_io_unplug(uint64_t dev_max_batch) | ||
185 | { | ||
186 | + AioContext *ctx = qemu_get_current_aio_context(); | ||
187 | + LinuxAioState *s = aio_get_linux_aio(ctx); | ||
188 | + | ||
189 | assert(s->io_q.plugged); | ||
190 | s->io_q.plugged--; | ||
191 | |||
192 | @@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, | ||
23 | return 0; | 193 | return 0; |
24 | } | 194 | } |
25 | 195 | ||
26 | -static int coroutine_fn do_perform_cow(BlockDriverState *bs, | 196 | -int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
27 | - uint64_t src_cluster_offset, | 197 | - uint64_t offset, QEMUIOVector *qiov, int type, |
28 | - uint64_t cluster_offset, | 198 | - uint64_t dev_max_batch) |
29 | - unsigned offset_in_cluster, | 199 | +int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, |
30 | - unsigned bytes) | 200 | + int type, uint64_t dev_max_batch) |
31 | +static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, | 201 | { |
32 | + uint64_t src_cluster_offset, | ||
33 | + unsigned offset_in_cluster, | ||
34 | + uint8_t *buffer, | ||
35 | + unsigned bytes) | ||
36 | { | ||
37 | - BDRVQcow2State *s = bs->opaque; | ||
38 | QEMUIOVector qiov; | ||
39 | - struct iovec iov; | ||
40 | + struct iovec iov = { .iov_base = buffer, .iov_len = bytes }; | ||
41 | int ret; | 202 | int ret; |
42 | 203 | + AioContext *ctx = qemu_get_current_aio_context(); | |
43 | if (bytes == 0) { | 204 | struct qemu_laiocb laiocb = { |
44 | return 0; | 205 | .co = qemu_coroutine_self(), |
45 | } | 206 | .nbytes = qiov->size, |
46 | 207 | - .ctx = s, | |
47 | - iov.iov_len = bytes; | 208 | + .ctx = aio_get_linux_aio(ctx), |
48 | - iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); | 209 | .ret = -EINPROGRESS, |
49 | - if (iov.iov_base == NULL) { | 210 | .is_read = (type == QEMU_AIO_READ), |
50 | - return -ENOMEM; | 211 | .qiov = qiov, |
51 | - } | ||
52 | - | ||
53 | qemu_iovec_init_external(&qiov, &iov, 1); | ||
54 | |||
55 | BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); | ||
56 | |||
57 | if (!bs->drv) { | ||
58 | - ret = -ENOMEDIUM; | ||
59 | - goto out; | ||
60 | + return -ENOMEDIUM; | ||
61 | } | ||
62 | |||
63 | /* Call .bdrv_co_readv() directly instead of using the public block-layer | ||
64 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs, | ||
65 | ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, | ||
66 | bytes, &qiov, 0); | ||
67 | if (ret < 0) { | ||
68 | - goto out; | ||
69 | + return ret; | ||
70 | } | ||
71 | |||
72 | - if (bs->encrypted) { | ||
73 | + return 0; | ||
74 | +} | ||
75 | + | ||
76 | +static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, | ||
77 | + uint64_t src_cluster_offset, | ||
78 | + unsigned offset_in_cluster, | ||
79 | + uint8_t *buffer, | ||
80 | + unsigned bytes) | ||
81 | +{ | ||
82 | + if (bytes && bs->encrypted) { | ||
83 | + BDRVQcow2State *s = bs->opaque; | ||
84 | int64_t sector = (src_cluster_offset + offset_in_cluster) | ||
85 | >> BDRV_SECTOR_BITS; | ||
86 | assert(s->cipher); | ||
87 | assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); | ||
88 | assert((bytes & ~BDRV_SECTOR_MASK) == 0); | ||
89 | - if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base, | ||
90 | + if (qcow2_encrypt_sectors(s, sector, buffer, buffer, | ||
91 | bytes >> BDRV_SECTOR_BITS, true, NULL) < 0) { | ||
92 | - ret = -EIO; | ||
93 | - goto out; | ||
94 | + return false; | ||
95 | } | ||
96 | } | ||
97 | + return true; | ||
98 | +} | ||
99 | + | ||
100 | +static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, | ||
101 | + uint64_t cluster_offset, | ||
102 | + unsigned offset_in_cluster, | ||
103 | + uint8_t *buffer, | ||
104 | + unsigned bytes) | ||
105 | +{ | ||
106 | + QEMUIOVector qiov; | ||
107 | + struct iovec iov = { .iov_base = buffer, .iov_len = bytes }; | ||
108 | + int ret; | ||
109 | + | ||
110 | + if (bytes == 0) { | ||
111 | + return 0; | ||
112 | + } | ||
113 | + | ||
114 | + qemu_iovec_init_external(&qiov, &iov, 1); | ||
115 | |||
116 | ret = qcow2_pre_write_overlap_check(bs, 0, | ||
117 | cluster_offset + offset_in_cluster, bytes); | ||
118 | if (ret < 0) { | ||
119 | - goto out; | ||
120 | + return ret; | ||
121 | } | ||
122 | |||
123 | BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); | ||
124 | ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, | ||
125 | bytes, &qiov, 0); | ||
126 | if (ret < 0) { | ||
127 | - goto out; | ||
128 | + return ret; | ||
129 | } | ||
130 | |||
131 | - ret = 0; | ||
132 | -out: | ||
133 | - qemu_vfree(iov.iov_base); | ||
134 | - return ret; | ||
135 | + return 0; | ||
136 | } | ||
137 | |||
138 | |||
139 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | ||
140 | BDRVQcow2State *s = bs->opaque; | ||
141 | Qcow2COWRegion *start = &m->cow_start; | ||
142 | Qcow2COWRegion *end = &m->cow_end; | ||
143 | + unsigned buffer_size; | ||
144 | + uint8_t *start_buffer, *end_buffer; | ||
145 | int ret; | ||
146 | |||
147 | + assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); | ||
148 | + | ||
149 | if (start->nb_bytes == 0 && end->nb_bytes == 0) { | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | + /* Reserve a buffer large enough to store the data from both the | ||
154 | + * start and end COW regions. Add some padding in the middle if | ||
155 | + * necessary to make sure that the end region is optimally aligned */ | ||
156 | + buffer_size = QEMU_ALIGN_UP(start->nb_bytes, bdrv_opt_mem_align(bs)) + | ||
157 | + end->nb_bytes; | ||
158 | + start_buffer = qemu_try_blockalign(bs, buffer_size); | ||
159 | + if (start_buffer == NULL) { | ||
160 | + return -ENOMEM; | ||
161 | + } | ||
162 | + /* The part of the buffer where the end region is located */ | ||
163 | + end_buffer = start_buffer + buffer_size - end->nb_bytes; | ||
164 | + | ||
165 | qemu_co_mutex_unlock(&s->lock); | ||
166 | - ret = do_perform_cow(bs, m->offset, m->alloc_offset, | ||
167 | - start->offset, start->nb_bytes); | ||
168 | + /* First we read the existing data from both COW regions */ | ||
169 | + ret = do_perform_cow_read(bs, m->offset, start->offset, | ||
170 | + start_buffer, start->nb_bytes); | ||
171 | if (ret < 0) { | ||
172 | goto fail; | ||
173 | } | ||
174 | |||
175 | - ret = do_perform_cow(bs, m->offset, m->alloc_offset, | ||
176 | - end->offset, end->nb_bytes); | ||
177 | + ret = do_perform_cow_read(bs, m->offset, end->offset, | ||
178 | + end_buffer, end->nb_bytes); | ||
179 | + if (ret < 0) { | ||
180 | + goto fail; | ||
181 | + } | ||
182 | + | ||
183 | + /* Encrypt the data if necessary before writing it */ | ||
184 | + if (bs->encrypted) { | ||
185 | + if (!do_perform_cow_encrypt(bs, m->offset, start->offset, | ||
186 | + start_buffer, start->nb_bytes) || | ||
187 | + !do_perform_cow_encrypt(bs, m->offset, end->offset, | ||
188 | + end_buffer, end->nb_bytes)) { | ||
189 | + ret = -EIO; | ||
190 | + goto fail; | ||
191 | + } | ||
192 | + } | ||
193 | + | ||
194 | + /* And now we can write everything */ | ||
195 | + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, | ||
196 | + start_buffer, start->nb_bytes); | ||
197 | + if (ret < 0) { | ||
198 | + goto fail; | ||
199 | + } | ||
200 | |||
201 | + ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, | ||
202 | + end_buffer, end->nb_bytes); | ||
203 | fail: | ||
204 | qemu_co_mutex_lock(&s->lock); | ||
205 | |||
206 | @@ -XXX,XX +XXX,XX @@ fail: | ||
207 | qcow2_cache_depends_on_flush(s->l2_table_cache); | ||
208 | } | ||
209 | |||
210 | + qemu_vfree(start_buffer); | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | -- | 212 | -- |
215 | 1.8.3.1 | 213 | 2.40.0 |
216 | |||
217 | diff view generated by jsdifflib |
1 | From: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Remove usage of aio_context_acquire by always submitting asynchronous | ||
4 | AIO to the current thread's LuringState. | ||
5 | |||
6 | In order to prevent mistakes from the caller side, avoid passing LuringState | ||
7 | in luring_io_{plug/unplug} and luring_co_submit, and document the functions | ||
8 | to make clear that they work in the current thread's AioContext. | ||
9 | |||
10 | Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
11 | Message-Id: <20230203131731.851116-3-eesposit@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
2 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
3 | --- | 15 | --- |
4 | block/qed-table.c | 47 ++++++++++++----------------------------------- | 16 | include/block/aio.h | 4 ---- |
5 | block/qed.c | 12 +++++++----- | 17 | include/block/raw-aio.h | 15 +++++++++++---- |
6 | block/qed.h | 8 +++----- | 18 | block/file-posix.c | 12 ++++-------- |
7 | 3 files changed, 22 insertions(+), 45 deletions(-) | 19 | block/io_uring.c | 23 +++++++++++++++-------- |
20 | 4 files changed, 30 insertions(+), 24 deletions(-) | ||
8 | 21 | ||
9 | diff --git a/block/qed-table.c b/block/qed-table.c | 22 | diff --git a/include/block/aio.h b/include/block/aio.h |
10 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/block/qed-table.c | 24 | --- a/include/block/aio.h |
12 | +++ b/block/qed-table.c | 25 | +++ b/include/block/aio.h |
13 | @@ -XXX,XX +XXX,XX @@ out: | 26 | @@ -XXX,XX +XXX,XX @@ struct AioContext { |
14 | * @index: Index of first element | 27 | struct LinuxAioState *linux_aio; |
15 | * @n: Number of elements | 28 | #endif |
16 | * @flush: Whether or not to sync to disk | 29 | #ifdef CONFIG_LINUX_IO_URING |
17 | - * @cb: Completion function | 30 | - /* |
18 | - * @opaque: Argument for completion function | 31 | - * State for Linux io_uring. Uses aio_context_acquire/release for |
19 | */ | 32 | - * locking. |
20 | -static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, | 33 | - */ |
21 | - unsigned int index, unsigned int n, bool flush, | 34 | struct LuringState *linux_io_uring; |
22 | - BlockCompletionFunc *cb, void *opaque) | 35 | |
23 | +static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, | 36 | /* State for file descriptor monitoring using Linux io_uring */ |
24 | + unsigned int index, unsigned int n, bool flush) | 37 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h |
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/include/block/raw-aio.h | ||
40 | +++ b/include/block/raw-aio.h | ||
41 | @@ -XXX,XX +XXX,XX @@ void laio_io_unplug(uint64_t dev_max_batch); | ||
42 | typedef struct LuringState LuringState; | ||
43 | LuringState *luring_init(Error **errp); | ||
44 | void luring_cleanup(LuringState *s); | ||
45 | -int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, | ||
46 | - uint64_t offset, QEMUIOVector *qiov, int type); | ||
47 | + | ||
48 | +/* luring_co_submit: submit I/O requests in the thread's current AioContext. */ | ||
49 | +int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, | ||
50 | + QEMUIOVector *qiov, int type); | ||
51 | void luring_detach_aio_context(LuringState *s, AioContext *old_context); | ||
52 | void luring_attach_aio_context(LuringState *s, AioContext *new_context); | ||
53 | -void luring_io_plug(BlockDriverState *bs, LuringState *s); | ||
54 | -void luring_io_unplug(BlockDriverState *bs, LuringState *s); | ||
55 | + | ||
56 | +/* | ||
57 | + * luring_io_plug/unplug work in the thread's current AioContext, therefore the | ||
58 | + * caller must ensure that they are paired in the same IOThread. | ||
59 | + */ | ||
60 | +void luring_io_plug(void); | ||
61 | +void luring_io_unplug(void); | ||
62 | #endif | ||
63 | |||
64 | #ifdef _WIN32 | ||
65 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/block/file-posix.c | ||
68 | +++ b/block/file-posix.c | ||
69 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, | ||
70 | type |= QEMU_AIO_MISALIGNED; | ||
71 | #ifdef CONFIG_LINUX_IO_URING | ||
72 | } else if (s->use_linux_io_uring) { | ||
73 | - LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
74 | assert(qiov->size == bytes); | ||
75 | - return luring_co_submit(bs, aio, s->fd, offset, qiov, type); | ||
76 | + return luring_co_submit(bs, s->fd, offset, qiov, type); | ||
77 | #endif | ||
78 | #ifdef CONFIG_LINUX_AIO | ||
79 | } else if (s->use_linux_aio) { | ||
80 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_plug(BlockDriverState *bs) | ||
81 | #endif | ||
82 | #ifdef CONFIG_LINUX_IO_URING | ||
83 | if (s->use_linux_io_uring) { | ||
84 | - LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
85 | - luring_io_plug(bs, aio); | ||
86 | + luring_io_plug(); | ||
87 | } | ||
88 | #endif | ||
89 | } | ||
90 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_io_unplug(BlockDriverState *bs) | ||
91 | #endif | ||
92 | #ifdef CONFIG_LINUX_IO_URING | ||
93 | if (s->use_linux_io_uring) { | ||
94 | - LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
95 | - luring_io_unplug(bs, aio); | ||
96 | + luring_io_unplug(); | ||
97 | } | ||
98 | #endif | ||
99 | } | ||
100 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) | ||
101 | |||
102 | #ifdef CONFIG_LINUX_IO_URING | ||
103 | if (s->use_linux_io_uring) { | ||
104 | - LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
105 | - return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH); | ||
106 | + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); | ||
107 | } | ||
108 | #endif | ||
109 | return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); | ||
110 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/block/io_uring.c | ||
113 | +++ b/block/io_uring.c | ||
114 | @@ -XXX,XX +XXX,XX @@ | ||
115 | #include "qapi/error.h" | ||
116 | #include "trace.h" | ||
117 | |||
118 | +/* Only used for assertions. */ | ||
119 | +#include "qemu/coroutine_int.h" | ||
120 | + | ||
121 | /* io_uring ring size */ | ||
122 | #define MAX_ENTRIES 128 | ||
123 | |||
124 | @@ -XXX,XX +XXX,XX @@ typedef struct LuringState { | ||
125 | |||
126 | struct io_uring ring; | ||
127 | |||
128 | - /* io queue for submit at batch. Protected by AioContext lock. */ | ||
129 | + /* No locking required, only accessed from AioContext home thread */ | ||
130 | LuringQueue io_q; | ||
131 | |||
132 | - /* I/O completion processing. Only runs in I/O thread. */ | ||
133 | QEMUBH *completion_bh; | ||
134 | } LuringState; | ||
135 | |||
136 | @@ -XXX,XX +XXX,XX @@ end: | ||
137 | * eventually runs later. Coroutines cannot be entered recursively | ||
138 | * so avoid doing that! | ||
139 | */ | ||
140 | + assert(luringcb->co->ctx == s->aio_context); | ||
141 | if (!qemu_coroutine_entered(luringcb->co)) { | ||
142 | aio_co_wake(luringcb->co); | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static int ioq_submit(LuringState *s) | ||
145 | |||
146 | static void luring_process_completions_and_submit(LuringState *s) | ||
25 | { | 147 | { |
26 | unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; | 148 | - aio_context_acquire(s->aio_context); |
27 | unsigned int start, end, i; | 149 | luring_process_completions(s); |
28 | @@ -XXX,XX +XXX,XX @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, | 150 | |
29 | ret = 0; | 151 | if (!s->io_q.plugged && s->io_q.in_queue > 0) { |
30 | out: | 152 | ioq_submit(s); |
31 | qemu_vfree(new_table); | 153 | } |
32 | - cb(opaque, ret); | 154 | - aio_context_release(s->aio_context); |
33 | -} | ||
34 | - | ||
35 | -/** | ||
36 | - * Propagate return value from async callback | ||
37 | - */ | ||
38 | -static void qed_sync_cb(void *opaque, int ret) | ||
39 | -{ | ||
40 | - *(int *)opaque = ret; | ||
41 | + return ret; | ||
42 | } | 155 | } |
43 | 156 | ||
44 | int qed_read_l1_table_sync(BDRVQEDState *s) | 157 | static void qemu_luring_completion_bh(void *opaque) |
45 | @@ -XXX,XX +XXX,XX @@ int qed_read_l1_table_sync(BDRVQEDState *s) | 158 | @@ -XXX,XX +XXX,XX @@ static void ioq_init(LuringQueue *io_q) |
46 | return qed_read_table(s, s->header.l1_table_offset, s->l1_table); | 159 | io_q->blocked = false; |
47 | } | 160 | } |
48 | 161 | ||
49 | -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, | 162 | -void luring_io_plug(BlockDriverState *bs, LuringState *s) |
50 | - BlockCompletionFunc *cb, void *opaque) | 163 | +void luring_io_plug(void) |
51 | +int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n) | ||
52 | { | 164 | { |
53 | BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); | 165 | + AioContext *ctx = qemu_get_current_aio_context(); |
54 | - qed_write_table(s, s->header.l1_table_offset, | 166 | + LuringState *s = aio_get_linux_io_uring(ctx); |
55 | - s->l1_table, index, n, false, cb, opaque); | 167 | trace_luring_io_plug(s); |
56 | + return qed_write_table(s, s->header.l1_table_offset, | 168 | s->io_q.plugged++; |
57 | + s->l1_table, index, n, false); | ||
58 | } | 169 | } |
59 | 170 | ||
60 | int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, | 171 | -void luring_io_unplug(BlockDriverState *bs, LuringState *s) |
61 | unsigned int n) | 172 | +void luring_io_unplug(void) |
62 | { | 173 | { |
63 | - int ret = -EINPROGRESS; | 174 | + AioContext *ctx = qemu_get_current_aio_context(); |
64 | - | 175 | + LuringState *s = aio_get_linux_io_uring(ctx); |
65 | - qed_write_l1_table(s, index, n, qed_sync_cb, &ret); | 176 | assert(s->io_q.plugged); |
66 | - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); | 177 | trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged, |
67 | - | 178 | s->io_q.in_queue, s->io_q.in_flight); |
68 | - return ret; | 179 | @@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, |
69 | + return qed_write_l1_table(s, index, n); | 180 | return 0; |
70 | } | 181 | } |
71 | 182 | ||
72 | int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset) | 183 | -int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, |
73 | @@ -XXX,XX +XXX,XX @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset | 184 | - uint64_t offset, QEMUIOVector *qiov, int type) |
74 | return qed_read_l2_table(s, request, offset); | 185 | +int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, |
75 | } | 186 | + QEMUIOVector *qiov, int type) |
76 | |||
77 | -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
78 | - unsigned int index, unsigned int n, bool flush, | ||
79 | - BlockCompletionFunc *cb, void *opaque) | ||
80 | +int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
81 | + unsigned int index, unsigned int n, bool flush) | ||
82 | { | 187 | { |
83 | BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); | 188 | int ret; |
84 | - qed_write_table(s, request->l2_table->offset, | 189 | + AioContext *ctx = qemu_get_current_aio_context(); |
85 | - request->l2_table->table, index, n, flush, cb, opaque); | 190 | + LuringState *s = aio_get_linux_io_uring(ctx); |
86 | + return qed_write_table(s, request->l2_table->offset, | 191 | LuringAIOCB luringcb = { |
87 | + request->l2_table->table, index, n, flush); | 192 | .co = qemu_coroutine_self(), |
88 | } | 193 | .ret = -EINPROGRESS, |
89 | |||
90 | int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | ||
91 | unsigned int index, unsigned int n, bool flush) | ||
92 | { | ||
93 | - int ret = -EINPROGRESS; | ||
94 | - | ||
95 | - qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); | ||
96 | - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); | ||
97 | - | ||
98 | - return ret; | ||
99 | + return qed_write_l2_table(s, request, index, n, flush); | ||
100 | } | ||
101 | diff --git a/block/qed.c b/block/qed.c | ||
102 | index XXXXXXX..XXXXXXX 100644 | ||
103 | --- a/block/qed.c | ||
104 | +++ b/block/qed.c | ||
105 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l1_update(void *opaque, int ret) | ||
106 | index = qed_l1_index(s, acb->cur_pos); | ||
107 | s->l1_table->offsets[index] = acb->request.l2_table->offset; | ||
108 | |||
109 | - qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb); | ||
110 | + ret = qed_write_l1_table(s, index, 1); | ||
111 | + qed_commit_l2_update(acb, ret); | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) | ||
116 | |||
117 | if (need_alloc) { | ||
118 | /* Write out the whole new L2 table */ | ||
119 | - qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, | ||
120 | - qed_aio_write_l1_update, acb); | ||
121 | + ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true); | ||
122 | + qed_aio_write_l1_update(acb, ret); | ||
123 | } else { | ||
124 | /* Write out only the updated part of the L2 table */ | ||
125 | - qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false, | ||
126 | - qed_aio_next_io_cb, acb); | ||
127 | + ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, | ||
128 | + false); | ||
129 | + qed_aio_next_io(acb, ret); | ||
130 | } | ||
131 | return; | ||
132 | |||
133 | diff --git a/block/qed.h b/block/qed.h | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/block/qed.h | ||
136 | +++ b/block/qed.h | ||
137 | @@ -XXX,XX +XXX,XX @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table); | ||
138 | * Table I/O functions | ||
139 | */ | ||
140 | int qed_read_l1_table_sync(BDRVQEDState *s); | ||
141 | -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, | ||
142 | - BlockCompletionFunc *cb, void *opaque); | ||
143 | +int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n); | ||
144 | int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, | ||
145 | unsigned int n); | ||
146 | int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | ||
147 | uint64_t offset); | ||
148 | int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset); | ||
149 | -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
150 | - unsigned int index, unsigned int n, bool flush, | ||
151 | - BlockCompletionFunc *cb, void *opaque); | ||
152 | +int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
153 | + unsigned int index, unsigned int n, bool flush); | ||
154 | int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | ||
155 | unsigned int index, unsigned int n, bool flush); | ||
156 | |||
157 | -- | 194 | -- |
158 | 1.8.3.1 | 195 | 2.40.0 |
159 | |||
160 | diff view generated by jsdifflib |
1 | From: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
---|---|---|---|
2 | |||
3 | Use qemu_get_current_aio_context() where possible, since we always | ||
4 | submit work to the current thread anyways. | ||
5 | |||
6 | We want to also be sure that the thread submitting the work is | ||
7 | the same as the one processing the pool, to avoid adding | ||
8 | synchronization to the pool list. | ||
9 | |||
10 | Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
11 | Message-Id: <20230203131731.851116-4-eesposit@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
2 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
3 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
4 | --- | 15 | --- |
5 | block/qed-cluster.c | 94 ++++++++++++++++++----------------------------------- | 16 | include/block/thread-pool.h | 5 +++++ |
6 | block/qed-table.c | 15 +++------ | 17 | block/file-posix.c | 21 ++++++++++----------- |
7 | block/qed.h | 3 +- | 18 | block/file-win32.c | 2 +- |
8 | 3 files changed, 36 insertions(+), 76 deletions(-) | 19 | block/qcow2-threads.c | 2 +- |
9 | 20 | util/thread-pool.c | 9 ++++----- | |
10 | diff --git a/block/qed-cluster.c b/block/qed-cluster.c | 21 | 5 files changed, 21 insertions(+), 18 deletions(-) |
11 | index XXXXXXX..XXXXXXX 100644 | 22 | |
12 | --- a/block/qed-cluster.c | 23 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h |
13 | +++ b/block/qed-cluster.c | 24 | index XXXXXXX..XXXXXXX 100644 |
14 | @@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, | 25 | --- a/include/block/thread-pool.h |
15 | return i - index; | 26 | +++ b/include/block/thread-pool.h |
16 | } | 27 | @@ -XXX,XX +XXX,XX @@ typedef struct ThreadPool ThreadPool; |
17 | 28 | ThreadPool *thread_pool_new(struct AioContext *ctx); | |
18 | -typedef struct { | 29 | void thread_pool_free(ThreadPool *pool); |
19 | - BDRVQEDState *s; | 30 | |
20 | - uint64_t pos; | 31 | +/* |
21 | - size_t len; | 32 | + * thread_pool_submit* API: submit I/O requests in the thread's |
22 | - | 33 | + * current AioContext. |
23 | - QEDRequest *request; | 34 | + */ |
24 | - | 35 | BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, |
25 | - /* User callback */ | 36 | ThreadPoolFunc *func, void *arg, |
26 | - QEDFindClusterFunc *cb; | 37 | BlockCompletionFunc *cb, void *opaque); |
27 | - void *opaque; | 38 | int coroutine_fn thread_pool_submit_co(ThreadPool *pool, |
28 | -} QEDFindClusterCB; | 39 | ThreadPoolFunc *func, void *arg); |
29 | - | 40 | void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); |
30 | -static void qed_find_cluster_cb(void *opaque, int ret) | 41 | + |
31 | -{ | 42 | void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); |
32 | - QEDFindClusterCB *find_cluster_cb = opaque; | 43 | |
33 | - BDRVQEDState *s = find_cluster_cb->s; | 44 | #endif |
34 | - QEDRequest *request = find_cluster_cb->request; | 45 | diff --git a/block/file-posix.c b/block/file-posix.c |
35 | - uint64_t offset = 0; | 46 | index XXXXXXX..XXXXXXX 100644 |
36 | - size_t len = 0; | 47 | --- a/block/file-posix.c |
37 | - unsigned int index; | 48 | +++ b/block/file-posix.c |
38 | - unsigned int n; | ||
39 | - | ||
40 | - qed_acquire(s); | ||
41 | - if (ret) { | ||
42 | - goto out; | ||
43 | - } | ||
44 | - | ||
45 | - index = qed_l2_index(s, find_cluster_cb->pos); | ||
46 | - n = qed_bytes_to_clusters(s, | ||
47 | - qed_offset_into_cluster(s, find_cluster_cb->pos) + | ||
48 | - find_cluster_cb->len); | ||
49 | - n = qed_count_contiguous_clusters(s, request->l2_table->table, | ||
50 | - index, n, &offset); | ||
51 | - | ||
52 | - if (qed_offset_is_unalloc_cluster(offset)) { | ||
53 | - ret = QED_CLUSTER_L2; | ||
54 | - } else if (qed_offset_is_zero_cluster(offset)) { | ||
55 | - ret = QED_CLUSTER_ZERO; | ||
56 | - } else if (qed_check_cluster_offset(s, offset)) { | ||
57 | - ret = QED_CLUSTER_FOUND; | ||
58 | - } else { | ||
59 | - ret = -EINVAL; | ||
60 | - } | ||
61 | - | ||
62 | - len = MIN(find_cluster_cb->len, n * s->header.cluster_size - | ||
63 | - qed_offset_into_cluster(s, find_cluster_cb->pos)); | ||
64 | - | ||
65 | -out: | ||
66 | - find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len); | ||
67 | - qed_release(s); | ||
68 | - g_free(find_cluster_cb); | ||
69 | -} | ||
70 | - | ||
71 | /** | ||
72 | * Find the offset of a data cluster | ||
73 | * | ||
74 | @@ -XXX,XX +XXX,XX @@ out: | 49 | @@ -XXX,XX +XXX,XX @@ out: |
75 | void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | 50 | return result; |
76 | size_t len, QEDFindClusterFunc *cb, void *opaque) | 51 | } |
52 | |||
53 | -static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, | ||
54 | - ThreadPoolFunc func, void *arg) | ||
55 | +static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg) | ||
77 | { | 56 | { |
78 | - QEDFindClusterCB *find_cluster_cb; | 57 | /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ |
79 | uint64_t l2_offset; | 58 | - ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); |
80 | + uint64_t offset = 0; | 59 | + ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context()); |
81 | + unsigned int index; | 60 | return thread_pool_submit_co(pool, func, arg); |
82 | + unsigned int n; | 61 | } |
83 | + int ret; | 62 | |
84 | 63 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, | |
85 | /* Limit length to L2 boundary. Requests are broken up at the L2 boundary | 64 | }; |
86 | * so that a request acts on one L2 table at a time. | 65 | |
87 | @@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | 66 | assert(qiov->size == bytes); |
88 | return; | 67 | - return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb); |
89 | } | 68 | + return raw_thread_pool_submit(handle_aiocb_rw, &acb); |
90 | 69 | } | |
91 | - find_cluster_cb = g_malloc(sizeof(*find_cluster_cb)); | 70 | |
92 | - find_cluster_cb->s = s; | 71 | static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, |
93 | - find_cluster_cb->pos = pos; | 72 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) |
94 | - find_cluster_cb->len = len; | 73 | return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); |
95 | - find_cluster_cb->cb = cb; | 74 | } |
96 | - find_cluster_cb->opaque = opaque; | 75 | #endif |
97 | - find_cluster_cb->request = request; | 76 | - return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); |
98 | + ret = qed_read_l2_table(s, request, l2_offset); | 77 | + return raw_thread_pool_submit(handle_aiocb_flush, &acb); |
99 | + qed_acquire(s); | 78 | } |
100 | + if (ret) { | 79 | |
101 | + goto out; | 80 | static void raw_aio_attach_aio_context(BlockDriverState *bs, |
102 | + } | 81 | @@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, |
103 | + | 82 | }, |
104 | + index = qed_l2_index(s, pos); | 83 | }; |
105 | + n = qed_bytes_to_clusters(s, | 84 | |
106 | + qed_offset_into_cluster(s, pos) + len); | 85 | - return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb); |
107 | + n = qed_count_contiguous_clusters(s, request->l2_table->table, | 86 | + return raw_thread_pool_submit(handle_aiocb_truncate, &acb); |
108 | + index, n, &offset); | 87 | } |
109 | + | 88 | |
110 | + if (qed_offset_is_unalloc_cluster(offset)) { | 89 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, |
111 | + ret = QED_CLUSTER_L2; | 90 | @@ -XXX,XX +XXX,XX @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes, |
112 | + } else if (qed_offset_is_zero_cluster(offset)) { | 91 | acb.aio_type |= QEMU_AIO_BLKDEV; |
113 | + ret = QED_CLUSTER_ZERO; | 92 | } |
114 | + } else if (qed_check_cluster_offset(s, offset)) { | 93 | |
115 | + ret = QED_CLUSTER_FOUND; | 94 | - ret = raw_thread_pool_submit(bs, handle_aiocb_discard, &acb); |
116 | + } else { | 95 | + ret = raw_thread_pool_submit(handle_aiocb_discard, &acb); |
117 | + ret = -EINVAL; | 96 | raw_account_discard(s, bytes, ret); |
118 | + } | ||
119 | + | ||
120 | + len = MIN(len, | ||
121 | + n * s->header.cluster_size - qed_offset_into_cluster(s, pos)); | ||
122 | |||
123 | - qed_read_l2_table(s, request, l2_offset, | ||
124 | - qed_find_cluster_cb, find_cluster_cb); | ||
125 | +out: | ||
126 | + cb(opaque, ret, offset, len); | ||
127 | + qed_release(s); | ||
128 | } | ||
129 | diff --git a/block/qed-table.c b/block/qed-table.c | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/block/qed-table.c | ||
132 | +++ b/block/qed-table.c | ||
133 | @@ -XXX,XX +XXX,XX @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, | ||
134 | return ret; | 97 | return ret; |
135 | } | 98 | } |
136 | 99 | @@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, | |
137 | -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, | 100 | handler = handle_aiocb_write_zeroes; |
138 | - BlockCompletionFunc *cb, void *opaque) | 101 | } |
139 | +int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset) | 102 | |
103 | - return raw_thread_pool_submit(bs, handler, &acb); | ||
104 | + return raw_thread_pool_submit(handler, &acb); | ||
105 | } | ||
106 | |||
107 | static int coroutine_fn raw_co_pwrite_zeroes( | ||
108 | @@ -XXX,XX +XXX,XX @@ raw_co_copy_range_to(BlockDriverState *bs, | ||
109 | }, | ||
110 | }; | ||
111 | |||
112 | - return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb); | ||
113 | + return raw_thread_pool_submit(handle_aiocb_copy_range, &acb); | ||
114 | } | ||
115 | |||
116 | BlockDriver bdrv_file = { | ||
117 | @@ -XXX,XX +XXX,XX @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) | ||
118 | struct sg_io_hdr *io_hdr = buf; | ||
119 | if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT || | ||
120 | io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) { | ||
121 | - return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs), | ||
122 | + return pr_manager_execute(s->pr_mgr, qemu_get_current_aio_context(), | ||
123 | s->fd, io_hdr); | ||
124 | } | ||
125 | } | ||
126 | @@ -XXX,XX +XXX,XX @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) | ||
127 | }, | ||
128 | }; | ||
129 | |||
130 | - return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb); | ||
131 | + return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); | ||
132 | } | ||
133 | #endif /* linux */ | ||
134 | |||
135 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
136 | index XXXXXXX..XXXXXXX 100644 | ||
137 | --- a/block/file-win32.c | ||
138 | +++ b/block/file-win32.c | ||
139 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, | ||
140 | acb->aio_offset = offset; | ||
141 | |||
142 | trace_file_paio_submit(acb, opaque, offset, count, type); | ||
143 | - pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); | ||
144 | + pool = aio_get_thread_pool(qemu_get_current_aio_context()); | ||
145 | return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); | ||
146 | } | ||
147 | |||
148 | diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c | ||
149 | index XXXXXXX..XXXXXXX 100644 | ||
150 | --- a/block/qcow2-threads.c | ||
151 | +++ b/block/qcow2-threads.c | ||
152 | @@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg) | ||
140 | { | 153 | { |
141 | int ret; | 154 | int ret; |
142 | 155 | BDRVQcow2State *s = bs->opaque; | |
143 | @@ -XXX,XX +XXX,XX @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, | 156 | - ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); |
144 | /* Check for cached L2 entry */ | 157 | + ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context()); |
145 | request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); | 158 | |
146 | if (request->l2_table) { | 159 | qemu_co_mutex_lock(&s->lock); |
147 | - cb(opaque, 0); | 160 | while (s->nb_threads >= QCOW2_MAX_THREADS) { |
148 | - return; | 161 | diff --git a/util/thread-pool.c b/util/thread-pool.c |
149 | + return 0; | 162 | index XXXXXXX..XXXXXXX 100644 |
150 | } | 163 | --- a/util/thread-pool.c |
151 | 164 | +++ b/util/thread-pool.c | |
152 | request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); | 165 | @@ -XXX,XX +XXX,XX @@ struct ThreadPoolElement { |
153 | @@ -XXX,XX +XXX,XX @@ void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, | 166 | /* Access to this list is protected by lock. */ |
154 | } | 167 | QTAILQ_ENTRY(ThreadPoolElement) reqs; |
155 | qed_release(s); | 168 | |
156 | 169 | - /* Access to this list is protected by the global mutex. */ | |
157 | - cb(opaque, ret); | 170 | + /* This list is only written by the thread pool's mother thread. */ |
158 | + return ret; | 171 | QLIST_ENTRY(ThreadPoolElement) all; |
159 | } | 172 | }; |
160 | 173 | ||
161 | int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) | 174 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_completion_bh(void *opaque) |
175 | ThreadPool *pool = opaque; | ||
176 | ThreadPoolElement *elem, *next; | ||
177 | |||
178 | - aio_context_acquire(pool->ctx); | ||
179 | restart: | ||
180 | QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { | ||
181 | if (elem->state != THREAD_DONE) { | ||
182 | @@ -XXX,XX +XXX,XX @@ restart: | ||
183 | */ | ||
184 | qemu_bh_schedule(pool->completion_bh); | ||
185 | |||
186 | - aio_context_release(pool->ctx); | ||
187 | elem->common.cb(elem->common.opaque, elem->ret); | ||
188 | - aio_context_acquire(pool->ctx); | ||
189 | |||
190 | /* We can safely cancel the completion_bh here regardless of someone | ||
191 | * else having scheduled it meanwhile because we reenter the | ||
192 | @@ -XXX,XX +XXX,XX @@ restart: | ||
193 | qemu_aio_unref(elem); | ||
194 | } | ||
195 | } | ||
196 | - aio_context_release(pool->ctx); | ||
197 | } | ||
198 | |||
199 | static void thread_pool_cancel(BlockAIOCB *acb) | ||
200 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, | ||
162 | { | 201 | { |
163 | - int ret = -EINPROGRESS; | 202 | ThreadPoolElement *req; |
164 | - | 203 | |
165 | - qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); | 204 | + /* Assert that the thread submitting work is the same running the pool */ |
166 | - BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); | 205 | + assert(pool->ctx == qemu_get_current_aio_context()); |
167 | - | 206 | + |
168 | - return ret; | 207 | req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); |
169 | + return qed_read_l2_table(s, request, offset); | 208 | req->func = func; |
170 | } | 209 | req->arg = arg; |
171 | |||
172 | void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
173 | diff --git a/block/qed.h b/block/qed.h | ||
174 | index XXXXXXX..XXXXXXX 100644 | ||
175 | --- a/block/qed.h | ||
176 | +++ b/block/qed.h | ||
177 | @@ -XXX,XX +XXX,XX @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, | ||
178 | unsigned int n); | ||
179 | int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | ||
180 | uint64_t offset); | ||
181 | -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, | ||
182 | - BlockCompletionFunc *cb, void *opaque); | ||
183 | +int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset); | ||
184 | void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | ||
185 | unsigned int index, unsigned int n, bool flush, | ||
186 | BlockCompletionFunc *cb, void *opaque); | ||
187 | -- | 210 | -- |
188 | 1.8.3.1 | 211 | 2.40.0 |
189 | |||
190 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Emanuele Giuseppe Esposito <eesposit@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | There used to be throttle_timers_{detach,attach}_aio_context() calls | 3 | thread_pool_submit_aio() is always called on a pool taken from |
4 | in bdrv_set_aio_context(), but since 7ca7f0f6db1fedd28d490795d778cf239 | 4 | qemu_get_current_aio_context(), and that is the only intended |
5 | they are now in blk_set_aio_context(). | 5 | use: each pool runs only in the same thread that is submitting |
6 | work to it, it can't run anywhere else. | ||
6 | 7 | ||
7 | Signed-off-by: Alberto Garcia <berto@igalia.com> | 8 | Therefore simplify the thread_pool_submit* API and remove the |
9 | ThreadPool function parameter. | ||
10 | |||
11 | Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
12 | Message-Id: <20230203131731.851116-5-eesposit@redhat.com> | ||
13 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
10 | --- | 16 | --- |
11 | block/throttle-groups.c | 2 +- | 17 | include/block/thread-pool.h | 10 ++++------ |
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | 18 | backends/tpm/tpm_backend.c | 4 +--- |
19 | block/file-posix.c | 4 +--- | ||
20 | block/file-win32.c | 4 +--- | ||
21 | block/qcow2-threads.c | 3 +-- | ||
22 | hw/9pfs/coth.c | 3 +-- | ||
23 | hw/ppc/spapr_nvdimm.c | 6 ++---- | ||
24 | hw/virtio/virtio-pmem.c | 3 +-- | ||
25 | scsi/pr-manager.c | 3 +-- | ||
26 | scsi/qemu-pr-helper.c | 3 +-- | ||
27 | tests/unit/test-thread-pool.c | 12 +++++------- | ||
28 | util/thread-pool.c | 16 ++++++++-------- | ||
29 | 12 files changed, 27 insertions(+), 44 deletions(-) | ||
13 | 30 | ||
14 | diff --git a/block/throttle-groups.c b/block/throttle-groups.c | 31 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h |
15 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/throttle-groups.c | 33 | --- a/include/block/thread-pool.h |
17 | +++ b/block/throttle-groups.c | 34 | +++ b/include/block/thread-pool.h |
35 | @@ -XXX,XX +XXX,XX @@ void thread_pool_free(ThreadPool *pool); | ||
36 | * thread_pool_submit* API: submit I/O requests in the thread's | ||
37 | * current AioContext. | ||
38 | */ | ||
39 | -BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, | ||
40 | - ThreadPoolFunc *func, void *arg, | ||
41 | - BlockCompletionFunc *cb, void *opaque); | ||
42 | -int coroutine_fn thread_pool_submit_co(ThreadPool *pool, | ||
43 | - ThreadPoolFunc *func, void *arg); | ||
44 | -void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); | ||
45 | +BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
46 | + BlockCompletionFunc *cb, void *opaque); | ||
47 | +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); | ||
48 | +void thread_pool_submit(ThreadPoolFunc *func, void *arg); | ||
49 | |||
50 | void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); | ||
51 | |||
52 | diff --git a/backends/tpm/tpm_backend.c b/backends/tpm/tpm_backend.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/backends/tpm/tpm_backend.c | ||
55 | +++ b/backends/tpm/tpm_backend.c | ||
56 | @@ -XXX,XX +XXX,XX @@ bool tpm_backend_had_startup_error(TPMBackend *s) | ||
57 | |||
58 | void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd) | ||
59 | { | ||
60 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); | ||
61 | - | ||
62 | if (s->cmd != NULL) { | ||
63 | error_report("There is a TPM request pending"); | ||
64 | return; | ||
65 | @@ -XXX,XX +XXX,XX @@ void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd) | ||
66 | |||
67 | s->cmd = cmd; | ||
68 | object_ref(OBJECT(s)); | ||
69 | - thread_pool_submit_aio(pool, tpm_backend_worker_thread, s, | ||
70 | + thread_pool_submit_aio(tpm_backend_worker_thread, s, | ||
71 | tpm_backend_request_completed, s); | ||
72 | } | ||
73 | |||
74 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/block/file-posix.c | ||
77 | +++ b/block/file-posix.c | ||
78 | @@ -XXX,XX +XXX,XX @@ out: | ||
79 | |||
80 | static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg) | ||
81 | { | ||
82 | - /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */ | ||
83 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context()); | ||
84 | - return thread_pool_submit_co(pool, func, arg); | ||
85 | + return thread_pool_submit_co(func, arg); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | diff --git a/block/file-win32.c b/block/file-win32.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/block/file-win32.c | ||
92 | +++ b/block/file-win32.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, | ||
94 | BlockCompletionFunc *cb, void *opaque, int type) | ||
95 | { | ||
96 | RawWin32AIOData *acb = g_new(RawWin32AIOData, 1); | ||
97 | - ThreadPool *pool; | ||
98 | |||
99 | acb->bs = bs; | ||
100 | acb->hfile = hfile; | ||
101 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, | ||
102 | acb->aio_offset = offset; | ||
103 | |||
104 | trace_file_paio_submit(acb, opaque, offset, count, type); | ||
105 | - pool = aio_get_thread_pool(qemu_get_current_aio_context()); | ||
106 | - return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); | ||
107 | + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); | ||
108 | } | ||
109 | |||
110 | int qemu_ftruncate64(int fd, int64_t length) | ||
111 | diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/block/qcow2-threads.c | ||
114 | +++ b/block/qcow2-threads.c | ||
115 | @@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg) | ||
116 | { | ||
117 | int ret; | ||
118 | BDRVQcow2State *s = bs->opaque; | ||
119 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context()); | ||
120 | |||
121 | qemu_co_mutex_lock(&s->lock); | ||
122 | while (s->nb_threads >= QCOW2_MAX_THREADS) { | ||
123 | @@ -XXX,XX +XXX,XX @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg) | ||
124 | s->nb_threads++; | ||
125 | qemu_co_mutex_unlock(&s->lock); | ||
126 | |||
127 | - ret = thread_pool_submit_co(pool, func, arg); | ||
128 | + ret = thread_pool_submit_co(func, arg); | ||
129 | |||
130 | qemu_co_mutex_lock(&s->lock); | ||
131 | s->nb_threads--; | ||
132 | diff --git a/hw/9pfs/coth.c b/hw/9pfs/coth.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/hw/9pfs/coth.c | ||
135 | +++ b/hw/9pfs/coth.c | ||
136 | @@ -XXX,XX +XXX,XX @@ static int coroutine_enter_func(void *arg) | ||
137 | void co_run_in_worker_bh(void *opaque) | ||
138 | { | ||
139 | Coroutine *co = opaque; | ||
140 | - thread_pool_submit_aio(aio_get_thread_pool(qemu_get_aio_context()), | ||
141 | - coroutine_enter_func, co, coroutine_enter_cb, co); | ||
142 | + thread_pool_submit_aio(coroutine_enter_func, co, coroutine_enter_cb, co); | ||
143 | } | ||
144 | diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c | ||
145 | index XXXXXXX..XXXXXXX 100644 | ||
146 | --- a/hw/ppc/spapr_nvdimm.c | ||
147 | +++ b/hw/ppc/spapr_nvdimm.c | ||
148 | @@ -XXX,XX +XXX,XX @@ static int spapr_nvdimm_flush_post_load(void *opaque, int version_id) | ||
149 | { | ||
150 | SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque; | ||
151 | SpaprNVDIMMDeviceFlushState *state; | ||
152 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); | ||
153 | HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem); | ||
154 | bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); | ||
155 | bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm), | ||
156 | @@ -XXX,XX +XXX,XX @@ static int spapr_nvdimm_flush_post_load(void *opaque, int version_id) | ||
157 | } | ||
158 | |||
159 | QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { | ||
160 | - thread_pool_submit_aio(pool, flush_worker_cb, state, | ||
161 | + thread_pool_submit_aio(flush_worker_cb, state, | ||
162 | spapr_nvdimm_flush_completion_cb, state); | ||
163 | } | ||
164 | |||
165 | @@ -XXX,XX +XXX,XX @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr, | ||
166 | PCDIMMDevice *dimm; | ||
167 | HostMemoryBackend *backend = NULL; | ||
168 | SpaprNVDIMMDeviceFlushState *state; | ||
169 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); | ||
170 | int fd; | ||
171 | |||
172 | if (!drc || !drc->dev || | ||
173 | @@ -XXX,XX +XXX,XX @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr, | ||
174 | |||
175 | state->drcidx = drc_index; | ||
176 | |||
177 | - thread_pool_submit_aio(pool, flush_worker_cb, state, | ||
178 | + thread_pool_submit_aio(flush_worker_cb, state, | ||
179 | spapr_nvdimm_flush_completion_cb, state); | ||
180 | |||
181 | continue_token = state->continue_token; | ||
182 | diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/hw/virtio/virtio-pmem.c | ||
185 | +++ b/hw/virtio/virtio-pmem.c | ||
186 | @@ -XXX,XX +XXX,XX @@ static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq) | ||
187 | VirtIODeviceRequest *req_data; | ||
188 | VirtIOPMEM *pmem = VIRTIO_PMEM(vdev); | ||
189 | HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev); | ||
190 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); | ||
191 | |||
192 | trace_virtio_pmem_flush_request(); | ||
193 | req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest)); | ||
194 | @@ -XXX,XX +XXX,XX @@ static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq) | ||
195 | req_data->fd = memory_region_get_fd(&backend->mr); | ||
196 | req_data->pmem = pmem; | ||
197 | req_data->vdev = vdev; | ||
198 | - thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data); | ||
199 | + thread_pool_submit_aio(worker_cb, req_data, done_cb, req_data); | ||
200 | } | ||
201 | |||
202 | static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config) | ||
203 | diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c | ||
204 | index XXXXXXX..XXXXXXX 100644 | ||
205 | --- a/scsi/pr-manager.c | ||
206 | +++ b/scsi/pr-manager.c | ||
207 | @@ -XXX,XX +XXX,XX @@ static int pr_manager_worker(void *opaque) | ||
208 | int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd, | ||
209 | struct sg_io_hdr *hdr) | ||
210 | { | ||
211 | - ThreadPool *pool = aio_get_thread_pool(ctx); | ||
212 | PRManagerData data = { | ||
213 | .pr_mgr = pr_mgr, | ||
214 | .fd = fd, | ||
215 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd, | ||
216 | |||
217 | /* The matching object_unref is in pr_manager_worker. */ | ||
218 | object_ref(OBJECT(pr_mgr)); | ||
219 | - return thread_pool_submit_co(pool, pr_manager_worker, &data); | ||
220 | + return thread_pool_submit_co(pr_manager_worker, &data); | ||
221 | } | ||
222 | |||
223 | bool pr_manager_is_connected(PRManager *pr_mgr) | ||
224 | diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c | ||
225 | index XXXXXXX..XXXXXXX 100644 | ||
226 | --- a/scsi/qemu-pr-helper.c | ||
227 | +++ b/scsi/qemu-pr-helper.c | ||
228 | @@ -XXX,XX +XXX,XX @@ static int do_sgio_worker(void *opaque) | ||
229 | static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense, | ||
230 | uint8_t *buf, int *sz, int dir) | ||
231 | { | ||
232 | - ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); | ||
233 | int r; | ||
234 | |||
235 | PRHelperSGIOData data = { | ||
236 | @@ -XXX,XX +XXX,XX @@ static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense, | ||
237 | .dir = dir, | ||
238 | }; | ||
239 | |||
240 | - r = thread_pool_submit_co(pool, do_sgio_worker, &data); | ||
241 | + r = thread_pool_submit_co(do_sgio_worker, &data); | ||
242 | *sz = data.sz; | ||
243 | return r; | ||
244 | } | ||
245 | diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c | ||
246 | index XXXXXXX..XXXXXXX 100644 | ||
247 | --- a/tests/unit/test-thread-pool.c | ||
248 | +++ b/tests/unit/test-thread-pool.c | ||
18 | @@ -XXX,XX +XXX,XX @@ | 249 | @@ -XXX,XX +XXX,XX @@ |
19 | * Again, all this is handled internally and is mostly transparent to | 250 | #include "qemu/main-loop.h" |
20 | * the outside. The 'throttle_timers' field however has an additional | 251 | |
21 | * constraint because it may be temporarily invalid (see for example | 252 | static AioContext *ctx; |
22 | - * bdrv_set_aio_context()). Therefore in this file a thread will | 253 | -static ThreadPool *pool; |
23 | + * blk_set_aio_context()). Therefore in this file a thread will | 254 | static int active; |
24 | * access some other BlockBackend's timers only after verifying that | 255 | |
25 | * that BlockBackend has throttled requests in the queue. | 256 | typedef struct { |
26 | */ | 257 | @@ -XXX,XX +XXX,XX @@ static void done_cb(void *opaque, int ret) |
258 | static void test_submit(void) | ||
259 | { | ||
260 | WorkerTestData data = { .n = 0 }; | ||
261 | - thread_pool_submit(pool, worker_cb, &data); | ||
262 | + thread_pool_submit(worker_cb, &data); | ||
263 | while (data.n == 0) { | ||
264 | aio_poll(ctx, true); | ||
265 | } | ||
266 | @@ -XXX,XX +XXX,XX @@ static void test_submit(void) | ||
267 | static void test_submit_aio(void) | ||
268 | { | ||
269 | WorkerTestData data = { .n = 0, .ret = -EINPROGRESS }; | ||
270 | - data.aiocb = thread_pool_submit_aio(pool, worker_cb, &data, | ||
271 | + data.aiocb = thread_pool_submit_aio(worker_cb, &data, | ||
272 | done_cb, &data); | ||
273 | |||
274 | /* The callbacks are not called until after the first wait. */ | ||
275 | @@ -XXX,XX +XXX,XX @@ static void co_test_cb(void *opaque) | ||
276 | active = 1; | ||
277 | data->n = 0; | ||
278 | data->ret = -EINPROGRESS; | ||
279 | - thread_pool_submit_co(pool, worker_cb, data); | ||
280 | + thread_pool_submit_co(worker_cb, data); | ||
281 | |||
282 | /* The test continues in test_submit_co, after qemu_coroutine_enter... */ | ||
283 | |||
284 | @@ -XXX,XX +XXX,XX @@ static void test_submit_many(void) | ||
285 | for (i = 0; i < 100; i++) { | ||
286 | data[i].n = 0; | ||
287 | data[i].ret = -EINPROGRESS; | ||
288 | - thread_pool_submit_aio(pool, worker_cb, &data[i], done_cb, &data[i]); | ||
289 | + thread_pool_submit_aio(worker_cb, &data[i], done_cb, &data[i]); | ||
290 | } | ||
291 | |||
292 | active = 100; | ||
293 | @@ -XXX,XX +XXX,XX @@ static void do_test_cancel(bool sync) | ||
294 | for (i = 0; i < 100; i++) { | ||
295 | data[i].n = 0; | ||
296 | data[i].ret = -EINPROGRESS; | ||
297 | - data[i].aiocb = thread_pool_submit_aio(pool, long_cb, &data[i], | ||
298 | + data[i].aiocb = thread_pool_submit_aio(long_cb, &data[i], | ||
299 | done_cb, &data[i]); | ||
300 | } | ||
301 | |||
302 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
303 | { | ||
304 | qemu_init_main_loop(&error_abort); | ||
305 | ctx = qemu_get_current_aio_context(); | ||
306 | - pool = aio_get_thread_pool(ctx); | ||
307 | |||
308 | g_test_init(&argc, &argv, NULL); | ||
309 | g_test_add_func("/thread-pool/submit", test_submit); | ||
310 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
311 | index XXXXXXX..XXXXXXX 100644 | ||
312 | --- a/util/thread-pool.c | ||
313 | +++ b/util/thread-pool.c | ||
314 | @@ -XXX,XX +XXX,XX @@ static const AIOCBInfo thread_pool_aiocb_info = { | ||
315 | .get_aio_context = thread_pool_get_aio_context, | ||
316 | }; | ||
317 | |||
318 | -BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, | ||
319 | - ThreadPoolFunc *func, void *arg, | ||
320 | - BlockCompletionFunc *cb, void *opaque) | ||
321 | +BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
322 | + BlockCompletionFunc *cb, void *opaque) | ||
323 | { | ||
324 | ThreadPoolElement *req; | ||
325 | + AioContext *ctx = qemu_get_current_aio_context(); | ||
326 | + ThreadPool *pool = aio_get_thread_pool(ctx); | ||
327 | |||
328 | /* Assert that the thread submitting work is the same running the pool */ | ||
329 | assert(pool->ctx == qemu_get_current_aio_context()); | ||
330 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_co_cb(void *opaque, int ret) | ||
331 | aio_co_wake(co->co); | ||
332 | } | ||
333 | |||
334 | -int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func, | ||
335 | - void *arg) | ||
336 | +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | ||
337 | { | ||
338 | ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; | ||
339 | assert(qemu_in_coroutine()); | ||
340 | - thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc); | ||
341 | + thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); | ||
342 | qemu_coroutine_yield(); | ||
343 | return tpc.ret; | ||
344 | } | ||
345 | |||
346 | -void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) | ||
347 | +void thread_pool_submit(ThreadPoolFunc *func, void *arg) | ||
348 | { | ||
349 | - thread_pool_submit_aio(pool, func, arg, NULL, NULL); | ||
350 | + thread_pool_submit_aio(func, arg, NULL, NULL); | ||
351 | } | ||
352 | |||
353 | void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
27 | -- | 354 | -- |
28 | 1.8.3.1 | 355 | 2.40.0 |
29 | |||
30 | diff view generated by jsdifflib |
1 | Now that we stay in coroutine context for the whole request when doing | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | reads or writes, we can add coroutine_fn annotations to many functions | 2 | |
3 | that can do I/O or yield directly. | 3 | Functions that can do I/O are prime candidates for being coroutine_fns. Make the |
4 | 4 | change for those that are themselves called only from coroutine_fns. | |
5 | |||
6 | In addition, coroutine_fns should do I/O using bdrv_co_*() functions, for | ||
7 | which it is required to hold the BlockDriverState graph lock. So also nnotate | ||
8 | functions on the I/O path with TSA attributes, making it possible to | ||
9 | switch them to use bdrv_co_*() functions. | ||
10 | |||
11 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
12 | Message-Id: <20230309084456.304669-2-pbonzini@redhat.com> | ||
13 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | --- | 15 | --- |
8 | block/qed-cluster.c | 5 +++-- | 16 | block/vvfat.c | 58 ++++++++++++++++++++++++++------------------------- |
9 | block/qed.c | 44 ++++++++++++++++++++++++-------------------- | 17 | 1 file changed, 30 insertions(+), 28 deletions(-) |
10 | block/qed.h | 5 +++-- | 18 | |
11 | 3 files changed, 30 insertions(+), 24 deletions(-) | 19 | diff --git a/block/vvfat.c b/block/vvfat.c |
12 | |||
13 | diff --git a/block/qed-cluster.c b/block/qed-cluster.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/qed-cluster.c | 21 | --- a/block/vvfat.c |
16 | +++ b/block/qed-cluster.c | 22 | +++ b/block/vvfat.c |
17 | @@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, | 23 | @@ -XXX,XX +XXX,XX @@ static BDRVVVFATState *vvv = NULL; |
18 | * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1 | 24 | #endif |
19 | * table offset, respectively. len is number of contiguous unallocated bytes. | 25 | |
20 | */ | 26 | static int enable_write_target(BlockDriverState *bs, Error **errp); |
21 | -int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | 27 | -static int is_consistent(BDRVVVFATState *s); |
22 | - size_t *len, uint64_t *img_offset) | 28 | +static int coroutine_fn is_consistent(BDRVVVFATState *s); |
23 | +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, | 29 | |
24 | + uint64_t pos, size_t *len, | 30 | static QemuOptsList runtime_opts = { |
25 | + uint64_t *img_offset) | 31 | .name = "vvfat", |
26 | { | 32 | @@ -XXX,XX +XXX,XX @@ static void print_mapping(const mapping_t* mapping) |
27 | uint64_t l2_offset; | 33 | } |
28 | uint64_t offset = 0; | 34 | #endif |
29 | diff --git a/block/qed.c b/block/qed.c | 35 | |
30 | index XXXXXXX..XXXXXXX 100644 | 36 | -static int vvfat_read(BlockDriverState *bs, int64_t sector_num, |
31 | --- a/block/qed.c | 37 | - uint8_t *buf, int nb_sectors) |
32 | +++ b/block/qed.c | 38 | +static int coroutine_fn GRAPH_RDLOCK |
33 | @@ -XXX,XX +XXX,XX @@ int qed_write_header_sync(BDRVQEDState *s) | 39 | +vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) |
34 | * This function only updates known header fields in-place and does not affect | 40 | { |
35 | * extra data after the QED header. | 41 | BDRVVVFATState *s = bs->opaque; |
36 | */ | ||
37 | -static int qed_write_header(BDRVQEDState *s) | ||
38 | +static int coroutine_fn qed_write_header(BDRVQEDState *s) | ||
39 | { | ||
40 | /* We must write full sectors for O_DIRECT but cannot necessarily generate | ||
41 | * the data following the header if an unrecognized compat feature is | ||
42 | @@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) | ||
43 | qemu_co_enter_next(&s->allocating_write_reqs); | ||
44 | } | ||
45 | |||
46 | -static void qed_need_check_timer_entry(void *opaque) | ||
47 | +static void coroutine_fn qed_need_check_timer_entry(void *opaque) | ||
48 | { | ||
49 | BDRVQEDState *s = opaque; | ||
50 | int ret; | ||
51 | @@ -XXX,XX +XXX,XX @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb) | ||
52 | * This function reads qiov->size bytes starting at pos from the backing file. | ||
53 | * If there is no backing file then zeroes are read. | ||
54 | */ | ||
55 | -static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos, | ||
56 | - QEMUIOVector *qiov, | ||
57 | - QEMUIOVector **backing_qiov) | ||
58 | +static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos, | ||
59 | + QEMUIOVector *qiov, | ||
60 | + QEMUIOVector **backing_qiov) | ||
61 | { | ||
62 | uint64_t backing_length = 0; | ||
63 | size_t size; | ||
64 | @@ -XXX,XX +XXX,XX @@ static int qed_read_backing_file(BDRVQEDState *s, uint64_t pos, | ||
65 | * @len: Number of bytes | ||
66 | * @offset: Byte offset in image file | ||
67 | */ | ||
68 | -static int qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, | ||
69 | - uint64_t len, uint64_t offset) | ||
70 | +static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, | ||
71 | + uint64_t pos, uint64_t len, | ||
72 | + uint64_t offset) | ||
73 | { | ||
74 | QEMUIOVector qiov; | ||
75 | QEMUIOVector *backing_qiov = NULL; | ||
76 | @@ -XXX,XX +XXX,XX @@ out: | ||
77 | * The cluster offset may be an allocated byte offset in the image file, the | ||
78 | * zero cluster marker, or the unallocated cluster marker. | ||
79 | */ | ||
80 | -static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, | ||
81 | - unsigned int n, uint64_t cluster) | ||
82 | +static void coroutine_fn qed_update_l2_table(BDRVQEDState *s, QEDTable *table, | ||
83 | + int index, unsigned int n, | ||
84 | + uint64_t cluster) | ||
85 | { | ||
86 | int i; | 42 | int i; |
87 | for (i = index; i < index + n; i++) { | 43 | @@ -XXX,XX +XXX,XX @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, |
88 | @@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, | 44 | DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 |
45 | " allocated\n", sector_num, | ||
46 | n >> BDRV_SECTOR_BITS)); | ||
47 | - if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n, | ||
48 | - buf + i * 0x200, 0) < 0) { | ||
49 | + if (bdrv_co_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n, | ||
50 | + buf + i * 0x200, 0) < 0) { | ||
51 | return -1; | ||
52 | } | ||
53 | i += (n >> BDRV_SECTOR_BITS) - 1; | ||
54 | @@ -XXX,XX +XXX,XX @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | -static int coroutine_fn | ||
59 | +static int coroutine_fn GRAPH_RDLOCK | ||
60 | vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, | ||
61 | QEMUIOVector *qiov, BdrvRequestFlags flags) | ||
62 | { | ||
63 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s, | ||
89 | } | 64 | } |
90 | } | 65 | } |
91 | 66 | ||
92 | -static void qed_aio_complete(QEDAIOCB *acb) | 67 | -static inline bool cluster_was_modified(BDRVVVFATState *s, |
93 | +static void coroutine_fn qed_aio_complete(QEDAIOCB *acb) | 68 | - uint32_t cluster_num) |
94 | { | 69 | +static inline bool coroutine_fn GRAPH_RDLOCK |
95 | BDRVQEDState *s = acb_to_s(acb); | 70 | +cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num) |
96 | 71 | { | |
97 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb) | 72 | int was_modified = 0; |
98 | /** | 73 | int i; |
99 | * Update L1 table with new L2 table offset and write it out | 74 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
100 | */ | 75 | * Further, the files/directories handled by this function are |
101 | -static int qed_aio_write_l1_update(QEDAIOCB *acb) | 76 | * assumed to be *not* deleted (and *only* those). |
102 | +static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb) | 77 | */ |
103 | { | 78 | -static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, |
104 | BDRVQEDState *s = acb_to_s(acb); | 79 | - direntry_t* direntry, const char* path) |
105 | CachedL2Table *l2_table = acb->request.l2_table; | 80 | +static uint32_t coroutine_fn GRAPH_RDLOCK |
106 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l1_update(QEDAIOCB *acb) | 81 | +get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const char* path) |
107 | /** | 82 | { |
108 | * Update L2 table with new cluster offsets and write them out | 83 | /* |
109 | */ | 84 | * This is a little bit tricky: |
110 | -static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) | 85 | @@ -XXX,XX +XXX,XX @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, |
111 | +static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) | 86 | if (res) { |
112 | { | 87 | return -1; |
113 | BDRVQEDState *s = acb_to_s(acb); | 88 | } |
114 | bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; | 89 | - res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, |
115 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) | 90 | - BDRV_SECTOR_SIZE, s->cluster_buffer, |
116 | /** | 91 | - 0); |
117 | * Write data to the image file | 92 | + res = bdrv_co_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, |
118 | */ | 93 | + BDRV_SECTOR_SIZE, s->cluster_buffer, |
119 | -static int qed_aio_write_main(QEDAIOCB *acb) | 94 | + 0); |
120 | +static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) | 95 | if (res < 0) { |
121 | { | 96 | return -2; |
122 | BDRVQEDState *s = acb_to_s(acb); | 97 | } |
123 | uint64_t offset = acb->cur_cluster + | 98 | @@ -XXX,XX +XXX,XX @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, |
124 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_main(QEDAIOCB *acb) | 99 | * It returns 0 upon inconsistency or error, and the number of clusters |
125 | /** | 100 | * used by the directory, its subdirectories and their files. |
126 | * Populate untouched regions of new data cluster | 101 | */ |
127 | */ | 102 | -static int check_directory_consistency(BDRVVVFATState *s, |
128 | -static int qed_aio_write_cow(QEDAIOCB *acb) | 103 | - int cluster_num, const char* path) |
129 | +static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb) | 104 | +static int coroutine_fn GRAPH_RDLOCK |
130 | { | 105 | +check_directory_consistency(BDRVVVFATState *s, int cluster_num, const char* path) |
131 | BDRVQEDState *s = acb_to_s(acb); | 106 | { |
132 | uint64_t start, len, offset; | 107 | int ret = 0; |
133 | @@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s) | 108 | unsigned char* cluster = g_malloc(s->cluster_size); |
134 | * | 109 | @@ -XXX,XX +XXX,XX @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i)) |
135 | * This path is taken when writing to previously unallocated clusters. | 110 | } |
136 | */ | 111 | |
137 | -static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | 112 | /* returns 1 on success */ |
138 | +static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | 113 | -static int is_consistent(BDRVVVFATState* s) |
139 | { | 114 | +static int coroutine_fn GRAPH_RDLOCK |
140 | BDRVQEDState *s = acb_to_s(acb); | 115 | +is_consistent(BDRVVVFATState* s) |
141 | int ret; | 116 | { |
142 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | 117 | int i, check; |
143 | * | 118 | int used_clusters_count = 0; |
144 | * This path is taken when writing to already allocated clusters. | 119 | @@ -XXX,XX +XXX,XX @@ static int commit_mappings(BDRVVVFATState* s, |
145 | */ | 120 | return 0; |
146 | -static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | 121 | } |
147 | +static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, | 122 | |
148 | + size_t len) | 123 | -static int commit_direntries(BDRVVVFATState* s, |
149 | { | 124 | - int dir_index, int parent_mapping_index) |
150 | /* Allocate buffer for zero writes */ | 125 | +static int coroutine_fn GRAPH_RDLOCK |
151 | if (acb->flags & QED_AIOCB_ZERO) { | 126 | +commit_direntries(BDRVVVFATState* s, int dir_index, int parent_mapping_index) |
152 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | 127 | { |
153 | * @offset: Cluster offset in bytes | 128 | direntry_t* direntry = array_get(&(s->directory), dir_index); |
154 | * @len: Length in bytes | 129 | uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry); |
155 | */ | 130 | @@ -XXX,XX +XXX,XX @@ static int commit_direntries(BDRVVVFATState* s, |
156 | -static int qed_aio_write_data(void *opaque, int ret, | 131 | |
157 | - uint64_t offset, size_t len) | 132 | /* commit one file (adjust contents, adjust mapping), |
158 | +static int coroutine_fn qed_aio_write_data(void *opaque, int ret, | 133 | return first_mapping_index */ |
159 | + uint64_t offset, size_t len) | 134 | -static int commit_one_file(BDRVVVFATState* s, |
160 | { | 135 | - int dir_index, uint32_t offset) |
161 | QEDAIOCB *acb = opaque; | 136 | +static int coroutine_fn GRAPH_RDLOCK |
162 | 137 | +commit_one_file(BDRVVVFATState* s, int dir_index, uint32_t offset) | |
163 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_data(void *opaque, int ret, | 138 | { |
164 | * @offset: Cluster offset in bytes | 139 | direntry_t* direntry = array_get(&(s->directory), dir_index); |
165 | * @len: Length in bytes | 140 | uint32_t c = begin_of_direntry(direntry); |
166 | */ | 141 | @@ -XXX,XX +XXX,XX @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s) |
167 | -static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) | 142 | /* |
168 | +static int coroutine_fn qed_aio_read_data(void *opaque, int ret, | 143 | * TODO: make sure that the short name is not matching *another* file |
169 | + uint64_t offset, size_t len) | 144 | */ |
170 | { | 145 | -static int handle_commits(BDRVVVFATState* s) |
171 | QEDAIOCB *acb = opaque; | 146 | +static int coroutine_fn GRAPH_RDLOCK handle_commits(BDRVVVFATState* s) |
172 | BDRVQEDState *s = acb_to_s(acb); | 147 | { |
173 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) | 148 | int i, fail = 0; |
174 | /** | 149 | |
175 | * Begin next I/O or complete the request | 150 | @@ -XXX,XX +XXX,XX @@ static int handle_deletes(BDRVVVFATState* s) |
176 | */ | 151 | * - recurse direntries from root (using bs->bdrv_pread) |
177 | -static int qed_aio_next_io(QEDAIOCB *acb) | 152 | * - delete files corresponding to mappings marked as deleted |
178 | +static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) | 153 | */ |
179 | { | 154 | -static int do_commit(BDRVVVFATState* s) |
180 | BDRVQEDState *s = acb_to_s(acb); | 155 | +static int coroutine_fn GRAPH_RDLOCK do_commit(BDRVVVFATState* s) |
181 | uint64_t offset; | 156 | { |
182 | diff --git a/block/qed.h b/block/qed.h | 157 | int ret = 0; |
183 | index XXXXXXX..XXXXXXX 100644 | 158 | |
184 | --- a/block/qed.h | 159 | @@ -XXX,XX +XXX,XX @@ DLOG(checkpoint()); |
185 | +++ b/block/qed.h | 160 | return 0; |
186 | @@ -XXX,XX +XXX,XX @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | 161 | } |
187 | /** | 162 | |
188 | * Cluster functions | 163 | -static int try_commit(BDRVVVFATState* s) |
189 | */ | 164 | +static int coroutine_fn GRAPH_RDLOCK try_commit(BDRVVVFATState* s) |
190 | -int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | 165 | { |
191 | - size_t *len, uint64_t *img_offset); | 166 | vvfat_close_current_file(s); |
192 | +int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request, | 167 | DLOG(checkpoint()); |
193 | + uint64_t pos, size_t *len, | 168 | @@ -XXX,XX +XXX,XX @@ DLOG(checkpoint()); |
194 | + uint64_t *img_offset); | 169 | return do_commit(s); |
195 | 170 | } | |
196 | /** | 171 | |
197 | * Consistency check | 172 | -static int vvfat_write(BlockDriverState *bs, int64_t sector_num, |
173 | - const uint8_t *buf, int nb_sectors) | ||
174 | +static int coroutine_fn GRAPH_RDLOCK | ||
175 | +vvfat_write(BlockDriverState *bs, int64_t sector_num, | ||
176 | + const uint8_t *buf, int nb_sectors) | ||
177 | { | ||
178 | BDRVVVFATState *s = bs->opaque; | ||
179 | int i, ret; | ||
180 | @@ -XXX,XX +XXX,XX @@ DLOG(checkpoint()); | ||
181 | * Use qcow backend. Commit later. | ||
182 | */ | ||
183 | DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); | ||
184 | - ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, | ||
185 | - nb_sectors * BDRV_SECTOR_SIZE, buf, 0); | ||
186 | + ret = bdrv_co_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, | ||
187 | + nb_sectors * BDRV_SECTOR_SIZE, buf, 0); | ||
188 | if (ret < 0) { | ||
189 | fprintf(stderr, "Error writing to qcow backend\n"); | ||
190 | return ret; | ||
191 | @@ -XXX,XX +XXX,XX @@ DLOG(checkpoint()); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | -static int coroutine_fn | ||
196 | +static int coroutine_fn GRAPH_RDLOCK | ||
197 | vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, | ||
198 | QEMUIOVector *qiov, BdrvRequestFlags flags) | ||
199 | { | ||
198 | -- | 200 | -- |
199 | 1.8.3.1 | 201 | 2.40.0 |
200 | |||
201 | diff view generated by jsdifflib |
1 | From: Max Reitz <mreitz@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The bs->exact_filename field may not be sufficient to store the full | 3 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
4 | blkdebug node filename. In this case, we should not generate a filename | 4 | Message-Id: <20230309084456.304669-3-pbonzini@redhat.com> |
5 | at all instead of an unusable one. | 5 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
6 | 6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | |
7 | Cc: qemu-stable@nongnu.org | ||
8 | Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com> | ||
9 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
10 | Message-id: 20170613172006.19685-2-mreitz@redhat.com | ||
11 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
12 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
14 | --- | 7 | --- |
15 | block/blkdebug.c | 10 +++++++--- | 8 | block/blkdebug.c | 4 ++-- |
16 | 1 file changed, 7 insertions(+), 3 deletions(-) | 9 | 1 file changed, 2 insertions(+), 2 deletions(-) |
17 | 10 | ||
18 | diff --git a/block/blkdebug.c b/block/blkdebug.c | 11 | diff --git a/block/blkdebug.c b/block/blkdebug.c |
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/blkdebug.c | 13 | --- a/block/blkdebug.c |
21 | +++ b/block/blkdebug.c | 14 | +++ b/block/blkdebug.c |
22 | @@ -XXX,XX +XXX,XX @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options) | 15 | @@ -XXX,XX +XXX,XX @@ out: |
23 | } | 16 | return ret; |
24 | 17 | } | |
25 | if (!force_json && bs->file->bs->exact_filename[0]) { | 18 | |
26 | - snprintf(bs->exact_filename, sizeof(bs->exact_filename), | 19 | -static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes, |
27 | - "blkdebug:%s:%s", s->config_file ?: "", | 20 | - BlkdebugIOType iotype) |
28 | - bs->file->bs->exact_filename); | 21 | +static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset, |
29 | + int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename), | 22 | + uint64_t bytes, BlkdebugIOType iotype) |
30 | + "blkdebug:%s:%s", s->config_file ?: "", | 23 | { |
31 | + bs->file->bs->exact_filename); | 24 | BDRVBlkdebugState *s = bs->opaque; |
32 | + if (ret >= sizeof(bs->exact_filename)) { | 25 | BlkdebugRule *rule = NULL; |
33 | + /* An overflow makes the filename unusable, so do not report any */ | ||
34 | + bs->exact_filename[0] = 0; | ||
35 | + } | ||
36 | } | ||
37 | |||
38 | opts = qdict_new(); | ||
39 | -- | 26 | -- |
40 | 1.8.3.1 | 27 | 2.40.0 |
41 | |||
42 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Qcow2COWRegion has two attributes: | 3 | mirror_flush calls a mixed function blk_flush but it is only called |
4 | from mirror_run; so call the coroutine version and make mirror_flush | ||
5 | a coroutine_fn too. | ||
4 | 6 | ||
5 | - The offset of the COW region from the start of the first cluster | 7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
6 | touched by the I/O request. Since it's always going to be positive | 8 | Message-Id: <20230309084456.304669-4-pbonzini@redhat.com> |
7 | and the maximum request size is at most INT_MAX, we can use a | ||
8 | regular unsigned int to store this offset. | ||
9 | |||
10 | - The size of the COW region in bytes. This is guaranteed to be >= 0, | ||
11 | so we should use an unsigned type instead. | ||
12 | |||
13 | In x86_64 this reduces the size of Qcow2COWRegion from 16 to 8 bytes. | ||
14 | It will also help keep some assertions simpler now that we know that | ||
15 | there are no negative numbers. | ||
16 | |||
17 | The prototype of do_perform_cow() is also updated to reflect these | ||
18 | changes. | ||
19 | |||
20 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
21 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
22 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 9 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
23 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
24 | --- | 11 | --- |
25 | block/qcow2-cluster.c | 4 ++-- | 12 | block/mirror.c | 4 ++-- |
26 | block/qcow2.h | 4 ++-- | 13 | 1 file changed, 2 insertions(+), 2 deletions(-) |
27 | 2 files changed, 4 insertions(+), 4 deletions(-) | ||
28 | 14 | ||
29 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 15 | diff --git a/block/mirror.c b/block/mirror.c |
30 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/block/qcow2-cluster.c | 17 | --- a/block/mirror.c |
32 | +++ b/block/qcow2-cluster.c | 18 | +++ b/block/mirror.c |
33 | @@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, | 19 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) |
34 | static int coroutine_fn do_perform_cow(BlockDriverState *bs, | 20 | /* Called when going out of the streaming phase to flush the bulk of the |
35 | uint64_t src_cluster_offset, | 21 | * data to the medium, or just before completing. |
36 | uint64_t cluster_offset, | 22 | */ |
37 | - int offset_in_cluster, | 23 | -static int mirror_flush(MirrorBlockJob *s) |
38 | - int bytes) | 24 | +static int coroutine_fn mirror_flush(MirrorBlockJob *s) |
39 | + unsigned offset_in_cluster, | ||
40 | + unsigned bytes) | ||
41 | { | 25 | { |
42 | BDRVQcow2State *s = bs->opaque; | 26 | - int ret = blk_flush(s->target); |
43 | QEMUIOVector qiov; | 27 | + int ret = blk_co_flush(s->target); |
44 | diff --git a/block/qcow2.h b/block/qcow2.h | 28 | if (ret < 0) { |
45 | index XXXXXXX..XXXXXXX 100644 | 29 | if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { |
46 | --- a/block/qcow2.h | 30 | s->ret = ret; |
47 | +++ b/block/qcow2.h | ||
48 | @@ -XXX,XX +XXX,XX @@ typedef struct Qcow2COWRegion { | ||
49 | * Offset of the COW region in bytes from the start of the first cluster | ||
50 | * touched by the request. | ||
51 | */ | ||
52 | - uint64_t offset; | ||
53 | + unsigned offset; | ||
54 | |||
55 | /** Number of bytes to copy */ | ||
56 | - int nb_bytes; | ||
57 | + unsigned nb_bytes; | ||
58 | } Qcow2COWRegion; | ||
59 | |||
60 | /** | ||
61 | -- | 31 | -- |
62 | 1.8.3.1 | 32 | 2.40.0 |
63 | |||
64 | diff view generated by jsdifflib |
1 | All callers pass ret = 0, so we can just remove it. | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
4 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
3 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
4 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
5 | --- | 6 | --- |
6 | block/qed.c | 17 ++++++----------- | 7 | nbd/server.c | 48 ++++++++++++++++++++++++------------------------ |
7 | 1 file changed, 6 insertions(+), 11 deletions(-) | 8 | 1 file changed, 24 insertions(+), 24 deletions(-) |
8 | 9 | ||
9 | diff --git a/block/qed.c b/block/qed.c | 10 | diff --git a/nbd/server.c b/nbd/server.c |
10 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/block/qed.c | 12 | --- a/nbd/server.c |
12 | +++ b/block/qed.c | 13 | +++ b/nbd/server.c |
13 | @@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) | 14 | @@ -XXX,XX +XXX,XX @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) |
14 | return l2_table; | 15 | return 1; |
15 | } | 16 | } |
16 | 17 | ||
17 | -static void qed_aio_next_io(QEDAIOCB *acb, int ret); | 18 | -static int nbd_receive_request(NBDClient *client, NBDRequest *request, |
18 | +static void qed_aio_next_io(QEDAIOCB *acb); | 19 | - Error **errp) |
19 | 20 | +static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request, | |
20 | static void qed_aio_start_io(QEDAIOCB *acb) | 21 | + Error **errp) |
21 | { | 22 | { |
22 | - qed_aio_next_io(acb, 0); | 23 | uint8_t buf[NBD_REQUEST_SIZE]; |
23 | + qed_aio_next_io(acb); | 24 | uint32_t magic; |
25 | @@ -XXX,XX +XXX,XX @@ static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error, | ||
26 | stq_be_p(&reply->handle, handle); | ||
24 | } | 27 | } |
25 | 28 | ||
26 | static void qed_plug_allocating_write_reqs(BDRVQEDState *s) | 29 | -static int nbd_co_send_simple_reply(NBDClient *client, |
27 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len) | 30 | - uint64_t handle, |
28 | /** | 31 | - uint32_t error, |
29 | * Begin next I/O or complete the request | 32 | - void *data, |
33 | - size_t len, | ||
34 | - Error **errp) | ||
35 | +static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client, | ||
36 | + uint64_t handle, | ||
37 | + uint32_t error, | ||
38 | + void *data, | ||
39 | + size_t len, | ||
40 | + Error **errp) | ||
41 | { | ||
42 | NBDSimpleReply reply; | ||
43 | int nbd_err = system_errno_to_nbd_errno(error); | ||
44 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, | ||
45 | stl_be_p(&chunk.length, pnum); | ||
46 | ret = nbd_co_send_iov(client, iov, 1, errp); | ||
47 | } else { | ||
48 | - ret = blk_pread(exp->common.blk, offset + progress, pnum, | ||
49 | - data + progress, 0); | ||
50 | + ret = blk_co_pread(exp->common.blk, offset + progress, pnum, | ||
51 | + data + progress, 0); | ||
52 | if (ret < 0) { | ||
53 | error_setg_errno(errp, -ret, "reading from file failed"); | ||
54 | break; | ||
55 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn blockalloc_to_extents(BlockBackend *blk, | ||
56 | * @ea is converted to BE by the function | ||
57 | * @last controls whether NBD_REPLY_FLAG_DONE is sent. | ||
30 | */ | 58 | */ |
31 | -static void qed_aio_next_io(QEDAIOCB *acb, int ret) | 59 | -static int nbd_co_send_extents(NBDClient *client, uint64_t handle, |
32 | +static void qed_aio_next_io(QEDAIOCB *acb) | 60 | - NBDExtentArray *ea, |
61 | - bool last, uint32_t context_id, Error **errp) | ||
62 | +static int coroutine_fn | ||
63 | +nbd_co_send_extents(NBDClient *client, uint64_t handle, NBDExtentArray *ea, | ||
64 | + bool last, uint32_t context_id, Error **errp) | ||
33 | { | 65 | { |
34 | BDRVQEDState *s = acb_to_s(acb); | 66 | NBDStructuredMeta chunk; |
35 | uint64_t offset; | 67 | struct iovec iov[] = { |
36 | size_t len; | 68 | @@ -XXX,XX +XXX,XX @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap, |
37 | + int ret; | 69 | bdrv_dirty_bitmap_unlock(bitmap); |
38 | 70 | } | |
39 | - trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); | 71 | |
40 | + trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + acb->cur_qiov.size); | 72 | -static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, |
41 | 73 | - BdrvDirtyBitmap *bitmap, uint64_t offset, | |
42 | if (acb->backing_qiov) { | 74 | - uint32_t length, bool dont_fragment, bool last, |
43 | qemu_iovec_destroy(acb->backing_qiov); | 75 | - uint32_t context_id, Error **errp) |
44 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret) | 76 | +static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, uint64_t handle, |
45 | acb->backing_qiov = NULL; | 77 | + BdrvDirtyBitmap *bitmap, uint64_t offset, |
78 | + uint32_t length, bool dont_fragment, bool last, | ||
79 | + uint32_t context_id, Error **errp) | ||
80 | { | ||
81 | unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS; | ||
82 | g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents); | ||
83 | @@ -XXX,XX +XXX,XX @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, | ||
84 | * to the client (although the caller may still need to disconnect after | ||
85 | * reporting the error). | ||
86 | */ | ||
87 | -static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, | ||
88 | - Error **errp) | ||
89 | +static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, | ||
90 | + Error **errp) | ||
91 | { | ||
92 | NBDClient *client = req->client; | ||
93 | int valid_flags; | ||
94 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, | ||
95 | data, request->len, errp); | ||
46 | } | 96 | } |
47 | 97 | ||
48 | - /* Handle I/O error */ | 98 | - ret = blk_pread(exp->common.blk, request->from, request->len, data, 0); |
49 | - if (ret) { | 99 | + ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0); |
50 | - qed_aio_complete(acb, ret); | 100 | if (ret < 0) { |
51 | - return; | 101 | return nbd_send_generic_reply(client, request->handle, ret, |
52 | - } | 102 | "reading from file failed", errp); |
53 | - | 103 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_handle_request(NBDClient *client, |
54 | acb->qiov_offset += acb->cur_qiov.size; | 104 | if (request->flags & NBD_CMD_FLAG_FUA) { |
55 | acb->cur_pos += acb->cur_qiov.size; | 105 | flags |= BDRV_REQ_FUA; |
56 | qemu_iovec_reset(&acb->cur_qiov); | ||
57 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret) | ||
58 | } | 106 | } |
59 | return; | 107 | - ret = blk_pwrite(exp->common.blk, request->from, request->len, data, |
60 | } | 108 | - flags); |
61 | - qed_aio_next_io(acb, 0); | 109 | + ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data, |
62 | + qed_aio_next_io(acb); | 110 | + flags); |
63 | } | 111 | return nbd_send_generic_reply(client, request->handle, ret, |
64 | 112 | "writing to file failed", errp); | |
65 | static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, | 113 | |
114 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nbd_handle_request(NBDClient *client, | ||
115 | if (request->flags & NBD_CMD_FLAG_FAST_ZERO) { | ||
116 | flags |= BDRV_REQ_NO_FALLBACK; | ||
117 | } | ||
118 | - ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len, | ||
119 | - flags); | ||
120 | + ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len, | ||
121 | + flags); | ||
122 | return nbd_send_generic_reply(client, request->handle, ret, | ||
123 | "writing to file failed", errp); | ||
124 | |||
66 | -- | 125 | -- |
67 | 1.8.3.1 | 126 | 2.40.0 |
68 | |||
69 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Instead of calling perform_cow() twice with a different COW region | 3 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
4 | each time, call it just once and make perform_cow() handle both | 4 | Message-Id: <20230309084456.304669-6-pbonzini@redhat.com> |
5 | regions. | ||
6 | |||
7 | This patch simply moves code around. The next one will do the actual | ||
8 | reordering of the COW operations. | ||
9 | |||
10 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
11 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 5 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
14 | --- | 7 | --- |
15 | block/qcow2-cluster.c | 36 ++++++++++++++++++++++-------------- | 8 | hw/9pfs/9p.h | 4 ++-- |
16 | 1 file changed, 22 insertions(+), 14 deletions(-) | 9 | hw/9pfs/codir.c | 6 +++--- |
10 | 2 files changed, 5 insertions(+), 5 deletions(-) | ||
17 | 11 | ||
18 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 12 | diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h |
19 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/qcow2-cluster.c | 14 | --- a/hw/9pfs/9p.h |
21 | +++ b/block/qcow2-cluster.c | 15 | +++ b/hw/9pfs/9p.h |
22 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs, | 16 | @@ -XXX,XX +XXX,XX @@ typedef struct V9fsDir { |
23 | struct iovec iov; | 17 | QemuMutex readdir_mutex_L; |
24 | int ret; | 18 | } V9fsDir; |
25 | 19 | ||
26 | + if (bytes == 0) { | 20 | -static inline void v9fs_readdir_lock(V9fsDir *dir) |
27 | + return 0; | 21 | +static inline void coroutine_fn v9fs_readdir_lock(V9fsDir *dir) |
28 | + } | 22 | { |
29 | + | 23 | if (dir->proto_version == V9FS_PROTO_2000U) { |
30 | iov.iov_len = bytes; | 24 | qemu_co_mutex_lock(&dir->readdir_mutex_u); |
31 | iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); | 25 | @@ -XXX,XX +XXX,XX @@ static inline void v9fs_readdir_lock(V9fsDir *dir) |
32 | if (iov.iov_base == NULL) { | 26 | } |
33 | @@ -XXX,XX +XXX,XX @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, | ||
34 | return cluster_offset; | ||
35 | } | 27 | } |
36 | 28 | ||
37 | -static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) | 29 | -static inline void v9fs_readdir_unlock(V9fsDir *dir) |
38 | +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 30 | +static inline void coroutine_fn v9fs_readdir_unlock(V9fsDir *dir) |
39 | { | 31 | { |
40 | BDRVQcow2State *s = bs->opaque; | 32 | if (dir->proto_version == V9FS_PROTO_2000U) { |
41 | + Qcow2COWRegion *start = &m->cow_start; | 33 | qemu_co_mutex_unlock(&dir->readdir_mutex_u); |
42 | + Qcow2COWRegion *end = &m->cow_end; | 34 | diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c |
43 | int ret; | 35 | index XXXXXXX..XXXXXXX 100644 |
44 | 36 | --- a/hw/9pfs/codir.c | |
45 | - if (r->nb_bytes == 0) { | 37 | +++ b/hw/9pfs/codir.c |
46 | + if (start->nb_bytes == 0 && end->nb_bytes == 0) { | 38 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp, |
47 | return 0; | 39 | * |
48 | } | 40 | * See v9fs_co_readdir_many() (as its only user) below for details. |
49 | 41 | */ | |
50 | qemu_co_mutex_unlock(&s->lock); | 42 | -static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, |
51 | - ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes); | 43 | - struct V9fsDirEnt **entries, off_t offset, |
52 | - qemu_co_mutex_lock(&s->lock); | 44 | - int32_t maxsize, bool dostat) |
53 | - | 45 | +static int coroutine_fn |
54 | + ret = do_perform_cow(bs, m->offset, m->alloc_offset, | 46 | +do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, struct V9fsDirEnt **entries, |
55 | + start->offset, start->nb_bytes); | 47 | + off_t offset, int32_t maxsize, bool dostat) |
56 | if (ret < 0) { | 48 | { |
57 | - return ret; | 49 | V9fsState *s = pdu->s; |
58 | + goto fail; | 50 | V9fsString name; |
59 | } | ||
60 | |||
61 | + ret = do_perform_cow(bs, m->offset, m->alloc_offset, | ||
62 | + end->offset, end->nb_bytes); | ||
63 | + | ||
64 | +fail: | ||
65 | + qemu_co_mutex_lock(&s->lock); | ||
66 | + | ||
67 | /* | ||
68 | * Before we update the L2 table to actually point to the new cluster, we | ||
69 | * need to be sure that the refcounts have been increased and COW was | ||
70 | * handled. | ||
71 | */ | ||
72 | - qcow2_cache_depends_on_flush(s->l2_table_cache); | ||
73 | + if (ret == 0) { | ||
74 | + qcow2_cache_depends_on_flush(s->l2_table_cache); | ||
75 | + } | ||
76 | |||
77 | - return 0; | ||
78 | + return ret; | ||
79 | } | ||
80 | |||
81 | int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) | ||
82 | @@ -XXX,XX +XXX,XX @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) | ||
83 | } | ||
84 | |||
85 | /* copy content of unmodified sectors */ | ||
86 | - ret = perform_cow(bs, m, &m->cow_start); | ||
87 | - if (ret < 0) { | ||
88 | - goto err; | ||
89 | - } | ||
90 | - | ||
91 | - ret = perform_cow(bs, m, &m->cow_end); | ||
92 | + ret = perform_cow(bs, m); | ||
93 | if (ret < 0) { | ||
94 | goto err; | ||
95 | } | ||
96 | -- | 51 | -- |
97 | 1.8.3.1 | 52 | 2.40.0 |
98 | |||
99 | diff view generated by jsdifflib |
1 | Most of the qed code is now synchronous and matches the coroutine model. | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | One notable exception is the serialisation between requests which can | ||
3 | still schedule a callback. Before we can replace this with coroutine | ||
4 | locks, let's convert the driver's external interfaces to the coroutine | ||
5 | versions. | ||
6 | 2 | ||
7 | We need to be careful to handle both requests that call the completion | 3 | do_sgio can suspend via the coroutine function thread_pool_submit_co, so it |
8 | callback directly from the calling coroutine (i.e. fully synchronous | 4 | has to be coroutine_fn as well---and the same is true of all its direct and |
9 | code) and requests that involve some callback, so that we need to yield | 5 | indirect callers. |
10 | and wait for the completion callback coming from outside the coroutine. | ||
11 | 6 | ||
7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Message-Id: <20230309084456.304669-7-pbonzini@redhat.com> | ||
9 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr> | ||
14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | 11 | --- |
16 | block/qed.c | 97 ++++++++++++++++++++++++++----------------------------------- | 12 | scsi/qemu-pr-helper.c | 22 +++++++++++----------- |
17 | 1 file changed, 42 insertions(+), 55 deletions(-) | 13 | 1 file changed, 11 insertions(+), 11 deletions(-) |
18 | 14 | ||
19 | diff --git a/block/qed.c b/block/qed.c | 15 | diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c |
20 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/qed.c | 17 | --- a/scsi/qemu-pr-helper.c |
22 | +++ b/block/qed.c | 18 | +++ b/scsi/qemu-pr-helper.c |
23 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb) | 19 | @@ -XXX,XX +XXX,XX @@ static int do_sgio_worker(void *opaque) |
20 | return status; | ||
21 | } | ||
22 | |||
23 | -static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense, | ||
24 | - uint8_t *buf, int *sz, int dir) | ||
25 | +static int coroutine_fn do_sgio(int fd, const uint8_t *cdb, uint8_t *sense, | ||
26 | + uint8_t *buf, int *sz, int dir) | ||
27 | { | ||
28 | int r; | ||
29 | |||
30 | @@ -XXX,XX +XXX,XX @@ static SCSISense mpath_generic_sense(int r) | ||
24 | } | 31 | } |
25 | } | 32 | } |
26 | 33 | ||
27 | -static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, | 34 | -static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense) |
28 | - int64_t sector_num, | 35 | +static int coroutine_fn mpath_reconstruct_sense(int fd, int r, uint8_t *sense) |
29 | - QEMUIOVector *qiov, int nb_sectors, | ||
30 | - BlockCompletionFunc *cb, | ||
31 | - void *opaque, int flags) | ||
32 | +typedef struct QEDRequestCo { | ||
33 | + Coroutine *co; | ||
34 | + bool done; | ||
35 | + int ret; | ||
36 | +} QEDRequestCo; | ||
37 | + | ||
38 | +static void qed_co_request_cb(void *opaque, int ret) | ||
39 | { | 36 | { |
40 | - QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); | 37 | switch (r) { |
41 | + QEDRequestCo *co = opaque; | 38 | case MPATH_PR_SUCCESS: |
42 | 39 | @@ -XXX,XX +XXX,XX @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense) | |
43 | - trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, | 40 | } |
44 | - opaque, flags); | ||
45 | + co->done = true; | ||
46 | + co->ret = ret; | ||
47 | + qemu_coroutine_enter_if_inactive(co->co); | ||
48 | +} | ||
49 | + | ||
50 | +static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num, | ||
51 | + QEMUIOVector *qiov, int nb_sectors, | ||
52 | + int flags) | ||
53 | +{ | ||
54 | + QEDRequestCo co = { | ||
55 | + .co = qemu_coroutine_self(), | ||
56 | + .done = false, | ||
57 | + }; | ||
58 | + QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, qed_co_request_cb, &co); | ||
59 | + | ||
60 | + trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, &co, flags); | ||
61 | |||
62 | acb->flags = flags; | ||
63 | acb->qiov = qiov; | ||
64 | @@ -XXX,XX +XXX,XX @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, | ||
65 | |||
66 | /* Start request */ | ||
67 | qed_aio_start_io(acb); | ||
68 | - return &acb->common; | ||
69 | -} | ||
70 | |||
71 | -static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, | ||
72 | - int64_t sector_num, | ||
73 | - QEMUIOVector *qiov, int nb_sectors, | ||
74 | - BlockCompletionFunc *cb, | ||
75 | - void *opaque) | ||
76 | -{ | ||
77 | - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); | ||
78 | + if (!co.done) { | ||
79 | + qemu_coroutine_yield(); | ||
80 | + } | ||
81 | + | ||
82 | + return co.ret; | ||
83 | } | 41 | } |
84 | 42 | ||
85 | -static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, | 43 | -static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
86 | - int64_t sector_num, | 44 | - uint8_t *data, int sz) |
87 | - QEMUIOVector *qiov, int nb_sectors, | 45 | +static int coroutine_fn multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
88 | - BlockCompletionFunc *cb, | 46 | + uint8_t *data, int sz) |
89 | - void *opaque) | ||
90 | +static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs, | ||
91 | + int64_t sector_num, int nb_sectors, | ||
92 | + QEMUIOVector *qiov) | ||
93 | { | 47 | { |
94 | - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, | 48 | int rq_servact = cdb[1]; |
95 | - opaque, QED_AIOCB_WRITE); | 49 | struct prin_resp resp; |
96 | + return qed_co_request(bs, sector_num, qiov, nb_sectors, 0); | 50 | @@ -XXX,XX +XXX,XX @@ static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
51 | return mpath_reconstruct_sense(fd, r, sense); | ||
97 | } | 52 | } |
98 | 53 | ||
99 | -typedef struct { | 54 | -static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, |
100 | - Coroutine *co; | 55 | - const uint8_t *param, int sz) |
101 | - int ret; | 56 | +static int coroutine_fn multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, |
102 | - bool done; | 57 | + const uint8_t *param, int sz) |
103 | -} QEDWriteZeroesCB; | ||
104 | - | ||
105 | -static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret) | ||
106 | +static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs, | ||
107 | + int64_t sector_num, int nb_sectors, | ||
108 | + QEMUIOVector *qiov) | ||
109 | { | 58 | { |
110 | - QEDWriteZeroesCB *cb = opaque; | 59 | int rq_servact = cdb[1]; |
111 | - | 60 | int rq_scope = cdb[2] >> 4; |
112 | - cb->done = true; | 61 | @@ -XXX,XX +XXX,XX @@ static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, |
113 | - cb->ret = ret; | ||
114 | - if (cb->co) { | ||
115 | - aio_co_wake(cb->co); | ||
116 | - } | ||
117 | + return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE); | ||
118 | } | 62 | } |
119 | 63 | #endif | |
120 | static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, | 64 | |
121 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, | 65 | -static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
122 | int count, | 66 | - uint8_t *data, int *resp_sz) |
123 | BdrvRequestFlags flags) | 67 | +static int coroutine_fn do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
68 | + uint8_t *data, int *resp_sz) | ||
124 | { | 69 | { |
125 | - BlockAIOCB *blockacb; | 70 | #ifdef CONFIG_MPATH |
126 | BDRVQEDState *s = bs->opaque; | 71 | if (is_mpath(fd)) { |
127 | - QEDWriteZeroesCB cb = { .done = false }; | 72 | @@ -XXX,XX +XXX,XX @@ static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense, |
128 | QEMUIOVector qiov; | 73 | SG_DXFER_FROM_DEV); |
129 | struct iovec iov; | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, | ||
132 | iov.iov_len = count; | ||
133 | |||
134 | qemu_iovec_init_external(&qiov, &iov, 1); | ||
135 | - blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov, | ||
136 | - count >> BDRV_SECTOR_BITS, | ||
137 | - qed_co_pwrite_zeroes_cb, &cb, | ||
138 | - QED_AIOCB_WRITE | QED_AIOCB_ZERO); | ||
139 | - if (!blockacb) { | ||
140 | - return -EIO; | ||
141 | - } | ||
142 | - if (!cb.done) { | ||
143 | - cb.co = qemu_coroutine_self(); | ||
144 | - qemu_coroutine_yield(); | ||
145 | - } | ||
146 | - assert(cb.done); | ||
147 | - return cb.ret; | ||
148 | + return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov, | ||
149 | + count >> BDRV_SECTOR_BITS, | ||
150 | + QED_AIOCB_WRITE | QED_AIOCB_ZERO); | ||
151 | } | 74 | } |
152 | 75 | ||
153 | static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp) | 76 | -static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, |
154 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = { | 77 | - const uint8_t *param, int sz) |
155 | .bdrv_create = bdrv_qed_create, | 78 | +static int coroutine_fn do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, |
156 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 79 | + const uint8_t *param, int sz) |
157 | .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, | 80 | { |
158 | - .bdrv_aio_readv = bdrv_qed_aio_readv, | 81 | int resp_sz; |
159 | - .bdrv_aio_writev = bdrv_qed_aio_writev, | 82 | |
160 | + .bdrv_co_readv = bdrv_qed_co_readv, | ||
161 | + .bdrv_co_writev = bdrv_qed_co_writev, | ||
162 | .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, | ||
163 | .bdrv_truncate = bdrv_qed_truncate, | ||
164 | .bdrv_getlength = bdrv_qed_getlength, | ||
165 | -- | 83 | -- |
166 | 1.8.3.1 | 84 | 2.40.0 |
167 | |||
168 | diff view generated by jsdifflib |
1 | From: Stephen Bates <sbates@raithlin.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Add the ability for the NVMe model to support both the RDS and WDS | 3 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
4 | modes in the Controller Memory Buffer. | 4 | Message-Id: <20230309084456.304669-8-pbonzini@redhat.com> |
5 | 5 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | |
6 | Although not currently supported in the upstreamed Linux kernel a fork | ||
7 | with support exists [1] and user-space test programs that build on | ||
8 | this also exist [2]. | ||
9 | |||
10 | Useful for testing CMB functionality in preperation for real CMB | ||
11 | enabled NVMe devices (coming soon). | ||
12 | |||
13 | [1] https://github.com/sbates130272/linux-p2pmem | ||
14 | [2] https://github.com/sbates130272/p2pmem-test | ||
15 | |||
16 | Signed-off-by: Stephen Bates <sbates@raithlin.com> | ||
17 | Reviewed-by: Logan Gunthorpe <logang@deltatee.com> | ||
18 | Reviewed-by: Keith Busch <keith.busch@intel.com> | ||
19 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 6 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
20 | --- | 7 | --- |
21 | hw/block/nvme.c | 83 +++++++++++++++++++++++++++++++++++++++------------------ | 8 | tests/unit/test-thread-pool.c | 2 +- |
22 | hw/block/nvme.h | 1 + | 9 | 1 file changed, 1 insertion(+), 1 deletion(-) |
23 | 2 files changed, 58 insertions(+), 26 deletions(-) | ||
24 | 10 | ||
25 | diff --git a/hw/block/nvme.c b/hw/block/nvme.c | 11 | diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c |
26 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/hw/block/nvme.c | 13 | --- a/tests/unit/test-thread-pool.c |
28 | +++ b/hw/block/nvme.c | 14 | +++ b/tests/unit/test-thread-pool.c |
29 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ static void test_submit_aio(void) |
30 | * cmb_size_mb=<cmb_size_mb[optional]> | 16 | g_assert_cmpint(data.ret, ==, 0); |
31 | * | ||
32 | * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at | ||
33 | - * offset 0 in BAR2 and supports SQS only for now. | ||
34 | + * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. | ||
35 | */ | ||
36 | |||
37 | #include "qemu/osdep.h" | ||
38 | @@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) | ||
39 | } | ||
40 | } | 17 | } |
41 | 18 | ||
42 | -static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | 19 | -static void co_test_cb(void *opaque) |
43 | - uint32_t len, NvmeCtrl *n) | 20 | +static void coroutine_fn co_test_cb(void *opaque) |
44 | +static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, | ||
45 | + uint64_t prp2, uint32_t len, NvmeCtrl *n) | ||
46 | { | 21 | { |
47 | hwaddr trans_len = n->page_size - (prp1 % n->page_size); | 22 | WorkerTestData *data = opaque; |
48 | trans_len = MIN(len, trans_len); | ||
49 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | ||
50 | |||
51 | if (!prp1) { | ||
52 | return NVME_INVALID_FIELD | NVME_DNR; | ||
53 | + } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && | ||
54 | + prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { | ||
55 | + qsg->nsg = 0; | ||
56 | + qemu_iovec_init(iov, num_prps); | ||
57 | + qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); | ||
58 | + } else { | ||
59 | + pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); | ||
60 | + qemu_sglist_add(qsg, prp1, trans_len); | ||
61 | } | ||
62 | - | ||
63 | - pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); | ||
64 | - qemu_sglist_add(qsg, prp1, trans_len); | ||
65 | len -= trans_len; | ||
66 | if (len) { | ||
67 | if (!prp2) { | ||
68 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | ||
69 | |||
70 | nents = (len + n->page_size - 1) >> n->page_bits; | ||
71 | prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); | ||
72 | - pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans); | ||
73 | + nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); | ||
74 | while (len != 0) { | ||
75 | uint64_t prp_ent = le64_to_cpu(prp_list[i]); | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | ||
78 | i = 0; | ||
79 | nents = (len + n->page_size - 1) >> n->page_bits; | ||
80 | prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); | ||
81 | - pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list, | ||
82 | + nvme_addr_read(n, prp_ent, (void *)prp_list, | ||
83 | prp_trans); | ||
84 | prp_ent = le64_to_cpu(prp_list[i]); | ||
85 | } | ||
86 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | ||
87 | } | ||
88 | |||
89 | trans_len = MIN(len, n->page_size); | ||
90 | - qemu_sglist_add(qsg, prp_ent, trans_len); | ||
91 | + if (qsg->nsg){ | ||
92 | + qemu_sglist_add(qsg, prp_ent, trans_len); | ||
93 | + } else { | ||
94 | + qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); | ||
95 | + } | ||
96 | len -= trans_len; | ||
97 | i++; | ||
98 | } | ||
99 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, | ||
100 | if (prp2 & (n->page_size - 1)) { | ||
101 | goto unmap; | ||
102 | } | ||
103 | - qemu_sglist_add(qsg, prp2, len); | ||
104 | + if (qsg->nsg) { | ||
105 | + qemu_sglist_add(qsg, prp2, len); | ||
106 | + } else { | ||
107 | + qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len); | ||
108 | + } | ||
109 | } | ||
110 | } | ||
111 | return NVME_SUCCESS; | ||
112 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, | ||
113 | uint64_t prp1, uint64_t prp2) | ||
114 | { | ||
115 | QEMUSGList qsg; | ||
116 | + QEMUIOVector iov; | ||
117 | + uint16_t status = NVME_SUCCESS; | ||
118 | |||
119 | - if (nvme_map_prp(&qsg, prp1, prp2, len, n)) { | ||
120 | + if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { | ||
121 | return NVME_INVALID_FIELD | NVME_DNR; | ||
122 | } | ||
123 | - if (dma_buf_read(ptr, len, &qsg)) { | ||
124 | + if (qsg.nsg > 0) { | ||
125 | + if (dma_buf_read(ptr, len, &qsg)) { | ||
126 | + status = NVME_INVALID_FIELD | NVME_DNR; | ||
127 | + } | ||
128 | qemu_sglist_destroy(&qsg); | ||
129 | - return NVME_INVALID_FIELD | NVME_DNR; | ||
130 | + } else { | ||
131 | + if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { | ||
132 | + status = NVME_INVALID_FIELD | NVME_DNR; | ||
133 | + } | ||
134 | + qemu_iovec_destroy(&iov); | ||
135 | } | ||
136 | - qemu_sglist_destroy(&qsg); | ||
137 | - return NVME_SUCCESS; | ||
138 | + return status; | ||
139 | } | ||
140 | |||
141 | static void nvme_post_cqes(void *opaque) | ||
142 | @@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, | ||
143 | return NVME_LBA_RANGE | NVME_DNR; | ||
144 | } | ||
145 | |||
146 | - if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) { | ||
147 | + if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) { | ||
148 | block_acct_invalid(blk_get_stats(n->conf.blk), acct); | ||
149 | return NVME_INVALID_FIELD | NVME_DNR; | ||
150 | } | ||
151 | |||
152 | - assert((nlb << data_shift) == req->qsg.size); | ||
153 | - | ||
154 | - req->has_sg = true; | ||
155 | dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); | ||
156 | - req->aiocb = is_write ? | ||
157 | - dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, | ||
158 | - nvme_rw_cb, req) : | ||
159 | - dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, | ||
160 | - nvme_rw_cb, req); | ||
161 | + if (req->qsg.nsg > 0) { | ||
162 | + req->has_sg = true; | ||
163 | + req->aiocb = is_write ? | ||
164 | + dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, | ||
165 | + nvme_rw_cb, req) : | ||
166 | + dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE, | ||
167 | + nvme_rw_cb, req); | ||
168 | + } else { | ||
169 | + req->has_sg = false; | ||
170 | + req->aiocb = is_write ? | ||
171 | + blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, | ||
172 | + req) : | ||
173 | + blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb, | ||
174 | + req); | ||
175 | + } | ||
176 | |||
177 | return NVME_NO_COMPLETE; | ||
178 | } | ||
179 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev) | ||
180 | NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); | ||
181 | NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); | ||
182 | NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); | ||
183 | - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0); | ||
184 | - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0); | ||
185 | + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); | ||
186 | + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); | ||
187 | NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ | ||
188 | NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); | ||
189 | |||
190 | + n->cmbloc = n->bar.cmbloc; | ||
191 | + n->cmbsz = n->bar.cmbsz; | ||
192 | + | ||
193 | n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); | ||
194 | memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, | ||
195 | "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); | ||
196 | diff --git a/hw/block/nvme.h b/hw/block/nvme.h | ||
197 | index XXXXXXX..XXXXXXX 100644 | ||
198 | --- a/hw/block/nvme.h | ||
199 | +++ b/hw/block/nvme.h | ||
200 | @@ -XXX,XX +XXX,XX @@ typedef struct NvmeRequest { | ||
201 | NvmeCqe cqe; | ||
202 | BlockAcctCookie acct; | ||
203 | QEMUSGList qsg; | ||
204 | + QEMUIOVector iov; | ||
205 | QTAILQ_ENTRY(NvmeRequest)entry; | ||
206 | } NvmeRequest; | ||
207 | 23 | ||
208 | -- | 24 | -- |
209 | 1.8.3.1 | 25 | 2.40.0 |
210 | |||
211 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | If the guest tries to write data that results on the allocation of a | 3 | Functions that can do I/O (including calling bdrv_is_allocated |
4 | new cluster, instead of writing the guest data first and then the data | 4 | and bdrv_block_status functions) are prime candidates for being |
5 | from the COW regions, write everything together using one single I/O | 5 | coroutine_fns. Make the change for those that are themselves called |
6 | operation. | 6 | only from coroutine_fns. Also annotate that they are called with the |
7 | graph rdlock taken, thus allowing them to call bdrv_co_*() functions | ||
8 | for I/O. | ||
7 | 9 | ||
8 | This can improve the write performance by 25% or more, depending on | 10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
9 | several factors such as the media type, the cluster size and the I/O | 11 | Message-Id: <20230309084456.304669-9-pbonzini@redhat.com> |
10 | request size. | ||
11 | |||
12 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
13 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
15 | --- | 14 | --- |
16 | block/qcow2-cluster.c | 40 ++++++++++++++++++++++++-------- | 15 | block/qcow2.h | 15 ++++++++------- |
17 | block/qcow2.c | 64 +++++++++++++++++++++++++++++++++++++++++++-------- | 16 | block/qcow2-bitmap.c | 2 +- |
18 | block/qcow2.h | 7 ++++++ | 17 | block/qcow2-cluster.c | 21 +++++++++++++-------- |
19 | 3 files changed, 91 insertions(+), 20 deletions(-) | 18 | block/qcow2-refcount.c | 8 ++++---- |
19 | block/qcow2-snapshot.c | 25 +++++++++++++------------ | ||
20 | block/qcow2.c | 27 ++++++++++++++------------- | ||
21 | 6 files changed, 53 insertions(+), 45 deletions(-) | ||
20 | 22 | ||
23 | diff --git a/block/qcow2.h b/block/qcow2.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/block/qcow2.h | ||
26 | +++ b/block/qcow2.h | ||
27 | @@ -XXX,XX +XXX,XX @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset, | ||
28 | uint64_t new_refblock_offset); | ||
29 | |||
30 | int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); | ||
31 | -int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, | ||
32 | - int64_t nb_clusters); | ||
33 | -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); | ||
34 | +int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, | ||
35 | + int64_t nb_clusters); | ||
36 | +int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size); | ||
37 | void qcow2_free_clusters(BlockDriverState *bs, | ||
38 | int64_t offset, int64_t size, | ||
39 | enum qcow2_discard_type type); | ||
40 | @@ -XXX,XX +XXX,XX @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, | ||
41 | BlockDriverAmendStatusCB *status_cb, | ||
42 | void *cb_opaque, Error **errp); | ||
43 | int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs); | ||
44 | -int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); | ||
45 | +int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); | ||
46 | int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs); | ||
47 | |||
48 | /* qcow2-cluster.c functions */ | ||
49 | @@ -XXX,XX +XXX,XX @@ void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, | ||
50 | int coroutine_fn GRAPH_RDLOCK | ||
51 | qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); | ||
52 | |||
53 | -void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); | ||
54 | +void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); | ||
55 | int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, | ||
56 | uint64_t bytes, enum qcow2_discard_type type, | ||
57 | bool full_discard); | ||
58 | @@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, | ||
59 | Error **errp); | ||
60 | |||
61 | void qcow2_free_snapshots(BlockDriverState *bs); | ||
62 | -int qcow2_read_snapshots(BlockDriverState *bs, Error **errp); | ||
63 | +int coroutine_fn GRAPH_RDLOCK | ||
64 | +qcow2_read_snapshots(BlockDriverState *bs, Error **errp); | ||
65 | int qcow2_write_snapshots(BlockDriverState *bs); | ||
66 | |||
67 | int coroutine_fn GRAPH_RDLOCK | ||
68 | @@ -XXX,XX +XXX,XX @@ bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs, | ||
69 | bool qcow2_get_bitmap_info_list(BlockDriverState *bs, | ||
70 | Qcow2BitmapInfoList **info_list, Error **errp); | ||
71 | int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); | ||
72 | -int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp); | ||
73 | +int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp); | ||
74 | bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, | ||
75 | bool release_stored, Error **errp); | ||
76 | int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); | ||
77 | diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/block/qcow2-bitmap.c | ||
80 | +++ b/block/qcow2-bitmap.c | ||
81 | @@ -XXX,XX +XXX,XX @@ out: | ||
82 | } | ||
83 | |||
84 | /* Checks to see if it's safe to resize bitmaps */ | ||
85 | -int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp) | ||
86 | +int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp) | ||
87 | { | ||
88 | BDRVQcow2State *s = bs->opaque; | ||
89 | Qcow2BitmapList *bm_list; | ||
21 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 90 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c |
22 | index XXXXXXX..XXXXXXX 100644 | 91 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/block/qcow2-cluster.c | 92 | --- a/block/qcow2-cluster.c |
24 | +++ b/block/qcow2-cluster.c | 93 | +++ b/block/qcow2-cluster.c |
25 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 94 | @@ -XXX,XX +XXX,XX @@ err: |
26 | assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); | 95 | * Frees the allocated clusters because the request failed and they won't |
27 | assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); | 96 | * actually be linked. |
28 | assert(start->offset + start->nb_bytes <= end->offset); | 97 | */ |
29 | + assert(!m->data_qiov || m->data_qiov->size == data_bytes); | 98 | -void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) |
30 | 99 | +void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) | |
31 | if (start->nb_bytes == 0 && end->nb_bytes == 0) { | 100 | { |
32 | return 0; | 101 | BDRVQcow2State *s = bs->opaque; |
33 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 102 | if (!has_data_file(bs) && !m->keep_old_clusters) { |
34 | /* The part of the buffer where the end region is located */ | 103 | @@ -XXX,XX +XXX,XX @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) |
35 | end_buffer = start_buffer + buffer_size - end->nb_bytes; | 104 | * |
36 | 105 | * Returns 0 on success, -errno on failure. | |
37 | - qemu_iovec_init(&qiov, 1); | 106 | */ |
38 | + qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0)); | 107 | -static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, |
39 | 108 | - uint64_t guest_offset, unsigned bytes, | |
40 | qemu_co_mutex_unlock(&s->lock); | 109 | - uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) |
41 | /* First we read the existing data from both COW regions. We | 110 | +static int coroutine_fn calculate_l2_meta(BlockDriverState *bs, |
42 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 111 | + uint64_t host_cluster_offset, |
112 | + uint64_t guest_offset, unsigned bytes, | ||
113 | + uint64_t *l2_slice, QCowL2Meta **m, | ||
114 | + bool keep_old) | ||
115 | { | ||
116 | BDRVQcow2State *s = bs->opaque; | ||
117 | int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); | ||
118 | @@ -XXX,XX +XXX,XX @@ out: | ||
119 | * function has been waiting for another request and the allocation must be | ||
120 | * restarted, but the whole request should not be failed. | ||
121 | */ | ||
122 | -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, | ||
123 | - uint64_t *host_offset, uint64_t *nb_clusters) | ||
124 | +static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs, | ||
125 | + uint64_t guest_offset, | ||
126 | + uint64_t *host_offset, | ||
127 | + uint64_t *nb_clusters) | ||
128 | { | ||
129 | BDRVQcow2State *s = bs->opaque; | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, | ||
132 | return nb_clusters; | ||
133 | } | ||
134 | |||
135 | -static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, | ||
136 | - unsigned nb_subclusters) | ||
137 | +static int coroutine_fn | ||
138 | +zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, | ||
139 | + unsigned nb_subclusters) | ||
140 | { | ||
141 | BDRVQcow2State *s = bs->opaque; | ||
142 | uint64_t *l2_slice; | ||
143 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/block/qcow2-refcount.c | ||
146 | +++ b/block/qcow2-refcount.c | ||
147 | @@ -XXX,XX +XXX,XX @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) | ||
148 | return offset; | ||
149 | } | ||
150 | |||
151 | -int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, | ||
152 | - int64_t nb_clusters) | ||
153 | +int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, | ||
154 | + int64_t nb_clusters) | ||
155 | { | ||
156 | BDRVQcow2State *s = bs->opaque; | ||
157 | uint64_t cluster_index, refcount; | ||
158 | @@ -XXX,XX +XXX,XX @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, | ||
159 | |||
160 | /* only used to allocate compressed sectors. We try to allocate | ||
161 | contiguous sectors. size must be <= cluster_size */ | ||
162 | -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) | ||
163 | +int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size) | ||
164 | { | ||
165 | BDRVQcow2State *s = bs->opaque; | ||
166 | int64_t offset; | ||
167 | @@ -XXX,XX +XXX,XX @@ out: | ||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | -int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) | ||
172 | +int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) | ||
173 | { | ||
174 | BDRVQcow2State *s = bs->opaque; | ||
175 | int64_t i; | ||
176 | diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/block/qcow2-snapshot.c | ||
179 | +++ b/block/qcow2-snapshot.c | ||
180 | @@ -XXX,XX +XXX,XX @@ void qcow2_free_snapshots(BlockDriverState *bs) | ||
181 | * qcow2_check_refcounts() does not do anything with snapshots' | ||
182 | * extra data.) | ||
183 | */ | ||
184 | -static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, | ||
185 | - int *nb_clusters_reduced, | ||
186 | - int *extra_data_dropped, | ||
187 | - Error **errp) | ||
188 | +static coroutine_fn GRAPH_RDLOCK | ||
189 | +int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, | ||
190 | + int *nb_clusters_reduced, | ||
191 | + int *extra_data_dropped, | ||
192 | + Error **errp) | ||
193 | { | ||
194 | BDRVQcow2State *s = bs->opaque; | ||
195 | QCowSnapshotHeader h; | ||
196 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, | ||
197 | |||
198 | /* Read statically sized part of the snapshot header */ | ||
199 | offset = ROUND_UP(offset, 8); | ||
200 | - ret = bdrv_pread(bs->file, offset, sizeof(h), &h, 0); | ||
201 | + ret = bdrv_co_pread(bs->file, offset, sizeof(h), &h, 0); | ||
202 | if (ret < 0) { | ||
203 | error_setg_errno(errp, -ret, "Failed to read snapshot table"); | ||
204 | goto fail; | ||
205 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, | ||
43 | } | 206 | } |
44 | } | 207 | |
45 | 208 | /* Read known extra data */ | |
46 | - /* And now we can write everything */ | 209 | - ret = bdrv_pread(bs->file, offset, |
47 | - qemu_iovec_reset(&qiov); | 210 | - MIN(sizeof(extra), sn->extra_data_size), &extra, 0); |
48 | - qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | 211 | + ret = bdrv_co_pread(bs->file, offset, |
49 | - ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); | 212 | + MIN(sizeof(extra), sn->extra_data_size), &extra, 0); |
50 | - if (ret < 0) { | 213 | if (ret < 0) { |
51 | - goto fail; | 214 | error_setg_errno(errp, -ret, "Failed to read snapshot table"); |
52 | + /* And now we can write everything. If we have the guest data we | 215 | goto fail; |
53 | + * can write everything in one single operation */ | 216 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, |
54 | + if (m->data_qiov) { | 217 | /* Store unknown extra data */ |
55 | + qemu_iovec_reset(&qiov); | 218 | unknown_extra_data_size = sn->extra_data_size - sizeof(extra); |
56 | + if (start->nb_bytes) { | 219 | sn->unknown_extra_data = g_malloc(unknown_extra_data_size); |
57 | + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | 220 | - ret = bdrv_pread(bs->file, offset, unknown_extra_data_size, |
58 | + } | 221 | - sn->unknown_extra_data, 0); |
59 | + qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes); | 222 | + ret = bdrv_co_pread(bs->file, offset, unknown_extra_data_size, |
60 | + if (end->nb_bytes) { | 223 | + sn->unknown_extra_data, 0); |
61 | + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | 224 | if (ret < 0) { |
62 | + } | 225 | error_setg_errno(errp, -ret, |
63 | + /* NOTE: we have a write_aio blkdebug event here followed by | 226 | "Failed to read snapshot table"); |
64 | + * a cow_write one in do_perform_cow_write(), but there's only | 227 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, |
65 | + * one single I/O operation */ | 228 | |
66 | + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | 229 | /* Read snapshot ID */ |
67 | + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); | 230 | sn->id_str = g_malloc(id_str_size + 1); |
68 | + } else { | 231 | - ret = bdrv_pread(bs->file, offset, id_str_size, sn->id_str, 0); |
69 | + /* If there's no guest data then write both COW regions separately */ | 232 | + ret = bdrv_co_pread(bs->file, offset, id_str_size, sn->id_str, 0); |
70 | + qemu_iovec_reset(&qiov); | 233 | if (ret < 0) { |
71 | + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | 234 | error_setg_errno(errp, -ret, "Failed to read snapshot table"); |
72 | + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); | 235 | goto fail; |
73 | + if (ret < 0) { | 236 | @@ -XXX,XX +XXX,XX @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, |
74 | + goto fail; | 237 | |
75 | + } | 238 | /* Read snapshot name */ |
76 | + | 239 | sn->name = g_malloc(name_size + 1); |
77 | + qemu_iovec_reset(&qiov); | 240 | - ret = bdrv_pread(bs->file, offset, name_size, sn->name, 0); |
78 | + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | 241 | + ret = bdrv_co_pread(bs->file, offset, name_size, sn->name, 0); |
79 | + ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); | 242 | if (ret < 0) { |
80 | } | 243 | error_setg_errno(errp, -ret, "Failed to read snapshot table"); |
81 | 244 | goto fail; | |
82 | - qemu_iovec_reset(&qiov); | 245 | @@ -XXX,XX +XXX,XX @@ fail: |
83 | - qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | 246 | return ret; |
84 | - ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); | 247 | } |
85 | fail: | 248 | |
86 | qemu_co_mutex_lock(&s->lock); | 249 | -int qcow2_read_snapshots(BlockDriverState *bs, Error **errp) |
87 | 250 | +int coroutine_fn qcow2_read_snapshots(BlockDriverState *bs, Error **errp) | |
251 | { | ||
252 | return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp); | ||
253 | } | ||
88 | diff --git a/block/qcow2.c b/block/qcow2.c | 254 | diff --git a/block/qcow2.c b/block/qcow2.c |
89 | index XXXXXXX..XXXXXXX 100644 | 255 | index XXXXXXX..XXXXXXX 100644 |
90 | --- a/block/qcow2.c | 256 | --- a/block/qcow2.c |
91 | +++ b/block/qcow2.c | 257 | +++ b/block/qcow2.c |
92 | @@ -XXX,XX +XXX,XX @@ fail: | 258 | @@ -XXX,XX +XXX,XX @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp) |
93 | return ret; | 259 | * unknown magic is skipped (future extension this version knows nothing about) |
94 | } | 260 | * return 0 upon success, non-0 otherwise |
95 | 261 | */ | |
96 | +/* Check if it's possible to merge a write request with the writing of | 262 | -static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
97 | + * the data from the COW regions */ | 263 | - uint64_t end_offset, void **p_feature_table, |
98 | +static bool merge_cow(uint64_t offset, unsigned bytes, | 264 | - int flags, bool *need_update_header, |
99 | + QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) | 265 | - Error **errp) |
100 | +{ | 266 | +static int coroutine_fn GRAPH_RDLOCK |
101 | + QCowL2Meta *m; | 267 | +qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
102 | + | 268 | + uint64_t end_offset, void **p_feature_table, |
103 | + for (m = l2meta; m != NULL; m = m->next) { | 269 | + int flags, bool *need_update_header, Error **errp) |
104 | + /* If both COW regions are empty then there's nothing to merge */ | 270 | { |
105 | + if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { | 271 | BDRVQcow2State *s = bs->opaque; |
106 | + continue; | 272 | QCowExtension ext; |
107 | + } | 273 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
108 | + | 274 | printf("attempting to read extended header in offset %lu\n", offset); |
109 | + /* The data (middle) region must be immediately after the | 275 | #endif |
110 | + * start region */ | 276 | |
111 | + if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { | 277 | - ret = bdrv_pread(bs->file, offset, sizeof(ext), &ext, 0); |
112 | + continue; | 278 | + ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0); |
113 | + } | 279 | if (ret < 0) { |
114 | + | 280 | error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " |
115 | + /* The end region must be immediately after the data (middle) | 281 | "pread fail from offset %" PRIu64, offset); |
116 | + * region */ | 282 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
117 | + if (m->offset + m->cow_end.offset != offset + bytes) { | 283 | sizeof(bs->backing_format)); |
118 | + continue; | 284 | return 2; |
119 | + } | 285 | } |
120 | + | 286 | - ret = bdrv_pread(bs->file, offset, ext.len, bs->backing_format, 0); |
121 | + /* Make sure that adding both COW regions to the QEMUIOVector | 287 | + ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0); |
122 | + * does not exceed IOV_MAX */ | 288 | if (ret < 0) { |
123 | + if (hd_qiov->niov > IOV_MAX - 2) { | 289 | error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " |
124 | + continue; | 290 | "Could not read format name"); |
125 | + } | 291 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
126 | + | 292 | case QCOW2_EXT_MAGIC_FEATURE_TABLE: |
127 | + m->data_qiov = hd_qiov; | 293 | if (p_feature_table != NULL) { |
128 | + return true; | 294 | void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); |
129 | + } | 295 | - ret = bdrv_pread(bs->file, offset, ext.len, feature_table, 0); |
130 | + | 296 | + ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0); |
131 | + return false; | 297 | if (ret < 0) { |
132 | +} | 298 | error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " |
133 | + | 299 | "Could not read table"); |
134 | static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | 300 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
135 | uint64_t bytes, QEMUIOVector *qiov, | 301 | return -EINVAL; |
136 | int flags) | 302 | } |
137 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | 303 | |
138 | goto fail; | 304 | - ret = bdrv_pread(bs->file, offset, ext.len, &s->crypto_header, 0); |
139 | } | 305 | + ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0); |
140 | 306 | if (ret < 0) { | |
141 | - qemu_co_mutex_unlock(&s->lock); | 307 | error_setg_errno(errp, -ret, |
142 | - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | 308 | "Unable to read CRYPTO header extension"); |
143 | - trace_qcow2_writev_data(qemu_coroutine_self(), | 309 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
144 | - cluster_offset + offset_in_cluster); | 310 | break; |
145 | - ret = bdrv_co_pwritev(bs->file, | 311 | } |
146 | - cluster_offset + offset_in_cluster, | 312 | |
147 | - cur_bytes, &hd_qiov, 0); | 313 | - ret = bdrv_pread(bs->file, offset, ext.len, &bitmaps_ext, 0); |
148 | - qemu_co_mutex_lock(&s->lock); | 314 | + ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0); |
149 | - if (ret < 0) { | 315 | if (ret < 0) { |
150 | - goto fail; | 316 | error_setg_errno(errp, -ret, "bitmaps_ext: " |
151 | + /* If we need to do COW, check if it's possible to merge the | 317 | "Could not read ext header"); |
152 | + * writing of the guest data together with that of the COW regions. | 318 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
153 | + * If it's not possible (or not necessary) then write the | 319 | case QCOW2_EXT_MAGIC_DATA_FILE: |
154 | + * guest data now. */ | 320 | { |
155 | + if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { | 321 | s->image_data_file = g_malloc0(ext.len + 1); |
156 | + qemu_co_mutex_unlock(&s->lock); | 322 | - ret = bdrv_pread(bs->file, offset, ext.len, s->image_data_file, 0); |
157 | + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | 323 | + ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0); |
158 | + trace_qcow2_writev_data(qemu_coroutine_self(), | 324 | if (ret < 0) { |
159 | + cluster_offset + offset_in_cluster); | 325 | error_setg_errno(errp, -ret, |
160 | + ret = bdrv_co_pwritev(bs->file, | 326 | "ERROR: Could not read data file name"); |
161 | + cluster_offset + offset_in_cluster, | 327 | @@ -XXX,XX +XXX,XX @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, |
162 | + cur_bytes, &hd_qiov, 0); | 328 | uext->len = ext.len; |
163 | + qemu_co_mutex_lock(&s->lock); | 329 | QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); |
164 | + if (ret < 0) { | 330 | |
165 | + goto fail; | 331 | - ret = bdrv_pread(bs->file, offset, uext->len, uext->data, 0); |
166 | + } | 332 | + ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0); |
167 | } | 333 | if (ret < 0) { |
168 | 334 | error_setg_errno(errp, -ret, "ERROR: unknown extension: " | |
169 | while (l2meta != NULL) { | 335 | "Could not read data"); |
170 | diff --git a/block/qcow2.h b/block/qcow2.h | 336 | @@ -XXX,XX +XXX,XX @@ static void qcow2_update_options_abort(BlockDriverState *bs, |
171 | index XXXXXXX..XXXXXXX 100644 | 337 | qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); |
172 | --- a/block/qcow2.h | 338 | } |
173 | +++ b/block/qcow2.h | 339 | |
174 | @@ -XXX,XX +XXX,XX @@ typedef struct QCowL2Meta | 340 | -static int qcow2_update_options(BlockDriverState *bs, QDict *options, |
175 | */ | 341 | - int flags, Error **errp) |
176 | Qcow2COWRegion cow_end; | 342 | +static int coroutine_fn |
177 | 343 | +qcow2_update_options(BlockDriverState *bs, QDict *options, int flags, | |
178 | + /** | 344 | + Error **errp) |
179 | + * The I/O vector with the data from the actual guest write request. | 345 | { |
180 | + * If non-NULL, this is meant to be merged together with the data | 346 | Qcow2ReopenState r = {}; |
181 | + * from @cow_start and @cow_end into one single write operation. | 347 | int ret; |
182 | + */ | ||
183 | + QEMUIOVector *data_qiov; | ||
184 | + | ||
185 | /** Pointer to next L2Meta of the same write request */ | ||
186 | struct QCowL2Meta *next; | ||
187 | |||
188 | -- | 348 | -- |
189 | 1.8.3.1 | 349 | 2.40.0 |
190 | |||
191 | diff view generated by jsdifflib |
1 | From: Alberto Garcia <berto@igalia.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Instead of passing a single buffer pointer to do_perform_cow_write(), | 3 | Functions that can do I/O are prime candidates for being coroutine_fns. Make the |
4 | pass a QEMUIOVector. This will allow us to merge the write requests | 4 | change for the one that is itself called only from coroutine_fns. Unfortunately |
5 | for the COW regions and the actual data into a single one. | 5 | vmdk does not use a coroutine_fn for the bulk of the open (like qcow2 does) so |
6 | vmdk_read_cid cannot have the same treatment. | ||
6 | 7 | ||
7 | Although do_perform_cow_read() does not strictly need to change its | 8 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
8 | API, we're doing it here as well for consistency. | 9 | Message-Id: <20230309084456.304669-10-pbonzini@redhat.com> |
9 | |||
10 | Signed-off-by: Alberto Garcia <berto@igalia.com> | ||
11 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | 10 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> |
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
13 | --- | 12 | --- |
14 | block/qcow2-cluster.c | 51 ++++++++++++++++++++++++--------------------------- | 13 | block/vmdk.c | 2 +- |
15 | 1 file changed, 24 insertions(+), 27 deletions(-) | 14 | 1 file changed, 1 insertion(+), 1 deletion(-) |
16 | 15 | ||
17 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 16 | diff --git a/block/vmdk.c b/block/vmdk.c |
18 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/qcow2-cluster.c | 18 | --- a/block/vmdk.c |
20 | +++ b/block/qcow2-cluster.c | 19 | +++ b/block/vmdk.c |
21 | @@ -XXX,XX +XXX,XX @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, | 20 | @@ -XXX,XX +XXX,XX @@ out: |
22 | static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, | ||
23 | uint64_t src_cluster_offset, | ||
24 | unsigned offset_in_cluster, | ||
25 | - uint8_t *buffer, | ||
26 | - unsigned bytes) | ||
27 | + QEMUIOVector *qiov) | ||
28 | { | ||
29 | - QEMUIOVector qiov; | ||
30 | - struct iovec iov = { .iov_base = buffer, .iov_len = bytes }; | ||
31 | int ret; | ||
32 | |||
33 | - if (bytes == 0) { | ||
34 | + if (qiov->size == 0) { | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
39 | - | ||
40 | BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); | ||
41 | |||
42 | if (!bs->drv) { | ||
43 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, | ||
44 | * which can lead to deadlock when block layer copy-on-read is enabled. | ||
45 | */ | ||
46 | ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, | ||
47 | - bytes, &qiov, 0); | ||
48 | + qiov->size, qiov, 0); | ||
49 | if (ret < 0) { | ||
50 | return ret; | ||
51 | } | ||
52 | @@ -XXX,XX +XXX,XX @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, | ||
53 | static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, | ||
54 | uint64_t cluster_offset, | ||
55 | unsigned offset_in_cluster, | ||
56 | - uint8_t *buffer, | ||
57 | - unsigned bytes) | ||
58 | + QEMUIOVector *qiov) | ||
59 | { | ||
60 | - QEMUIOVector qiov; | ||
61 | - struct iovec iov = { .iov_base = buffer, .iov_len = bytes }; | ||
62 | int ret; | ||
63 | |||
64 | - if (bytes == 0) { | ||
65 | + if (qiov->size == 0) { | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | - qemu_iovec_init_external(&qiov, &iov, 1); | ||
70 | - | ||
71 | ret = qcow2_pre_write_overlap_check(bs, 0, | ||
72 | - cluster_offset + offset_in_cluster, bytes); | ||
73 | + cluster_offset + offset_in_cluster, qiov->size); | ||
74 | if (ret < 0) { | ||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); | ||
79 | ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, | ||
80 | - bytes, &qiov, 0); | ||
81 | + qiov->size, qiov, 0); | ||
82 | if (ret < 0) { | ||
83 | return ret; | ||
84 | } | ||
85 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | ||
86 | unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); | ||
87 | bool merge_reads; | ||
88 | uint8_t *start_buffer, *end_buffer; | ||
89 | + QEMUIOVector qiov; | ||
90 | int ret; | ||
91 | |||
92 | assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); | ||
93 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | ||
94 | /* The part of the buffer where the end region is located */ | ||
95 | end_buffer = start_buffer + buffer_size - end->nb_bytes; | ||
96 | |||
97 | + qemu_iovec_init(&qiov, 1); | ||
98 | + | ||
99 | qemu_co_mutex_unlock(&s->lock); | ||
100 | /* First we read the existing data from both COW regions. We | ||
101 | * either read the whole region in one go, or the start and end | ||
102 | * regions separately. */ | ||
103 | if (merge_reads) { | ||
104 | - ret = do_perform_cow_read(bs, m->offset, start->offset, | ||
105 | - start_buffer, buffer_size); | ||
106 | + qemu_iovec_add(&qiov, start_buffer, buffer_size); | ||
107 | + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); | ||
108 | } else { | ||
109 | - ret = do_perform_cow_read(bs, m->offset, start->offset, | ||
110 | - start_buffer, start->nb_bytes); | ||
111 | + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | ||
112 | + ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); | ||
113 | if (ret < 0) { | ||
114 | goto fail; | ||
115 | } | ||
116 | |||
117 | - ret = do_perform_cow_read(bs, m->offset, end->offset, | ||
118 | - end_buffer, end->nb_bytes); | ||
119 | + qemu_iovec_reset(&qiov); | ||
120 | + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | ||
121 | + ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); | ||
122 | } | ||
123 | if (ret < 0) { | ||
124 | goto fail; | ||
125 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | ||
126 | } | ||
127 | |||
128 | /* And now we can write everything */ | ||
129 | - ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, | ||
130 | - start_buffer, start->nb_bytes); | ||
131 | + qemu_iovec_reset(&qiov); | ||
132 | + qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | ||
133 | + ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); | ||
134 | if (ret < 0) { | ||
135 | goto fail; | ||
136 | } | ||
137 | |||
138 | - ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, | ||
139 | - end_buffer, end->nb_bytes); | ||
140 | + qemu_iovec_reset(&qiov); | ||
141 | + qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | ||
142 | + ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); | ||
143 | fail: | ||
144 | qemu_co_mutex_lock(&s->lock); | ||
145 | |||
146 | @@ -XXX,XX +XXX,XX @@ fail: | ||
147 | } | ||
148 | |||
149 | qemu_vfree(start_buffer); | ||
150 | + qemu_iovec_destroy(&qiov); | ||
151 | return ret; | 21 | return ret; |
152 | } | 22 | } |
153 | 23 | ||
24 | -static int vmdk_is_cid_valid(BlockDriverState *bs) | ||
25 | +static int coroutine_fn vmdk_is_cid_valid(BlockDriverState *bs) | ||
26 | { | ||
27 | BDRVVmdkState *s = bs->opaque; | ||
28 | uint32_t cur_pcid; | ||
154 | -- | 29 | -- |
155 | 1.8.3.1 | 30 | 2.40.0 |
156 | |||
157 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
2 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
3 | --- | ||
4 | block/qed-cluster.c | 39 ++++++++++++++++++++++----------------- | ||
5 | block/qed.c | 24 +++++++++++------------- | ||
6 | block/qed.h | 4 ++-- | ||
7 | 3 files changed, 35 insertions(+), 32 deletions(-) | ||
8 | 1 | ||
9 | diff --git a/block/qed-cluster.c b/block/qed-cluster.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/block/qed-cluster.c | ||
12 | +++ b/block/qed-cluster.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, | ||
14 | * @s: QED state | ||
15 | * @request: L2 cache entry | ||
16 | * @pos: Byte position in device | ||
17 | - * @len: Number of bytes | ||
18 | - * @cb: Completion function | ||
19 | - * @opaque: User data for completion function | ||
20 | + * @len: Number of bytes (may be shortened on return) | ||
21 | + * @img_offset: Contains offset in the image file on success | ||
22 | * | ||
23 | * This function translates a position in the block device to an offset in the | ||
24 | - * image file. It invokes the cb completion callback to report back the | ||
25 | - * translated offset or unallocated range in the image file. | ||
26 | + * image file. The translated offset or unallocated range in the image file is | ||
27 | + * reported back in *img_offset and *len. | ||
28 | * | ||
29 | * If the L2 table exists, request->l2_table points to the L2 table cache entry | ||
30 | * and the caller must free the reference when they are finished. The cache | ||
31 | * entry is exposed in this way to avoid callers having to read the L2 table | ||
32 | * again later during request processing. If request->l2_table is non-NULL it | ||
33 | * will be unreferenced before taking on the new cache entry. | ||
34 | + * | ||
35 | + * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous | ||
36 | + * range in the image file. | ||
37 | + * | ||
38 | + * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1 | ||
39 | + * table offset, respectively. len is number of contiguous unallocated bytes. | ||
40 | */ | ||
41 | -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
42 | - size_t len, QEDFindClusterFunc *cb, void *opaque) | ||
43 | +int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
44 | + size_t *len, uint64_t *img_offset) | ||
45 | { | ||
46 | uint64_t l2_offset; | ||
47 | uint64_t offset = 0; | ||
48 | @@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
49 | /* Limit length to L2 boundary. Requests are broken up at the L2 boundary | ||
50 | * so that a request acts on one L2 table at a time. | ||
51 | */ | ||
52 | - len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); | ||
53 | + *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); | ||
54 | |||
55 | l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)]; | ||
56 | if (qed_offset_is_unalloc_cluster(l2_offset)) { | ||
57 | - cb(opaque, QED_CLUSTER_L1, 0, len); | ||
58 | - return; | ||
59 | + *img_offset = 0; | ||
60 | + return QED_CLUSTER_L1; | ||
61 | } | ||
62 | if (!qed_check_table_offset(s, l2_offset)) { | ||
63 | - cb(opaque, -EINVAL, 0, 0); | ||
64 | - return; | ||
65 | + *img_offset = *len = 0; | ||
66 | + return -EINVAL; | ||
67 | } | ||
68 | |||
69 | ret = qed_read_l2_table(s, request, l2_offset); | ||
70 | @@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
71 | } | ||
72 | |||
73 | index = qed_l2_index(s, pos); | ||
74 | - n = qed_bytes_to_clusters(s, | ||
75 | - qed_offset_into_cluster(s, pos) + len); | ||
76 | + n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len); | ||
77 | n = qed_count_contiguous_clusters(s, request->l2_table->table, | ||
78 | index, n, &offset); | ||
79 | |||
80 | @@ -XXX,XX +XXX,XX @@ void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
81 | ret = -EINVAL; | ||
82 | } | ||
83 | |||
84 | - len = MIN(len, | ||
85 | - n * s->header.cluster_size - qed_offset_into_cluster(s, pos)); | ||
86 | + *len = MIN(*len, | ||
87 | + n * s->header.cluster_size - qed_offset_into_cluster(s, pos)); | ||
88 | |||
89 | out: | ||
90 | - cb(opaque, ret, offset, len); | ||
91 | + *img_offset = offset; | ||
92 | qed_release(s); | ||
93 | + return ret; | ||
94 | } | ||
95 | diff --git a/block/qed.c b/block/qed.c | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/block/qed.c | ||
98 | +++ b/block/qed.c | ||
99 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, | ||
100 | .file = file, | ||
101 | }; | ||
102 | QEDRequest request = { .l2_table = NULL }; | ||
103 | + uint64_t offset; | ||
104 | + int ret; | ||
105 | |||
106 | - qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb); | ||
107 | + ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); | ||
108 | + qed_is_allocated_cb(&cb, ret, offset, len); | ||
109 | |||
110 | - /* Now sleep if the callback wasn't invoked immediately */ | ||
111 | - while (cb.status == BDRV_BLOCK_OFFSET_MASK) { | ||
112 | - cb.co = qemu_coroutine_self(); | ||
113 | - qemu_coroutine_yield(); | ||
114 | - } | ||
115 | + /* The callback was invoked immediately */ | ||
116 | + assert(cb.status != BDRV_BLOCK_OFFSET_MASK); | ||
117 | |||
118 | qed_unref_l2_cache_entry(request.l2_table); | ||
119 | |||
120 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
121 | * or -errno | ||
122 | * @offset: Cluster offset in bytes | ||
123 | * @len: Length in bytes | ||
124 | - * | ||
125 | - * Callback from qed_find_cluster(). | ||
126 | */ | ||
127 | static void qed_aio_write_data(void *opaque, int ret, | ||
128 | uint64_t offset, size_t len) | ||
129 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_data(void *opaque, int ret, | ||
130 | * or -errno | ||
131 | * @offset: Cluster offset in bytes | ||
132 | * @len: Length in bytes | ||
133 | - * | ||
134 | - * Callback from qed_find_cluster(). | ||
135 | */ | ||
136 | static void qed_aio_read_data(void *opaque, int ret, | ||
137 | uint64_t offset, size_t len) | ||
138 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret) | ||
139 | BDRVQEDState *s = acb_to_s(acb); | ||
140 | QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? | ||
141 | qed_aio_write_data : qed_aio_read_data; | ||
142 | + uint64_t offset; | ||
143 | + size_t len; | ||
144 | |||
145 | trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); | ||
146 | |||
147 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb, int ret) | ||
148 | } | ||
149 | |||
150 | /* Find next cluster and start I/O */ | ||
151 | - qed_find_cluster(s, &acb->request, | ||
152 | - acb->cur_pos, acb->end_pos - acb->cur_pos, | ||
153 | - io_fn, acb); | ||
154 | + len = acb->end_pos - acb->cur_pos; | ||
155 | + ret = qed_find_cluster(s, &acb->request, acb->cur_pos, &len, &offset); | ||
156 | + io_fn(acb, ret, offset, len); | ||
157 | } | ||
158 | |||
159 | static BlockAIOCB *qed_aio_setup(BlockDriverState *bs, | ||
160 | diff --git a/block/qed.h b/block/qed.h | ||
161 | index XXXXXXX..XXXXXXX 100644 | ||
162 | --- a/block/qed.h | ||
163 | +++ b/block/qed.h | ||
164 | @@ -XXX,XX +XXX,XX @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | ||
165 | /** | ||
166 | * Cluster functions | ||
167 | */ | ||
168 | -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
169 | - size_t len, QEDFindClusterFunc *cb, void *opaque); | ||
170 | +int qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, | ||
171 | + size_t *len, uint64_t *img_offset); | ||
172 | |||
173 | /** | ||
174 | * Consistency check | ||
175 | -- | ||
176 | 1.8.3.1 | ||
177 | |||
178 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
2 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
3 | --- | ||
4 | block/qed.c | 32 ++++++++++++-------------------- | ||
5 | 1 file changed, 12 insertions(+), 20 deletions(-) | ||
6 | 1 | ||
7 | diff --git a/block/qed.c b/block/qed.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/block/qed.c | ||
10 | +++ b/block/qed.c | ||
11 | @@ -XXX,XX +XXX,XX @@ int qed_write_header_sync(BDRVQEDState *s) | ||
12 | * This function only updates known header fields in-place and does not affect | ||
13 | * extra data after the QED header. | ||
14 | */ | ||
15 | -static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, | ||
16 | - void *opaque) | ||
17 | +static int qed_write_header(BDRVQEDState *s) | ||
18 | { | ||
19 | /* We must write full sectors for O_DIRECT but cannot necessarily generate | ||
20 | * the data following the header if an unrecognized compat feature is | ||
21 | @@ -XXX,XX +XXX,XX @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, | ||
22 | ret = 0; | ||
23 | out: | ||
24 | qemu_vfree(buf); | ||
25 | - cb(opaque, ret); | ||
26 | + return ret; | ||
27 | } | ||
28 | |||
29 | static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) | ||
30 | @@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) | ||
31 | } | ||
32 | } | ||
33 | |||
34 | -static void qed_finish_clear_need_check(void *opaque, int ret) | ||
35 | -{ | ||
36 | - /* Do nothing */ | ||
37 | -} | ||
38 | - | ||
39 | -static void qed_flush_after_clear_need_check(void *opaque, int ret) | ||
40 | -{ | ||
41 | - BDRVQEDState *s = opaque; | ||
42 | - | ||
43 | - bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s); | ||
44 | - | ||
45 | - /* No need to wait until flush completes */ | ||
46 | - qed_unplug_allocating_write_reqs(s); | ||
47 | -} | ||
48 | - | ||
49 | static void qed_clear_need_check(void *opaque, int ret) | ||
50 | { | ||
51 | BDRVQEDState *s = opaque; | ||
52 | @@ -XXX,XX +XXX,XX @@ static void qed_clear_need_check(void *opaque, int ret) | ||
53 | } | ||
54 | |||
55 | s->header.features &= ~QED_F_NEED_CHECK; | ||
56 | - qed_write_header(s, qed_flush_after_clear_need_check, s); | ||
57 | + ret = qed_write_header(s); | ||
58 | + (void) ret; | ||
59 | + | ||
60 | + qed_unplug_allocating_write_reqs(s); | ||
61 | + | ||
62 | + ret = bdrv_flush(s->bs); | ||
63 | + (void) ret; | ||
64 | } | ||
65 | |||
66 | static void qed_need_check_timer_cb(void *opaque) | ||
67 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
68 | { | ||
69 | BDRVQEDState *s = acb_to_s(acb); | ||
70 | BlockCompletionFunc *cb; | ||
71 | + int ret; | ||
72 | |||
73 | /* Cancel timer when the first allocating request comes in */ | ||
74 | if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { | ||
75 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
76 | |||
77 | if (qed_should_set_need_check(s)) { | ||
78 | s->header.features |= QED_F_NEED_CHECK; | ||
79 | - qed_write_header(s, cb, acb); | ||
80 | + ret = qed_write_header(s); | ||
81 | + cb(acb, ret); | ||
82 | } else { | ||
83 | cb(acb, 0); | ||
84 | } | ||
85 | -- | ||
86 | 1.8.3.1 | ||
87 | |||
88 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Note that this code is generally not running in coroutine context, so | ||
2 | this is an actual blocking synchronous operation. We'll fix this in a | ||
3 | moment. | ||
4 | 1 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | --- | ||
8 | block/qed.c | 61 +++++++++++++++++++------------------------------------------ | ||
9 | 1 file changed, 19 insertions(+), 42 deletions(-) | ||
10 | |||
11 | diff --git a/block/qed.c b/block/qed.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/block/qed.c | ||
14 | +++ b/block/qed.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_start_io(QEDAIOCB *acb) | ||
16 | qed_aio_next_io(acb, 0); | ||
17 | } | ||
18 | |||
19 | -static void qed_aio_next_io_cb(void *opaque, int ret) | ||
20 | -{ | ||
21 | - QEDAIOCB *acb = opaque; | ||
22 | - | ||
23 | - qed_aio_next_io(acb, ret); | ||
24 | -} | ||
25 | - | ||
26 | static void qed_plug_allocating_write_reqs(BDRVQEDState *s) | ||
27 | { | ||
28 | assert(!s->allocating_write_reqs_plugged); | ||
29 | @@ -XXX,XX +XXX,XX @@ err: | ||
30 | qed_aio_complete(acb, ret); | ||
31 | } | ||
32 | |||
33 | -static void qed_aio_write_l2_update_cb(void *opaque, int ret) | ||
34 | -{ | ||
35 | - QEDAIOCB *acb = opaque; | ||
36 | - qed_aio_write_l2_update(acb, ret, acb->cur_cluster); | ||
37 | -} | ||
38 | - | ||
39 | -/** | ||
40 | - * Flush new data clusters before updating the L2 table | ||
41 | - * | ||
42 | - * This flush is necessary when a backing file is in use. A crash during an | ||
43 | - * allocating write could result in empty clusters in the image. If the write | ||
44 | - * only touched a subregion of the cluster, then backing image sectors have | ||
45 | - * been lost in the untouched region. The solution is to flush after writing a | ||
46 | - * new data cluster and before updating the L2 table. | ||
47 | - */ | ||
48 | -static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) | ||
49 | -{ | ||
50 | - QEDAIOCB *acb = opaque; | ||
51 | - BDRVQEDState *s = acb_to_s(acb); | ||
52 | - | ||
53 | - if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) { | ||
54 | - qed_aio_complete(acb, -EIO); | ||
55 | - } | ||
56 | -} | ||
57 | - | ||
58 | /** | ||
59 | * Write data to the image file | ||
60 | */ | ||
61 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret) | ||
62 | BDRVQEDState *s = acb_to_s(acb); | ||
63 | uint64_t offset = acb->cur_cluster + | ||
64 | qed_offset_into_cluster(s, acb->cur_pos); | ||
65 | - BlockCompletionFunc *next_fn; | ||
66 | |||
67 | trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret) | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | + BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); | ||
74 | + ret = bdrv_pwritev(s->bs->file, offset, &acb->cur_qiov); | ||
75 | + if (ret >= 0) { | ||
76 | + ret = 0; | ||
77 | + } | ||
78 | + | ||
79 | if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { | ||
80 | - next_fn = qed_aio_next_io_cb; | ||
81 | + qed_aio_next_io(acb, ret); | ||
82 | } else { | ||
83 | if (s->bs->backing) { | ||
84 | - next_fn = qed_aio_write_flush_before_l2_update; | ||
85 | - } else { | ||
86 | - next_fn = qed_aio_write_l2_update_cb; | ||
87 | + /* | ||
88 | + * Flush new data clusters before updating the L2 table | ||
89 | + * | ||
90 | + * This flush is necessary when a backing file is in use. A crash | ||
91 | + * during an allocating write could result in empty clusters in the | ||
92 | + * image. If the write only touched a subregion of the cluster, | ||
93 | + * then backing image sectors have been lost in the untouched | ||
94 | + * region. The solution is to flush after writing a new data | ||
95 | + * cluster and before updating the L2 table. | ||
96 | + */ | ||
97 | + ret = bdrv_flush(s->bs->file->bs); | ||
98 | } | ||
99 | + qed_aio_write_l2_update(acb, ret, acb->cur_cluster); | ||
100 | } | ||
101 | - | ||
102 | - BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); | ||
103 | - bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, | ||
104 | - &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, | ||
105 | - next_fn, acb); | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | -- | ||
110 | 1.8.3.1 | ||
111 | |||
112 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | qed_commit_l2_update() is unconditionally called at the end of | ||
2 | qed_aio_write_l1_update(). Inline it. | ||
3 | 1 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | ||
7 | block/qed.c | 36 ++++++++++++++---------------------- | ||
8 | 1 file changed, 14 insertions(+), 22 deletions(-) | ||
9 | |||
10 | diff --git a/block/qed.c b/block/qed.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/block/qed.c | ||
13 | +++ b/block/qed.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
15 | } | ||
16 | |||
17 | /** | ||
18 | - * Commit the current L2 table to the cache | ||
19 | + * Update L1 table with new L2 table offset and write it out | ||
20 | */ | ||
21 | -static void qed_commit_l2_update(void *opaque, int ret) | ||
22 | +static void qed_aio_write_l1_update(void *opaque, int ret) | ||
23 | { | ||
24 | QEDAIOCB *acb = opaque; | ||
25 | BDRVQEDState *s = acb_to_s(acb); | ||
26 | CachedL2Table *l2_table = acb->request.l2_table; | ||
27 | uint64_t l2_offset = l2_table->offset; | ||
28 | + int index; | ||
29 | + | ||
30 | + if (ret) { | ||
31 | + qed_aio_complete(acb, ret); | ||
32 | + return; | ||
33 | + } | ||
34 | |||
35 | + index = qed_l1_index(s, acb->cur_pos); | ||
36 | + s->l1_table->offsets[index] = l2_table->offset; | ||
37 | + | ||
38 | + ret = qed_write_l1_table(s, index, 1); | ||
39 | + | ||
40 | + /* Commit the current L2 table to the cache */ | ||
41 | qed_commit_l2_cache_entry(&s->l2_cache, l2_table); | ||
42 | |||
43 | /* This is guaranteed to succeed because we just committed the entry to the | ||
44 | @@ -XXX,XX +XXX,XX @@ static void qed_commit_l2_update(void *opaque, int ret) | ||
45 | qed_aio_next_io(acb, ret); | ||
46 | } | ||
47 | |||
48 | -/** | ||
49 | - * Update L1 table with new L2 table offset and write it out | ||
50 | - */ | ||
51 | -static void qed_aio_write_l1_update(void *opaque, int ret) | ||
52 | -{ | ||
53 | - QEDAIOCB *acb = opaque; | ||
54 | - BDRVQEDState *s = acb_to_s(acb); | ||
55 | - int index; | ||
56 | - | ||
57 | - if (ret) { | ||
58 | - qed_aio_complete(acb, ret); | ||
59 | - return; | ||
60 | - } | ||
61 | - | ||
62 | - index = qed_l1_index(s, acb->cur_pos); | ||
63 | - s->l1_table->offsets[index] = acb->request.l2_table->offset; | ||
64 | - | ||
65 | - ret = qed_write_l1_table(s, index, 1); | ||
66 | - qed_commit_l2_update(acb, ret); | ||
67 | -} | ||
68 | |||
69 | /** | ||
70 | * Update L2 table with new cluster offsets and write them out | ||
71 | -- | ||
72 | 1.8.3.1 | ||
73 | |||
74 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but | ||
2 | just return an error code and let the caller handle it. | ||
3 | 1 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | ||
7 | block/qed.c | 19 +++++++++---------- | ||
8 | 1 file changed, 9 insertions(+), 10 deletions(-) | ||
9 | |||
10 | diff --git a/block/qed.c b/block/qed.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/block/qed.c | ||
13 | +++ b/block/qed.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
15 | /** | ||
16 | * Update L1 table with new L2 table offset and write it out | ||
17 | */ | ||
18 | -static void qed_aio_write_l1_update(void *opaque, int ret) | ||
19 | +static int qed_aio_write_l1_update(QEDAIOCB *acb) | ||
20 | { | ||
21 | - QEDAIOCB *acb = opaque; | ||
22 | BDRVQEDState *s = acb_to_s(acb); | ||
23 | CachedL2Table *l2_table = acb->request.l2_table; | ||
24 | uint64_t l2_offset = l2_table->offset; | ||
25 | - int index; | ||
26 | - | ||
27 | - if (ret) { | ||
28 | - qed_aio_complete(acb, ret); | ||
29 | - return; | ||
30 | - } | ||
31 | + int index, ret; | ||
32 | |||
33 | index = qed_l1_index(s, acb->cur_pos); | ||
34 | s->l1_table->offsets[index] = l2_table->offset; | ||
35 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l1_update(void *opaque, int ret) | ||
36 | acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); | ||
37 | assert(acb->request.l2_table != NULL); | ||
38 | |||
39 | - qed_aio_next_io(acb, ret); | ||
40 | + return ret; | ||
41 | } | ||
42 | |||
43 | |||
44 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) | ||
45 | if (need_alloc) { | ||
46 | /* Write out the whole new L2 table */ | ||
47 | ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true); | ||
48 | - qed_aio_write_l1_update(acb, ret); | ||
49 | + if (ret) { | ||
50 | + goto err; | ||
51 | + } | ||
52 | + ret = qed_aio_write_l1_update(acb); | ||
53 | + qed_aio_next_io(acb, ret); | ||
54 | + | ||
55 | } else { | ||
56 | /* Write out only the updated part of the L2 table */ | ||
57 | ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, | ||
58 | -- | ||
59 | 1.8.3.1 | ||
60 | |||
61 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but | ||
2 | just return an error code and let the caller handle it. | ||
3 | 1 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | ||
7 | block/qed.c | 43 ++++++++++++++++++++++++++----------------- | ||
8 | 1 file changed, 26 insertions(+), 17 deletions(-) | ||
9 | |||
10 | diff --git a/block/qed.c b/block/qed.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/block/qed.c | ||
13 | +++ b/block/qed.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_l1_update(QEDAIOCB *acb) | ||
15 | /** | ||
16 | * Update L2 table with new cluster offsets and write them out | ||
17 | */ | ||
18 | -static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) | ||
19 | +static int qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset) | ||
20 | { | ||
21 | BDRVQEDState *s = acb_to_s(acb); | ||
22 | bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; | ||
23 | - int index; | ||
24 | - | ||
25 | - if (ret) { | ||
26 | - goto err; | ||
27 | - } | ||
28 | + int index, ret; | ||
29 | |||
30 | if (need_alloc) { | ||
31 | qed_unref_l2_cache_entry(acb->request.l2_table); | ||
32 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) | ||
33 | /* Write out the whole new L2 table */ | ||
34 | ret = qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true); | ||
35 | if (ret) { | ||
36 | - goto err; | ||
37 | + return ret; | ||
38 | } | ||
39 | - ret = qed_aio_write_l1_update(acb); | ||
40 | - qed_aio_next_io(acb, ret); | ||
41 | - | ||
42 | + return qed_aio_write_l1_update(acb); | ||
43 | } else { | ||
44 | /* Write out only the updated part of the L2 table */ | ||
45 | ret = qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, | ||
46 | false); | ||
47 | - qed_aio_next_io(acb, ret); | ||
48 | + if (ret) { | ||
49 | + return ret; | ||
50 | + } | ||
51 | } | ||
52 | - return; | ||
53 | - | ||
54 | -err: | ||
55 | - qed_aio_complete(acb, ret); | ||
56 | + return 0; | ||
57 | } | ||
58 | |||
59 | /** | ||
60 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret) | ||
61 | */ | ||
62 | ret = bdrv_flush(s->bs->file->bs); | ||
63 | } | ||
64 | - qed_aio_write_l2_update(acb, ret, acb->cur_cluster); | ||
65 | + if (ret) { | ||
66 | + goto err; | ||
67 | + } | ||
68 | + ret = qed_aio_write_l2_update(acb, acb->cur_cluster); | ||
69 | + if (ret) { | ||
70 | + goto err; | ||
71 | + } | ||
72 | + qed_aio_next_io(acb, 0); | ||
73 | } | ||
74 | + return; | ||
75 | + | ||
76 | +err: | ||
77 | + qed_aio_complete(acb, ret); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_zero_cluster(void *opaque, int ret) | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | - qed_aio_write_l2_update(acb, 0, 1); | ||
86 | + ret = qed_aio_write_l2_update(acb, 1); | ||
87 | + if (ret < 0) { | ||
88 | + qed_aio_complete(acb, ret); | ||
89 | + return; | ||
90 | + } | ||
91 | + qed_aio_next_io(acb, 0); | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | -- | ||
96 | 1.8.3.1 | ||
97 | |||
98 | diff view generated by jsdifflib |
1 | Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but | 1 | From: Wang Liang <wangliangzz@inspur.com> |
---|---|---|---|
2 | just return an error code and let the caller handle it. | ||
3 | 2 | ||
4 | While refactoring qed_aio_write_alloc() to accomodate the change, | 3 | hmp_commit() calls blk_is_available() from a non-coroutine context (and in |
5 | qed_aio_write_zero_cluster() ended up with a single line, so I chose to | 4 | the main loop). blk_is_available() is a co_wrapper_mixed_bdrv_rdlock |
6 | inline that line and remove the function completely. | 5 | function, and in the non-coroutine context it calls AIO_WAIT_WHILE(), |
6 | which crashes if the aio_context lock is not taken before. | ||
7 | 7 | ||
8 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1615 | ||
9 | Signed-off-by: Wang Liang <wangliangzz@inspur.com> | ||
10 | Message-Id: <20230424103902.45265-1-wangliangzz@126.com> | ||
11 | Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> | ||
12 | Reviewed-by: Kevin Wolf <kwolf@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | 14 | --- |
11 | block/qed.c | 58 +++++++++++++++++++++------------------------------------- | 15 | block/monitor/block-hmp-cmds.c | 10 ++++++---- |
12 | 1 file changed, 21 insertions(+), 37 deletions(-) | 16 | 1 file changed, 6 insertions(+), 4 deletions(-) |
13 | 17 | ||
14 | diff --git a/block/qed.c b/block/qed.c | 18 | diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c |
15 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/qed.c | 20 | --- a/block/monitor/block-hmp-cmds.c |
17 | +++ b/block/qed.c | 21 | +++ b/block/monitor/block-hmp-cmds.c |
18 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_main(QEDAIOCB *acb) | 22 | @@ -XXX,XX +XXX,XX @@ void hmp_commit(Monitor *mon, const QDict *qdict) |
19 | /** | 23 | error_report("Device '%s' not found", device); |
20 | * Populate untouched regions of new data cluster | ||
21 | */ | ||
22 | -static void qed_aio_write_cow(void *opaque, int ret) | ||
23 | +static int qed_aio_write_cow(QEDAIOCB *acb) | ||
24 | { | ||
25 | - QEDAIOCB *acb = opaque; | ||
26 | BDRVQEDState *s = acb_to_s(acb); | ||
27 | uint64_t start, len, offset; | ||
28 | + int ret; | ||
29 | |||
30 | /* Populate front untouched region of new data cluster */ | ||
31 | start = qed_start_of_cluster(s, acb->cur_pos); | ||
32 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_cow(void *opaque, int ret) | ||
33 | |||
34 | trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); | ||
35 | ret = qed_copy_from_backing_file(s, start, len, acb->cur_cluster); | ||
36 | - if (ret) { | ||
37 | - qed_aio_complete(acb, ret); | ||
38 | - return; | ||
39 | + if (ret < 0) { | ||
40 | + return ret; | ||
41 | } | ||
42 | |||
43 | /* Populate back untouched region of new data cluster */ | ||
44 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_cow(void *opaque, int ret) | ||
45 | |||
46 | trace_qed_aio_write_postfill(s, acb, start, len, offset); | ||
47 | ret = qed_copy_from_backing_file(s, start, len, offset); | ||
48 | - if (ret) { | ||
49 | - qed_aio_complete(acb, ret); | ||
50 | - return; | ||
51 | - } | ||
52 | - | ||
53 | - ret = qed_aio_write_main(acb); | ||
54 | if (ret < 0) { | ||
55 | - qed_aio_complete(acb, ret); | ||
56 | - return; | ||
57 | + return ret; | ||
58 | } | ||
59 | - qed_aio_next_io(acb, 0); | ||
60 | + | ||
61 | + return qed_aio_write_main(acb); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | @@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s) | ||
66 | return !(s->header.features & QED_F_NEED_CHECK); | ||
67 | } | ||
68 | |||
69 | -static void qed_aio_write_zero_cluster(void *opaque, int ret) | ||
70 | -{ | ||
71 | - QEDAIOCB *acb = opaque; | ||
72 | - | ||
73 | - if (ret) { | ||
74 | - qed_aio_complete(acb, ret); | ||
75 | - return; | ||
76 | - } | ||
77 | - | ||
78 | - ret = qed_aio_write_l2_update(acb, 1); | ||
79 | - if (ret < 0) { | ||
80 | - qed_aio_complete(acb, ret); | ||
81 | - return; | ||
82 | - } | ||
83 | - qed_aio_next_io(acb, 0); | ||
84 | -} | ||
85 | - | ||
86 | /** | ||
87 | * Write new data cluster | ||
88 | * | ||
89 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_zero_cluster(void *opaque, int ret) | ||
90 | static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
91 | { | ||
92 | BDRVQEDState *s = acb_to_s(acb); | ||
93 | - BlockCompletionFunc *cb; | ||
94 | int ret; | ||
95 | |||
96 | /* Cancel timer when the first allocating request comes in */ | ||
97 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
98 | qed_aio_start_io(acb); | ||
99 | return; | 24 | return; |
100 | } | 25 | } |
101 | - | 26 | - if (!blk_is_available(blk)) { |
102 | - cb = qed_aio_write_zero_cluster; | 27 | - error_report("Device '%s' has no medium", device); |
103 | } else { | 28 | - return; |
104 | - cb = qed_aio_write_cow; | 29 | - } |
105 | acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); | 30 | |
106 | } | 31 | bs = bdrv_skip_implicit_filters(blk_bs(blk)); |
107 | 32 | aio_context = bdrv_get_aio_context(bs); | |
108 | if (qed_should_set_need_check(s)) { | 33 | aio_context_acquire(aio_context); |
109 | s->header.features |= QED_F_NEED_CHECK; | 34 | |
110 | ret = qed_write_header(s); | 35 | + if (!blk_is_available(blk)) { |
111 | - cb(acb, ret); | 36 | + error_report("Device '%s' has no medium", device); |
112 | + if (ret < 0) { | 37 | + aio_context_release(aio_context); |
113 | + qed_aio_complete(acb, ret); | ||
114 | + return; | 38 | + return; |
115 | + } | 39 | + } |
116 | + } | ||
117 | + | 40 | + |
118 | + if (acb->flags & QED_AIOCB_ZERO) { | 41 | ret = bdrv_commit(bs); |
119 | + ret = qed_aio_write_l2_update(acb, 1); | 42 | |
120 | } else { | 43 | aio_context_release(aio_context); |
121 | - cb(acb, 0); | ||
122 | + ret = qed_aio_write_cow(acb); | ||
123 | } | ||
124 | + if (ret < 0) { | ||
125 | + qed_aio_complete(acb, ret); | ||
126 | + return; | ||
127 | + } | ||
128 | + qed_aio_next_io(acb, 0); | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | -- | 44 | -- |
133 | 1.8.3.1 | 45 | 2.40.0 |
134 | |||
135 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Don't recurse into qed_aio_next_io() and qed_aio_complete() here, but | ||
2 | just return an error code and let the caller handle it. | ||
3 | 1 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | ||
7 | block/qed.c | 43 ++++++++++++++++++++----------------------- | ||
8 | 1 file changed, 20 insertions(+), 23 deletions(-) | ||
9 | |||
10 | diff --git a/block/qed.c b/block/qed.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/block/qed.c | ||
13 | +++ b/block/qed.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static bool qed_should_set_need_check(BDRVQEDState *s) | ||
15 | * | ||
16 | * This path is taken when writing to previously unallocated clusters. | ||
17 | */ | ||
18 | -static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
19 | +static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
20 | { | ||
21 | BDRVQEDState *s = acb_to_s(acb); | ||
22 | int ret; | ||
23 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
24 | } | ||
25 | if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) || | ||
26 | s->allocating_write_reqs_plugged) { | ||
27 | - return; /* wait for existing request to finish */ | ||
28 | + return -EINPROGRESS; /* wait for existing request to finish */ | ||
29 | } | ||
30 | |||
31 | acb->cur_nclusters = qed_bytes_to_clusters(s, | ||
32 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
33 | if (acb->flags & QED_AIOCB_ZERO) { | ||
34 | /* Skip ahead if the clusters are already zero */ | ||
35 | if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { | ||
36 | - qed_aio_start_io(acb); | ||
37 | - return; | ||
38 | + return 0; | ||
39 | } | ||
40 | } else { | ||
41 | acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); | ||
42 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
43 | s->header.features |= QED_F_NEED_CHECK; | ||
44 | ret = qed_write_header(s); | ||
45 | if (ret < 0) { | ||
46 | - qed_aio_complete(acb, ret); | ||
47 | - return; | ||
48 | + return ret; | ||
49 | } | ||
50 | } | ||
51 | |||
52 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
53 | ret = qed_aio_write_cow(acb); | ||
54 | } | ||
55 | if (ret < 0) { | ||
56 | - qed_aio_complete(acb, ret); | ||
57 | - return; | ||
58 | + return ret; | ||
59 | } | ||
60 | - qed_aio_next_io(acb, 0); | ||
61 | + return 0; | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
66 | * | ||
67 | * This path is taken when writing to already allocated clusters. | ||
68 | */ | ||
69 | -static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
70 | +static int qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
71 | { | ||
72 | - int ret; | ||
73 | - | ||
74 | /* Allocate buffer for zero writes */ | ||
75 | if (acb->flags & QED_AIOCB_ZERO) { | ||
76 | struct iovec *iov = acb->qiov->iov; | ||
77 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
78 | if (!iov->iov_base) { | ||
79 | iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len); | ||
80 | if (iov->iov_base == NULL) { | ||
81 | - qed_aio_complete(acb, -ENOMEM); | ||
82 | - return; | ||
83 | + return -ENOMEM; | ||
84 | } | ||
85 | memset(iov->iov_base, 0, iov->iov_len); | ||
86 | } | ||
87 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) | ||
88 | qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); | ||
89 | |||
90 | /* Do the actual write */ | ||
91 | - ret = qed_aio_write_main(acb); | ||
92 | - if (ret < 0) { | ||
93 | - qed_aio_complete(acb, ret); | ||
94 | - return; | ||
95 | - } | ||
96 | - qed_aio_next_io(acb, 0); | ||
97 | + return qed_aio_write_main(acb); | ||
98 | } | ||
99 | |||
100 | /** | ||
101 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_write_data(void *opaque, int ret, | ||
102 | |||
103 | switch (ret) { | ||
104 | case QED_CLUSTER_FOUND: | ||
105 | - qed_aio_write_inplace(acb, offset, len); | ||
106 | + ret = qed_aio_write_inplace(acb, offset, len); | ||
107 | break; | ||
108 | |||
109 | case QED_CLUSTER_L2: | ||
110 | case QED_CLUSTER_L1: | ||
111 | case QED_CLUSTER_ZERO: | ||
112 | - qed_aio_write_alloc(acb, len); | ||
113 | + ret = qed_aio_write_alloc(acb, len); | ||
114 | break; | ||
115 | |||
116 | default: | ||
117 | - qed_aio_complete(acb, ret); | ||
118 | + assert(ret < 0); | ||
119 | break; | ||
120 | } | ||
121 | + | ||
122 | + if (ret < 0) { | ||
123 | + if (ret != -EINPROGRESS) { | ||
124 | + qed_aio_complete(acb, ret); | ||
125 | + } | ||
126 | + return; | ||
127 | + } | ||
128 | + qed_aio_next_io(acb, 0); | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | -- | ||
133 | 1.8.3.1 | ||
134 | |||
135 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Now that we're running in coroutine context, the ad-hoc serialisation | ||
2 | code (which drops a request that has to wait out of coroutine context) | ||
3 | can be replaced by a CoQueue. | ||
4 | 1 | ||
5 | This means that when we resume a serialised request, it is running in | ||
6 | coroutine context again and its I/O isn't blocking any more. | ||
7 | |||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/qed.c | 49 +++++++++++++++++-------------------------------- | ||
12 | block/qed.h | 3 ++- | ||
13 | 2 files changed, 19 insertions(+), 33 deletions(-) | ||
14 | |||
15 | diff --git a/block/qed.c b/block/qed.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/qed.c | ||
18 | +++ b/block/qed.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void qed_plug_allocating_write_reqs(BDRVQEDState *s) | ||
20 | |||
21 | static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) | ||
22 | { | ||
23 | - QEDAIOCB *acb; | ||
24 | - | ||
25 | assert(s->allocating_write_reqs_plugged); | ||
26 | |||
27 | s->allocating_write_reqs_plugged = false; | ||
28 | - | ||
29 | - acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); | ||
30 | - if (acb) { | ||
31 | - qed_aio_start_io(acb); | ||
32 | - } | ||
33 | + qemu_co_enter_next(&s->allocating_write_reqs); | ||
34 | } | ||
35 | |||
36 | static void qed_clear_need_check(void *opaque, int ret) | ||
37 | @@ -XXX,XX +XXX,XX @@ static void qed_need_check_timer_cb(void *opaque) | ||
38 | BDRVQEDState *s = opaque; | ||
39 | |||
40 | /* The timer should only fire when allocating writes have drained */ | ||
41 | - assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); | ||
42 | + assert(!s->allocating_acb); | ||
43 | |||
44 | trace_qed_need_check_timer_cb(s); | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags, | ||
47 | int ret; | ||
48 | |||
49 | s->bs = bs; | ||
50 | - QSIMPLEQ_INIT(&s->allocating_write_reqs); | ||
51 | + qemu_co_queue_init(&s->allocating_write_reqs); | ||
52 | |||
53 | ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); | ||
54 | if (ret < 0) { | ||
55 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete_bh(void *opaque) | ||
56 | qed_release(s); | ||
57 | } | ||
58 | |||
59 | -static void qed_resume_alloc_bh(void *opaque) | ||
60 | -{ | ||
61 | - qed_aio_start_io(opaque); | ||
62 | -} | ||
63 | - | ||
64 | static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
65 | { | ||
66 | BDRVQEDState *s = acb_to_s(acb); | ||
67 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) | ||
68 | * next request in the queue. This ensures that we don't cycle through | ||
69 | * requests multiple times but rather finish one at a time completely. | ||
70 | */ | ||
71 | - if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { | ||
72 | - QEDAIOCB *next_acb; | ||
73 | - QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); | ||
74 | - next_acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); | ||
75 | - if (next_acb) { | ||
76 | - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||
77 | - qed_resume_alloc_bh, next_acb); | ||
78 | + if (acb == s->allocating_acb) { | ||
79 | + s->allocating_acb = NULL; | ||
80 | + if (!qemu_co_queue_empty(&s->allocating_write_reqs)) { | ||
81 | + qemu_co_enter_next(&s->allocating_write_reqs); | ||
82 | } else if (s->header.features & QED_F_NEED_CHECK) { | ||
83 | qed_start_need_check_timer(s); | ||
84 | } | ||
85 | @@ -XXX,XX +XXX,XX @@ static int qed_aio_write_alloc(QEDAIOCB *acb, size_t len) | ||
86 | int ret; | ||
87 | |||
88 | /* Cancel timer when the first allocating request comes in */ | ||
89 | - if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { | ||
90 | + if (s->allocating_acb == NULL) { | ||
91 | qed_cancel_need_check_timer(s); | ||
92 | } | ||
93 | |||
94 | /* Freeze this request if another allocating write is in progress */ | ||
95 | - if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { | ||
96 | - QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); | ||
97 | - } | ||
98 | - if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) || | ||
99 | - s->allocating_write_reqs_plugged) { | ||
100 | - return -EINPROGRESS; /* wait for existing request to finish */ | ||
101 | + if (s->allocating_acb != acb || s->allocating_write_reqs_plugged) { | ||
102 | + if (s->allocating_acb != NULL) { | ||
103 | + qemu_co_queue_wait(&s->allocating_write_reqs, NULL); | ||
104 | + assert(s->allocating_acb == NULL); | ||
105 | + } | ||
106 | + s->allocating_acb = acb; | ||
107 | + return -EAGAIN; /* start over with looking up table entries */ | ||
108 | } | ||
109 | |||
110 | acb->cur_nclusters = qed_bytes_to_clusters(s, | ||
111 | @@ -XXX,XX +XXX,XX @@ static void qed_aio_next_io(QEDAIOCB *acb) | ||
112 | ret = qed_aio_read_data(acb, ret, offset, len); | ||
113 | } | ||
114 | |||
115 | - if (ret < 0) { | ||
116 | - if (ret != -EINPROGRESS) { | ||
117 | - qed_aio_complete(acb, ret); | ||
118 | - } | ||
119 | + if (ret < 0 && ret != -EAGAIN) { | ||
120 | + qed_aio_complete(acb, ret); | ||
121 | return; | ||
122 | } | ||
123 | } | ||
124 | diff --git a/block/qed.h b/block/qed.h | ||
125 | index XXXXXXX..XXXXXXX 100644 | ||
126 | --- a/block/qed.h | ||
127 | +++ b/block/qed.h | ||
128 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
129 | uint32_t l2_mask; | ||
130 | |||
131 | /* Allocating write request queue */ | ||
132 | - QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs; | ||
133 | + QEDAIOCB *allocating_acb; | ||
134 | + CoQueue allocating_write_reqs; | ||
135 | bool allocating_write_reqs_plugged; | ||
136 | |||
137 | /* Periodic flush and clear need check flag */ | ||
138 | -- | ||
139 | 1.8.3.1 | ||
140 | |||
141 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This fixes the last place where we degraded from AIO to actual blocking | ||
2 | synchronous I/O requests. Putting it into a coroutine means that instead | ||
3 | of blocking, the coroutine simply yields while doing I/O. | ||
4 | 1 | ||
5 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | --- | ||
8 | block/qed.c | 33 +++++++++++++++++---------------- | ||
9 | 1 file changed, 17 insertions(+), 16 deletions(-) | ||
10 | |||
11 | diff --git a/block/qed.c b/block/qed.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/block/qed.c | ||
14 | +++ b/block/qed.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) | ||
16 | qemu_co_enter_next(&s->allocating_write_reqs); | ||
17 | } | ||
18 | |||
19 | -static void qed_clear_need_check(void *opaque, int ret) | ||
20 | +static void qed_need_check_timer_entry(void *opaque) | ||
21 | { | ||
22 | BDRVQEDState *s = opaque; | ||
23 | + int ret; | ||
24 | |||
25 | - if (ret) { | ||
26 | + /* The timer should only fire when allocating writes have drained */ | ||
27 | + assert(!s->allocating_acb); | ||
28 | + | ||
29 | + trace_qed_need_check_timer_cb(s); | ||
30 | + | ||
31 | + qed_acquire(s); | ||
32 | + qed_plug_allocating_write_reqs(s); | ||
33 | + | ||
34 | + /* Ensure writes are on disk before clearing flag */ | ||
35 | + ret = bdrv_co_flush(s->bs->file->bs); | ||
36 | + qed_release(s); | ||
37 | + if (ret < 0) { | ||
38 | qed_unplug_allocating_write_reqs(s); | ||
39 | return; | ||
40 | } | ||
41 | @@ -XXX,XX +XXX,XX @@ static void qed_clear_need_check(void *opaque, int ret) | ||
42 | |||
43 | qed_unplug_allocating_write_reqs(s); | ||
44 | |||
45 | - ret = bdrv_flush(s->bs); | ||
46 | + ret = bdrv_co_flush(s->bs); | ||
47 | (void) ret; | ||
48 | } | ||
49 | |||
50 | static void qed_need_check_timer_cb(void *opaque) | ||
51 | { | ||
52 | - BDRVQEDState *s = opaque; | ||
53 | - | ||
54 | - /* The timer should only fire when allocating writes have drained */ | ||
55 | - assert(!s->allocating_acb); | ||
56 | - | ||
57 | - trace_qed_need_check_timer_cb(s); | ||
58 | - | ||
59 | - qed_acquire(s); | ||
60 | - qed_plug_allocating_write_reqs(s); | ||
61 | - | ||
62 | - /* Ensure writes are on disk before clearing flag */ | ||
63 | - bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s); | ||
64 | - qed_release(s); | ||
65 | + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); | ||
66 | + qemu_coroutine_enter(co); | ||
67 | } | ||
68 | |||
69 | void qed_acquire(BDRVQEDState *s) | ||
70 | -- | ||
71 | 1.8.3.1 | ||
72 | |||
73 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | All functions that are marked coroutine_fn can directly call the | ||
2 | bdrv_co_* version of functions instead of going through the wrapper. | ||
3 | 1 | ||
4 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
5 | Reviewed-by: Manos Pitsidianakis <el13635@mail.ntua.gr> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | --- | ||
8 | block/qed.c | 16 +++++++++------- | ||
9 | 1 file changed, 9 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/block/qed.c b/block/qed.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/block/qed.c | ||
14 | +++ b/block/qed.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_write_header(BDRVQEDState *s) | ||
16 | }; | ||
17 | qemu_iovec_init_external(&qiov, &iov, 1); | ||
18 | |||
19 | - ret = bdrv_preadv(s->bs->file, 0, &qiov); | ||
20 | + ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0); | ||
21 | if (ret < 0) { | ||
22 | goto out; | ||
23 | } | ||
24 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_write_header(BDRVQEDState *s) | ||
25 | /* Update header */ | ||
26 | qed_header_cpu_to_le(&s->header, (QEDHeader *) buf); | ||
27 | |||
28 | - ret = bdrv_pwritev(s->bs->file, 0, &qiov); | ||
29 | + ret = bdrv_co_pwritev(s->bs->file, 0, qiov.size, &qiov, 0); | ||
30 | if (ret < 0) { | ||
31 | goto out; | ||
32 | } | ||
33 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos, | ||
34 | qemu_iovec_concat(*backing_qiov, qiov, 0, size); | ||
35 | |||
36 | BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); | ||
37 | - ret = bdrv_preadv(s->bs->backing, pos, *backing_qiov); | ||
38 | + ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0); | ||
39 | if (ret < 0) { | ||
40 | return ret; | ||
41 | } | ||
42 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, | ||
43 | } | ||
44 | |||
45 | BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); | ||
46 | - ret = bdrv_pwritev(s->bs->file, offset, &qiov); | ||
47 | + ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0); | ||
48 | if (ret < 0) { | ||
49 | goto out; | ||
50 | } | ||
51 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) | ||
52 | trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size); | ||
53 | |||
54 | BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); | ||
55 | - ret = bdrv_pwritev(s->bs->file, offset, &acb->cur_qiov); | ||
56 | + ret = bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size, | ||
57 | + &acb->cur_qiov, 0); | ||
58 | if (ret < 0) { | ||
59 | return ret; | ||
60 | } | ||
61 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb) | ||
62 | * region. The solution is to flush after writing a new data | ||
63 | * cluster and before updating the L2 table. | ||
64 | */ | ||
65 | - ret = bdrv_flush(s->bs->file->bs); | ||
66 | + ret = bdrv_co_flush(s->bs->file->bs); | ||
67 | if (ret < 0) { | ||
68 | return ret; | ||
69 | } | ||
70 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret, | ||
71 | } | ||
72 | |||
73 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
74 | - ret = bdrv_preadv(bs->file, offset, &acb->cur_qiov); | ||
75 | + ret = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, | ||
76 | + &acb->cur_qiov, 0); | ||
77 | if (ret < 0) { | ||
78 | return ret; | ||
79 | } | ||
80 | -- | ||
81 | 1.8.3.1 | ||
82 | |||
83 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "sochin.jiang" <sochin.jiang@huawei.com> | ||
2 | 1 | ||
3 | img_commit could fall into an infinite loop calling run_block_job() if | ||
4 | its blockjob fails on any I/O error, fix this already known problem. | ||
5 | |||
6 | Signed-off-by: sochin.jiang <sochin.jiang@huawei.com> | ||
7 | Message-id: 1497509253-28941-1-git-send-email-sochin.jiang@huawei.com | ||
8 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
9 | --- | ||
10 | blockjob.c | 4 ++-- | ||
11 | include/block/blockjob.h | 18 ++++++++++++++++++ | ||
12 | qemu-img.c | 20 +++++++++++++------- | ||
13 | 3 files changed, 33 insertions(+), 9 deletions(-) | ||
14 | |||
15 | diff --git a/blockjob.c b/blockjob.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/blockjob.c | ||
18 | +++ b/blockjob.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void block_job_resume(BlockJob *job) | ||
20 | block_job_enter(job); | ||
21 | } | ||
22 | |||
23 | -static void block_job_ref(BlockJob *job) | ||
24 | +void block_job_ref(BlockJob *job) | ||
25 | { | ||
26 | ++job->refcnt; | ||
27 | } | ||
28 | @@ -XXX,XX +XXX,XX @@ static void block_job_attached_aio_context(AioContext *new_context, | ||
29 | void *opaque); | ||
30 | static void block_job_detach_aio_context(void *opaque); | ||
31 | |||
32 | -static void block_job_unref(BlockJob *job) | ||
33 | +void block_job_unref(BlockJob *job) | ||
34 | { | ||
35 | if (--job->refcnt == 0) { | ||
36 | BlockDriverState *bs = blk_bs(job->blk); | ||
37 | diff --git a/include/block/blockjob.h b/include/block/blockjob.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/include/block/blockjob.h | ||
40 | +++ b/include/block/blockjob.h | ||
41 | @@ -XXX,XX +XXX,XX @@ void block_job_iostatus_reset(BlockJob *job); | ||
42 | BlockJobTxn *block_job_txn_new(void); | ||
43 | |||
44 | /** | ||
45 | + * block_job_ref: | ||
46 | + * | ||
47 | + * Add a reference to BlockJob refcnt, it will be decreased with | ||
48 | + * block_job_unref, and then be freed if it comes to be the last | ||
49 | + * reference. | ||
50 | + */ | ||
51 | +void block_job_ref(BlockJob *job); | ||
52 | + | ||
53 | +/** | ||
54 | + * block_job_unref: | ||
55 | + * | ||
56 | + * Release a reference that was previously acquired with block_job_ref | ||
57 | + * or block_job_create. If it's the last reference to the object, it will be | ||
58 | + * freed. | ||
59 | + */ | ||
60 | +void block_job_unref(BlockJob *job); | ||
61 | + | ||
62 | +/** | ||
63 | * block_job_txn_unref: | ||
64 | * | ||
65 | * Release a reference that was previously acquired with block_job_txn_add_job | ||
66 | diff --git a/qemu-img.c b/qemu-img.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/qemu-img.c | ||
69 | +++ b/qemu-img.c | ||
70 | @@ -XXX,XX +XXX,XX @@ static void common_block_job_cb(void *opaque, int ret) | ||
71 | static void run_block_job(BlockJob *job, Error **errp) | ||
72 | { | ||
73 | AioContext *aio_context = blk_get_aio_context(job->blk); | ||
74 | + int ret = 0; | ||
75 | |||
76 | - /* FIXME In error cases, the job simply goes away and we access a dangling | ||
77 | - * pointer below. */ | ||
78 | aio_context_acquire(aio_context); | ||
79 | + block_job_ref(job); | ||
80 | do { | ||
81 | aio_poll(aio_context, true); | ||
82 | qemu_progress_print(job->len ? | ||
83 | ((float)job->offset / job->len * 100.f) : 0.0f, 0); | ||
84 | - } while (!job->ready); | ||
85 | + } while (!job->ready && !job->completed); | ||
86 | |||
87 | - block_job_complete_sync(job, errp); | ||
88 | + if (!job->completed) { | ||
89 | + ret = block_job_complete_sync(job, errp); | ||
90 | + } else { | ||
91 | + ret = job->ret; | ||
92 | + } | ||
93 | + block_job_unref(job); | ||
94 | aio_context_release(aio_context); | ||
95 | |||
96 | - /* A block job may finish instantaneously without publishing any progress, | ||
97 | - * so just signal completion here */ | ||
98 | - qemu_progress_print(100.f, 0); | ||
99 | + /* publish completion progress only when success */ | ||
100 | + if (!ret) { | ||
101 | + qemu_progress_print(100.f, 0); | ||
102 | + } | ||
103 | } | ||
104 | |||
105 | static int img_commit(int argc, char **argv) | ||
106 | -- | ||
107 | 1.8.3.1 | ||
108 | |||
109 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Max Reitz <mreitz@redhat.com> | ||
2 | 1 | ||
3 | uri_parse(...)->scheme may be NULL. In fact, probably every field may be | ||
4 | NULL, and the callers do test this for all of the other fields but not | ||
5 | for scheme (except for block/gluster.c; block/vxhs.c does not access | ||
6 | that field at all). | ||
7 | |||
8 | We can easily fix this by using g_strcmp0() instead of strcmp(). | ||
9 | |||
10 | Cc: qemu-stable@nongnu.org | ||
11 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
12 | Message-id: 20170613205726.13544-1-mreitz@redhat.com | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Signed-off-by: Max Reitz <mreitz@redhat.com> | ||
15 | --- | ||
16 | block/nbd.c | 6 +++--- | ||
17 | block/nfs.c | 2 +- | ||
18 | block/sheepdog.c | 6 +++--- | ||
19 | block/ssh.c | 2 +- | ||
20 | 4 files changed, 8 insertions(+), 8 deletions(-) | ||
21 | |||
22 | diff --git a/block/nbd.c b/block/nbd.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/block/nbd.c | ||
25 | +++ b/block/nbd.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static int nbd_parse_uri(const char *filename, QDict *options) | ||
27 | } | ||
28 | |||
29 | /* transport */ | ||
30 | - if (!strcmp(uri->scheme, "nbd")) { | ||
31 | + if (!g_strcmp0(uri->scheme, "nbd")) { | ||
32 | is_unix = false; | ||
33 | - } else if (!strcmp(uri->scheme, "nbd+tcp")) { | ||
34 | + } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) { | ||
35 | is_unix = false; | ||
36 | - } else if (!strcmp(uri->scheme, "nbd+unix")) { | ||
37 | + } else if (!g_strcmp0(uri->scheme, "nbd+unix")) { | ||
38 | is_unix = true; | ||
39 | } else { | ||
40 | ret = -EINVAL; | ||
41 | diff --git a/block/nfs.c b/block/nfs.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/block/nfs.c | ||
44 | +++ b/block/nfs.c | ||
45 | @@ -XXX,XX +XXX,XX @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp) | ||
46 | error_setg(errp, "Invalid URI specified"); | ||
47 | goto out; | ||
48 | } | ||
49 | - if (strcmp(uri->scheme, "nfs") != 0) { | ||
50 | + if (g_strcmp0(uri->scheme, "nfs") != 0) { | ||
51 | error_setg(errp, "URI scheme must be 'nfs'"); | ||
52 | goto out; | ||
53 | } | ||
54 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/block/sheepdog.c | ||
57 | +++ b/block/sheepdog.c | ||
58 | @@ -XXX,XX +XXX,XX @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename, | ||
59 | } | ||
60 | |||
61 | /* transport */ | ||
62 | - if (!strcmp(uri->scheme, "sheepdog")) { | ||
63 | + if (!g_strcmp0(uri->scheme, "sheepdog")) { | ||
64 | is_unix = false; | ||
65 | - } else if (!strcmp(uri->scheme, "sheepdog+tcp")) { | ||
66 | + } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) { | ||
67 | is_unix = false; | ||
68 | - } else if (!strcmp(uri->scheme, "sheepdog+unix")) { | ||
69 | + } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) { | ||
70 | is_unix = true; | ||
71 | } else { | ||
72 | error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp'," | ||
73 | diff --git a/block/ssh.c b/block/ssh.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/block/ssh.c | ||
76 | +++ b/block/ssh.c | ||
77 | @@ -XXX,XX +XXX,XX @@ static int parse_uri(const char *filename, QDict *options, Error **errp) | ||
78 | return -EINVAL; | ||
79 | } | ||
80 | |||
81 | - if (strcmp(uri->scheme, "ssh") != 0) { | ||
82 | + if (g_strcmp0(uri->scheme, "ssh") != 0) { | ||
83 | error_setg(errp, "URI scheme must be 'ssh'"); | ||
84 | goto err; | ||
85 | } | ||
86 | -- | ||
87 | 1.8.3.1 | ||
88 | |||
89 | diff view generated by jsdifflib |