1
The following changes since commit 0b6206b9c6825619cd721085fe082d7a0abc9af4:
1
The following changes since commit 56f9e46b841c7be478ca038d8d4085d776ab4b0d:
2
2
3
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210914-4' into staging (2021-09-15 13:27:49 +0100)
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-20' into staging (2017-02-20 17:42:47 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/XanClic/qemu.git tags/pull-block-2021-09-15
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 1899bf47375ad40555dcdff12ba49b4b8b82df38:
9
for you to fetch changes up to a7b91d35bab97a2d3e779d0c64c9b837b52a6cf7:
10
10
11
qemu-img: Add -F shorthand to convert (2021-09-15 18:42:38 +0200)
11
coroutine-lock: make CoRwlock thread-safe and fair (2017-02-21 11:39:40 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches:
14
Pull request
15
- Block-status cache for data regions
15
16
- qcow2 optimization (when using subclusters)
16
v2:
17
- iotests delinting, and let 297 (lint checker) cover named iotests
17
* Rebased to resolve scsi conflicts
18
- qcow2 check improvements
19
- Added -F (target backing file format) option to qemu-img convert
20
- Mirror job fix
21
- Fix for when a migration is initiated while a backup job runs
22
- Fix for uncached qemu-img convert to a volume with 4k sectors (for an
23
unaligned image)
24
- Minor gluster driver fix
25
18
26
----------------------------------------------------------------
19
----------------------------------------------------------------
27
Eric Blake (1):
28
qemu-img: Add -F shorthand to convert
29
20
30
Hanna Reitz (15):
21
Paolo Bonzini (24):
31
gluster: Align block-status tail
22
block: move AioContext, QEMUTimer, main-loop to libqemuutil
32
block: Drop BDS comment regarding bdrv_append()
23
aio: introduce aio_co_schedule and aio_co_wake
33
block: block-status cache for data regions
24
block-backend: allow blk_prw from coroutine context
34
block: Clarify that @bytes is no limit on *pnum
25
test-thread-pool: use generic AioContext infrastructure
35
block/file-posix: Do not force-cap *pnum
26
io: add methods to set I/O handlers on AioContext
36
block/gluster: Do not force-cap *pnum
27
io: make qio_channel_yield aware of AioContexts
37
block/iscsi: Do not force-cap *pnum
28
nbd: convert to use qio_channel_yield
38
iotests: Fix unspecified-encoding pylint warnings
29
coroutine-lock: reschedule coroutine on the AioContext it was running
39
iotests: Fix use-{list,dict}-literal warnings
30
on
40
iotests/297: Drop 169 and 199 from the skip list
31
blkdebug: reschedule coroutine on the AioContext it is running on
41
migrate-bitmaps-postcopy-test: Fix pylint warnings
32
qed: introduce qed_aio_start_io and qed_aio_next_io_cb
42
migrate-bitmaps-test: Fix pylint warnings
33
aio: push aio_context_acquire/release down to dispatching
43
mirror-top-perms: Fix AbnormalShutdown path
34
block: explicitly acquire aiocontext in timers that need it
44
iotests/297: Cover tests/
35
block: explicitly acquire aiocontext in callbacks that need it
45
qemu-img: Allow target be aligned to sector size
36
block: explicitly acquire aiocontext in bottom halves that need it
37
block: explicitly acquire aiocontext in aio callbacks that need it
38
aio-posix: partially inline aio_dispatch into aio_poll
39
async: remove unnecessary inc/dec pairs
40
block: document fields protected by AioContext lock
41
coroutine-lock: make CoMutex thread-safe
42
coroutine-lock: add limited spinning to CoMutex
43
test-aio-multithread: add performance comparison with thread-based
44
mutexes
45
coroutine-lock: place CoMutex before CoQueue in header
46
coroutine-lock: add mutex argument to CoQueue APIs
47
coroutine-lock: make CoRwlock thread-safe and fair
46
48
47
Stefano Garzarella (1):
49
Makefile.objs | 4 -
48
block/mirror: fix NULL pointer dereference in
50
stubs/Makefile.objs | 1 +
49
mirror_wait_on_conflicts()
51
tests/Makefile.include | 19 +-
50
52
util/Makefile.objs | 6 +-
51
Vladimir Sementsov-Ogievskiy (15):
53
block/nbd-client.h | 2 +-
52
tests: add migrate-during-backup
54
block/qed.h | 3 +
53
block: bdrv_inactivate_recurse(): check for permissions and fix crash
55
include/block/aio.h | 38 ++-
54
simplebench: add img_bench_templater.py
56
include/block/block_int.h | 64 +++--
55
qcow2: refactor handle_dependencies() loop body
57
include/io/channel.h | 72 +++++-
56
qcow2: handle_dependencies(): relax conflict detection
58
include/qemu/coroutine.h | 84 ++++---
57
qcow2-refcount: improve style of check_refcounts_l2()
59
include/qemu/coroutine_int.h | 11 +-
58
qcow2: compressed read: simplify cluster descriptor passing
60
include/sysemu/block-backend.h | 14 +-
59
qcow2: introduce qcow2_parse_compressed_l2_entry() helper
61
tests/iothread.h | 25 ++
60
qcow2-refcount: introduce fix_l2_entry_by_zero()
62
block/backup.c | 2 +-
61
qcow2-refcount: fix_l2_entry_by_zero(): also zero L2 entry bitmap
63
block/blkdebug.c | 9 +-
62
qcow2-refcount: check_refcounts_l2(): check l2_bitmap
64
block/blkreplay.c | 2 +-
63
qcow2-refcount: check_refcounts_l2(): check reserved bits
65
block/block-backend.c | 13 +-
64
qcow2-refcount: improve style of check_refcounts_l1()
66
block/curl.c | 44 +++-
65
qcow2-refcount: check_refcounts_l1(): check reserved bits
67
block/gluster.c | 9 +-
66
qcow2-refcount: check_refblocks(): add separate message for reserved
68
block/io.c | 42 +---
67
69
block/iscsi.c | 15 +-
68
docs/tools/qemu-img.rst | 4 +-
70
block/linux-aio.c | 10 +-
69
block/qcow2.h | 7 +-
71
block/mirror.c | 12 +-
70
include/block/block_int.h | 61 +++-
72
block/nbd-client.c | 119 +++++----
71
block.c | 88 +++++
73
block/nfs.c | 9 +-
72
block/file-posix.c | 7 +-
74
block/qcow2-cluster.c | 4 +-
73
block/gluster.c | 23 +-
75
block/qed-cluster.c | 2 +
74
block/io.c | 68 +++-
76
block/qed-table.c | 12 +-
75
block/iscsi.c | 3 -
77
block/qed.c | 58 +++--
76
block/mirror.c | 25 +-
78
block/sheepdog.c | 31 +--
77
block/qcow2-cluster.c | 78 +++--
79
block/ssh.c | 29 +--
78
block/qcow2-refcount.c | 326 ++++++++++++------
80
block/throttle-groups.c | 4 +-
79
block/qcow2.c | 13 +-
81
block/win32-aio.c | 9 +-
80
qemu-img.c | 18 +-
82
dma-helpers.c | 2 +
81
qemu-img-cmds.hx | 2 +-
83
hw/9pfs/9p.c | 2 +-
82
scripts/simplebench/img_bench_templater.py | 95 +++++
84
hw/block/virtio-blk.c | 19 +-
83
scripts/simplebench/table_templater.py | 62 ++++
85
hw/scsi/scsi-bus.c | 2 +
84
tests/qemu-iotests/122 | 2 +-
86
hw/scsi/scsi-disk.c | 15 ++
85
tests/qemu-iotests/271 | 5 +-
87
hw/scsi/scsi-generic.c | 20 +-
86
tests/qemu-iotests/271.out | 4 +-
88
hw/scsi/virtio-scsi.c | 7 +
87
tests/qemu-iotests/297 | 9 +-
89
io/channel-command.c | 13 +
88
tests/qemu-iotests/iotests.py | 12 +-
90
io/channel-file.c | 11 +
89
.../tests/migrate-bitmaps-postcopy-test | 13 +-
91
io/channel-socket.c | 16 +-
90
tests/qemu-iotests/tests/migrate-bitmaps-test | 43 ++-
92
io/channel-tls.c | 12 +
91
.../qemu-iotests/tests/migrate-during-backup | 97 ++++++
93
io/channel-watch.c | 6 +
92
.../tests/migrate-during-backup.out | 5 +
94
io/channel.c | 97 ++++++--
93
tests/qemu-iotests/tests/mirror-top-perms | 2 +-
95
nbd/client.c | 2 +-
94
26 files changed, 855 insertions(+), 217 deletions(-)
96
nbd/common.c | 9 +-
95
create mode 100755 scripts/simplebench/img_bench_templater.py
97
nbd/server.c | 94 +++-----
96
create mode 100644 scripts/simplebench/table_templater.py
98
stubs/linux-aio.c | 32 +++
97
create mode 100755 tests/qemu-iotests/tests/migrate-during-backup
99
stubs/set-fd-handler.c | 11 -
98
create mode 100644 tests/qemu-iotests/tests/migrate-during-backup.out
100
tests/iothread.c | 91 +++++++
101
tests/test-aio-multithread.c | 463 ++++++++++++++++++++++++++++++++++++
102
tests/test-thread-pool.c | 12 +-
103
aio-posix.c => util/aio-posix.c | 62 ++---
104
aio-win32.c => util/aio-win32.c | 30 +--
105
util/aiocb.c | 55 +++++
106
async.c => util/async.c | 84 ++++++-
107
iohandler.c => util/iohandler.c | 0
108
main-loop.c => util/main-loop.c | 0
109
util/qemu-coroutine-lock.c | 254 ++++++++++++++++++--
110
util/qemu-coroutine-sleep.c | 2 +-
111
util/qemu-coroutine.c | 8 +
112
qemu-timer.c => util/qemu-timer.c | 0
113
thread-pool.c => util/thread-pool.c | 8 +-
114
trace-events | 11 -
115
util/trace-events | 17 +-
116
67 files changed, 1712 insertions(+), 533 deletions(-)
117
create mode 100644 tests/iothread.h
118
create mode 100644 stubs/linux-aio.c
119
create mode 100644 tests/iothread.c
120
create mode 100644 tests/test-aio-multithread.c
121
rename aio-posix.c => util/aio-posix.c (94%)
122
rename aio-win32.c => util/aio-win32.c (95%)
123
create mode 100644 util/aiocb.c
124
rename async.c => util/async.c (82%)
125
rename iohandler.c => util/iohandler.c (100%)
126
rename main-loop.c => util/main-loop.c (100%)
127
rename qemu-timer.c => util/qemu-timer.c (100%)
128
rename thread-pool.c => util/thread-pool.c (97%)
99
129
100
--
130
--
101
2.31.1
131
2.9.3
102
132
103
133
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add simple grammar-parsing template benchmark. New tool consume test
3
AioContext is fairly self contained, the only dependency is QEMUTimer but
4
template written in bash with some special grammar injections and
4
that in turn doesn't need anything else. So move them out of block-obj-y
5
produces multiple tests, run them and finally print a performance
5
to avoid introducing a dependency from io/ to block-obj-y.
6
comparison table of different tests produced from one template.
6
7
7
main-loop and its dependency iohandler also need to be moved, because
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
later in this series io/ will call iohandler_get_aio_context.
9
Message-Id: <20210824101517.59802-2-vsementsov@virtuozzo.com>
9
10
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
10
[Changed copyright "the QEMU team" to "other QEMU contributors" as
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
suggested by Daniel Berrange and agreed by Paolo.
12
--Stefan]
13
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
18
---
13
scripts/simplebench/img_bench_templater.py | 95 ++++++++++++++++++++++
19
Makefile.objs | 4 ---
14
scripts/simplebench/table_templater.py | 62 ++++++++++++++
20
stubs/Makefile.objs | 1 +
15
2 files changed, 157 insertions(+)
21
tests/Makefile.include | 11 ++++----
16
create mode 100755 scripts/simplebench/img_bench_templater.py
22
util/Makefile.objs | 6 +++-
17
create mode 100644 scripts/simplebench/table_templater.py
23
block/io.c | 29 -------------------
18
24
stubs/linux-aio.c | 32 +++++++++++++++++++++
19
diff --git a/scripts/simplebench/img_bench_templater.py b/scripts/simplebench/img_bench_templater.py
25
stubs/set-fd-handler.c | 11 --------
20
new file mode 100755
26
aio-posix.c => util/aio-posix.c | 2 +-
21
index XXXXXXX..XXXXXXX
27
aio-win32.c => util/aio-win32.c | 0
22
--- /dev/null
28
util/aiocb.c | 55 +++++++++++++++++++++++++++++++++++++
23
+++ b/scripts/simplebench/img_bench_templater.py
29
async.c => util/async.c | 3 +-
24
@@ -XXX,XX +XXX,XX @@
30
iohandler.c => util/iohandler.c | 0
25
+#!/usr/bin/env python3
31
main-loop.c => util/main-loop.c | 0
26
+#
32
qemu-timer.c => util/qemu-timer.c | 0
27
+# Process img-bench test templates
33
thread-pool.c => util/thread-pool.c | 2 +-
28
+#
34
trace-events | 11 --------
29
+# Copyright (c) 2021 Virtuozzo International GmbH.
35
util/trace-events | 11 ++++++++
30
+#
36
17 files changed, 114 insertions(+), 64 deletions(-)
31
+# This program is free software; you can redistribute it and/or modify
37
create mode 100644 stubs/linux-aio.c
32
+# it under the terms of the GNU General Public License as published by
38
rename aio-posix.c => util/aio-posix.c (99%)
33
+# the Free Software Foundation; either version 2 of the License, or
39
rename aio-win32.c => util/aio-win32.c (100%)
34
+# (at your option) any later version.
40
create mode 100644 util/aiocb.c
35
+#
41
rename async.c => util/async.c (99%)
36
+# This program is distributed in the hope that it will be useful,
42
rename iohandler.c => util/iohandler.c (100%)
37
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
43
rename main-loop.c => util/main-loop.c (100%)
38
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44
rename qemu-timer.c => util/qemu-timer.c (100%)
39
+# GNU General Public License for more details.
45
rename thread-pool.c => util/thread-pool.c (99%)
40
+#
46
41
+# You should have received a copy of the GNU General Public License
47
diff --git a/Makefile.objs b/Makefile.objs
42
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
48
index XXXXXXX..XXXXXXX 100644
43
+#
49
--- a/Makefile.objs
44
+
50
+++ b/Makefile.objs
45
+
51
@@ -XXX,XX +XXX,XX @@ chardev-obj-y = chardev/
46
+import sys
52
#######################################################################
47
+import subprocess
53
# block-obj-y is code used by both qemu system emulation and qemu-img
48
+import re
54
49
+import json
55
-block-obj-y = async.o thread-pool.o
50
+
56
block-obj-y += nbd/
51
+import simplebench
57
block-obj-y += block.o blockjob.o
52
+from results_to_text import results_to_text
58
-block-obj-y += main-loop.o iohandler.o qemu-timer.o
53
+from table_templater import Templater
59
-block-obj-$(CONFIG_POSIX) += aio-posix.o
54
+
60
-block-obj-$(CONFIG_WIN32) += aio-win32.o
55
+
61
block-obj-y += block/
56
+def bench_func(env, case):
62
block-obj-y += qemu-io-cmds.o
57
+ test = templater.gen(env['data'], case['data'])
63
block-obj-$(CONFIG_REPLICATION) += replication.o
58
+
64
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
59
+ p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
65
index XXXXXXX..XXXXXXX 100644
60
+ stderr=subprocess.STDOUT, universal_newlines=True)
66
--- a/stubs/Makefile.objs
61
+
67
+++ b/stubs/Makefile.objs
62
+ if p.returncode == 0:
68
@@ -XXX,XX +XXX,XX @@ stub-obj-y += get-vm-name.o
63
+ try:
69
stub-obj-y += iothread.o
64
+ m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
70
stub-obj-y += iothread-lock.o
65
+ return {'seconds': float(m.group(1))}
71
stub-obj-y += is-daemonized.o
66
+ except Exception:
72
+stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
67
+ return {'error': f'failed to parse qemu-img output: {p.stdout}'}
73
stub-obj-y += machine-init-done.o
68
+ else:
74
stub-obj-y += migr-blocker.o
69
+ return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
75
stub-obj-y += monitor.o
70
+
76
diff --git a/tests/Makefile.include b/tests/Makefile.include
71
+
77
index XXXXXXX..XXXXXXX 100644
72
+if __name__ == '__main__':
78
--- a/tests/Makefile.include
73
+ if len(sys.argv) > 1:
79
+++ b/tests/Makefile.include
74
+ print("""
80
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
75
+Usage: img_bench_templater.py < path/to/test-template.sh
81
check-unit-y += tests/test-iov$(EXESUF)
76
+
82
gcov-files-test-iov-y = util/iov.c
77
+This script generates performance tests from a test template (example below),
83
check-unit-y += tests/test-aio$(EXESUF)
78
+runs them, and displays the results in a table. The template is read from
84
+gcov-files-test-aio-y = util/async.c util/qemu-timer.o
79
+stdin. It must be written in bash and end with a `qemu-img bench` invocation
85
+gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
80
+(whose result is parsed to get the test instance’s result).
86
+gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
81
+
87
check-unit-y += tests/test-throttle$(EXESUF)
82
+Use the following syntax in the template to create the various different test
88
gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
83
+instances:
89
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
84
+
90
@@ -XXX,XX +XXX,XX @@ tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y)
85
+ column templating: {var1|var2|...} - test will use different values in
91
tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y)
86
+ different columns. You may use several {} constructions in the test, in this
92
tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
87
+ case product of all choice-sets will be used.
93
88
+
94
-tests/test-char$(EXESUF): tests/test-char.o qemu-timer.o \
89
+ row templating: [var1|var2|...] - similar thing to define rows (test-cases)
95
-    $(test-util-obj-y) $(qtest-obj-y) $(test-block-obj-y) $(chardev-obj-y)
90
+
96
+tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
91
+Test template example:
97
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
92
+
98
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
93
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
99
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
94
+qemu-img-new in your build directory in two test-cases with 4K writes and 64K
100
@@ -XXX,XX +XXX,XX @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
95
+writes. The template may look like this:
101
    migration/vmstate.o migration/qemu-file.o \
96
+
102
migration/qemu-file-channel.o migration/qjson.o \
97
+qemu_img=/path/to/qemu/build/qemu-img-{old|new}
103
    $(test-io-obj-y)
98
+$qemu_img create -f qcow2 /ssd/x.qcow2 1G
104
-tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
99
+$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2
105
-    $(test-util-obj-y)
100
+
106
+tests/test-timed-average$(EXESUF): tests/test-timed-average.o $(test-util-obj-y)
101
+When passing this to stdin of img_bench_templater.py, the resulting comparison
107
tests/test-base64$(EXESUF): tests/test-base64.o \
102
+table will contain two columns (for two binaries) and two rows (for two
108
    libqemuutil.a libqemustub.a
103
+test-cases).
109
tests/ptimer-test$(EXESUF): tests/ptimer-test.o tests/ptimer-test-stubs.o hw/core/ptimer.o libqemustub.a
104
+
110
@@ -XXX,XX +XXX,XX @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
105
+In addition to displaying the results, script also stores results in JSON
111
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
106
+format into results.json file in current directory.
112
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
107
+""")
113
tests/postcopy-test$(EXESUF): tests/postcopy-test.o
108
+ sys.exit()
114
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-timer.o \
109
+
115
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \
110
+ templater = Templater(sys.stdin.read())
116
    $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y) \
111
+
117
    $(chardev-obj-y)
112
+ envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns]
118
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
113
+ cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows]
119
diff --git a/util/Makefile.objs b/util/Makefile.objs
114
+
120
index XXXXXXX..XXXXXXX 100644
115
+ result = simplebench.bench(bench_func, envs, cases, count=5,
121
--- a/util/Makefile.objs
116
+ initial_run=False)
122
+++ b/util/Makefile.objs
117
+ print(results_to_text(result))
123
@@ -XXX,XX +XXX,XX @@
118
+ with open('results.json', 'w') as f:
124
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
119
+ json.dump(result, f, indent=4)
125
util-obj-y += bufferiszero.o
120
diff --git a/scripts/simplebench/table_templater.py b/scripts/simplebench/table_templater.py
126
util-obj-y += lockcnt.o
127
+util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o
128
+util-obj-y += main-loop.o iohandler.o
129
+util-obj-$(CONFIG_POSIX) += aio-posix.o
130
util-obj-$(CONFIG_POSIX) += compatfd.o
131
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
132
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
133
util-obj-$(CONFIG_POSIX) += oslib-posix.o
134
util-obj-$(CONFIG_POSIX) += qemu-openpty.o
135
util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
136
-util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
137
util-obj-$(CONFIG_POSIX) += memfd.o
138
+util-obj-$(CONFIG_WIN32) += aio-win32.o
139
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
140
util-obj-$(CONFIG_WIN32) += oslib-win32.o
141
util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
142
util-obj-y += envlist.o path.o module.o
143
diff --git a/block/io.c b/block/io.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
148
return &acb->common;
149
}
150
151
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
152
- BlockCompletionFunc *cb, void *opaque)
153
-{
154
- BlockAIOCB *acb;
155
-
156
- acb = g_malloc(aiocb_info->aiocb_size);
157
- acb->aiocb_info = aiocb_info;
158
- acb->bs = bs;
159
- acb->cb = cb;
160
- acb->opaque = opaque;
161
- acb->refcnt = 1;
162
- return acb;
163
-}
164
-
165
-void qemu_aio_ref(void *p)
166
-{
167
- BlockAIOCB *acb = p;
168
- acb->refcnt++;
169
-}
170
-
171
-void qemu_aio_unref(void *p)
172
-{
173
- BlockAIOCB *acb = p;
174
- assert(acb->refcnt > 0);
175
- if (--acb->refcnt == 0) {
176
- g_free(acb);
177
- }
178
-}
179
-
180
/**************************************************************/
181
/* Coroutine block device emulation */
182
183
diff --git a/stubs/linux-aio.c b/stubs/linux-aio.c
121
new file mode 100644
184
new file mode 100644
122
index XXXXXXX..XXXXXXX
185
index XXXXXXX..XXXXXXX
123
--- /dev/null
186
--- /dev/null
124
+++ b/scripts/simplebench/table_templater.py
187
+++ b/stubs/linux-aio.c
125
@@ -XXX,XX +XXX,XX @@
188
@@ -XXX,XX +XXX,XX @@
126
+# Parser for test templates
189
+/*
127
+#
190
+ * Linux native AIO support.
128
+# Copyright (c) 2021 Virtuozzo International GmbH.
191
+ *
129
+#
192
+ * Copyright (C) 2009 IBM, Corp.
130
+# This program is free software; you can redistribute it and/or modify
193
+ * Copyright (C) 2009 Red Hat, Inc.
131
+# it under the terms of the GNU General Public License as published by
194
+ *
132
+# the Free Software Foundation; either version 2 of the License, or
195
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
133
+# (at your option) any later version.
196
+ * See the COPYING file in the top-level directory.
134
+#
197
+ */
135
+# This program is distributed in the hope that it will be useful,
198
+#include "qemu/osdep.h"
136
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
199
+#include "block/aio.h"
137
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
200
+#include "block/raw-aio.h"
138
+# GNU General Public License for more details.
201
+
139
+#
202
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
140
+# You should have received a copy of the GNU General Public License
203
+{
141
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
204
+ abort();
142
+#
205
+}
143
+
206
+
144
+import itertools
207
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
145
+from lark import Lark
208
+{
146
+
209
+ abort();
147
+grammar = """
210
+}
148
+start: ( text | column_switch | row_switch )+
211
+
149
+
212
+LinuxAioState *laio_init(void)
150
+column_switch: "{" text ["|" text]+ "}"
213
+{
151
+row_switch: "[" text ["|" text]+ "]"
214
+ abort();
152
+text: /[^|{}\[\]]+/
215
+}
153
+"""
216
+
154
+
217
+void laio_cleanup(LinuxAioState *s)
155
+parser = Lark(grammar)
218
+{
156
+
219
+ abort();
157
+class Templater:
220
+}
158
+ def __init__(self, template):
221
diff --git a/stubs/set-fd-handler.c b/stubs/set-fd-handler.c
159
+ self.tree = parser.parse(template)
222
index XXXXXXX..XXXXXXX 100644
160
+
223
--- a/stubs/set-fd-handler.c
161
+ c_switches = []
224
+++ b/stubs/set-fd-handler.c
162
+ r_switches = []
225
@@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd,
163
+ for x in self.tree.children:
226
{
164
+ if x.data == 'column_switch':
227
abort();
165
+ c_switches.append([el.children[0].value for el in x.children])
228
}
166
+ elif x.data == 'row_switch':
229
-
167
+ r_switches.append([el.children[0].value for el in x.children])
230
-void aio_set_fd_handler(AioContext *ctx,
168
+
231
- int fd,
169
+ self.columns = list(itertools.product(*c_switches))
232
- bool is_external,
170
+ self.rows = list(itertools.product(*r_switches))
233
- IOHandler *io_read,
171
+
234
- IOHandler *io_write,
172
+ def gen(self, column, row):
235
- AioPollFn *io_poll,
173
+ i = 0
236
- void *opaque)
174
+ j = 0
237
-{
175
+ result = []
238
- abort();
176
+
239
-}
177
+ for x in self.tree.children:
240
diff --git a/aio-posix.c b/util/aio-posix.c
178
+ if x.data == 'text':
241
similarity index 99%
179
+ result.append(x.children[0].value)
242
rename from aio-posix.c
180
+ elif x.data == 'column_switch':
243
rename to util/aio-posix.c
181
+ result.append(column[i])
244
index XXXXXXX..XXXXXXX 100644
182
+ i += 1
245
--- a/aio-posix.c
183
+ elif x.data == 'row_switch':
246
+++ b/util/aio-posix.c
184
+ result.append(row[j])
247
@@ -XXX,XX +XXX,XX @@
185
+ j += 1
248
#include "qemu/rcu_queue.h"
186
+
249
#include "qemu/sockets.h"
187
+ return ''.join(result)
250
#include "qemu/cutils.h"
251
-#include "trace-root.h"
252
+#include "trace.h"
253
#ifdef CONFIG_EPOLL_CREATE1
254
#include <sys/epoll.h>
255
#endif
256
diff --git a/aio-win32.c b/util/aio-win32.c
257
similarity index 100%
258
rename from aio-win32.c
259
rename to util/aio-win32.c
260
diff --git a/util/aiocb.c b/util/aiocb.c
261
new file mode 100644
262
index XXXXXXX..XXXXXXX
263
--- /dev/null
264
+++ b/util/aiocb.c
265
@@ -XXX,XX +XXX,XX @@
266
+/*
267
+ * BlockAIOCB allocation
268
+ *
269
+ * Copyright (c) 2003-2017 Fabrice Bellard and other QEMU contributors
270
+ *
271
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
272
+ * of this software and associated documentation files (the "Software"), to deal
273
+ * in the Software without restriction, including without limitation the rights
274
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
275
+ * copies of the Software, and to permit persons to whom the Software is
276
+ * furnished to do so, subject to the following conditions:
277
+ *
278
+ * The above copyright notice and this permission notice shall be included in
279
+ * all copies or substantial portions of the Software.
280
+ *
281
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
282
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
283
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
284
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
285
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
286
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
287
+ * THE SOFTWARE.
288
+ */
289
+
290
+#include "qemu/osdep.h"
291
+#include "block/aio.h"
292
+
293
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
294
+ BlockCompletionFunc *cb, void *opaque)
295
+{
296
+ BlockAIOCB *acb;
297
+
298
+ acb = g_malloc(aiocb_info->aiocb_size);
299
+ acb->aiocb_info = aiocb_info;
300
+ acb->bs = bs;
301
+ acb->cb = cb;
302
+ acb->opaque = opaque;
303
+ acb->refcnt = 1;
304
+ return acb;
305
+}
306
+
307
+void qemu_aio_ref(void *p)
308
+{
309
+ BlockAIOCB *acb = p;
310
+ acb->refcnt++;
311
+}
312
+
313
+void qemu_aio_unref(void *p)
314
+{
315
+ BlockAIOCB *acb = p;
316
+ assert(acb->refcnt > 0);
317
+ if (--acb->refcnt == 0) {
318
+ g_free(acb);
319
+ }
320
+}
321
diff --git a/async.c b/util/async.c
322
similarity index 99%
323
rename from async.c
324
rename to util/async.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/async.c
327
+++ b/util/async.c
328
@@ -XXX,XX +XXX,XX @@
329
/*
330
- * QEMU System Emulator
331
+ * Data plane event loop
332
*
333
* Copyright (c) 2003-2008 Fabrice Bellard
334
+ * Copyright (c) 2009-2017 QEMU contributors
335
*
336
* Permission is hereby granted, free of charge, to any person obtaining a copy
337
* of this software and associated documentation files (the "Software"), to deal
338
diff --git a/iohandler.c b/util/iohandler.c
339
similarity index 100%
340
rename from iohandler.c
341
rename to util/iohandler.c
342
diff --git a/main-loop.c b/util/main-loop.c
343
similarity index 100%
344
rename from main-loop.c
345
rename to util/main-loop.c
346
diff --git a/qemu-timer.c b/util/qemu-timer.c
347
similarity index 100%
348
rename from qemu-timer.c
349
rename to util/qemu-timer.c
350
diff --git a/thread-pool.c b/util/thread-pool.c
351
similarity index 99%
352
rename from thread-pool.c
353
rename to util/thread-pool.c
354
index XXXXXXX..XXXXXXX 100644
355
--- a/thread-pool.c
356
+++ b/util/thread-pool.c
357
@@ -XXX,XX +XXX,XX @@
358
#include "qemu/queue.h"
359
#include "qemu/thread.h"
360
#include "qemu/coroutine.h"
361
-#include "trace-root.h"
362
+#include "trace.h"
363
#include "block/thread-pool.h"
364
#include "qemu/main-loop.h"
365
366
diff --git a/trace-events b/trace-events
367
index XXXXXXX..XXXXXXX 100644
368
--- a/trace-events
369
+++ b/trace-events
370
@@ -XXX,XX +XXX,XX @@
371
#
372
# The <format-string> should be a sprintf()-compatible format string.
373
374
-# aio-posix.c
375
-run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
376
-run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
377
-poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
378
-poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
379
-
380
-# thread-pool.c
381
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
382
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
383
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
384
-
385
# ioport.c
386
cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
387
cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
388
diff --git a/util/trace-events b/util/trace-events
389
index XXXXXXX..XXXXXXX 100644
390
--- a/util/trace-events
391
+++ b/util/trace-events
392
@@ -XXX,XX +XXX,XX @@
393
# See docs/tracing.txt for syntax documentation.
394
395
+# util/aio-posix.c
396
+run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
397
+run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
398
+poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
399
+poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
400
+
401
+# util/thread-pool.c
402
+thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
403
+thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
404
+thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
405
+
406
# util/buffer.c
407
buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
408
buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
188
--
409
--
189
2.31.1
410
2.9.3
190
411
191
412
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add a simple test which tries to run migration during backup.
3
aio_co_wake provides the infrastructure to start a coroutine on a "home"
4
bdrv_inactivate_all() should fail. But due to bug (see next commit with
4
AioContext. It will be used by CoMutex and CoQueue, so that coroutines
5
fix) it doesn't, nodes are inactivated and continued backup crashes
5
don't jump from one context to another when they go to sleep on a
6
on assertion "assert(!(bs->open_flags & BDRV_O_INACTIVE));" in
6
mutex or waitqueue. However, it can also be used as a more efficient
7
bdrv_co_write_req_prepare().
7
alternative to one-shot bottom halves, and saves the effort of tracking
8
8
which AioContext a coroutine is running on.
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
10
Message-Id: <20210911120027.8063-2-vsementsov@virtuozzo.com>
10
aio_co_schedule is the part of aio_co_wake that starts a coroutine
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
on a remove AioContext, but it is also useful to implement e.g.
12
bdrv_set_aio_context callbacks.
13
14
The implementation of aio_co_schedule is based on a lock-free
15
multiple-producer, single-consumer queue. The multiple producers use
16
cmpxchg to add to a LIFO stack. The consumer (a per-AioContext bottom
17
half) grabs all items added so far, inverts the list to make it FIFO,
18
and goes through it one item at a time until it's empty. The data
19
structure was inspired by OSv, which uses it in the very code we'll
20
"port" to QEMU for the thread-safe CoMutex.
21
22
Most of the new code is really tests.
23
24
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
25
Reviewed-by: Fam Zheng <famz@redhat.com>
26
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
28
---
13
.../qemu-iotests/tests/migrate-during-backup | 97 +++++++++++++++++++
29
tests/Makefile.include | 8 +-
14
.../tests/migrate-during-backup.out | 5 +
30
include/block/aio.h | 32 +++++++
15
2 files changed, 102 insertions(+)
31
include/qemu/coroutine_int.h | 11 ++-
16
create mode 100755 tests/qemu-iotests/tests/migrate-during-backup
32
tests/iothread.h | 25 +++++
17
create mode 100644 tests/qemu-iotests/tests/migrate-during-backup.out
33
tests/iothread.c | 91 ++++++++++++++++++
18
34
tests/test-aio-multithread.c | 213 +++++++++++++++++++++++++++++++++++++++++++
19
diff --git a/tests/qemu-iotests/tests/migrate-during-backup b/tests/qemu-iotests/tests/migrate-during-backup
35
util/async.c | 65 +++++++++++++
20
new file mode 100755
36
util/qemu-coroutine.c | 8 ++
21
index XXXXXXX..XXXXXXX
37
util/trace-events | 4 +
22
--- /dev/null
38
9 files changed, 453 insertions(+), 4 deletions(-)
23
+++ b/tests/qemu-iotests/tests/migrate-during-backup
39
create mode 100644 tests/iothread.h
24
@@ -XXX,XX +XXX,XX @@
40
create mode 100644 tests/iothread.c
25
+#!/usr/bin/env python3
41
create mode 100644 tests/test-aio-multithread.c
26
+# group: migration disabled
42
27
+#
43
diff --git a/tests/Makefile.include b/tests/Makefile.include
28
+# Copyright (c) 2021 Virtuozzo International GmbH
44
index XXXXXXX..XXXXXXX 100644
29
+#
45
--- a/tests/Makefile.include
30
+# This program is free software; you can redistribute it and/or modify
46
+++ b/tests/Makefile.include
31
+# it under the terms of the GNU General Public License as published by
47
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-aio$(EXESUF)
32
+# the Free Software Foundation; either version 2 of the License, or
48
gcov-files-test-aio-y = util/async.c util/qemu-timer.o
33
+# (at your option) any later version.
49
gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
34
+#
50
gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
35
+# This program is distributed in the hope that it will be useful,
51
+check-unit-y += tests/test-aio-multithread$(EXESUF)
36
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
52
+gcov-files-test-aio-multithread-y = $(gcov-files-test-aio-y)
37
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53
+gcov-files-test-aio-multithread-y += util/qemu-coroutine.c tests/iothread.c
38
+# GNU General Public License for more details.
54
check-unit-y += tests/test-throttle$(EXESUF)
39
+#
55
-gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
40
+# You should have received a copy of the GNU General Public License
56
-gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
41
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
57
check-unit-y += tests/test-thread-pool$(EXESUF)
42
+#
58
gcov-files-test-thread-pool-y = thread-pool.c
43
+
59
gcov-files-test-hbitmap-y = util/hbitmap.c
44
+import os
60
@@ -XXX,XX +XXX,XX @@ test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \
45
+import iotests
61
    $(test-qom-obj-y)
46
+from iotests import qemu_img_create, qemu_io
62
test-crypto-obj-y = $(crypto-obj-y) $(test-qom-obj-y)
47
+
63
test-io-obj-y = $(io-obj-y) $(test-crypto-obj-y)
48
+
64
-test-block-obj-y = $(block-obj-y) $(test-io-obj-y)
49
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
65
+test-block-obj-y = $(block-obj-y) $(test-io-obj-y) tests/iothread.o
50
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
66
51
+size = '1M'
67
tests/check-qint$(EXESUF): tests/check-qint.o $(test-util-obj-y)
52
+mig_file = os.path.join(iotests.test_dir, 'mig_file')
68
tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y)
53
+mig_cmd = 'exec: cat > ' + mig_file
69
@@ -XXX,XX +XXX,XX @@ tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
54
+
70
tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
55
+
71
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
56
+class TestMigrateDuringBackup(iotests.QMPTestCase):
72
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
57
+ def tearDown(self):
73
+tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
58
+ self.vm.shutdown()
74
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
59
+ os.remove(disk_a)
75
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
60
+ os.remove(disk_b)
76
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
61
+ os.remove(mig_file)
77
diff --git a/include/block/aio.h b/include/block/aio.h
62
+
78
index XXXXXXX..XXXXXXX 100644
63
+ def setUp(self):
79
--- a/include/block/aio.h
64
+ qemu_img_create('-f', iotests.imgfmt, disk_a, size)
80
+++ b/include/block/aio.h
65
+ qemu_img_create('-f', iotests.imgfmt, disk_b, size)
81
@@ -XXX,XX +XXX,XX @@ typedef void QEMUBHFunc(void *opaque);
66
+ qemu_io('-c', f'write 0 {size}', disk_a)
82
typedef bool AioPollFn(void *opaque);
67
+
83
typedef void IOHandler(void *opaque);
68
+ self.vm = iotests.VM().add_drive(disk_a)
84
69
+ self.vm.launch()
85
+struct Coroutine;
70
+ result = self.vm.qmp('blockdev-add', {
86
struct ThreadPool;
71
+ 'node-name': 'target',
87
struct LinuxAioState;
72
+ 'driver': iotests.imgfmt,
88
73
+ 'file': {
89
@@ -XXX,XX +XXX,XX @@ struct AioContext {
74
+ 'driver': 'file',
90
bool notified;
75
+ 'filename': disk_b
91
EventNotifier notifier;
76
+ }
92
77
+ })
93
+ QSLIST_HEAD(, Coroutine) scheduled_coroutines;
78
+ self.assert_qmp(result, 'return', {})
94
+ QEMUBH *co_schedule_bh;
79
+
95
+
80
+ def test_migrate(self):
96
/* Thread pool for performing work and receiving completion callbacks.
81
+ result = self.vm.qmp('blockdev-backup', device='drive0',
97
* Has its own locking.
82
+ target='target', sync='full',
98
*/
83
+ speed=1, x_perf={
99
@@ -XXX,XX +XXX,XX @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
84
+ 'max-workers': 1,
100
}
85
+ 'max-chunk': 64 * 1024
101
86
+ })
102
/**
87
+ self.assert_qmp(result, 'return', {})
103
+ * aio_co_schedule:
88
+
104
+ * @ctx: the aio context
89
+ result = self.vm.qmp('job-pause', id='drive0')
105
+ * @co: the coroutine
90
+ self.assert_qmp(result, 'return', {})
106
+ *
91
+
107
+ * Start a coroutine on a remote AioContext.
92
+ result = self.vm.qmp('migrate-set-capabilities',
108
+ *
93
+ capabilities=[{'capability': 'events',
109
+ * The coroutine must not be entered by anyone else while aio_co_schedule()
94
+ 'state': True}])
110
+ * is active. In addition the coroutine must have yielded unless ctx
95
+ self.assert_qmp(result, 'return', {})
111
+ * is the context in which the coroutine is running (i.e. the value of
96
+ result = self.vm.qmp('migrate', uri=mig_cmd)
112
+ * qemu_get_current_aio_context() from the coroutine itself).
97
+ self.assert_qmp(result, 'return', {})
113
+ */
98
+
114
+void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
99
+ e = self.vm.events_wait((('MIGRATION',
115
+
100
+ {'data': {'status': 'completed'}}),
116
+/**
101
+ ('MIGRATION',
117
+ * aio_co_wake:
102
+ {'data': {'status': 'failed'}})))
118
+ * @co: the coroutine
103
+
119
+ *
104
+ # Don't assert that e is 'failed' now: this way we'll miss
120
+ * Restart a coroutine on the AioContext where it was running last, thus
105
+ # possible crash when backup continues :)
121
+ * preventing coroutines from jumping from one context to another when they
106
+
122
+ * go to sleep.
107
+ result = self.vm.qmp('block-job-set-speed', device='drive0',
123
+ *
108
+ speed=0)
124
+ * aio_co_wake may be executed either in coroutine or non-coroutine
109
+ self.assert_qmp(result, 'return', {})
125
+ * context. The coroutine must not be entered by anyone else while
110
+ result = self.vm.qmp('job-resume', id='drive0')
126
+ * aio_co_wake() is active.
111
+ self.assert_qmp(result, 'return', {})
127
+ */
112
+
128
+void aio_co_wake(struct Coroutine *co);
113
+ # For future: if something changes so that both migration
129
+
114
+ # and backup pass, let's not miss that moment, as it may
130
+/**
115
+ # be a bug as well as improvement.
131
* Return the AioContext whose event loop runs in the current thread.
116
+ self.assert_qmp(e, 'data/status', 'failed')
132
*
117
+
133
* If called from an IOThread this will be the IOThread's AioContext. If
118
+
134
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
119
+if __name__ == '__main__':
135
index XXXXXXX..XXXXXXX 100644
120
+ iotests.main(supported_fmts=['qcow2'],
136
--- a/include/qemu/coroutine_int.h
121
+ supported_protocols=['file'])
137
+++ b/include/qemu/coroutine_int.h
122
diff --git a/tests/qemu-iotests/tests/migrate-during-backup.out b/tests/qemu-iotests/tests/migrate-during-backup.out
138
@@ -XXX,XX +XXX,XX @@ struct Coroutine {
139
CoroutineEntry *entry;
140
void *entry_arg;
141
Coroutine *caller;
142
+
143
+ /* Only used when the coroutine has terminated. */
144
QSLIST_ENTRY(Coroutine) pool_next;
145
+
146
size_t locks_held;
147
148
- /* Coroutines that should be woken up when we yield or terminate */
149
+ /* Coroutines that should be woken up when we yield or terminate.
150
+ * Only used when the coroutine is running.
151
+ */
152
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
153
+
154
+ /* Only used when the coroutine has yielded. */
155
+ AioContext *ctx;
156
QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
157
+ QSLIST_ENTRY(Coroutine) co_scheduled_next;
158
};
159
160
Coroutine *qemu_coroutine_new(void);
161
diff --git a/tests/iothread.h b/tests/iothread.h
123
new file mode 100644
162
new file mode 100644
124
index XXXXXXX..XXXXXXX
163
index XXXXXXX..XXXXXXX
125
--- /dev/null
164
--- /dev/null
126
+++ b/tests/qemu-iotests/tests/migrate-during-backup.out
165
+++ b/tests/iothread.h
127
@@ -XXX,XX +XXX,XX @@
166
@@ -XXX,XX +XXX,XX @@
128
+.
167
+/*
129
+----------------------------------------------------------------------
168
+ * Event loop thread implementation for unit tests
130
+Ran 1 tests
169
+ *
131
+
170
+ * Copyright Red Hat Inc., 2013, 2016
132
+OK
171
+ *
172
+ * Authors:
173
+ * Stefan Hajnoczi <stefanha@redhat.com>
174
+ * Paolo Bonzini <pbonzini@redhat.com>
175
+ *
176
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
177
+ * See the COPYING file in the top-level directory.
178
+ */
179
+#ifndef TEST_IOTHREAD_H
180
+#define TEST_IOTHREAD_H
181
+
182
+#include "block/aio.h"
183
+#include "qemu/thread.h"
184
+
185
+typedef struct IOThread IOThread;
186
+
187
+IOThread *iothread_new(void);
188
+void iothread_join(IOThread *iothread);
189
+AioContext *iothread_get_aio_context(IOThread *iothread);
190
+
191
+#endif
192
diff --git a/tests/iothread.c b/tests/iothread.c
193
new file mode 100644
194
index XXXXXXX..XXXXXXX
195
--- /dev/null
196
+++ b/tests/iothread.c
197
@@ -XXX,XX +XXX,XX @@
198
+/*
199
+ * Event loop thread implementation for unit tests
200
+ *
201
+ * Copyright Red Hat Inc., 2013, 2016
202
+ *
203
+ * Authors:
204
+ * Stefan Hajnoczi <stefanha@redhat.com>
205
+ * Paolo Bonzini <pbonzini@redhat.com>
206
+ *
207
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
208
+ * See the COPYING file in the top-level directory.
209
+ *
210
+ */
211
+
212
+#include "qemu/osdep.h"
213
+#include "qapi/error.h"
214
+#include "block/aio.h"
215
+#include "qemu/main-loop.h"
216
+#include "qemu/rcu.h"
217
+#include "iothread.h"
218
+
219
+struct IOThread {
220
+ AioContext *ctx;
221
+
222
+ QemuThread thread;
223
+ QemuMutex init_done_lock;
224
+ QemuCond init_done_cond; /* is thread initialization done? */
225
+ bool stopping;
226
+};
227
+
228
+static __thread IOThread *my_iothread;
229
+
230
+AioContext *qemu_get_current_aio_context(void)
231
+{
232
+ return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
233
+}
234
+
235
+static void *iothread_run(void *opaque)
236
+{
237
+ IOThread *iothread = opaque;
238
+
239
+ rcu_register_thread();
240
+
241
+ my_iothread = iothread;
242
+ qemu_mutex_lock(&iothread->init_done_lock);
243
+ iothread->ctx = aio_context_new(&error_abort);
244
+ qemu_cond_signal(&iothread->init_done_cond);
245
+ qemu_mutex_unlock(&iothread->init_done_lock);
246
+
247
+ while (!atomic_read(&iothread->stopping)) {
248
+ aio_poll(iothread->ctx, true);
249
+ }
250
+
251
+ rcu_unregister_thread();
252
+ return NULL;
253
+}
254
+
255
+void iothread_join(IOThread *iothread)
256
+{
257
+ iothread->stopping = true;
258
+ aio_notify(iothread->ctx);
259
+ qemu_thread_join(&iothread->thread);
260
+ qemu_cond_destroy(&iothread->init_done_cond);
261
+ qemu_mutex_destroy(&iothread->init_done_lock);
262
+ aio_context_unref(iothread->ctx);
263
+ g_free(iothread);
264
+}
265
+
266
+IOThread *iothread_new(void)
267
+{
268
+ IOThread *iothread = g_new0(IOThread, 1);
269
+
270
+ qemu_mutex_init(&iothread->init_done_lock);
271
+ qemu_cond_init(&iothread->init_done_cond);
272
+ qemu_thread_create(&iothread->thread, NULL, iothread_run,
273
+ iothread, QEMU_THREAD_JOINABLE);
274
+
275
+ /* Wait for initialization to complete */
276
+ qemu_mutex_lock(&iothread->init_done_lock);
277
+ while (iothread->ctx == NULL) {
278
+ qemu_cond_wait(&iothread->init_done_cond,
279
+ &iothread->init_done_lock);
280
+ }
281
+ qemu_mutex_unlock(&iothread->init_done_lock);
282
+ return iothread;
283
+}
284
+
285
+AioContext *iothread_get_aio_context(IOThread *iothread)
286
+{
287
+ return iothread->ctx;
288
+}
289
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
290
new file mode 100644
291
index XXXXXXX..XXXXXXX
292
--- /dev/null
293
+++ b/tests/test-aio-multithread.c
294
@@ -XXX,XX +XXX,XX @@
295
+/*
296
+ * AioContext multithreading tests
297
+ *
298
+ * Copyright Red Hat, Inc. 2016
299
+ *
300
+ * Authors:
301
+ * Paolo Bonzini <pbonzini@redhat.com>
302
+ *
303
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
304
+ * See the COPYING.LIB file in the top-level directory.
305
+ */
306
+
307
+#include "qemu/osdep.h"
308
+#include <glib.h>
309
+#include "block/aio.h"
310
+#include "qapi/error.h"
311
+#include "qemu/coroutine.h"
312
+#include "qemu/thread.h"
313
+#include "qemu/error-report.h"
314
+#include "iothread.h"
315
+
316
+/* AioContext management */
317
+
318
+#define NUM_CONTEXTS 5
319
+
320
+static IOThread *threads[NUM_CONTEXTS];
321
+static AioContext *ctx[NUM_CONTEXTS];
322
+static __thread int id = -1;
323
+
324
+static QemuEvent done_event;
325
+
326
+/* Run a function synchronously on a remote iothread. */
327
+
328
+typedef struct CtxRunData {
329
+ QEMUBHFunc *cb;
330
+ void *arg;
331
+} CtxRunData;
332
+
333
+static void ctx_run_bh_cb(void *opaque)
334
+{
335
+ CtxRunData *data = opaque;
336
+
337
+ data->cb(data->arg);
338
+ qemu_event_set(&done_event);
339
+}
340
+
341
+static void ctx_run(int i, QEMUBHFunc *cb, void *opaque)
342
+{
343
+ CtxRunData data = {
344
+ .cb = cb,
345
+ .arg = opaque
346
+ };
347
+
348
+ qemu_event_reset(&done_event);
349
+ aio_bh_schedule_oneshot(ctx[i], ctx_run_bh_cb, &data);
350
+ qemu_event_wait(&done_event);
351
+}
352
+
353
+/* Starting the iothreads. */
354
+
355
+static void set_id_cb(void *opaque)
356
+{
357
+ int *i = opaque;
358
+
359
+ id = *i;
360
+}
361
+
362
+static void create_aio_contexts(void)
363
+{
364
+ int i;
365
+
366
+ for (i = 0; i < NUM_CONTEXTS; i++) {
367
+ threads[i] = iothread_new();
368
+ ctx[i] = iothread_get_aio_context(threads[i]);
369
+ }
370
+
371
+ qemu_event_init(&done_event, false);
372
+ for (i = 0; i < NUM_CONTEXTS; i++) {
373
+ ctx_run(i, set_id_cb, &i);
374
+ }
375
+}
376
+
377
+/* Stopping the iothreads. */
378
+
379
+static void join_aio_contexts(void)
380
+{
381
+ int i;
382
+
383
+ for (i = 0; i < NUM_CONTEXTS; i++) {
384
+ aio_context_ref(ctx[i]);
385
+ }
386
+ for (i = 0; i < NUM_CONTEXTS; i++) {
387
+ iothread_join(threads[i]);
388
+ }
389
+ for (i = 0; i < NUM_CONTEXTS; i++) {
390
+ aio_context_unref(ctx[i]);
391
+ }
392
+ qemu_event_destroy(&done_event);
393
+}
394
+
395
+/* Basic test for the stuff above. */
396
+
397
+static void test_lifecycle(void)
398
+{
399
+ create_aio_contexts();
400
+ join_aio_contexts();
401
+}
402
+
403
+/* aio_co_schedule test. */
404
+
405
+static Coroutine *to_schedule[NUM_CONTEXTS];
406
+
407
+static bool now_stopping;
408
+
409
+static int count_retry;
410
+static int count_here;
411
+static int count_other;
412
+
413
+static bool schedule_next(int n)
414
+{
415
+ Coroutine *co;
416
+
417
+ co = atomic_xchg(&to_schedule[n], NULL);
418
+ if (!co) {
419
+ atomic_inc(&count_retry);
420
+ return false;
421
+ }
422
+
423
+ if (n == id) {
424
+ atomic_inc(&count_here);
425
+ } else {
426
+ atomic_inc(&count_other);
427
+ }
428
+
429
+ aio_co_schedule(ctx[n], co);
430
+ return true;
431
+}
432
+
433
+static void finish_cb(void *opaque)
434
+{
435
+ schedule_next(id);
436
+}
437
+
438
+static coroutine_fn void test_multi_co_schedule_entry(void *opaque)
439
+{
440
+ g_assert(to_schedule[id] == NULL);
441
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
442
+
443
+ while (!atomic_mb_read(&now_stopping)) {
444
+ int n;
445
+
446
+ n = g_test_rand_int_range(0, NUM_CONTEXTS);
447
+ schedule_next(n);
448
+ qemu_coroutine_yield();
449
+
450
+ g_assert(to_schedule[id] == NULL);
451
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
452
+ }
453
+}
454
+
455
+
456
+static void test_multi_co_schedule(int seconds)
457
+{
458
+ int i;
459
+
460
+ count_here = count_other = count_retry = 0;
461
+ now_stopping = false;
462
+
463
+ create_aio_contexts();
464
+ for (i = 0; i < NUM_CONTEXTS; i++) {
465
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_schedule_entry, NULL);
466
+ aio_co_schedule(ctx[i], co1);
467
+ }
468
+
469
+ g_usleep(seconds * 1000000);
470
+
471
+ atomic_mb_set(&now_stopping, true);
472
+ for (i = 0; i < NUM_CONTEXTS; i++) {
473
+ ctx_run(i, finish_cb, NULL);
474
+ to_schedule[i] = NULL;
475
+ }
476
+
477
+ join_aio_contexts();
478
+ g_test_message("scheduled %d, queued %d, retry %d, total %d\n",
479
+ count_other, count_here, count_retry,
480
+ count_here + count_other + count_retry);
481
+}
482
+
483
+static void test_multi_co_schedule_1(void)
484
+{
485
+ test_multi_co_schedule(1);
486
+}
487
+
488
+static void test_multi_co_schedule_10(void)
489
+{
490
+ test_multi_co_schedule(10);
491
+}
492
+
493
+/* End of tests. */
494
+
495
+int main(int argc, char **argv)
496
+{
497
+ init_clocks();
498
+
499
+ g_test_init(&argc, &argv, NULL);
500
+ g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
501
+ if (g_test_quick()) {
502
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
503
+ } else {
504
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
505
+ }
506
+ return g_test_run();
507
+}
508
diff --git a/util/async.c b/util/async.c
509
index XXXXXXX..XXXXXXX 100644
510
--- a/util/async.c
511
+++ b/util/async.c
512
@@ -XXX,XX +XXX,XX @@
513
#include "qemu/main-loop.h"
514
#include "qemu/atomic.h"
515
#include "block/raw-aio.h"
516
+#include "qemu/coroutine_int.h"
517
+#include "trace.h"
518
519
/***********************************************************/
520
/* bottom halves (can be seen as timers which expire ASAP) */
521
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
522
}
523
#endif
524
525
+ assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
526
+ qemu_bh_delete(ctx->co_schedule_bh);
527
+
528
qemu_lockcnt_lock(&ctx->list_lock);
529
assert(!qemu_lockcnt_count(&ctx->list_lock));
530
while (ctx->first_bh) {
531
@@ -XXX,XX +XXX,XX @@ static bool event_notifier_poll(void *opaque)
532
return atomic_read(&ctx->notified);
533
}
534
535
+static void co_schedule_bh_cb(void *opaque)
536
+{
537
+ AioContext *ctx = opaque;
538
+ QSLIST_HEAD(, Coroutine) straight, reversed;
539
+
540
+ QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
541
+ QSLIST_INIT(&straight);
542
+
543
+ while (!QSLIST_EMPTY(&reversed)) {
544
+ Coroutine *co = QSLIST_FIRST(&reversed);
545
+ QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
546
+ QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
547
+ }
548
+
549
+ while (!QSLIST_EMPTY(&straight)) {
550
+ Coroutine *co = QSLIST_FIRST(&straight);
551
+ QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
552
+ trace_aio_co_schedule_bh_cb(ctx, co);
553
+ qemu_coroutine_enter(co);
554
+ }
555
+}
556
+
557
AioContext *aio_context_new(Error **errp)
558
{
559
int ret;
560
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
561
}
562
g_source_set_can_recurse(&ctx->source, true);
563
qemu_lockcnt_init(&ctx->list_lock);
564
+
565
+ ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
566
+ QSLIST_INIT(&ctx->scheduled_coroutines);
567
+
568
aio_set_event_notifier(ctx, &ctx->notifier,
569
false,
570
(EventNotifierHandler *)
571
@@ -XXX,XX +XXX,XX @@ fail:
572
return NULL;
573
}
574
575
+void aio_co_schedule(AioContext *ctx, Coroutine *co)
576
+{
577
+ trace_aio_co_schedule(ctx, co);
578
+ QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
579
+ co, co_scheduled_next);
580
+ qemu_bh_schedule(ctx->co_schedule_bh);
581
+}
582
+
583
+void aio_co_wake(struct Coroutine *co)
584
+{
585
+ AioContext *ctx;
586
+
587
+ /* Read coroutine before co->ctx. Matches smp_wmb in
588
+ * qemu_coroutine_enter.
589
+ */
590
+ smp_read_barrier_depends();
591
+ ctx = atomic_read(&co->ctx);
592
+
593
+ if (ctx != qemu_get_current_aio_context()) {
594
+ aio_co_schedule(ctx, co);
595
+ return;
596
+ }
597
+
598
+ if (qemu_in_coroutine()) {
599
+ Coroutine *self = qemu_coroutine_self();
600
+ assert(self != co);
601
+ QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
602
+ } else {
603
+ aio_context_acquire(ctx);
604
+ qemu_coroutine_enter(co);
605
+ aio_context_release(ctx);
606
+ }
607
+}
608
+
609
void aio_context_ref(AioContext *ctx)
610
{
611
g_source_ref(&ctx->source);
612
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
613
index XXXXXXX..XXXXXXX 100644
614
--- a/util/qemu-coroutine.c
615
+++ b/util/qemu-coroutine.c
616
@@ -XXX,XX +XXX,XX @@
617
#include "qemu/atomic.h"
618
#include "qemu/coroutine.h"
619
#include "qemu/coroutine_int.h"
620
+#include "block/aio.h"
621
622
enum {
623
POOL_BATCH_SIZE = 64,
624
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
625
}
626
627
co->caller = self;
628
+ co->ctx = qemu_get_current_aio_context();
629
+
630
+ /* Store co->ctx before anything that stores co. Matches
631
+ * barrier in aio_co_wake.
632
+ */
633
+ smp_wmb();
634
+
635
ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
636
637
qemu_co_queue_run_restart(co);
638
diff --git a/util/trace-events b/util/trace-events
639
index XXXXXXX..XXXXXXX 100644
640
--- a/util/trace-events
641
+++ b/util/trace-events
642
@@ -XXX,XX +XXX,XX @@ run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
643
poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
644
poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
645
646
+# util/async.c
647
+aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
648
+aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
649
+
650
# util/thread-pool.c
651
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
652
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
133
--
653
--
134
2.31.1
654
2.9.3
135
655
136
656
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Although we have long supported 'qemu-img convert -o
3
qcow2_create2 calls this. Do not run a nested event loop, as that
4
backing_file=foo,backing_fmt=bar', the fact that we have a shortcut -B
4
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
5
for backing_file but none for backing_fmt has made it more likely that
5
list of the currently running one.
6
users accidentally run into:
7
6
8
qemu-img: warning: Deprecated use of backing file without explicit backing format
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/block-backend.c | 12 ++++++++----
14
1 file changed, 8 insertions(+), 4 deletions(-)
9
15
10
when using -B instead of -o. For similarity with other qemu-img
16
diff --git a/block/block-backend.c b/block/block-backend.c
11
commands, such as create and compare, add '-F $fmt' as the shorthand
12
for '-o backing_fmt=$fmt'. Update iotest 122 for coverage of both
13
spellings.
14
15
Signed-off-by: Eric Blake <eblake@redhat.com>
16
Message-Id: <20210913131735.1948339-1-eblake@redhat.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
19
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
20
---
21
docs/tools/qemu-img.rst | 4 ++--
22
qemu-img.c | 10 +++++++---
23
qemu-img-cmds.hx | 2 +-
24
tests/qemu-iotests/122 | 2 +-
25
4 files changed, 11 insertions(+), 7 deletions(-)
26
27
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
28
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/tools/qemu-img.rst
18
--- a/block/block-backend.c
30
+++ b/docs/tools/qemu-img.rst
19
+++ b/block/block-backend.c
31
@@ -XXX,XX +XXX,XX @@ Command description:
20
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
32
4
21
{
33
Error on reading data
22
QEMUIOVector qiov;
34
23
struct iovec iov;
35
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
24
- Coroutine *co;
36
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F backing_fmt]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
25
BlkRwCo rwco;
37
26
38
Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM*
27
iov = (struct iovec) {
39
to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can
28
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
40
@@ -XXX,XX +XXX,XX @@ Command description:
29
.ret = NOT_DONE,
41
You can use the *BACKING_FILE* option to force the output image to be
30
};
42
created as a copy on write image of the specified base image; the
31
43
*BACKING_FILE* should have the same content as the input's base image,
32
- co = qemu_coroutine_create(co_entry, &rwco);
44
- however the path, image format, etc may differ.
33
- qemu_coroutine_enter(co);
45
+ however the path, image format (as given by *BACKING_FMT*), etc may differ.
34
- BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
46
35
+ if (qemu_in_coroutine()) {
47
If a relative path name is given, the backing file is looked up relative to
36
+ /* Fast-path if already in coroutine context */
48
the directory containing *OUTPUT_FILENAME*.
37
+ co_entry(&rwco);
49
diff --git a/qemu-img.c b/qemu-img.c
38
+ } else {
50
index XXXXXXX..XXXXXXX 100644
39
+ Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
51
--- a/qemu-img.c
40
+ qemu_coroutine_enter(co);
52
+++ b/qemu-img.c
41
+ BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
53
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
42
+ }
54
int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
43
55
const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
44
return rwco.ret;
56
*src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
45
}
57
- *out_filename, *out_baseimg_param, *snapshot_name = NULL;
58
+ *out_filename, *out_baseimg_param, *snapshot_name = NULL,
59
+ *backing_fmt = NULL;
60
BlockDriver *drv = NULL, *proto_drv = NULL;
61
BlockDriverInfo bdi;
62
BlockDriverState *out_bs;
63
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
64
{"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
65
{0, 0, 0, 0}
66
};
67
- c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
68
+ c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
69
long_options, NULL);
70
if (c == -1) {
71
break;
72
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
73
case 'c':
74
s.compressed = true;
75
break;
76
+ case 'F':
77
+ backing_fmt = optarg;
78
+ break;
79
case 'o':
80
if (accumulate_options(&options, optarg) < 0) {
81
goto fail_getopt;
82
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
83
84
qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
85
s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
86
- ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
87
+ ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
88
if (ret < 0) {
89
goto out;
90
}
91
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
92
index XXXXXXX..XXXXXXX 100644
93
--- a/qemu-img-cmds.hx
94
+++ b/qemu-img-cmds.hx
95
@@ -XXX,XX +XXX,XX @@ SRST
96
ERST
97
98
DEF("convert", img_convert,
99
- "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
100
+ "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file [-F backing_fmt]] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
101
SRST
102
.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
103
ERST
104
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
105
index XXXXXXX..XXXXXXX 100755
106
--- a/tests/qemu-iotests/122
107
+++ b/tests/qemu-iotests/122
108
@@ -XXX,XX +XXX,XX @@ echo
109
_make_test_img -b "$TEST_IMG".base -F $IMGFMT
110
111
$QEMU_IO -c "write -P 0 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
112
-$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
113
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -F $IMGFMT \
114
"$TEST_IMG" "$TEST_IMG".orig
115
$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
116
$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
117
--
46
--
118
2.31.1
47
2.9.3
119
48
120
49
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Split checking for reserved bits out of aligned offset check.
3
Once the thread pool starts using aio_co_wake, it will also need
4
qemu_get_current_aio_context(). Make test-thread-pool create
5
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
6
and tests/iothread.c can provide the rest.
4
7
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
11
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
9
Message-Id: <20210914122454.141075-11-vsementsov@virtuozzo.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
---
13
---
12
block/qcow2.h | 1 +
14
tests/test-thread-pool.c | 12 +++---------
13
block/qcow2-refcount.c | 10 +++++++++-
15
1 file changed, 3 insertions(+), 9 deletions(-)
14
2 files changed, 10 insertions(+), 1 deletion(-)
15
16
16
diff --git a/block/qcow2.h b/block/qcow2.h
17
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
17
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.h
19
--- a/tests/test-thread-pool.c
19
+++ b/block/qcow2.h
20
+++ b/tests/test-thread-pool.c
20
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
21
@@ -XXX,XX +XXX,XX @@
21
#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
22
#include "qapi/error.h"
22
23
#include "qemu/timer.h"
23
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
24
#include "qemu/error-report.h"
24
+#define REFT_RESERVED_MASK 0x1ffULL
25
+#include "qemu/main-loop.h"
25
26
26
#define INV_OFFSET (-1ULL)
27
static AioContext *ctx;
27
28
static ThreadPool *pool;
28
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
29
@@ -XXX,XX +XXX,XX @@ static void test_cancel_async(void)
29
index XXXXXXX..XXXXXXX 100644
30
int main(int argc, char **argv)
30
--- a/block/qcow2-refcount.c
31
{
31
+++ b/block/qcow2-refcount.c
32
int ret;
32
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
33
- Error *local_error = NULL;
33
34
34
for(i = 0; i < s->refcount_table_size; i++) {
35
- init_clocks();
35
uint64_t offset, cluster;
36
-
36
- offset = s->refcount_table[i];
37
- ctx = aio_context_new(&local_error);
37
+ offset = s->refcount_table[i] & REFT_OFFSET_MASK;
38
- if (!ctx) {
38
cluster = offset >> s->cluster_bits;
39
- error_reportf_err(local_error, "Failed to create AIO Context: ");
39
40
- exit(1);
40
+ if (s->refcount_table[i] & REFT_RESERVED_MASK) {
41
- }
41
+ fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
42
+ qemu_init_main_loop(&error_abort);
42
+ "reserved bits set\n", i);
43
+ ctx = qemu_get_current_aio_context();
43
+ res->corruptions++;
44
pool = aio_get_thread_pool(ctx);
44
+ *rebuild = true;
45
45
+ continue;
46
g_test_init(&argc, &argv, NULL);
46
+ }
47
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
47
+
48
48
/* Refcount blocks are cluster aligned */
49
ret = g_test_run();
49
if (offset_into_cluster(s, offset)) {
50
50
fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
51
- aio_context_unref(ctx);
52
return ret;
53
}
51
--
54
--
52
2.31.1
55
2.9.3
53
56
54
57
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
We'll reuse the function to fix wrong L2 entry bitmap. Support it now.
3
This is in preparation for making qio_channel_yield work on
4
4
AioContexts other than the main one.
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
6
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20210914122454.141075-6-vsementsov@virtuozzo.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
12
---
11
block/qcow2-refcount.c | 18 +++++++++++++++---
13
include/io/channel.h | 25 +++++++++++++++++++++++++
12
1 file changed, 15 insertions(+), 3 deletions(-)
14
io/channel-command.c | 13 +++++++++++++
13
15
io/channel-file.c | 11 +++++++++++
14
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
16
io/channel-socket.c | 16 +++++++++++-----
15
index XXXXXXX..XXXXXXX 100644
17
io/channel-tls.c | 12 ++++++++++++
16
--- a/block/qcow2-refcount.c
18
io/channel-watch.c | 6 ++++++
17
+++ b/block/qcow2-refcount.c
19
io/channel.c | 11 +++++++++++
18
@@ -XXX,XX +XXX,XX @@ enum {
20
7 files changed, 89 insertions(+), 5 deletions(-)
21
22
diff --git a/include/io/channel.h b/include/io/channel.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/io/channel.h
25
+++ b/include/io/channel.h
26
@@ -XXX,XX +XXX,XX @@
27
28
#include "qemu-common.h"
29
#include "qom/object.h"
30
+#include "block/aio.h"
31
32
#define TYPE_QIO_CHANNEL "qio-channel"
33
#define QIO_CHANNEL(obj) \
34
@@ -XXX,XX +XXX,XX @@ struct QIOChannelClass {
35
off_t offset,
36
int whence,
37
Error **errp);
38
+ void (*io_set_aio_fd_handler)(QIOChannel *ioc,
39
+ AioContext *ctx,
40
+ IOHandler *io_read,
41
+ IOHandler *io_write,
42
+ void *opaque);
19
};
43
};
20
44
21
/*
45
/* General I/O handling functions */
22
- * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN.
46
@@ -XXX,XX +XXX,XX @@ void qio_channel_yield(QIOChannel *ioc,
23
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN (or making all its present
47
void qio_channel_wait(QIOChannel *ioc,
24
+ * subclusters QCOW2_SUBCLUSTER_ZERO_PLAIN).
48
GIOCondition condition);
25
*
49
26
* This function decrements res->corruptions on success, so the caller is
50
+/**
27
* responsible to increment res->corruptions prior to the call.
51
+ * qio_channel_set_aio_fd_handler:
28
@@ -XXX,XX +XXX,XX @@ static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
52
+ * @ioc: the channel object
29
int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
53
+ * @ctx: the AioContext to set the handlers on
30
uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
54
+ * @io_read: the read handler
31
int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
55
+ * @io_write: the write handler
32
- uint64_t l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
56
+ * @opaque: the opaque value passed to the handler
33
57
+ *
34
- set_l2_entry(s, l2_table, l2_index, l2_entry);
58
+ * This is used internally by qio_channel_yield(). It can
35
+ if (has_subclusters(s)) {
59
+ * be used by channel implementations to forward the handlers
36
+ uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
60
+ * to another channel (e.g. from #QIOChannelTLS to the
37
+
61
+ * underlying socket).
38
+ /* Allocated subclusters become zero */
62
+ */
39
+ l2_bitmap |= l2_bitmap << 32;
63
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
40
+ l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
64
+ AioContext *ctx,
41
+
65
+ IOHandler *io_read,
42
+ set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
66
+ IOHandler *io_write,
43
+ set_l2_entry(s, l2_table, l2_index, 0);
67
+ void *opaque);
44
+ } else {
68
+
45
+ set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
69
#endif /* QIO_CHANNEL_H */
46
+ }
70
diff --git a/io/channel-command.c b/io/channel-command.c
47
+
71
index XXXXXXX..XXXXXXX 100644
48
ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
72
--- a/io/channel-command.c
49
false);
73
+++ b/io/channel-command.c
50
if (metadata_overlap) {
74
@@ -XXX,XX +XXX,XX @@ static int qio_channel_command_close(QIOChannel *ioc,
75
}
76
77
78
+static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
79
+ AioContext *ctx,
80
+ IOHandler *io_read,
81
+ IOHandler *io_write,
82
+ void *opaque)
83
+{
84
+ QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
85
+ aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque);
86
+ aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque);
87
+}
88
+
89
+
90
static GSource *qio_channel_command_create_watch(QIOChannel *ioc,
91
GIOCondition condition)
92
{
93
@@ -XXX,XX +XXX,XX @@ static void qio_channel_command_class_init(ObjectClass *klass,
94
ioc_klass->io_set_blocking = qio_channel_command_set_blocking;
95
ioc_klass->io_close = qio_channel_command_close;
96
ioc_klass->io_create_watch = qio_channel_command_create_watch;
97
+ ioc_klass->io_set_aio_fd_handler = qio_channel_command_set_aio_fd_handler;
98
}
99
100
static const TypeInfo qio_channel_command_info = {
101
diff --git a/io/channel-file.c b/io/channel-file.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/io/channel-file.c
104
+++ b/io/channel-file.c
105
@@ -XXX,XX +XXX,XX @@ static int qio_channel_file_close(QIOChannel *ioc,
106
}
107
108
109
+static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
110
+ AioContext *ctx,
111
+ IOHandler *io_read,
112
+ IOHandler *io_write,
113
+ void *opaque)
114
+{
115
+ QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
116
+ aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque);
117
+}
118
+
119
static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
120
GIOCondition condition)
121
{
122
@@ -XXX,XX +XXX,XX @@ static void qio_channel_file_class_init(ObjectClass *klass,
123
ioc_klass->io_seek = qio_channel_file_seek;
124
ioc_klass->io_close = qio_channel_file_close;
125
ioc_klass->io_create_watch = qio_channel_file_create_watch;
126
+ ioc_klass->io_set_aio_fd_handler = qio_channel_file_set_aio_fd_handler;
127
}
128
129
static const TypeInfo qio_channel_file_info = {
130
diff --git a/io/channel-socket.c b/io/channel-socket.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/io/channel-socket.c
133
+++ b/io/channel-socket.c
134
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
135
qemu_set_block(sioc->fd);
136
} else {
137
qemu_set_nonblock(sioc->fd);
138
-#ifdef WIN32
139
- WSAEventSelect(sioc->fd, ioc->event,
140
- FD_READ | FD_ACCEPT | FD_CLOSE |
141
- FD_CONNECT | FD_WRITE | FD_OOB);
142
-#endif
143
}
144
return 0;
145
}
146
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_shutdown(QIOChannel *ioc,
147
return 0;
148
}
149
150
+static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
151
+ AioContext *ctx,
152
+ IOHandler *io_read,
153
+ IOHandler *io_write,
154
+ void *opaque)
155
+{
156
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
157
+ aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque);
158
+}
159
+
160
static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
161
GIOCondition condition)
162
{
163
@@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_class_init(ObjectClass *klass,
164
ioc_klass->io_set_cork = qio_channel_socket_set_cork;
165
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
166
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
167
+ ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
168
}
169
170
static const TypeInfo qio_channel_socket_info = {
171
diff --git a/io/channel-tls.c b/io/channel-tls.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/io/channel-tls.c
174
+++ b/io/channel-tls.c
175
@@ -XXX,XX +XXX,XX @@ static int qio_channel_tls_close(QIOChannel *ioc,
176
return qio_channel_close(tioc->master, errp);
177
}
178
179
+static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
180
+ AioContext *ctx,
181
+ IOHandler *io_read,
182
+ IOHandler *io_write,
183
+ void *opaque)
184
+{
185
+ QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
186
+
187
+ qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
188
+}
189
+
190
static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
191
GIOCondition condition)
192
{
193
@@ -XXX,XX +XXX,XX @@ static void qio_channel_tls_class_init(ObjectClass *klass,
194
ioc_klass->io_close = qio_channel_tls_close;
195
ioc_klass->io_shutdown = qio_channel_tls_shutdown;
196
ioc_klass->io_create_watch = qio_channel_tls_create_watch;
197
+ ioc_klass->io_set_aio_fd_handler = qio_channel_tls_set_aio_fd_handler;
198
}
199
200
static const TypeInfo qio_channel_tls_info = {
201
diff --git a/io/channel-watch.c b/io/channel-watch.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/io/channel-watch.c
204
+++ b/io/channel-watch.c
205
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
206
GSource *source;
207
QIOChannelSocketSource *ssource;
208
209
+#ifdef WIN32
210
+ WSAEventSelect(socket, ioc->event,
211
+ FD_READ | FD_ACCEPT | FD_CLOSE |
212
+ FD_CONNECT | FD_WRITE | FD_OOB);
213
+#endif
214
+
215
source = g_source_new(&qio_channel_socket_source_funcs,
216
sizeof(QIOChannelSocketSource));
217
ssource = (QIOChannelSocketSource *)source;
218
diff --git a/io/channel.c b/io/channel.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/io/channel.c
221
+++ b/io/channel.c
222
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
223
}
224
225
226
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
227
+ AioContext *ctx,
228
+ IOHandler *io_read,
229
+ IOHandler *io_write,
230
+ void *opaque)
231
+{
232
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
233
+
234
+ klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque);
235
+}
236
+
237
guint qio_channel_add_watch(QIOChannel *ioc,
238
GIOCondition condition,
239
QIOChannelFunc func,
51
--
240
--
52
2.31.1
241
2.9.3
53
242
54
243
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add helper to parse compressed l2_entry and use it everywhere instead
3
Support separate coroutines for reading and writing, and place the
4
of open-coding.
4
read/write handlers on the AioContext that the QIOChannel is registered
5
5
with.
6
Note, that in most places we move to precise coffset/csize instead of
6
7
sector-aligned. Still it should work good enough for updating
7
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
8
refcounts.
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
12
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Message-Id: <20210914122454.141075-4-vsementsov@virtuozzo.com>
14
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
15
---
13
---
16
block/qcow2.h | 3 ++-
14
include/io/channel.h | 47 ++++++++++++++++++++++++++--
17
block/qcow2-cluster.c | 15 +++++++++++++++
15
io/channel.c | 86 +++++++++++++++++++++++++++++++++++++++-------------
18
block/qcow2-refcount.c | 36 +++++++++++++++++-------------------
16
2 files changed, 109 insertions(+), 24 deletions(-)
19
block/qcow2.c | 9 ++-------
17
20
4 files changed, 36 insertions(+), 27 deletions(-)
18
diff --git a/include/io/channel.h b/include/io/channel.h
21
22
diff --git a/block/qcow2.h b/block/qcow2.h
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/block/qcow2.h
20
--- a/include/io/channel.h
25
+++ b/block/qcow2.h
21
+++ b/include/io/channel.h
26
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@
27
23
28
/* Defined in the qcow2 spec (compressed cluster descriptor) */
24
#include "qemu-common.h"
29
#define QCOW2_COMPRESSED_SECTOR_SIZE 512U
25
#include "qom/object.h"
30
-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
26
+#include "qemu/coroutine.h"
31
27
#include "block/aio.h"
32
/* Must be at least 2 to cover COW */
28
33
#define MIN_L2_CACHE_SIZE 2 /* cache entries */
29
#define TYPE_QIO_CHANNEL "qio-channel"
34
@@ -XXX,XX +XXX,XX @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
30
@@ -XXX,XX +XXX,XX @@ struct QIOChannel {
35
uint64_t offset,
31
Object parent;
36
int compressed_size,
32
unsigned int features; /* bitmask of QIOChannelFeatures */
37
uint64_t *host_offset);
33
char *name;
38
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
34
+ AioContext *ctx;
39
+ uint64_t *coffset, int *csize);
35
+ Coroutine *read_coroutine;
40
36
+ Coroutine *write_coroutine;
41
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
37
#ifdef _WIN32
42
void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
38
HANDLE event; /* For use with GSource on Win32 */
43
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
39
#endif
40
@@ -XXX,XX +XXX,XX @@ guint qio_channel_add_watch(QIOChannel *ioc,
41
42
43
/**
44
+ * qio_channel_attach_aio_context:
45
+ * @ioc: the channel object
46
+ * @ctx: the #AioContext to set the handlers on
47
+ *
48
+ * Request that qio_channel_yield() sets I/O handlers on
49
+ * the given #AioContext. If @ctx is %NULL, qio_channel_yield()
50
+ * uses QEMU's main thread event loop.
51
+ *
52
+ * You can move a #QIOChannel from one #AioContext to another even if
53
+ * I/O handlers are set for a coroutine. However, #QIOChannel provides
54
+ * no synchronization between the calls to qio_channel_yield() and
55
+ * qio_channel_attach_aio_context().
56
+ *
57
+ * Therefore you should first call qio_channel_detach_aio_context()
58
+ * to ensure that the coroutine is not entered concurrently. Then,
59
+ * while the coroutine has yielded, call qio_channel_attach_aio_context(),
60
+ * and then aio_co_schedule() to place the coroutine on the new
61
+ * #AioContext. The calls to qio_channel_detach_aio_context()
62
+ * and qio_channel_attach_aio_context() should be protected with
63
+ * aio_context_acquire() and aio_context_release().
64
+ */
65
+void qio_channel_attach_aio_context(QIOChannel *ioc,
66
+ AioContext *ctx);
67
+
68
+/**
69
+ * qio_channel_detach_aio_context:
70
+ * @ioc: the channel object
71
+ *
72
+ * Disable any I/O handlers set by qio_channel_yield(). With the
73
+ * help of aio_co_schedule(), this allows moving a coroutine that was
74
+ * paused by qio_channel_yield() to another context.
75
+ */
76
+void qio_channel_detach_aio_context(QIOChannel *ioc);
77
+
78
+/**
79
* qio_channel_yield:
80
* @ioc: the channel object
81
* @condition: the I/O condition to wait for
82
*
83
- * Yields execution from the current coroutine until
84
- * the condition indicated by @condition becomes
85
- * available.
86
+ * Yields execution from the current coroutine until the condition
87
+ * indicated by @condition becomes available. @condition must
88
+ * be either %G_IO_IN or %G_IO_OUT; it cannot contain both. In
89
+ * addition, no two coroutine can be waiting on the same condition
90
+ * and channel at the same time.
91
*
92
* This must only be called from coroutine context
93
*/
94
diff --git a/io/channel.c b/io/channel.c
44
index XXXXXXX..XXXXXXX 100644
95
index XXXXXXX..XXXXXXX 100644
45
--- a/block/qcow2-cluster.c
96
--- a/io/channel.c
46
+++ b/block/qcow2-cluster.c
97
+++ b/io/channel.c
47
@@ -XXX,XX +XXX,XX @@ fail:
98
@@ -XXX,XX +XXX,XX @@
48
g_free(l1_table);
99
#include "qemu/osdep.h"
49
return ret;
100
#include "io/channel.h"
101
#include "qapi/error.h"
102
-#include "qemu/coroutine.h"
103
+#include "qemu/main-loop.h"
104
105
bool qio_channel_has_feature(QIOChannel *ioc,
106
QIOChannelFeature feature)
107
@@ -XXX,XX +XXX,XX @@ off_t qio_channel_io_seek(QIOChannel *ioc,
50
}
108
}
51
+
109
52
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
110
53
+ uint64_t *coffset, int *csize)
111
-typedef struct QIOChannelYieldData QIOChannelYieldData;
54
+{
112
-struct QIOChannelYieldData {
55
+ BDRVQcow2State *s = bs->opaque;
113
- QIOChannel *ioc;
56
+ int nb_csectors;
114
- Coroutine *co;
57
+
115
-};
58
+ assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
116
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc);
59
+
117
60
+ *coffset = l2_entry & s->cluster_offset_mask;
118
+static void qio_channel_restart_read(void *opaque)
61
+
119
+{
62
+ nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
120
+ QIOChannel *ioc = opaque;
63
+ *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
121
+ Coroutine *co = ioc->read_coroutine;
64
+ (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1));
122
+
65
+}
123
+ ioc->read_coroutine = NULL;
66
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
124
+ qio_channel_set_aio_fd_handlers(ioc);
67
index XXXXXXX..XXXXXXX 100644
125
+ aio_co_wake(co);
68
--- a/block/qcow2-refcount.c
126
+}
69
+++ b/block/qcow2-refcount.c
127
70
@@ -XXX,XX +XXX,XX @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
128
-static gboolean qio_channel_yield_enter(QIOChannel *ioc,
71
switch (ctype) {
129
- GIOCondition condition,
72
case QCOW2_CLUSTER_COMPRESSED:
130
- gpointer opaque)
73
{
131
+static void qio_channel_restart_write(void *opaque)
74
- int64_t offset = (l2_entry & s->cluster_offset_mask)
75
- & QCOW2_COMPRESSED_SECTOR_MASK;
76
- int size = QCOW2_COMPRESSED_SECTOR_SIZE *
77
- (((l2_entry >> s->csize_shift) & s->csize_mask) + 1);
78
- qcow2_free_clusters(bs, offset, size, type);
79
+ uint64_t coffset;
80
+ int csize;
81
+
82
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
83
+ qcow2_free_clusters(bs, coffset, csize, type);
84
}
85
break;
86
case QCOW2_CLUSTER_NORMAL:
87
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
88
bool l1_allocated = false;
89
int64_t old_entry, old_l2_offset;
90
unsigned slice, slice_size2, n_slices;
91
- int i, j, l1_modified = 0, nb_csectors;
92
+ int i, j, l1_modified = 0;
93
int ret;
94
95
assert(addend >= -1 && addend <= 1);
96
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
97
98
switch (qcow2_get_cluster_type(bs, entry)) {
99
case QCOW2_CLUSTER_COMPRESSED:
100
- nb_csectors = ((entry >> s->csize_shift) &
101
- s->csize_mask) + 1;
102
if (addend != 0) {
103
- uint64_t coffset = (entry & s->cluster_offset_mask)
104
- & QCOW2_COMPRESSED_SECTOR_MASK;
105
+ uint64_t coffset;
106
+ int csize;
107
+
108
+ qcow2_parse_compressed_l2_entry(bs, entry,
109
+ &coffset, &csize);
110
ret = update_refcount(
111
- bs, coffset,
112
- nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE,
113
+ bs, coffset, csize,
114
abs(addend), addend < 0,
115
QCOW2_DISCARD_SNAPSHOT);
116
if (ret < 0) {
117
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
118
BDRVQcow2State *s = bs->opaque;
119
uint64_t l2_entry;
120
uint64_t next_contiguous_offset = 0;
121
- int i, nb_csectors, ret;
122
+ int i, ret;
123
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
124
g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
125
126
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
127
128
/* Do the actual checks */
129
for (i = 0; i < s->l2_size; i++) {
130
+ uint64_t coffset;
131
+ int csize;
132
l2_entry = get_l2_entry(s, l2_table, i);
133
134
switch (qcow2_get_cluster_type(bs, l2_entry)) {
135
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
136
}
137
138
/* Mark cluster as used */
139
- nb_csectors = ((l2_entry >> s->csize_shift) &
140
- s->csize_mask) + 1;
141
- l2_entry &= s->cluster_offset_mask;
142
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
143
ret = qcow2_inc_refcounts_imrt(
144
- bs, res, refcount_table, refcount_table_size,
145
- l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
146
- nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
147
+ bs, res, refcount_table, refcount_table_size, coffset, csize);
148
if (ret < 0) {
149
return ret;
150
}
151
diff --git a/block/qcow2.c b/block/qcow2.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/block/qcow2.c
154
+++ b/block/qcow2.c
155
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
156
size_t qiov_offset)
157
{
132
{
158
BDRVQcow2State *s = bs->opaque;
133
- QIOChannelYieldData *data = opaque;
159
- int ret = 0, csize, nb_csectors;
134
- qemu_coroutine_enter(data->co);
160
+ int ret = 0, csize;
135
- return FALSE;
161
uint64_t coffset;
136
+ QIOChannel *ioc = opaque;
162
uint8_t *buf, *out_buf;
137
+ Coroutine *co = ioc->write_coroutine;
163
int offset_in_cluster = offset_into_cluster(s, offset);
138
+
164
139
+ ioc->write_coroutine = NULL;
165
- assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
140
+ qio_channel_set_aio_fd_handlers(ioc);
141
+ aio_co_wake(co);
142
}
143
144
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
145
+{
146
+ IOHandler *rd_handler = NULL, *wr_handler = NULL;
147
+ AioContext *ctx;
148
+
149
+ if (ioc->read_coroutine) {
150
+ rd_handler = qio_channel_restart_read;
151
+ }
152
+ if (ioc->write_coroutine) {
153
+ wr_handler = qio_channel_restart_write;
154
+ }
155
+
156
+ ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
157
+ qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
158
+}
159
+
160
+void qio_channel_attach_aio_context(QIOChannel *ioc,
161
+ AioContext *ctx)
162
+{
163
+ AioContext *old_ctx;
164
+ if (ioc->ctx == ctx) {
165
+ return;
166
+ }
167
+
168
+ old_ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
169
+ qio_channel_set_aio_fd_handler(ioc, old_ctx, NULL, NULL, NULL);
170
+ ioc->ctx = ctx;
171
+ qio_channel_set_aio_fd_handlers(ioc);
172
+}
173
+
174
+void qio_channel_detach_aio_context(QIOChannel *ioc)
175
+{
176
+ ioc->read_coroutine = NULL;
177
+ ioc->write_coroutine = NULL;
178
+ qio_channel_set_aio_fd_handlers(ioc);
179
+ ioc->ctx = NULL;
180
+}
181
182
void coroutine_fn qio_channel_yield(QIOChannel *ioc,
183
GIOCondition condition)
184
{
185
- QIOChannelYieldData data;
166
-
186
-
167
- coffset = l2_entry & s->cluster_offset_mask;
187
assert(qemu_in_coroutine());
168
- nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
188
- data.ioc = ioc;
169
- csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
189
- data.co = qemu_coroutine_self();
170
- (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
190
- qio_channel_add_watch(ioc,
171
+ qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
191
- condition,
172
192
- qio_channel_yield_enter,
173
buf = g_try_malloc(csize);
193
- &data,
174
if (!buf) {
194
- NULL);
195
+ if (condition == G_IO_IN) {
196
+ assert(!ioc->read_coroutine);
197
+ ioc->read_coroutine = qemu_coroutine_self();
198
+ } else if (condition == G_IO_OUT) {
199
+ assert(!ioc->write_coroutine);
200
+ ioc->write_coroutine = qemu_coroutine_self();
201
+ } else {
202
+ abort();
203
+ }
204
+ qio_channel_set_aio_fd_handlers(ioc);
205
qemu_coroutine_yield();
206
}
207
175
--
208
--
176
2.31.1
209
2.9.3
177
210
178
211
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
gluster's block-status implementation is basically a copy of that in
3
In the client, read the reply headers from a coroutine, switching the
4
block/file-posix.c, there is only one thing missing, and that is
4
read side between the "read header" coroutine and the I/O coroutine that
5
aligning trailing data extents to the request alignment (as added by
5
reads the body of the reply.
6
commit 9c3db310ff0).
7
6
8
Note that 9c3db310ff0 mentions that "there seems to be no other block
7
In the server, if the server can read more requests it will create a new
9
driver that sets request_alignment and [...]", but while block/gluster.c
8
"read request" coroutine as soon as a request has been read. Otherwise,
10
does indeed not set request_alignment, block/io.c's
9
the new coroutine is created in nbd_request_put.
11
bdrv_refresh_limits() will still default to an alignment of 512 because
12
block/gluster.c does not provide a byte-aligned read function.
13
Therefore, unaligned tails can conceivably occur, and so we should apply
14
the change from 9c3db310ff0 to gluster's block-status implementation.
15
10
16
Reported-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
18
Message-Id: <20210805143603.59503-1-mreitz@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
19
Reviewed-by: Eric Blake <eblake@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
20
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
15
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
---
17
---
22
block/gluster.c | 16 ++++++++++++++++
18
block/nbd-client.h | 2 +-
23
1 file changed, 16 insertions(+)
19
block/nbd-client.c | 117 ++++++++++++++++++++++++-----------------------------
20
nbd/client.c | 2 +-
21
nbd/common.c | 9 +----
22
nbd/server.c | 94 +++++++++++++-----------------------------
23
5 files changed, 83 insertions(+), 141 deletions(-)
24
24
25
diff --git a/block/gluster.c b/block/gluster.c
25
diff --git a/block/nbd-client.h b/block/nbd-client.h
26
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
27
--- a/block/gluster.c
27
--- a/block/nbd-client.h
28
+++ b/block/gluster.c
28
+++ b/block/nbd-client.h
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
29
@@ -XXX,XX +XXX,XX @@ typedef struct NBDClientSession {
30
off_t data = 0, hole = 0;
30
31
int ret = -EINVAL;
31
CoMutex send_mutex;
32
32
CoQueue free_sema;
33
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
33
- Coroutine *send_coroutine;
34
+ Coroutine *read_reply_co;
35
int in_flight;
36
37
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
38
diff --git a/block/nbd-client.c b/block/nbd-client.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/nbd-client.c
41
+++ b/block/nbd-client.c
42
@@ -XXX,XX +XXX,XX @@
43
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
44
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
45
46
-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
47
+static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
48
{
49
+ NBDClientSession *s = nbd_get_client_session(bs);
50
int i;
51
52
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
53
@@ -XXX,XX +XXX,XX @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
54
qemu_coroutine_enter(s->recv_coroutine[i]);
55
}
56
}
57
+ BDRV_POLL_WHILE(bs, s->read_reply_co);
58
}
59
60
static void nbd_teardown_connection(BlockDriverState *bs)
61
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
62
qio_channel_shutdown(client->ioc,
63
QIO_CHANNEL_SHUTDOWN_BOTH,
64
NULL);
65
- nbd_recv_coroutines_enter_all(client);
66
+ nbd_recv_coroutines_enter_all(bs);
67
68
nbd_client_detach_aio_context(bs);
69
object_unref(OBJECT(client->sioc));
70
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
71
client->ioc = NULL;
72
}
73
74
-static void nbd_reply_ready(void *opaque)
75
+static coroutine_fn void nbd_read_reply_entry(void *opaque)
76
{
77
- BlockDriverState *bs = opaque;
78
- NBDClientSession *s = nbd_get_client_session(bs);
79
+ NBDClientSession *s = opaque;
80
uint64_t i;
81
int ret;
82
83
- if (!s->ioc) { /* Already closed */
84
- return;
85
- }
86
-
87
- if (s->reply.handle == 0) {
88
- /* No reply already in flight. Fetch a header. It is possible
89
- * that another thread has done the same thing in parallel, so
90
- * the socket is not readable anymore.
91
- */
92
+ for (;;) {
93
+ assert(s->reply.handle == 0);
94
ret = nbd_receive_reply(s->ioc, &s->reply);
95
- if (ret == -EAGAIN) {
96
- return;
97
- }
98
if (ret < 0) {
99
- s->reply.handle = 0;
100
- goto fail;
101
+ break;
102
}
103
- }
104
105
- /* There's no need for a mutex on the receive side, because the
106
- * handler acts as a synchronization point and ensures that only
107
- * one coroutine is called until the reply finishes. */
108
- i = HANDLE_TO_INDEX(s, s->reply.handle);
109
- if (i >= MAX_NBD_REQUESTS) {
110
- goto fail;
111
- }
112
+ /* There's no need for a mutex on the receive side, because the
113
+ * handler acts as a synchronization point and ensures that only
114
+ * one coroutine is called until the reply finishes.
115
+ */
116
+ i = HANDLE_TO_INDEX(s, s->reply.handle);
117
+ if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
118
+ break;
119
+ }
120
121
- if (s->recv_coroutine[i]) {
122
- qemu_coroutine_enter(s->recv_coroutine[i]);
123
- return;
124
+ /* We're woken up by the recv_coroutine itself. Note that there
125
+ * is no race between yielding and reentering read_reply_co. This
126
+ * is because:
127
+ *
128
+ * - if recv_coroutine[i] runs on the same AioContext, it is only
129
+ * entered after we yield
130
+ *
131
+ * - if recv_coroutine[i] runs on a different AioContext, reentering
132
+ * read_reply_co happens through a bottom half, which can only
133
+ * run after we yield.
134
+ */
135
+ aio_co_wake(s->recv_coroutine[i]);
136
+ qemu_coroutine_yield();
137
}
138
-
139
-fail:
140
- nbd_teardown_connection(bs);
141
-}
142
-
143
-static void nbd_restart_write(void *opaque)
144
-{
145
- BlockDriverState *bs = opaque;
146
-
147
- qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
148
+ s->read_reply_co = NULL;
149
}
150
151
static int nbd_co_send_request(BlockDriverState *bs,
152
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
153
QEMUIOVector *qiov)
154
{
155
NBDClientSession *s = nbd_get_client_session(bs);
156
- AioContext *aio_context;
157
int rc, ret, i;
158
159
qemu_co_mutex_lock(&s->send_mutex);
160
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
161
return -EPIPE;
162
}
163
164
- s->send_coroutine = qemu_coroutine_self();
165
- aio_context = bdrv_get_aio_context(bs);
166
-
167
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
168
- nbd_reply_ready, nbd_restart_write, NULL, bs);
169
if (qiov) {
170
qio_channel_set_cork(s->ioc, true);
171
rc = nbd_send_request(s->ioc, request);
172
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
173
} else {
174
rc = nbd_send_request(s->ioc, request);
175
}
176
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
177
- nbd_reply_ready, NULL, NULL, bs);
178
- s->send_coroutine = NULL;
179
qemu_co_mutex_unlock(&s->send_mutex);
180
return rc;
181
}
182
@@ -XXX,XX +XXX,XX @@ static void nbd_co_receive_reply(NBDClientSession *s,
183
{
184
int ret;
185
186
- /* Wait until we're woken up by the read handler. TODO: perhaps
187
- * peek at the next reply and avoid yielding if it's ours? */
188
+ /* Wait until we're woken up by nbd_read_reply_entry. */
189
qemu_coroutine_yield();
190
*reply = s->reply;
191
if (reply->handle != request->handle ||
192
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
193
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
194
}
195
196
-static void nbd_coroutine_end(NBDClientSession *s,
197
+static void nbd_coroutine_end(BlockDriverState *bs,
198
NBDRequest *request)
199
{
200
+ NBDClientSession *s = nbd_get_client_session(bs);
201
int i = HANDLE_TO_INDEX(s, request->handle);
34
+
202
+
35
if (!s->fd) {
203
s->recv_coroutine[i] = NULL;
204
- if (s->in_flight-- == MAX_NBD_REQUESTS) {
205
- qemu_co_queue_next(&s->free_sema);
206
+ s->in_flight--;
207
+ qemu_co_queue_next(&s->free_sema);
208
+
209
+ /* Kick the read_reply_co to get the next reply. */
210
+ if (s->read_reply_co) {
211
+ aio_co_wake(s->read_reply_co);
212
}
213
}
214
215
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
216
} else {
217
nbd_co_receive_reply(client, &request, &reply, qiov);
218
}
219
- nbd_coroutine_end(client, &request);
220
+ nbd_coroutine_end(bs, &request);
221
return -reply.error;
222
}
223
224
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
225
} else {
226
nbd_co_receive_reply(client, &request, &reply, NULL);
227
}
228
- nbd_coroutine_end(client, &request);
229
+ nbd_coroutine_end(bs, &request);
230
return -reply.error;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
234
} else {
235
nbd_co_receive_reply(client, &request, &reply, NULL);
236
}
237
- nbd_coroutine_end(client, &request);
238
+ nbd_coroutine_end(bs, &request);
239
return -reply.error;
240
}
241
242
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_flush(BlockDriverState *bs)
243
} else {
244
nbd_co_receive_reply(client, &request, &reply, NULL);
245
}
246
- nbd_coroutine_end(client, &request);
247
+ nbd_coroutine_end(bs, &request);
248
return -reply.error;
249
}
250
251
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
252
} else {
253
nbd_co_receive_reply(client, &request, &reply, NULL);
254
}
255
- nbd_coroutine_end(client, &request);
256
+ nbd_coroutine_end(bs, &request);
257
return -reply.error;
258
259
}
260
261
void nbd_client_detach_aio_context(BlockDriverState *bs)
262
{
263
- aio_set_fd_handler(bdrv_get_aio_context(bs),
264
- nbd_get_client_session(bs)->sioc->fd,
265
- false, NULL, NULL, NULL, NULL);
266
+ NBDClientSession *client = nbd_get_client_session(bs);
267
+ qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
268
}
269
270
void nbd_client_attach_aio_context(BlockDriverState *bs,
271
AioContext *new_context)
272
{
273
- aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
274
- false, nbd_reply_ready, NULL, NULL, bs);
275
+ NBDClientSession *client = nbd_get_client_session(bs);
276
+ qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
277
+ aio_co_schedule(new_context, client->read_reply_co);
278
}
279
280
void nbd_client_close(BlockDriverState *bs)
281
@@ -XXX,XX +XXX,XX @@ int nbd_client_init(BlockDriverState *bs,
282
/* Now that we're connected, set the socket to be non-blocking and
283
* kick the reply mechanism. */
284
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
285
-
286
+ client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
287
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
288
289
logout("Established connection with NBD server\n");
290
diff --git a/nbd/client.c b/nbd/client.c
291
index XXXXXXX..XXXXXXX 100644
292
--- a/nbd/client.c
293
+++ b/nbd/client.c
294
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
295
ssize_t ret;
296
297
ret = read_sync(ioc, buf, sizeof(buf));
298
- if (ret < 0) {
299
+ if (ret <= 0) {
36
return ret;
300
return ret;
37
}
301
}
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
302
39
/* On a data extent, compute bytes to the end of the extent,
303
diff --git a/nbd/common.c b/nbd/common.c
40
* possibly including a partial sector at EOF. */
304
index XXXXXXX..XXXXXXX 100644
41
*pnum = MIN(bytes, hole - offset);
305
--- a/nbd/common.c
306
+++ b/nbd/common.c
307
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
308
}
309
if (len == QIO_CHANNEL_ERR_BLOCK) {
310
if (qemu_in_coroutine()) {
311
- /* XXX figure out if we can create a variant on
312
- * qio_channel_yield() that works with AIO contexts
313
- * and consider using that in this branch */
314
- qemu_coroutine_yield();
315
- } else if (done) {
316
- /* XXX this is needed by nbd_reply_ready. */
317
- qio_channel_wait(ioc,
318
- do_read ? G_IO_IN : G_IO_OUT);
319
+ qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
320
} else {
321
return -EAGAIN;
322
}
323
diff --git a/nbd/server.c b/nbd/server.c
324
index XXXXXXX..XXXXXXX 100644
325
--- a/nbd/server.c
326
+++ b/nbd/server.c
327
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
328
CoMutex send_lock;
329
Coroutine *send_coroutine;
330
331
- bool can_read;
332
-
333
QTAILQ_ENTRY(NBDClient) next;
334
int nb_requests;
335
bool closing;
336
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
337
338
/* That's all folks */
339
340
-static void nbd_set_handlers(NBDClient *client);
341
-static void nbd_unset_handlers(NBDClient *client);
342
-static void nbd_update_can_read(NBDClient *client);
343
+static void nbd_client_receive_next_request(NBDClient *client);
344
345
static gboolean nbd_negotiate_continue(QIOChannel *ioc,
346
GIOCondition condition,
347
@@ -XXX,XX +XXX,XX @@ void nbd_client_put(NBDClient *client)
348
*/
349
assert(client->closing);
350
351
- nbd_unset_handlers(client);
352
+ qio_channel_detach_aio_context(client->ioc);
353
object_unref(OBJECT(client->sioc));
354
object_unref(OBJECT(client->ioc));
355
if (client->tlscreds) {
356
@@ -XXX,XX +XXX,XX @@ static NBDRequestData *nbd_request_get(NBDClient *client)
357
358
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
359
client->nb_requests++;
360
- nbd_update_can_read(client);
361
362
req = g_new0(NBDRequestData, 1);
363
nbd_client_get(client);
364
@@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req)
365
g_free(req);
366
367
client->nb_requests--;
368
- nbd_update_can_read(client);
369
+ nbd_client_receive_next_request(client);
42
+
370
+
43
+ /*
371
nbd_client_put(client);
44
+ * We are not allowed to return partial sectors, though, so
372
}
45
+ * round up if necessary.
373
46
+ */
374
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
47
+ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
375
exp->ctx = ctx;
48
+ int64_t file_length = qemu_gluster_getlength(bs);
376
49
+ if (file_length > 0) {
377
QTAILQ_FOREACH(client, &exp->clients, next) {
50
+ /* Ignore errors, this is just a safeguard */
378
- nbd_set_handlers(client);
51
+ assert(hole == file_length);
379
+ qio_channel_attach_aio_context(client->ioc, ctx);
52
+ }
380
+ if (client->recv_coroutine) {
53
+ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
381
+ aio_co_schedule(ctx, client->recv_coroutine);
54
+ }
382
+ }
383
+ if (client->send_coroutine) {
384
+ aio_co_schedule(ctx, client->send_coroutine);
385
+ }
386
}
387
}
388
389
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
390
TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
391
392
QTAILQ_FOREACH(client, &exp->clients, next) {
393
- nbd_unset_handlers(client);
394
+ qio_channel_detach_aio_context(client->ioc);
395
}
396
397
exp->ctx = NULL;
398
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
399
g_assert(qemu_in_coroutine());
400
qemu_co_mutex_lock(&client->send_lock);
401
client->send_coroutine = qemu_coroutine_self();
402
- nbd_set_handlers(client);
403
404
if (!len) {
405
rc = nbd_send_reply(client->ioc, reply);
406
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
407
}
408
409
client->send_coroutine = NULL;
410
- nbd_set_handlers(client);
411
qemu_co_mutex_unlock(&client->send_lock);
412
return rc;
413
}
414
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
415
ssize_t rc;
416
417
g_assert(qemu_in_coroutine());
418
- client->recv_coroutine = qemu_coroutine_self();
419
- nbd_update_can_read(client);
420
-
421
+ assert(client->recv_coroutine == qemu_coroutine_self());
422
rc = nbd_receive_request(client->ioc, request);
423
if (rc < 0) {
424
if (rc != -EAGAIN) {
425
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
426
427
out:
428
client->recv_coroutine = NULL;
429
- nbd_update_can_read(client);
430
+ nbd_client_receive_next_request(client);
431
432
return rc;
433
}
434
435
-static void nbd_trip(void *opaque)
436
+/* Owns a reference to the NBDClient passed as opaque. */
437
+static coroutine_fn void nbd_trip(void *opaque)
438
{
439
NBDClient *client = opaque;
440
NBDExport *exp = client->exp;
441
NBDRequestData *req;
442
- NBDRequest request;
443
+ NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
444
NBDReply reply;
445
ssize_t ret;
446
int flags;
447
448
TRACE("Reading request.");
449
if (client->closing) {
450
+ nbd_client_put(client);
451
return;
452
}
453
454
@@ -XXX,XX +XXX,XX @@ static void nbd_trip(void *opaque)
455
456
done:
457
nbd_request_put(req);
458
+ nbd_client_put(client);
459
return;
460
461
out:
462
nbd_request_put(req);
463
client_close(client);
464
+ nbd_client_put(client);
465
}
466
467
-static void nbd_read(void *opaque)
468
+static void nbd_client_receive_next_request(NBDClient *client)
469
{
470
- NBDClient *client = opaque;
471
-
472
- if (client->recv_coroutine) {
473
- qemu_coroutine_enter(client->recv_coroutine);
474
- } else {
475
- qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client));
476
- }
477
-}
478
-
479
-static void nbd_restart_write(void *opaque)
480
-{
481
- NBDClient *client = opaque;
482
-
483
- qemu_coroutine_enter(client->send_coroutine);
484
-}
485
-
486
-static void nbd_set_handlers(NBDClient *client)
487
-{
488
- if (client->exp && client->exp->ctx) {
489
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true,
490
- client->can_read ? nbd_read : NULL,
491
- client->send_coroutine ? nbd_restart_write : NULL,
492
- NULL, client);
493
- }
494
-}
495
-
496
-static void nbd_unset_handlers(NBDClient *client)
497
-{
498
- if (client->exp && client->exp->ctx) {
499
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true, NULL,
500
- NULL, NULL, NULL);
501
- }
502
-}
503
-
504
-static void nbd_update_can_read(NBDClient *client)
505
-{
506
- bool can_read = client->recv_coroutine ||
507
- client->nb_requests < MAX_NBD_REQUESTS;
508
-
509
- if (can_read != client->can_read) {
510
- client->can_read = can_read;
511
- nbd_set_handlers(client);
512
-
513
- /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
514
- * in nbd_set_handlers() will have taken care of that */
515
+ if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
516
+ nbd_client_get(client);
517
+ client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
518
+ aio_co_schedule(client->exp->ctx, client->recv_coroutine);
519
}
520
}
521
522
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void nbd_co_client_start(void *opaque)
523
goto out;
524
}
525
qemu_co_mutex_init(&client->send_lock);
526
- nbd_set_handlers(client);
527
528
if (exp) {
529
QTAILQ_INSERT_TAIL(&exp->clients, client, next);
530
}
55
+
531
+
56
ret = BDRV_BLOCK_DATA;
532
+ nbd_client_receive_next_request(client);
57
} else {
533
+
58
/* On a hole, compute bytes to the beginning of the next extent. */
534
out:
535
g_free(data);
536
}
537
@@ -XXX,XX +XXX,XX @@ void nbd_client_new(NBDExport *exp,
538
object_ref(OBJECT(client->sioc));
539
client->ioc = QIO_CHANNEL(sioc);
540
object_ref(OBJECT(client->ioc));
541
- client->can_read = true;
542
client->close = close_fn;
543
544
data->client = client;
59
--
545
--
60
2.31.1
546
2.9.3
61
547
62
548
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
3
As a small step towards the introduction of multiqueue, we want
4
Reviewed-by: Eric Blake <eblake@redhat.com>
4
coroutines to remain on the same AioContext that started them,
5
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
5
unless they are moved explicitly with e.g. aio_co_schedule. This patch
6
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
6
avoids that coroutines switch AioContext when they use a CoMutex.
7
Message-Id: <20210914122454.141075-8-vsementsov@virtuozzo.com>
7
For now it does not make much of a difference, because the CoMutex
8
[hreitz: Separated `type` declaration from statements]
8
is not thread-safe and the AioContext itself is used to protect the
9
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
9
CoMutex from concurrent access. However, this is going to change.
10
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
15
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
17
---
11
block/qcow2.h | 1 +
18
util/qemu-coroutine-lock.c | 5 ++---
12
block/qcow2-refcount.c | 14 +++++++++++++-
19
util/trace-events | 1 -
13
2 files changed, 14 insertions(+), 1 deletion(-)
20
2 files changed, 2 insertions(+), 4 deletions(-)
14
21
15
diff --git a/block/qcow2.h b/block/qcow2.h
22
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2.h
24
--- a/util/qemu-coroutine-lock.c
18
+++ b/block/qcow2.h
25
+++ b/util/qemu-coroutine-lock.c
19
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
26
@@ -XXX,XX +XXX,XX @@
20
27
#include "qemu/coroutine.h"
21
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
28
#include "qemu/coroutine_int.h"
22
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
29
#include "qemu/queue.h"
23
+#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
30
+#include "block/aio.h"
24
31
#include "trace.h"
25
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
32
26
33
void qemu_co_queue_init(CoQueue *queue)
27
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
34
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_run_restart(Coroutine *co)
35
36
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
37
{
38
- Coroutine *self = qemu_coroutine_self();
39
Coroutine *next;
40
41
if (QSIMPLEQ_EMPTY(&queue->entries)) {
42
@@ -XXX,XX +XXX,XX @@ static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
43
44
while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
45
QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
46
- QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
47
- trace_qemu_co_queue_next(next);
48
+ aio_co_wake(next);
49
if (single) {
50
break;
51
}
52
diff --git a/util/trace-events b/util/trace-events
28
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
29
--- a/block/qcow2-refcount.c
54
--- a/util/trace-events
30
+++ b/block/qcow2-refcount.c
55
+++ b/util/trace-events
31
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
56
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
32
for (i = 0; i < s->l2_size; i++) {
57
33
uint64_t coffset;
58
# util/qemu-coroutine-lock.c
34
int csize;
59
qemu_co_queue_run_restart(void *co) "co %p"
35
+ QCow2ClusterType type;
60
-qemu_co_queue_next(void *nxt) "next %p"
36
+
61
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
37
l2_entry = get_l2_entry(s, l2_table, i);
62
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
38
l2_bitmap = get_l2_bitmap(s, l2_table, i);
63
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
39
+ type = qcow2_get_cluster_type(bs, l2_entry);
40
+
41
+ if (type != QCOW2_CLUSTER_COMPRESSED) {
42
+ /* Check reserved bits of Standard Cluster Descriptor */
43
+ if (l2_entry & L2E_STD_RESERVED_MASK) {
44
+ fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
45
+ "%" PRIx64 "\n", l2_entry);
46
+ res->corruptions++;
47
+ }
48
+ }
49
50
- switch (qcow2_get_cluster_type(bs, l2_entry)) {
51
+ switch (type) {
52
case QCOW2_CLUSTER_COMPRESSED:
53
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
54
if (l2_entry & QCOW_OFLAG_COPIED) {
55
--
64
--
56
2.31.1
65
2.9.3
57
66
58
67
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Let's pass the whole L2 entry and not bother with
3
Keep the coroutine on the same AioContext. Without this change,
4
L2E_COMPRESSED_OFFSET_SIZE_MASK.
4
there would be a race between yielding the coroutine and reentering it.
5
While the race cannot happen now, because the code only runs from a single
6
AioContext, this will change with multiqueue support in the block layer.
5
7
6
It also helps further refactoring that adds generic
8
While doing the change, replace custom bottom half with aio_co_schedule.
7
qcow2_parse_compressed_l2_entry() helper.
8
9
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
13
Message-Id: <20210914122454.141075-3-vsementsov@virtuozzo.com>
14
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
14
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
---
16
block/qcow2.h | 1 -
17
block/blkdebug.c | 9 +--------
17
block/qcow2-cluster.c | 5 ++---
18
1 file changed, 1 insertion(+), 8 deletions(-)
18
block/qcow2.c | 12 +++++++-----
19
3 files changed, 9 insertions(+), 9 deletions(-)
20
19
21
diff --git a/block/qcow2.h b/block/qcow2.h
20
diff --git a/block/blkdebug.c b/block/blkdebug.c
22
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2.h
22
--- a/block/blkdebug.c
24
+++ b/block/qcow2.h
23
+++ b/block/blkdebug.c
25
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
24
@@ -XXX,XX +XXX,XX @@ out:
26
25
return ret;
27
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
26
}
28
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
27
29
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
28
-static void error_callback_bh(void *opaque)
30
29
-{
31
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
30
- Coroutine *co = opaque;
32
31
- qemu_coroutine_enter(co);
33
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
32
-}
34
index XXXXXXX..XXXXXXX 100644
33
-
35
--- a/block/qcow2-cluster.c
34
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
36
+++ b/block/qcow2-cluster.c
35
{
37
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
36
BDRVBlkdebugState *s = bs->opaque;
38
* offset needs to be aligned to a cluster boundary.
37
@@ -XXX,XX +XXX,XX @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
39
*
38
}
40
* If the cluster is unallocated then *host_offset will be 0.
39
41
- * If the cluster is compressed then *host_offset will contain the
40
if (!immediately) {
42
- * complete compressed cluster descriptor.
41
- aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
43
+ * If the cluster is compressed then *host_offset will contain the l2 entry.
42
- qemu_coroutine_self());
44
*
43
+ aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
45
* On entry, *bytes is the maximum number of contiguous bytes starting at
44
qemu_coroutine_yield();
46
* offset that we are interested in.
45
}
47
@@ -XXX,XX +XXX,XX @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
48
ret = -EIO;
49
goto fail;
50
}
51
- *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK;
52
+ *host_offset = l2_entry;
53
break;
54
case QCOW2_SUBCLUSTER_ZERO_PLAIN:
55
case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
56
diff --git a/block/qcow2.c b/block/qcow2.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/block/qcow2.c
59
+++ b/block/qcow2.c
60
@@ -XXX,XX +XXX,XX @@ typedef struct {
61
62
static int coroutine_fn
63
qcow2_co_preadv_compressed(BlockDriverState *bs,
64
- uint64_t cluster_descriptor,
65
+ uint64_t l2_entry,
66
uint64_t offset,
67
uint64_t bytes,
68
QEMUIOVector *qiov,
69
@@ -XXX,XX +XXX,XX @@ typedef struct Qcow2AioTask {
70
71
BlockDriverState *bs;
72
QCow2SubclusterType subcluster_type; /* only for read */
73
- uint64_t host_offset; /* or full descriptor in compressed clusters */
74
+ uint64_t host_offset; /* or l2_entry for compressed read */
75
uint64_t offset;
76
uint64_t bytes;
77
QEMUIOVector *qiov;
78
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
79
80
static int coroutine_fn
81
qcow2_co_preadv_compressed(BlockDriverState *bs,
82
- uint64_t cluster_descriptor,
83
+ uint64_t l2_entry,
84
uint64_t offset,
85
uint64_t bytes,
86
QEMUIOVector *qiov,
87
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
88
uint8_t *buf, *out_buf;
89
int offset_in_cluster = offset_into_cluster(s, offset);
90
91
- coffset = cluster_descriptor & s->cluster_offset_mask;
92
- nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1;
93
+ assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
94
+
95
+ coffset = l2_entry & s->cluster_offset_mask;
96
+ nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
97
csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
98
(coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
99
46
100
--
47
--
101
2.31.1
48
2.9.3
102
49
103
50
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Split fix_l2_entry_by_zero() out of check_refcounts_l2() to be
3
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
4
reused in further patch.
4
the AioContext, while qed_aio_next_io_cb will. Split the functionality
5
and gain a little type-safety in the process.
5
6
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Message-Id: <20210914122454.141075-5-vsementsov@virtuozzo.com>
10
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
11
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
13
---
12
block/qcow2-refcount.c | 87 +++++++++++++++++++++++++++++-------------
14
block/qed.c | 39 +++++++++++++++++++++++++--------------
13
1 file changed, 60 insertions(+), 27 deletions(-)
15
1 file changed, 25 insertions(+), 14 deletions(-)
14
16
15
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
17
diff --git a/block/qed.c b/block/qed.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2-refcount.c
19
--- a/block/qed.c
18
+++ b/block/qcow2-refcount.c
20
+++ b/block/qed.c
19
@@ -XXX,XX +XXX,XX @@ enum {
21
@@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
20
CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
22
return l2_table;
21
};
23
}
22
24
23
+/*
25
-static void qed_aio_next_io(void *opaque, int ret);
24
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN.
26
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
25
+ *
27
+
26
+ * This function decrements res->corruptions on success, so the caller is
28
+static void qed_aio_start_io(QEDAIOCB *acb)
27
+ * responsible to increment res->corruptions prior to the call.
28
+ *
29
+ * On failure in-memory @l2_table may be modified.
30
+ */
31
+static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
32
+ uint64_t l2_offset,
33
+ uint64_t *l2_table, int l2_index, bool active,
34
+ bool *metadata_overlap)
35
+{
29
+{
36
+ BDRVQcow2State *s = bs->opaque;
30
+ qed_aio_next_io(acb, 0);
37
+ int ret;
38
+ int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
39
+ uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
40
+ int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
41
+ uint64_t l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
42
+
43
+ set_l2_entry(s, l2_table, l2_index, l2_entry);
44
+ ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
45
+ false);
46
+ if (metadata_overlap) {
47
+ *metadata_overlap = ret < 0;
48
+ }
49
+ if (ret < 0) {
50
+ fprintf(stderr, "ERROR: Overlap check failed\n");
51
+ goto fail;
52
+ }
53
+
54
+ ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx],
55
+ l2_entry_size(s));
56
+ if (ret < 0) {
57
+ fprintf(stderr, "ERROR: Failed to overwrite L2 "
58
+ "table entry: %s\n", strerror(-ret));
59
+ goto fail;
60
+ }
61
+
62
+ res->corruptions--;
63
+ res->corruptions_fixed++;
64
+ return 0;
65
+
66
+fail:
67
+ res->check_errors++;
68
+ return ret;
69
+}
31
+}
70
+
32
+
71
/*
33
+static void qed_aio_next_io_cb(void *opaque, int ret)
72
* Increases the refcount in the given refcount table for the all clusters
34
+{
73
* referenced in the L2 table. While doing so, performs some checks on L2
35
+ QEDAIOCB *acb = opaque;
74
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
75
int i, ret;
76
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
77
g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
78
+ bool metadata_overlap;
79
80
/* Read L2 table from disk */
81
ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
82
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
83
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
84
offset);
85
if (fix & BDRV_FIX_ERRORS) {
86
- int idx = i * (l2_entry_size(s) / sizeof(uint64_t));
87
- uint64_t l2e_offset =
88
- l2_offset + (uint64_t)i * l2_entry_size(s);
89
- int ign = active ? QCOW2_OL_ACTIVE_L2 :
90
- QCOW2_OL_INACTIVE_L2;
91
-
92
- l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
93
- set_l2_entry(s, l2_table, i, l2_entry);
94
- ret = qcow2_pre_write_overlap_check(bs, ign,
95
- l2e_offset, l2_entry_size(s), false);
96
- if (ret < 0) {
97
- fprintf(stderr, "ERROR: Overlap check failed\n");
98
- res->check_errors++;
99
+ ret = fix_l2_entry_by_zero(bs, res, l2_offset,
100
+ l2_table, i, active,
101
+ &metadata_overlap);
102
+ if (metadata_overlap) {
103
/*
104
* Something is seriously wrong, so abort checking
105
* this L2 table.
106
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
107
return ret;
108
}
109
110
- ret = bdrv_pwrite_sync(bs->file, l2e_offset,
111
- &l2_table[idx],
112
- l2_entry_size(s));
113
- if (ret < 0) {
114
- fprintf(stderr, "ERROR: Failed to overwrite L2 "
115
- "table entry: %s\n", strerror(-ret));
116
- res->check_errors++;
117
- /*
118
- * Do not abort, continue checking the rest of this
119
- * L2 table's entries.
120
- */
121
- } else {
122
- res->corruptions--;
123
- res->corruptions_fixed++;
124
+ if (ret == 0) {
125
/*
126
* Skip marking the cluster as used
127
* (it is unused now).
128
*/
129
continue;
130
}
131
+
36
+
132
+ /*
37
+ qed_aio_next_io(acb, ret);
133
+ * Failed to fix.
38
+}
134
+ * Do not abort, continue checking the rest of this
39
135
+ * L2 table's entries.
40
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
136
+ */
41
{
137
}
42
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
138
} else {
43
139
fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
44
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
45
if (acb) {
46
- qed_aio_next_io(acb, 0);
47
+ qed_aio_start_io(acb);
48
}
49
}
50
51
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
52
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
53
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
54
if (acb) {
55
- qed_aio_next_io(acb, 0);
56
+ qed_aio_start_io(acb);
57
} else if (s->header.features & QED_F_NEED_CHECK) {
58
qed_start_need_check_timer(s);
59
}
60
@@ -XXX,XX +XXX,XX @@ static void qed_commit_l2_update(void *opaque, int ret)
61
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
62
assert(acb->request.l2_table != NULL);
63
64
- qed_aio_next_io(opaque, ret);
65
+ qed_aio_next_io(acb, ret);
66
}
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
70
if (need_alloc) {
71
/* Write out the whole new L2 table */
72
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
73
- qed_aio_write_l1_update, acb);
74
+ qed_aio_write_l1_update, acb);
75
} else {
76
/* Write out only the updated part of the L2 table */
77
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
78
- qed_aio_next_io, acb);
79
+ qed_aio_next_io_cb, acb);
80
}
81
return;
82
83
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
84
}
85
86
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
87
- next_fn = qed_aio_next_io;
88
+ next_fn = qed_aio_next_io_cb;
89
} else {
90
if (s->bs->backing) {
91
next_fn = qed_aio_write_flush_before_l2_update;
92
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
93
if (acb->flags & QED_AIOCB_ZERO) {
94
/* Skip ahead if the clusters are already zero */
95
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
96
- qed_aio_next_io(acb, 0);
97
+ qed_aio_start_io(acb);
98
return;
99
}
100
101
@@ -XXX,XX +XXX,XX @@ static void qed_aio_read_data(void *opaque, int ret,
102
/* Handle zero cluster and backing file reads */
103
if (ret == QED_CLUSTER_ZERO) {
104
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
105
- qed_aio_next_io(acb, 0);
106
+ qed_aio_start_io(acb);
107
return;
108
} else if (ret != QED_CLUSTER_FOUND) {
109
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
110
- &acb->backing_qiov, qed_aio_next_io, acb);
111
+ &acb->backing_qiov, qed_aio_next_io_cb, acb);
112
return;
113
}
114
115
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
116
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
117
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
118
- qed_aio_next_io, acb);
119
+ qed_aio_next_io_cb, acb);
120
return;
121
122
err:
123
@@ -XXX,XX +XXX,XX @@ err:
124
/**
125
* Begin next I/O or complete the request
126
*/
127
-static void qed_aio_next_io(void *opaque, int ret)
128
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
129
{
130
- QEDAIOCB *acb = opaque;
131
BDRVQEDState *s = acb_to_s(acb);
132
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
133
qed_aio_write_data : qed_aio_read_data;
134
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
135
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
136
137
/* Start request */
138
- qed_aio_next_io(acb, 0);
139
+ qed_aio_start_io(acb);
140
return &acb->common;
141
}
142
140
--
143
--
141
2.31.1
144
2.9.3
142
145
143
146
diff view generated by jsdifflib
1
We cannot write to images opened with O_DIRECT unless we allow them to
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
be resized so they are aligned to the sector size: Since 9c60a5d1978,
3
bdrv_node_refresh_perm() ensures that for nodes whose length is not
4
aligned to the request alignment and where someone has taken a WRITE
5
permission, the RESIZE permission is taken, too).
6
2
7
Let qemu-img convert pass the BDRV_O_RESIZE flag (which causes
3
The AioContext data structures are now protected by list_lock and/or
8
blk_new_open() to take the RESIZE permission) when using cache=none for
4
they are walked with FOREACH_RCU primitives. There is no need anymore
9
the target, so that when writing to it, it can be aligned to the target
5
to acquire the AioContext for the entire duration of aio_dispatch.
10
sector size.
6
Instead, just acquire it before and after invoking the callbacks.
7
The next step is then to push it further down.
11
8
12
Without this patch, an error is returned:
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
13
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
util/aio-posix.c | 25 +++++++++++--------------
17
util/aio-win32.c | 15 +++++++--------
18
util/async.c | 2 ++
19
3 files changed, 20 insertions(+), 22 deletions(-)
13
20
14
$ qemu-img convert -f raw -O raw -t none foo.img /mnt/tmp/foo.img
21
diff --git a/util/aio-posix.c b/util/aio-posix.c
15
qemu-img: Could not open '/mnt/tmp/foo.img': Cannot get 'write'
16
permission without 'resize': Image size is not a multiple of request
17
alignment
18
19
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1994266
20
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
21
Message-Id: <20210819101200.64235-1-hreitz@redhat.com>
22
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
23
---
24
qemu-img.c | 8 ++++++++
25
1 file changed, 8 insertions(+)
26
27
diff --git a/qemu-img.c b/qemu-img.c
28
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
29
--- a/qemu-img.c
23
--- a/util/aio-posix.c
30
+++ b/qemu-img.c
24
+++ b/util/aio-posix.c
31
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
25
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
32
goto out;
26
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
27
aio_node_check(ctx, node->is_external) &&
28
node->io_read) {
29
+ aio_context_acquire(ctx);
30
node->io_read(node->opaque);
31
+ aio_context_release(ctx);
32
33
/* aio_notify() does not count as progress */
34
if (node->opaque != &ctx->notifier) {
35
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
36
(revents & (G_IO_OUT | G_IO_ERR)) &&
37
aio_node_check(ctx, node->is_external) &&
38
node->io_write) {
39
+ aio_context_acquire(ctx);
40
node->io_write(node->opaque);
41
+ aio_context_release(ctx);
42
progress = true;
43
}
44
45
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
33
}
46
}
34
47
35
+ if (flags & BDRV_O_NOCACHE) {
48
/* Run our timers */
36
+ /*
49
+ aio_context_acquire(ctx);
37
+ * If we open the target with O_DIRECT, it may be necessary to
50
progress |= timerlistgroup_run_timers(&ctx->tlg);
38
+ * extend its size to align to the physical sector size.
51
+ aio_context_release(ctx);
39
+ */
52
40
+ flags |= BDRV_O_RESIZE;
53
return progress;
41
+ }
54
}
55
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
56
int64_t timeout;
57
int64_t start = 0;
58
59
- aio_context_acquire(ctx);
60
- progress = false;
61
-
62
/* aio_notify can avoid the expensive event_notifier_set if
63
* everything (file descriptors, bottom halves, timers) will
64
* be re-evaluated before the next blocking poll(). This is
65
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
66
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
67
}
68
69
- if (try_poll_mode(ctx, blocking)) {
70
- progress = true;
71
- } else {
72
+ aio_context_acquire(ctx);
73
+ progress = try_poll_mode(ctx, blocking);
74
+ aio_context_release(ctx);
42
+
75
+
43
if (skip_create) {
76
+ if (!progress) {
44
s.target = img_open(tgt_image_opts, out_filename, out_fmt,
77
assert(npfd == 0);
45
flags, writethrough, s.quiet, false);
78
79
/* fill pollfds */
80
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
81
timeout = blocking ? aio_compute_timeout(ctx) : 0;
82
83
/* wait until next event */
84
- if (timeout) {
85
- aio_context_release(ctx);
86
- }
87
if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
88
AioHandler epoll_handler;
89
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
} else {
92
ret = qemu_poll_ns(pollfds, npfd, timeout);
93
}
94
- if (timeout) {
95
- aio_context_acquire(ctx);
96
- }
97
}
98
99
if (blocking) {
100
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
101
progress = true;
102
}
103
104
- aio_context_release(ctx);
105
-
106
return progress;
107
}
108
109
diff --git a/util/aio-win32.c b/util/aio-win32.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/util/aio-win32.c
112
+++ b/util/aio-win32.c
113
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
114
(revents || event_notifier_get_handle(node->e) == event) &&
115
node->io_notify) {
116
node->pfd.revents = 0;
117
+ aio_context_acquire(ctx);
118
node->io_notify(node->e);
119
+ aio_context_release(ctx);
120
121
/* aio_notify() does not count as progress */
122
if (node->e != &ctx->notifier) {
123
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
124
(node->io_read || node->io_write)) {
125
node->pfd.revents = 0;
126
if ((revents & G_IO_IN) && node->io_read) {
127
+ aio_context_acquire(ctx);
128
node->io_read(node->opaque);
129
+ aio_context_release(ctx);
130
progress = true;
131
}
132
if ((revents & G_IO_OUT) && node->io_write) {
133
+ aio_context_acquire(ctx);
134
node->io_write(node->opaque);
135
+ aio_context_release(ctx);
136
progress = true;
137
}
138
139
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
140
int count;
141
int timeout;
142
143
- aio_context_acquire(ctx);
144
progress = false;
145
146
/* aio_notify can avoid the expensive event_notifier_set if
147
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
148
149
timeout = blocking && !have_select_revents
150
? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
151
- if (timeout) {
152
- aio_context_release(ctx);
153
- }
154
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
155
if (blocking) {
156
assert(first);
157
atomic_sub(&ctx->notify_me, 2);
158
}
159
- if (timeout) {
160
- aio_context_acquire(ctx);
161
- }
162
163
if (first) {
164
aio_notify_accept(ctx);
165
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
166
progress |= aio_dispatch_handlers(ctx, event);
167
} while (count > 0);
168
169
+ aio_context_acquire(ctx);
170
progress |= timerlistgroup_run_timers(&ctx->tlg);
171
-
172
aio_context_release(ctx);
173
return progress;
174
}
175
diff --git a/util/async.c b/util/async.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/util/async.c
178
+++ b/util/async.c
179
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
180
ret = 1;
181
}
182
bh->idle = 0;
183
+ aio_context_acquire(ctx);
184
aio_bh_call(bh);
185
+ aio_context_release(ctx);
186
}
187
if (bh->deleted) {
188
deleted = true;
46
--
189
--
47
2.31.1
190
2.9.3
48
191
49
192
diff view generated by jsdifflib
1
bdrv_co_block_status() does it for us, we do not need to do it here.
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
cache larger data regions than requested by its caller.
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20210812084148.14458-7-hreitz@redhat.com>
11
---
9
---
12
block/iscsi.c | 3 ---
10
block/qed.h | 3 +++
13
1 file changed, 3 deletions(-)
11
block/curl.c | 2 ++
14
12
block/io.c | 5 +++++
13
block/iscsi.c | 8 ++++++--
14
block/null.c | 4 ++++
15
block/qed.c | 12 ++++++++++++
16
block/throttle-groups.c | 2 ++
17
util/aio-posix.c | 2 --
18
util/aio-win32.c | 2 --
19
util/qemu-coroutine-sleep.c | 2 +-
20
10 files changed, 35 insertions(+), 7 deletions(-)
21
22
diff --git a/block/qed.h b/block/qed.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/qed.h
25
+++ b/block/qed.h
26
@@ -XXX,XX +XXX,XX @@ enum {
27
*/
28
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
29
30
+void qed_acquire(BDRVQEDState *s);
31
+void qed_release(BDRVQEDState *s);
32
+
33
/**
34
* Generic callback for chaining async callbacks
35
*/
36
diff --git a/block/curl.c b/block/curl.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/curl.c
39
+++ b/block/curl.c
40
@@ -XXX,XX +XXX,XX @@ static void curl_multi_timeout_do(void *arg)
41
return;
42
}
43
44
+ aio_context_acquire(s->aio_context);
45
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
46
47
curl_multi_check_completion(s);
48
+ aio_context_release(s->aio_context);
49
#else
50
abort();
51
#endif
52
diff --git a/block/io.c b/block/io.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/block/io.c
55
+++ b/block/io.c
56
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel(BlockAIOCB *acb)
57
if (acb->aiocb_info->get_aio_context) {
58
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
59
} else if (acb->bs) {
60
+ /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
61
+ * assert that we're not using an I/O thread. Thread-safe
62
+ * code should use bdrv_aio_cancel_async exclusively.
63
+ */
64
+ assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
65
aio_poll(bdrv_get_aio_context(acb->bs), true);
66
} else {
67
abort();
15
diff --git a/block/iscsi.c b/block/iscsi.c
68
diff --git a/block/iscsi.c b/block/iscsi.c
16
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
17
--- a/block/iscsi.c
70
--- a/block/iscsi.c
18
+++ b/block/iscsi.c
71
+++ b/block/iscsi.c
19
@@ -XXX,XX +XXX,XX @@ retry:
72
@@ -XXX,XX +XXX,XX @@ static void iscsi_retry_timer_expired(void *opaque)
20
iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
73
struct IscsiTask *iTask = opaque;
21
}
74
iTask->complete = 1;
22
75
if (iTask->co) {
23
- if (*pnum > bytes) {
76
- qemu_coroutine_enter(iTask->co);
24
- *pnum = bytes;
77
+ aio_co_wake(iTask->co);
25
- }
78
}
26
out_unlock:
79
}
27
qemu_mutex_unlock(&iscsilun->mutex);
80
28
g_free(iTask.err_str);
81
@@ -XXX,XX +XXX,XX @@ static void iscsi_nop_timed_event(void *opaque)
82
{
83
IscsiLun *iscsilun = opaque;
84
85
+ aio_context_acquire(iscsilun->aio_context);
86
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
87
error_report("iSCSI: NOP timeout. Reconnecting...");
88
iscsilun->request_timed_out = true;
89
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
90
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
91
- return;
92
+ goto out;
93
}
94
95
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
96
iscsi_set_events(iscsilun);
97
+
98
+out:
99
+ aio_context_release(iscsilun->aio_context);
100
}
101
102
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
103
diff --git a/block/null.c b/block/null.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/block/null.c
106
+++ b/block/null.c
107
@@ -XXX,XX +XXX,XX @@ static void null_bh_cb(void *opaque)
108
static void null_timer_cb(void *opaque)
109
{
110
NullAIOCB *acb = opaque;
111
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
112
+
113
+ aio_context_acquire(ctx);
114
acb->common.cb(acb->common.opaque, 0);
115
+ aio_context_release(ctx);
116
timer_deinit(&acb->timer);
117
qemu_aio_unref(acb);
118
}
119
diff --git a/block/qed.c b/block/qed.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/qed.c
122
+++ b/block/qed.c
123
@@ -XXX,XX +XXX,XX @@ static void qed_need_check_timer_cb(void *opaque)
124
125
trace_qed_need_check_timer_cb(s);
126
127
+ qed_acquire(s);
128
qed_plug_allocating_write_reqs(s);
129
130
/* Ensure writes are on disk before clearing flag */
131
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
132
+ qed_release(s);
133
+}
134
+
135
+void qed_acquire(BDRVQEDState *s)
136
+{
137
+ aio_context_acquire(bdrv_get_aio_context(s->bs));
138
+}
139
+
140
+void qed_release(BDRVQEDState *s)
141
+{
142
+ aio_context_release(bdrv_get_aio_context(s->bs));
143
}
144
145
static void qed_start_need_check_timer(BDRVQEDState *s)
146
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/block/throttle-groups.c
149
+++ b/block/throttle-groups.c
150
@@ -XXX,XX +XXX,XX @@ static void timer_cb(BlockBackend *blk, bool is_write)
151
qemu_mutex_unlock(&tg->lock);
152
153
/* Run the request that was waiting for this timer */
154
+ aio_context_acquire(blk_get_aio_context(blk));
155
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
156
+ aio_context_release(blk_get_aio_context(blk));
157
158
/* If the request queue was empty then we have to take care of
159
* scheduling the next one */
160
diff --git a/util/aio-posix.c b/util/aio-posix.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/util/aio-posix.c
163
+++ b/util/aio-posix.c
164
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
165
}
166
167
/* Run our timers */
168
- aio_context_acquire(ctx);
169
progress |= timerlistgroup_run_timers(&ctx->tlg);
170
- aio_context_release(ctx);
171
172
return progress;
173
}
174
diff --git a/util/aio-win32.c b/util/aio-win32.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/util/aio-win32.c
177
+++ b/util/aio-win32.c
178
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
179
progress |= aio_dispatch_handlers(ctx, event);
180
} while (count > 0);
181
182
- aio_context_acquire(ctx);
183
progress |= timerlistgroup_run_timers(&ctx->tlg);
184
- aio_context_release(ctx);
185
return progress;
186
}
187
188
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
189
index XXXXXXX..XXXXXXX 100644
190
--- a/util/qemu-coroutine-sleep.c
191
+++ b/util/qemu-coroutine-sleep.c
192
@@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque)
193
{
194
CoSleepCB *sleep_cb = opaque;
195
196
- qemu_coroutine_enter(sleep_cb->co);
197
+ aio_co_wake(sleep_cb->co);
198
}
199
200
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
29
--
201
--
30
2.31.1
202
2.9.3
31
203
32
204
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
3
This covers both file descriptor callbacks and polling callbacks,
4
Reviewed-by: Eric Blake <eblake@redhat.com>
4
since they execute related code.
5
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
5
6
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-Id: <20210914122454.141075-10-vsementsov@virtuozzo.com>
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
12
---
10
block/qcow2.h | 1 +
13
block/curl.c | 16 +++++++++++++---
11
block/qcow2-refcount.c | 6 ++++++
14
block/iscsi.c | 4 ++++
12
2 files changed, 7 insertions(+)
15
block/linux-aio.c | 4 ++++
16
block/nfs.c | 6 ++++++
17
block/sheepdog.c | 29 +++++++++++++++--------------
18
block/ssh.c | 29 +++++++++--------------------
19
block/win32-aio.c | 10 ++++++----
20
hw/block/virtio-blk.c | 5 ++++-
21
hw/scsi/virtio-scsi.c | 7 +++++++
22
util/aio-posix.c | 7 -------
23
util/aio-win32.c | 6 ------
24
11 files changed, 68 insertions(+), 55 deletions(-)
13
25
14
diff --git a/block/qcow2.h b/block/qcow2.h
26
diff --git a/block/curl.c b/block/curl.c
15
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2.h
28
--- a/block/curl.c
17
+++ b/block/qcow2.h
29
+++ b/block/curl.c
18
@@ -XXX,XX +XXX,XX @@ typedef enum QCow2MetadataOverlap {
30
@@ -XXX,XX +XXX,XX @@ static void curl_multi_check_completion(BDRVCURLState *s)
19
(QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
31
}
20
32
}
21
#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
33
22
+#define L1E_RESERVED_MASK 0x7f000000000001ffULL
34
-static void curl_multi_do(void *arg)
23
#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
35
+static void curl_multi_do_locked(CURLState *s)
24
#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
36
{
25
37
- CURLState *s = (CURLState *)arg;
26
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
38
CURLSocket *socket, *next_socket;
27
index XXXXXXX..XXXXXXX 100644
39
int running;
28
--- a/block/qcow2-refcount.c
40
int r;
29
+++ b/block/qcow2-refcount.c
41
@@ -XXX,XX +XXX,XX @@ static void curl_multi_do(void *arg)
30
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
42
}
31
continue;
43
}
44
45
+static void curl_multi_do(void *arg)
46
+{
47
+ CURLState *s = (CURLState *)arg;
48
+
49
+ aio_context_acquire(s->s->aio_context);
50
+ curl_multi_do_locked(s);
51
+ aio_context_release(s->s->aio_context);
52
+}
53
+
54
static void curl_multi_read(void *arg)
55
{
56
CURLState *s = (CURLState *)arg;
57
58
- curl_multi_do(arg);
59
+ aio_context_acquire(s->s->aio_context);
60
+ curl_multi_do_locked(s);
61
curl_multi_check_completion(s->s);
62
+ aio_context_release(s->s->aio_context);
63
}
64
65
static void curl_multi_timeout_do(void *arg)
66
diff --git a/block/iscsi.c b/block/iscsi.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/iscsi.c
69
+++ b/block/iscsi.c
70
@@ -XXX,XX +XXX,XX @@ iscsi_process_read(void *arg)
71
IscsiLun *iscsilun = arg;
72
struct iscsi_context *iscsi = iscsilun->iscsi;
73
74
+ aio_context_acquire(iscsilun->aio_context);
75
iscsi_service(iscsi, POLLIN);
76
iscsi_set_events(iscsilun);
77
+ aio_context_release(iscsilun->aio_context);
78
}
79
80
static void
81
@@ -XXX,XX +XXX,XX @@ iscsi_process_write(void *arg)
82
IscsiLun *iscsilun = arg;
83
struct iscsi_context *iscsi = iscsilun->iscsi;
84
85
+ aio_context_acquire(iscsilun->aio_context);
86
iscsi_service(iscsi, POLLOUT);
87
iscsi_set_events(iscsilun);
88
+ aio_context_release(iscsilun->aio_context);
89
}
90
91
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
92
diff --git a/block/linux-aio.c b/block/linux-aio.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/linux-aio.c
95
+++ b/block/linux-aio.c
96
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
97
LinuxAioState *s = container_of(e, LinuxAioState, e);
98
99
if (event_notifier_test_and_clear(&s->e)) {
100
+ aio_context_acquire(s->aio_context);
101
qemu_laio_process_completions_and_submit(s);
102
+ aio_context_release(s->aio_context);
103
}
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
107
return false;
108
}
109
110
+ aio_context_acquire(s->aio_context);
111
qemu_laio_process_completions_and_submit(s);
112
+ aio_context_release(s->aio_context);
113
return true;
114
}
115
116
diff --git a/block/nfs.c b/block/nfs.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/block/nfs.c
119
+++ b/block/nfs.c
120
@@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client)
121
static void nfs_process_read(void *arg)
122
{
123
NFSClient *client = arg;
124
+
125
+ aio_context_acquire(client->aio_context);
126
nfs_service(client->context, POLLIN);
127
nfs_set_events(client);
128
+ aio_context_release(client->aio_context);
129
}
130
131
static void nfs_process_write(void *arg)
132
{
133
NFSClient *client = arg;
134
+
135
+ aio_context_acquire(client->aio_context);
136
nfs_service(client->context, POLLOUT);
137
nfs_set_events(client);
138
+ aio_context_release(client->aio_context);
139
}
140
141
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
142
diff --git a/block/sheepdog.c b/block/sheepdog.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/block/sheepdog.c
145
+++ b/block/sheepdog.c
146
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
147
return ret;
148
}
149
150
-static void restart_co_req(void *opaque)
151
-{
152
- Coroutine *co = opaque;
153
-
154
- qemu_coroutine_enter(co);
155
-}
156
-
157
typedef struct SheepdogReqCo {
158
int sockfd;
159
BlockDriverState *bs;
160
@@ -XXX,XX +XXX,XX @@ typedef struct SheepdogReqCo {
161
unsigned int *rlen;
162
int ret;
163
bool finished;
164
+ Coroutine *co;
165
} SheepdogReqCo;
166
167
+static void restart_co_req(void *opaque)
168
+{
169
+ SheepdogReqCo *srco = opaque;
170
+
171
+ aio_co_wake(srco->co);
172
+}
173
+
174
static coroutine_fn void do_co_req(void *opaque)
175
{
176
int ret;
177
- Coroutine *co;
178
SheepdogReqCo *srco = opaque;
179
int sockfd = srco->sockfd;
180
SheepdogReq *hdr = srco->hdr;
181
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
182
unsigned int *wlen = srco->wlen;
183
unsigned int *rlen = srco->rlen;
184
185
- co = qemu_coroutine_self();
186
+ srco->co = qemu_coroutine_self();
187
aio_set_fd_handler(srco->aio_context, sockfd, false,
188
- NULL, restart_co_req, NULL, co);
189
+ NULL, restart_co_req, NULL, srco);
190
191
ret = send_co_req(sockfd, hdr, data, wlen);
192
if (ret < 0) {
193
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
194
}
195
196
aio_set_fd_handler(srco->aio_context, sockfd, false,
197
- restart_co_req, NULL, NULL, co);
198
+ restart_co_req, NULL, NULL, srco);
199
200
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
201
if (ret != sizeof(*hdr)) {
202
@@ -XXX,XX +XXX,XX @@ out:
203
aio_set_fd_handler(srco->aio_context, sockfd, false,
204
NULL, NULL, NULL, NULL);
205
206
+ srco->co = NULL;
207
srco->ret = ret;
208
srco->finished = true;
209
if (srco->bs) {
210
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
211
* We've finished all requests which belong to the AIOCB, so
212
* we can switch back to sd_co_readv/writev now.
213
*/
214
- qemu_coroutine_enter(acb->coroutine);
215
+ aio_co_wake(acb->coroutine);
216
}
217
218
return;
219
@@ -XXX,XX +XXX,XX @@ static void co_read_response(void *opaque)
220
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
221
}
222
223
- qemu_coroutine_enter(s->co_recv);
224
+ aio_co_wake(s->co_recv);
225
}
226
227
static void co_write_request(void *opaque)
228
{
229
BDRVSheepdogState *s = opaque;
230
231
- qemu_coroutine_enter(s->co_send);
232
+ aio_co_wake(s->co_send);
233
}
234
235
/*
236
diff --git a/block/ssh.c b/block/ssh.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/block/ssh.c
239
+++ b/block/ssh.c
240
@@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque)
241
242
DPRINTF("co=%p", co);
243
244
- qemu_coroutine_enter(co);
245
+ aio_co_wake(co);
246
}
247
248
-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
249
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
250
+ * handlers are set up so that we'll be rescheduled when there is an
251
+ * interesting event on the socket.
252
+ */
253
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
254
{
255
int r;
256
IOHandler *rd_handler = NULL, *wr_handler = NULL;
257
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
258
259
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
260
false, rd_handler, wr_handler, NULL, co);
261
-}
262
-
263
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
264
- BlockDriverState *bs)
265
-{
266
- DPRINTF("s->sock=%d", s->sock);
267
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
268
- false, NULL, NULL, NULL, NULL);
269
-}
270
-
271
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
272
- * handlers are set up so that we'll be rescheduled when there is an
273
- * interesting event on the socket.
274
- */
275
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
276
-{
277
- set_fd_handler(s, bs);
278
qemu_coroutine_yield();
279
- clear_fd_handler(s, bs);
280
+ DPRINTF("s->sock=%d - back", s->sock);
281
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
282
+ NULL, NULL, NULL, NULL);
283
}
284
285
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
286
diff --git a/block/win32-aio.c b/block/win32-aio.c
287
index XXXXXXX..XXXXXXX 100644
288
--- a/block/win32-aio.c
289
+++ b/block/win32-aio.c
290
@@ -XXX,XX +XXX,XX @@ struct QEMUWin32AIOState {
291
HANDLE hIOCP;
292
EventNotifier e;
293
int count;
294
- bool is_aio_context_attached;
295
+ AioContext *aio_ctx;
296
};
297
298
typedef struct QEMUWin32AIOCB {
299
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
300
}
301
302
303
+ aio_context_acquire(s->aio_ctx);
304
waiocb->common.cb(waiocb->common.opaque, ret);
305
+ aio_context_release(s->aio_ctx);
306
qemu_aio_unref(waiocb);
307
}
308
309
@@ -XXX,XX +XXX,XX @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
310
AioContext *old_context)
311
{
312
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
313
- aio->is_aio_context_attached = false;
314
+ aio->aio_ctx = NULL;
315
}
316
317
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
318
AioContext *new_context)
319
{
320
- aio->is_aio_context_attached = true;
321
+ aio->aio_ctx = new_context;
322
aio_set_event_notifier(new_context, &aio->e, false,
323
win32_aio_completion_cb, NULL);
324
}
325
@@ -XXX,XX +XXX,XX @@ out_free_state:
326
327
void win32_aio_cleanup(QEMUWin32AIOState *aio)
328
{
329
- assert(!aio->is_aio_context_attached);
330
+ assert(!aio->aio_ctx);
331
CloseHandle(aio->hIOCP);
332
event_notifier_cleanup(&aio->e);
333
g_free(aio);
334
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
335
index XXXXXXX..XXXXXXX 100644
336
--- a/hw/block/virtio-blk.c
337
+++ b/hw/block/virtio-blk.c
338
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
339
{
340
VirtIOBlockIoctlReq *ioctl_req = opaque;
341
VirtIOBlockReq *req = ioctl_req->req;
342
- VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
343
+ VirtIOBlock *s = req->dev;
344
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
345
struct virtio_scsi_inhdr *scsi;
346
struct sg_io_hdr *hdr;
347
348
@@ -XXX,XX +XXX,XX @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
349
MultiReqBuffer mrb = {};
350
bool progress = false;
351
352
+ aio_context_acquire(blk_get_aio_context(s->blk));
353
blk_io_plug(s->blk);
354
355
do {
356
@@ -XXX,XX +XXX,XX @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
357
}
358
359
blk_io_unplug(s->blk);
360
+ aio_context_release(blk_get_aio_context(s->blk));
361
return progress;
362
}
363
364
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
365
index XXXXXXX..XXXXXXX 100644
366
--- a/hw/scsi/virtio-scsi.c
367
+++ b/hw/scsi/virtio-scsi.c
368
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
369
VirtIOSCSIReq *req;
370
bool progress = false;
371
372
+ virtio_scsi_acquire(s);
373
while ((req = virtio_scsi_pop_req(s, vq))) {
374
progress = true;
375
virtio_scsi_handle_ctrl_req(s, req);
376
}
377
+ virtio_scsi_release(s);
378
return progress;
379
}
380
381
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
382
383
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
384
385
+ virtio_scsi_acquire(s);
386
do {
387
virtio_queue_set_notification(vq, 0);
388
389
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
390
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
391
virtio_scsi_handle_cmd_req_submit(s, req);
392
}
393
+ virtio_scsi_release(s);
394
return progress;
395
}
396
397
@@ -XXX,XX +XXX,XX @@ out:
398
399
bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
400
{
401
+ virtio_scsi_acquire(s);
402
if (s->events_dropped) {
403
virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
404
+ virtio_scsi_release(s);
405
return true;
406
}
407
+ virtio_scsi_release(s);
408
return false;
409
}
410
411
diff --git a/util/aio-posix.c b/util/aio-posix.c
412
index XXXXXXX..XXXXXXX 100644
413
--- a/util/aio-posix.c
414
+++ b/util/aio-posix.c
415
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
416
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
417
aio_node_check(ctx, node->is_external) &&
418
node->io_read) {
419
- aio_context_acquire(ctx);
420
node->io_read(node->opaque);
421
- aio_context_release(ctx);
422
423
/* aio_notify() does not count as progress */
424
if (node->opaque != &ctx->notifier) {
425
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
426
(revents & (G_IO_OUT | G_IO_ERR)) &&
427
aio_node_check(ctx, node->is_external) &&
428
node->io_write) {
429
- aio_context_acquire(ctx);
430
node->io_write(node->opaque);
431
- aio_context_release(ctx);
432
progress = true;
32
}
433
}
33
434
34
+ if (l1_table[i] & L1E_RESERVED_MASK) {
435
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
35
+ fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
436
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
36
+ "%" PRIx64 "\n", l1_table[i]);
437
}
37
+ res->corruptions++;
438
38
+ }
439
- aio_context_acquire(ctx);
39
+
440
progress = try_poll_mode(ctx, blocking);
40
l2_offset = l1_table[i] & L1E_OFFSET_MASK;
441
- aio_context_release(ctx);
41
442
-
42
/* Mark L2 table as used */
443
if (!progress) {
444
assert(npfd == 0);
445
446
diff --git a/util/aio-win32.c b/util/aio-win32.c
447
index XXXXXXX..XXXXXXX 100644
448
--- a/util/aio-win32.c
449
+++ b/util/aio-win32.c
450
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
451
(revents || event_notifier_get_handle(node->e) == event) &&
452
node->io_notify) {
453
node->pfd.revents = 0;
454
- aio_context_acquire(ctx);
455
node->io_notify(node->e);
456
- aio_context_release(ctx);
457
458
/* aio_notify() does not count as progress */
459
if (node->e != &ctx->notifier) {
460
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
461
(node->io_read || node->io_write)) {
462
node->pfd.revents = 0;
463
if ((revents & G_IO_IN) && node->io_read) {
464
- aio_context_acquire(ctx);
465
node->io_read(node->opaque);
466
- aio_context_release(ctx);
467
progress = true;
468
}
469
if ((revents & G_IO_OUT) && node->io_write) {
470
- aio_context_acquire(ctx);
471
node->io_write(node->opaque);
472
- aio_context_release(ctx);
473
progress = true;
474
}
475
43
--
476
--
44
2.31.1
477
2.9.3
45
478
46
479
diff view generated by jsdifflib
1
bdrv_co_block_status() does it for us, we do not need to do it here.
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
cache larger data regions than requested by its caller.
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/archipelago.c | 3 +++
11
block/blkreplay.c | 2 +-
12
block/block-backend.c | 6 ++++++
13
block/curl.c | 26 ++++++++++++++++++--------
14
block/gluster.c | 9 +--------
15
block/io.c | 6 +++++-
16
block/iscsi.c | 6 +++++-
17
block/linux-aio.c | 15 +++++++++------
18
block/nfs.c | 3 ++-
19
block/null.c | 4 ++++
20
block/qed.c | 3 +++
21
block/rbd.c | 4 ++++
22
dma-helpers.c | 2 ++
23
hw/block/virtio-blk.c | 2 ++
24
hw/scsi/scsi-bus.c | 2 ++
25
util/async.c | 4 ++--
26
util/thread-pool.c | 2 ++
27
17 files changed, 71 insertions(+), 28 deletions(-)
5
28
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
29
diff --git a/block/archipelago.c b/block/archipelago.c
7
Reviewed-by: Eric Blake <eblake@redhat.com>
30
index XXXXXXX..XXXXXXX 100644
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
31
--- a/block/archipelago.c
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
32
+++ b/block/archipelago.c
10
Message-Id: <20210812084148.14458-6-hreitz@redhat.com>
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
11
---
34
{
12
block/gluster.c | 7 ++++---
35
AIORequestData *reqdata = (AIORequestData *) opaque;
13
1 file changed, 4 insertions(+), 3 deletions(-)
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
14
37
+ AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
38
39
+ aio_context_acquire(ctx);
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
41
+ aio_context_release(ctx);
42
aio_cb->status = 0;
43
44
qemu_aio_unref(aio_cb);
45
diff --git a/block/blkreplay.c b/block/blkreplay.c
46
index XXXXXXX..XXXXXXX 100755
47
--- a/block/blkreplay.c
48
+++ b/block/blkreplay.c
49
@@ -XXX,XX +XXX,XX @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
50
static void blkreplay_bh_cb(void *opaque)
51
{
52
Request *req = opaque;
53
- qemu_coroutine_enter(req->co);
54
+ aio_co_wake(req->co);
55
qemu_bh_delete(req->bh);
56
g_free(req);
57
}
58
diff --git a/block/block-backend.c b/block/block-backend.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/block-backend.c
61
+++ b/block/block-backend.c
62
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
63
static void error_callback_bh(void *opaque)
64
{
65
struct BlockBackendAIOCB *acb = opaque;
66
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
68
bdrv_dec_in_flight(acb->common.bs);
69
+ aio_context_acquire(ctx);
70
acb->common.cb(acb->common.opaque, acb->ret);
71
+ aio_context_release(ctx);
72
qemu_aio_unref(acb);
73
}
74
75
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
76
static void blk_aio_complete_bh(void *opaque)
77
{
78
BlkAioEmAIOCB *acb = opaque;
79
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
80
81
assert(acb->has_returned);
82
+ aio_context_acquire(ctx);
83
blk_aio_complete(acb);
84
+ aio_context_release(ctx);
85
}
86
87
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
88
diff --git a/block/curl.c b/block/curl.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/block/curl.c
91
+++ b/block/curl.c
92
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
93
{
94
CURLState *state;
95
int running;
96
+ int ret = -EINPROGRESS;
97
98
CURLAIOCB *acb = p;
99
- BDRVCURLState *s = acb->common.bs->opaque;
100
+ BlockDriverState *bs = acb->common.bs;
101
+ BDRVCURLState *s = bs->opaque;
102
+ AioContext *ctx = bdrv_get_aio_context(bs);
103
104
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
105
size_t end;
106
107
+ aio_context_acquire(ctx);
108
+
109
// In case we have the requested data already (e.g. read-ahead),
110
// we can just call the callback and be done.
111
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
112
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
113
qemu_aio_unref(acb);
114
// fall through
115
case FIND_RET_WAIT:
116
- return;
117
+ goto out;
118
default:
119
break;
120
}
121
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
122
// No cache found, so let's start a new request
123
state = curl_init_state(acb->common.bs, s);
124
if (!state) {
125
- acb->common.cb(acb->common.opaque, -EIO);
126
- qemu_aio_unref(acb);
127
- return;
128
+ ret = -EIO;
129
+ goto out;
130
}
131
132
acb->start = 0;
133
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
134
state->orig_buf = g_try_malloc(state->buf_len);
135
if (state->buf_len && state->orig_buf == NULL) {
136
curl_clean_state(state);
137
- acb->common.cb(acb->common.opaque, -ENOMEM);
138
- qemu_aio_unref(acb);
139
- return;
140
+ ret = -ENOMEM;
141
+ goto out;
142
}
143
state->acb[0] = acb;
144
145
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
146
147
/* Tell curl it needs to kick things off */
148
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
149
+
150
+out:
151
+ if (ret != -EINPROGRESS) {
152
+ acb->common.cb(acb->common.opaque, ret);
153
+ qemu_aio_unref(acb);
154
+ }
155
+ aio_context_release(ctx);
156
}
157
158
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
15
diff --git a/block/gluster.c b/block/gluster.c
159
diff --git a/block/gluster.c b/block/gluster.c
16
index XXXXXXX..XXXXXXX 100644
160
index XXXXXXX..XXXXXXX 100644
17
--- a/block/gluster.c
161
--- a/block/gluster.c
18
+++ b/block/gluster.c
162
+++ b/block/gluster.c
19
@@ -XXX,XX +XXX,XX @@ exit:
163
@@ -XXX,XX +XXX,XX @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
20
* the specified offset) that are known to be in the same
164
return qemu_gluster_glfs_init(gconf, errp);
21
* allocated/unallocated state.
165
}
22
*
166
23
- * 'bytes' is the max value 'pnum' should be set to.
167
-static void qemu_gluster_complete_aio(void *opaque)
24
+ * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may
168
-{
25
+ * well exceed it.
169
- GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
26
*
170
-
27
* (Based on raw_co_block_status() from file-posix.c.)
171
- qemu_coroutine_enter(acb->coroutine);
172
-}
173
-
174
/*
175
* AIO callback routine called from GlusterFS thread.
28
*/
176
*/
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
177
@@ -XXX,XX +XXX,XX @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
30
} else if (data == offset) {
178
acb->ret = -EIO; /* Partial read/write - fail it */
31
/* On a data extent, compute bytes to the end of the extent,
179
}
32
* possibly including a partial sector at EOF. */
180
33
- *pnum = MIN(bytes, hole - offset);
181
- aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
34
+ *pnum = hole - offset;
182
+ aio_co_schedule(acb->aio_context, acb->coroutine);
35
183
}
36
/*
184
37
* We are not allowed to return partial sectors, though, so
185
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
186
diff --git a/block/io.c b/block/io.c
39
} else {
187
index XXXXXXX..XXXXXXX 100644
40
/* On a hole, compute bytes to the beginning of the next extent. */
188
--- a/block/io.c
41
assert(hole == offset);
189
+++ b/block/io.c
42
- *pnum = MIN(bytes, data - offset);
190
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
43
+ *pnum = data - offset;
191
bdrv_dec_in_flight(bs);
44
ret = BDRV_BLOCK_ZERO;
192
bdrv_drained_begin(bs);
45
}
193
data->done = true;
46
194
- qemu_coroutine_enter(co);
195
+ aio_co_wake(co);
196
}
197
198
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
199
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
200
static void bdrv_co_em_bh(void *opaque)
201
{
202
BlockAIOCBCoroutine *acb = opaque;
203
+ BlockDriverState *bs = acb->common.bs;
204
+ AioContext *ctx = bdrv_get_aio_context(bs);
205
206
assert(!acb->need_bh);
207
+ aio_context_acquire(ctx);
208
bdrv_co_complete(acb);
209
+ aio_context_release(ctx);
210
}
211
212
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
213
diff --git a/block/iscsi.c b/block/iscsi.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/block/iscsi.c
216
+++ b/block/iscsi.c
217
@@ -XXX,XX +XXX,XX @@ static void
218
iscsi_bh_cb(void *p)
219
{
220
IscsiAIOCB *acb = p;
221
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
222
223
qemu_bh_delete(acb->bh);
224
225
g_free(acb->buf);
226
acb->buf = NULL;
227
228
+ aio_context_acquire(ctx);
229
acb->common.cb(acb->common.opaque, acb->status);
230
+ aio_context_release(ctx);
231
232
if (acb->task != NULL) {
233
scsi_free_scsi_task(acb->task);
234
@@ -XXX,XX +XXX,XX @@ iscsi_schedule_bh(IscsiAIOCB *acb)
235
static void iscsi_co_generic_bh_cb(void *opaque)
236
{
237
struct IscsiTask *iTask = opaque;
238
+
239
iTask->complete = 1;
240
- qemu_coroutine_enter(iTask->co);
241
+ aio_co_wake(iTask->co);
242
}
243
244
static void iscsi_retry_timer_expired(void *opaque)
245
diff --git a/block/linux-aio.c b/block/linux-aio.c
246
index XXXXXXX..XXXXXXX 100644
247
--- a/block/linux-aio.c
248
+++ b/block/linux-aio.c
249
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState {
250
io_context_t ctx;
251
EventNotifier e;
252
253
- /* io queue for submit at batch */
254
+ /* io queue for submit at batch. Protected by AioContext lock. */
255
LaioQueue io_q;
256
257
- /* I/O completion processing */
258
+ /* I/O completion processing. Only runs in I/O thread. */
259
QEMUBH *completion_bh;
260
int event_idx;
261
int event_max;
262
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
263
*/
264
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
265
{
266
+ LinuxAioState *s = laiocb->ctx;
267
int ret;
268
269
ret = laiocb->ret;
270
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
271
}
272
273
laiocb->ret = ret;
274
+ aio_context_acquire(s->aio_context);
275
if (laiocb->co) {
276
/* If the coroutine is already entered it must be in ioq_submit() and
277
* will notice laio->ret has been filled in when it eventually runs
278
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
279
laiocb->common.cb(laiocb->common.opaque, ret);
280
qemu_aio_unref(laiocb);
281
}
282
+ aio_context_release(s->aio_context);
283
}
284
285
/**
286
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s)
287
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
288
{
289
qemu_laio_process_completions(s);
290
+
291
+ aio_context_acquire(s->aio_context);
292
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
293
ioq_submit(s);
294
}
295
+ aio_context_release(s->aio_context);
296
}
297
298
static void qemu_laio_completion_bh(void *opaque)
299
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
300
LinuxAioState *s = container_of(e, LinuxAioState, e);
301
302
if (event_notifier_test_and_clear(&s->e)) {
303
- aio_context_acquire(s->aio_context);
304
qemu_laio_process_completions_and_submit(s);
305
- aio_context_release(s->aio_context);
306
}
307
}
308
309
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
310
return false;
311
}
312
313
- aio_context_acquire(s->aio_context);
314
qemu_laio_process_completions_and_submit(s);
315
- aio_context_release(s->aio_context);
316
return true;
317
}
318
319
@@ -XXX,XX +XXX,XX @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
320
{
321
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
322
qemu_bh_delete(s->completion_bh);
323
+ s->aio_context = NULL;
324
}
325
326
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
327
diff --git a/block/nfs.c b/block/nfs.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/block/nfs.c
330
+++ b/block/nfs.c
331
@@ -XXX,XX +XXX,XX @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
332
static void nfs_co_generic_bh_cb(void *opaque)
333
{
334
NFSRPC *task = opaque;
335
+
336
task->complete = 1;
337
- qemu_coroutine_enter(task->co);
338
+ aio_co_wake(task->co);
339
}
340
341
static void
342
diff --git a/block/null.c b/block/null.c
343
index XXXXXXX..XXXXXXX 100644
344
--- a/block/null.c
345
+++ b/block/null.c
346
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
347
static void null_bh_cb(void *opaque)
348
{
349
NullAIOCB *acb = opaque;
350
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
351
+
352
+ aio_context_acquire(ctx);
353
acb->common.cb(acb->common.opaque, 0);
354
+ aio_context_release(ctx);
355
qemu_aio_unref(acb);
356
}
357
358
diff --git a/block/qed.c b/block/qed.c
359
index XXXXXXX..XXXXXXX 100644
360
--- a/block/qed.c
361
+++ b/block/qed.c
362
@@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
363
static void qed_aio_complete_bh(void *opaque)
364
{
365
QEDAIOCB *acb = opaque;
366
+ BDRVQEDState *s = acb_to_s(acb);
367
BlockCompletionFunc *cb = acb->common.cb;
368
void *user_opaque = acb->common.opaque;
369
int ret = acb->bh_ret;
370
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete_bh(void *opaque)
371
qemu_aio_unref(acb);
372
373
/* Invoke callback */
374
+ qed_acquire(s);
375
cb(user_opaque, ret);
376
+ qed_release(s);
377
}
378
379
static void qed_aio_complete(QEDAIOCB *acb, int ret)
380
diff --git a/block/rbd.c b/block/rbd.c
381
index XXXXXXX..XXXXXXX 100644
382
--- a/block/rbd.c
383
+++ b/block/rbd.c
384
@@ -XXX,XX +XXX,XX @@ shutdown:
385
static void qemu_rbd_complete_aio(RADOSCB *rcb)
386
{
387
RBDAIOCB *acb = rcb->acb;
388
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
389
int64_t r;
390
391
r = rcb->ret;
392
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
393
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
394
}
395
qemu_vfree(acb->bounce);
396
+
397
+ aio_context_acquire(ctx);
398
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
399
+ aio_context_release(ctx);
400
401
qemu_aio_unref(acb);
402
}
403
diff --git a/dma-helpers.c b/dma-helpers.c
404
index XXXXXXX..XXXXXXX 100644
405
--- a/dma-helpers.c
406
+++ b/dma-helpers.c
407
@@ -XXX,XX +XXX,XX @@ static void dma_blk_cb(void *opaque, int ret)
408
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
409
}
410
411
+ aio_context_acquire(dbs->ctx);
412
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
413
dma_blk_cb, dbs, dbs->io_func_opaque);
414
+ aio_context_release(dbs->ctx);
415
assert(dbs->acb);
416
}
417
418
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
419
index XXXXXXX..XXXXXXX 100644
420
--- a/hw/block/virtio-blk.c
421
+++ b/hw/block/virtio-blk.c
422
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
423
424
s->rq = NULL;
425
426
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
427
while (req) {
428
VirtIOBlockReq *next = req->next;
429
if (virtio_blk_handle_request(req, &mrb)) {
430
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
431
if (mrb.num_reqs) {
432
virtio_blk_submit_multireq(s->blk, &mrb);
433
}
434
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
435
}
436
437
static void virtio_blk_dma_restart_cb(void *opaque, int running,
438
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
439
index XXXXXXX..XXXXXXX 100644
440
--- a/hw/scsi/scsi-bus.c
441
+++ b/hw/scsi/scsi-bus.c
442
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
443
qemu_bh_delete(s->bh);
444
s->bh = NULL;
445
446
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
447
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
448
scsi_req_ref(req);
449
if (req->retry) {
450
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
451
}
452
scsi_req_unref(req);
453
}
454
+ aio_context_release(blk_get_aio_context(s->conf.blk));
455
}
456
457
void scsi_req_retry(SCSIRequest *req)
458
diff --git a/util/async.c b/util/async.c
459
index XXXXXXX..XXXXXXX 100644
460
--- a/util/async.c
461
+++ b/util/async.c
462
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
463
ret = 1;
464
}
465
bh->idle = 0;
466
- aio_context_acquire(ctx);
467
aio_bh_call(bh);
468
- aio_context_release(ctx);
469
}
470
if (bh->deleted) {
471
deleted = true;
472
@@ -XXX,XX +XXX,XX @@ static void co_schedule_bh_cb(void *opaque)
473
Coroutine *co = QSLIST_FIRST(&straight);
474
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
475
trace_aio_co_schedule_bh_cb(ctx, co);
476
+ aio_context_acquire(ctx);
477
qemu_coroutine_enter(co);
478
+ aio_context_release(ctx);
479
}
480
}
481
482
diff --git a/util/thread-pool.c b/util/thread-pool.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/util/thread-pool.c
485
+++ b/util/thread-pool.c
486
@@ -XXX,XX +XXX,XX @@ static void thread_pool_completion_bh(void *opaque)
487
ThreadPool *pool = opaque;
488
ThreadPoolElement *elem, *next;
489
490
+ aio_context_acquire(pool->ctx);
491
restart:
492
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
493
if (elem->state != THREAD_DONE) {
494
@@ -XXX,XX +XXX,XX @@ restart:
495
qemu_aio_unref(elem);
496
}
497
}
498
+ aio_context_release(pool->ctx);
499
}
500
501
static void thread_pool_cancel(BlockAIOCB *acb)
47
--
502
--
48
2.31.1
503
2.9.3
49
504
50
505
diff view generated by jsdifflib
1
From: Stefano Garzarella <sgarzare@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
In mirror_iteration() we call mirror_wait_on_conflicts() with
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
`self` parameter set to NULL.
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/archipelago.c | 3 ---
11
block/block-backend.c | 7 -------
12
block/curl.c | 2 +-
13
block/io.c | 6 +-----
14
block/iscsi.c | 3 ---
15
block/linux-aio.c | 5 +----
16
block/mirror.c | 12 +++++++++---
17
block/null.c | 8 --------
18
block/qed-cluster.c | 2 ++
19
block/qed-table.c | 12 ++++++++++--
20
block/qed.c | 4 ++--
21
block/rbd.c | 4 ----
22
block/win32-aio.c | 3 ---
23
hw/block/virtio-blk.c | 12 +++++++++++-
24
hw/scsi/scsi-disk.c | 15 +++++++++++++++
25
hw/scsi/scsi-generic.c | 20 +++++++++++++++++---
26
util/thread-pool.c | 4 +++-
27
17 files changed, 72 insertions(+), 50 deletions(-)
5
28
6
Starting from commit d44dae1a7c we dereference `self` pointer in
29
diff --git a/block/archipelago.c b/block/archipelago.c
7
mirror_wait_on_conflicts() without checks if it is not NULL.
30
index XXXXXXX..XXXXXXX 100644
8
31
--- a/block/archipelago.c
9
Backtrace:
32
+++ b/block/archipelago.c
10
Program terminated with signal SIGSEGV, Segmentation fault.
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
11
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
34
{
12
at ../block/mirror.c:172
35
AIORequestData *reqdata = (AIORequestData *) opaque;
13
172     self->waiting_for_op = op;
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
14
[Current thread is 1 (Thread 0x7f0908931ec0 (LWP 380249))]
37
- AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
15
(gdb) bt
38
16
#0 mirror_wait_on_conflicts (self=0x0, s=<optimized out>, offset=<optimized out>, bytes=<optimized out>)
39
- aio_context_acquire(ctx);
17
at ../block/mirror.c:172
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
18
#1 0x00005610c5d9d631 in mirror_run (job=0x5610c76a2c00, errp=<optimized out>) at ../block/mirror.c:491
41
- aio_context_release(ctx);
19
#2 0x00005610c5d58726 in job_co_entry (opaque=0x5610c76a2c00) at ../job.c:917
42
aio_cb->status = 0;
20
#3 0x00005610c5f046c6 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>)
43
21
at ../util/coroutine-ucontext.c:173
44
qemu_aio_unref(aio_cb);
22
#4 0x00007f0909975820 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
45
diff --git a/block/block-backend.c b/block/block-backend.c
23
from /usr/lib64/libc.so.6
46
index XXXXXXX..XXXXXXX 100644
24
47
--- a/block/block-backend.c
25
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001404
48
+++ b/block/block-backend.c
26
Fixes: d44dae1a7c ("block/mirror: fix active mirror dead-lock in mirror_wait_on_conflicts")
49
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
27
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
50
static void error_callback_bh(void *opaque)
28
Message-Id: <20210910124533.288318-1-sgarzare@redhat.com>
51
{
29
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
52
struct BlockBackendAIOCB *acb = opaque;
30
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
53
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
31
---
54
32
block/mirror.c | 25 ++++++++++++++++---------
55
bdrv_dec_in_flight(acb->common.bs);
33
1 file changed, 16 insertions(+), 9 deletions(-)
56
- aio_context_acquire(ctx);
34
57
acb->common.cb(acb->common.opaque, acb->ret);
58
- aio_context_release(ctx);
59
qemu_aio_unref(acb);
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
63
static void blk_aio_complete_bh(void *opaque)
64
{
65
BlkAioEmAIOCB *acb = opaque;
66
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
-
68
assert(acb->has_returned);
69
- aio_context_acquire(ctx);
70
blk_aio_complete(acb);
71
- aio_context_release(ctx);
72
}
73
74
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
75
diff --git a/block/curl.c b/block/curl.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/block/curl.c
78
+++ b/block/curl.c
79
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
80
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
81
82
out:
83
+ aio_context_release(ctx);
84
if (ret != -EINPROGRESS) {
85
acb->common.cb(acb->common.opaque, ret);
86
qemu_aio_unref(acb);
87
}
88
- aio_context_release(ctx);
89
}
90
91
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
92
diff --git a/block/io.c b/block/io.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/io.c
95
+++ b/block/io.c
96
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
97
CoroutineIOCompletion *co = opaque;
98
99
co->ret = ret;
100
- qemu_coroutine_enter(co->coroutine);
101
+ aio_co_wake(co->coroutine);
102
}
103
104
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
105
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
106
static void bdrv_co_em_bh(void *opaque)
107
{
108
BlockAIOCBCoroutine *acb = opaque;
109
- BlockDriverState *bs = acb->common.bs;
110
- AioContext *ctx = bdrv_get_aio_context(bs);
111
112
assert(!acb->need_bh);
113
- aio_context_acquire(ctx);
114
bdrv_co_complete(acb);
115
- aio_context_release(ctx);
116
}
117
118
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
119
diff --git a/block/iscsi.c b/block/iscsi.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/iscsi.c
122
+++ b/block/iscsi.c
123
@@ -XXX,XX +XXX,XX @@ static void
124
iscsi_bh_cb(void *p)
125
{
126
IscsiAIOCB *acb = p;
127
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
128
129
qemu_bh_delete(acb->bh);
130
131
g_free(acb->buf);
132
acb->buf = NULL;
133
134
- aio_context_acquire(ctx);
135
acb->common.cb(acb->common.opaque, acb->status);
136
- aio_context_release(ctx);
137
138
if (acb->task != NULL) {
139
scsi_free_scsi_task(acb->task);
140
diff --git a/block/linux-aio.c b/block/linux-aio.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/block/linux-aio.c
143
+++ b/block/linux-aio.c
144
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
145
*/
146
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
147
{
148
- LinuxAioState *s = laiocb->ctx;
149
int ret;
150
151
ret = laiocb->ret;
152
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
153
}
154
155
laiocb->ret = ret;
156
- aio_context_acquire(s->aio_context);
157
if (laiocb->co) {
158
/* If the coroutine is already entered it must be in ioq_submit() and
159
* will notice laio->ret has been filled in when it eventually runs
160
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
161
* that!
162
*/
163
if (!qemu_coroutine_entered(laiocb->co)) {
164
- qemu_coroutine_enter(laiocb->co);
165
+ aio_co_wake(laiocb->co);
166
}
167
} else {
168
laiocb->common.cb(laiocb->common.opaque, ret);
169
qemu_aio_unref(laiocb);
170
}
171
- aio_context_release(s->aio_context);
172
}
173
174
/**
35
diff --git a/block/mirror.c b/block/mirror.c
175
diff --git a/block/mirror.c b/block/mirror.c
36
index XXXXXXX..XXXXXXX 100644
176
index XXXXXXX..XXXXXXX 100644
37
--- a/block/mirror.c
177
--- a/block/mirror.c
38
+++ b/block/mirror.c
178
+++ b/block/mirror.c
39
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
179
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
40
if (ranges_overlap(self_start_chunk, self_nb_chunks,
180
{
41
op_start_chunk, op_nb_chunks))
181
MirrorOp *op = opaque;
42
{
182
MirrorBlockJob *s = op->s;
43
- /*
183
+
44
- * If the operation is already (indirectly) waiting for us, or
184
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
45
- * will wait for us as soon as it wakes up, then just go on
185
if (ret < 0) {
46
- * (instead of producing a deadlock in the former case).
186
BlockErrorAction action;
47
- */
187
48
- if (op->waiting_for_op) {
188
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
49
- continue;
50
+ if (self) {
51
+ /*
52
+ * If the operation is already (indirectly) waiting for us,
53
+ * or will wait for us as soon as it wakes up, then just go
54
+ * on (instead of producing a deadlock in the former case).
55
+ */
56
+ if (op->waiting_for_op) {
57
+ continue;
58
+ }
59
+
60
+ self->waiting_for_op = op;
61
}
62
63
- self->waiting_for_op = op;
64
qemu_co_queue_wait(&op->waiting_requests, NULL);
65
- self->waiting_for_op = NULL;
66
+
67
+ if (self) {
68
+ self->waiting_for_op = NULL;
69
+ }
70
+
71
break;
72
}
73
}
189
}
190
}
191
mirror_iteration_done(op, ret);
192
+ aio_context_release(blk_get_aio_context(s->common.blk));
193
}
194
195
static void mirror_read_complete(void *opaque, int ret)
196
{
197
MirrorOp *op = opaque;
198
MirrorBlockJob *s = op->s;
199
+
200
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
201
if (ret < 0) {
202
BlockErrorAction action;
203
204
@@ -XXX,XX +XXX,XX @@ static void mirror_read_complete(void *opaque, int ret)
205
}
206
207
mirror_iteration_done(op, ret);
208
- return;
209
+ } else {
210
+ blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
211
+ 0, mirror_write_complete, op);
212
}
213
- blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
214
- 0, mirror_write_complete, op);
215
+ aio_context_release(blk_get_aio_context(s->common.blk));
216
}
217
218
static inline void mirror_clip_sectors(MirrorBlockJob *s,
219
diff --git a/block/null.c b/block/null.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/block/null.c
222
+++ b/block/null.c
223
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
224
static void null_bh_cb(void *opaque)
225
{
226
NullAIOCB *acb = opaque;
227
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
228
-
229
- aio_context_acquire(ctx);
230
acb->common.cb(acb->common.opaque, 0);
231
- aio_context_release(ctx);
232
qemu_aio_unref(acb);
233
}
234
235
static void null_timer_cb(void *opaque)
236
{
237
NullAIOCB *acb = opaque;
238
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
239
-
240
- aio_context_acquire(ctx);
241
acb->common.cb(acb->common.opaque, 0);
242
- aio_context_release(ctx);
243
timer_deinit(&acb->timer);
244
qemu_aio_unref(acb);
245
}
246
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/qed-cluster.c
249
+++ b/block/qed-cluster.c
250
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
251
unsigned int index;
252
unsigned int n;
253
254
+ qed_acquire(s);
255
if (ret) {
256
goto out;
257
}
258
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
259
260
out:
261
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
262
+ qed_release(s);
263
g_free(find_cluster_cb);
264
}
265
266
diff --git a/block/qed-table.c b/block/qed-table.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/block/qed-table.c
269
+++ b/block/qed-table.c
270
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
271
{
272
QEDReadTableCB *read_table_cb = opaque;
273
QEDTable *table = read_table_cb->table;
274
+ BDRVQEDState *s = read_table_cb->s;
275
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
276
int i;
277
278
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
279
}
280
281
/* Byteswap offsets */
282
+ qed_acquire(s);
283
for (i = 0; i < noffsets; i++) {
284
table->offsets[i] = le64_to_cpu(table->offsets[i]);
285
}
286
+ qed_release(s);
287
288
out:
289
/* Completion */
290
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
291
+ trace_qed_read_table_cb(s, read_table_cb->table, ret);
292
gencb_complete(&read_table_cb->gencb, ret);
293
}
294
295
@@ -XXX,XX +XXX,XX @@ typedef struct {
296
static void qed_write_table_cb(void *opaque, int ret)
297
{
298
QEDWriteTableCB *write_table_cb = opaque;
299
+ BDRVQEDState *s = write_table_cb->s;
300
301
- trace_qed_write_table_cb(write_table_cb->s,
302
+ trace_qed_write_table_cb(s,
303
write_table_cb->orig_table,
304
write_table_cb->flush,
305
ret);
306
@@ -XXX,XX +XXX,XX @@ static void qed_write_table_cb(void *opaque, int ret)
307
if (write_table_cb->flush) {
308
/* We still need to flush first */
309
write_table_cb->flush = false;
310
+ qed_acquire(s);
311
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
312
write_table_cb);
313
+ qed_release(s);
314
return;
315
}
316
317
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
318
CachedL2Table *l2_table = request->l2_table;
319
uint64_t l2_offset = read_l2_table_cb->l2_offset;
320
321
+ qed_acquire(s);
322
if (ret) {
323
/* can't trust loaded L2 table anymore */
324
qed_unref_l2_cache_entry(l2_table);
325
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
326
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
327
assert(request->l2_table != NULL);
328
}
329
+ qed_release(s);
330
331
gencb_complete(&read_l2_table_cb->gencb, ret);
332
}
333
diff --git a/block/qed.c b/block/qed.c
334
index XXXXXXX..XXXXXXX 100644
335
--- a/block/qed.c
336
+++ b/block/qed.c
337
@@ -XXX,XX +XXX,XX @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
338
}
339
340
if (cb->co) {
341
- qemu_coroutine_enter(cb->co);
342
+ aio_co_wake(cb->co);
343
}
344
}
345
346
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
347
cb->done = true;
348
cb->ret = ret;
349
if (cb->co) {
350
- qemu_coroutine_enter(cb->co);
351
+ aio_co_wake(cb->co);
352
}
353
}
354
355
diff --git a/block/rbd.c b/block/rbd.c
356
index XXXXXXX..XXXXXXX 100644
357
--- a/block/rbd.c
358
+++ b/block/rbd.c
359
@@ -XXX,XX +XXX,XX @@ shutdown:
360
static void qemu_rbd_complete_aio(RADOSCB *rcb)
361
{
362
RBDAIOCB *acb = rcb->acb;
363
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
364
int64_t r;
365
366
r = rcb->ret;
367
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
368
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
369
}
370
qemu_vfree(acb->bounce);
371
-
372
- aio_context_acquire(ctx);
373
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
374
- aio_context_release(ctx);
375
376
qemu_aio_unref(acb);
377
}
378
diff --git a/block/win32-aio.c b/block/win32-aio.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/block/win32-aio.c
381
+++ b/block/win32-aio.c
382
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
383
qemu_vfree(waiocb->buf);
384
}
385
386
-
387
- aio_context_acquire(s->aio_ctx);
388
waiocb->common.cb(waiocb->common.opaque, ret);
389
- aio_context_release(s->aio_ctx);
390
qemu_aio_unref(waiocb);
391
}
392
393
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
394
index XXXXXXX..XXXXXXX 100644
395
--- a/hw/block/virtio-blk.c
396
+++ b/hw/block/virtio-blk.c
397
@@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
398
static void virtio_blk_rw_complete(void *opaque, int ret)
399
{
400
VirtIOBlockReq *next = opaque;
401
+ VirtIOBlock *s = next->dev;
402
403
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
404
while (next) {
405
VirtIOBlockReq *req = next;
406
next = req->mr_next;
407
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_rw_complete(void *opaque, int ret)
408
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
409
virtio_blk_free_request(req);
410
}
411
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
412
}
413
414
static void virtio_blk_flush_complete(void *opaque, int ret)
415
{
416
VirtIOBlockReq *req = opaque;
417
+ VirtIOBlock *s = req->dev;
418
419
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
420
if (ret) {
421
if (virtio_blk_handle_rw_error(req, -ret, 0)) {
422
- return;
423
+ goto out;
424
}
425
}
426
427
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
428
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
429
virtio_blk_free_request(req);
430
+
431
+out:
432
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
433
}
434
435
#ifdef __linux__
436
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
437
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
438
439
out:
440
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
441
virtio_blk_req_complete(req, status);
442
virtio_blk_free_request(req);
443
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
444
g_free(ioctl_req);
445
}
446
447
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
448
index XXXXXXX..XXXXXXX 100644
449
--- a/hw/scsi/scsi-disk.c
450
+++ b/hw/scsi/scsi-disk.c
451
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
452
453
assert(r->req.aiocb != NULL);
454
r->req.aiocb = NULL;
455
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
456
if (scsi_disk_req_check_error(r, ret, true)) {
457
goto done;
458
}
459
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
460
scsi_req_complete(&r->req, GOOD);
461
462
done:
463
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
464
scsi_req_unref(&r->req);
465
}
466
467
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
468
assert(r->req.aiocb != NULL);
469
r->req.aiocb = NULL;
470
471
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
472
if (ret < 0) {
473
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
474
} else {
475
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
476
}
477
scsi_dma_complete_noio(r, ret);
478
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
479
}
480
481
static void scsi_read_complete(void * opaque, int ret)
482
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
483
484
assert(r->req.aiocb != NULL);
485
r->req.aiocb = NULL;
486
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
487
if (scsi_disk_req_check_error(r, ret, true)) {
488
goto done;
489
}
490
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
491
492
done:
493
scsi_req_unref(&r->req);
494
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
495
}
496
497
/* Actually issue a read to the block device. */
498
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read_cb(void *opaque, int ret)
499
assert (r->req.aiocb != NULL);
500
r->req.aiocb = NULL;
501
502
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
503
if (ret < 0) {
504
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
505
} else {
506
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
507
}
508
scsi_do_read(opaque, ret);
509
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
510
}
511
512
/* Read more data from scsi device into buffer. */
513
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
514
assert (r->req.aiocb != NULL);
515
r->req.aiocb = NULL;
516
517
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
518
if (ret < 0) {
519
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
520
} else {
521
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
522
}
523
scsi_write_complete_noio(r, ret);
524
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
525
}
526
527
static void scsi_write_data(SCSIRequest *req)
528
@@ -XXX,XX +XXX,XX @@ static void scsi_unmap_complete(void *opaque, int ret)
529
{
530
UnmapCBData *data = opaque;
531
SCSIDiskReq *r = data->r;
532
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
533
534
assert(r->req.aiocb != NULL);
535
r->req.aiocb = NULL;
536
537
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
538
scsi_unmap_complete_noio(data, ret);
539
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
540
}
541
542
static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
543
@@ -XXX,XX +XXX,XX @@ static void scsi_write_same_complete(void *opaque, int ret)
544
545
assert(r->req.aiocb != NULL);
546
r->req.aiocb = NULL;
547
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
548
if (scsi_disk_req_check_error(r, ret, true)) {
549
goto done;
550
}
551
@@ -XXX,XX +XXX,XX @@ done:
552
scsi_req_unref(&r->req);
553
qemu_vfree(data->iov.iov_base);
554
g_free(data);
555
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
556
}
557
558
static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
559
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
560
index XXXXXXX..XXXXXXX 100644
561
--- a/hw/scsi/scsi-generic.c
562
+++ b/hw/scsi/scsi-generic.c
563
@@ -XXX,XX +XXX,XX @@ done:
564
static void scsi_command_complete(void *opaque, int ret)
565
{
566
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
567
+ SCSIDevice *s = r->req.dev;
568
569
assert(r->req.aiocb != NULL);
570
r->req.aiocb = NULL;
571
+
572
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
573
scsi_command_complete_noio(r, ret);
574
+ aio_context_release(blk_get_aio_context(s->conf.blk));
575
}
576
577
static int execute_command(BlockBackend *blk,
578
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
579
assert(r->req.aiocb != NULL);
580
r->req.aiocb = NULL;
581
582
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
583
+
584
if (ret || r->req.io_canceled) {
585
scsi_command_complete_noio(r, ret);
586
- return;
587
+ goto done;
588
}
589
590
len = r->io_header.dxfer_len - r->io_header.resid;
591
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
592
r->len = -1;
593
if (len == 0) {
594
scsi_command_complete_noio(r, 0);
595
- return;
596
+ goto done;
597
}
598
599
/* Snoop READ CAPACITY output to set the blocksize. */
600
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
601
}
602
scsi_req_data(&r->req, len);
603
scsi_req_unref(&r->req);
604
+
605
+done:
606
+ aio_context_release(blk_get_aio_context(s->conf.blk));
607
}
608
609
/* Read more data from scsi device into buffer. */
610
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
611
assert(r->req.aiocb != NULL);
612
r->req.aiocb = NULL;
613
614
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
615
+
616
if (ret || r->req.io_canceled) {
617
scsi_command_complete_noio(r, ret);
618
- return;
619
+ goto done;
620
}
621
622
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
623
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
624
}
625
626
scsi_command_complete_noio(r, ret);
627
+
628
+done:
629
+ aio_context_release(blk_get_aio_context(s->conf.blk));
630
}
631
632
/* Write data to a scsi device. Returns nonzero on failure.
633
diff --git a/util/thread-pool.c b/util/thread-pool.c
634
index XXXXXXX..XXXXXXX 100644
635
--- a/util/thread-pool.c
636
+++ b/util/thread-pool.c
637
@@ -XXX,XX +XXX,XX @@ restart:
638
*/
639
qemu_bh_schedule(pool->completion_bh);
640
641
+ aio_context_release(pool->ctx);
642
elem->common.cb(elem->common.opaque, elem->ret);
643
+ aio_context_acquire(pool->ctx);
644
qemu_aio_unref(elem);
645
goto restart;
646
} else {
647
@@ -XXX,XX +XXX,XX @@ static void thread_pool_co_cb(void *opaque, int ret)
648
ThreadPoolCo *co = opaque;
649
650
co->ret = ret;
651
- qemu_coroutine_enter(co->co);
652
+ aio_co_wake(co->co);
653
}
654
655
int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
74
--
656
--
75
2.31.1
657
2.9.3
76
658
77
659
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
No logic change, just prepare for the following commit. While being
3
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
4
here do also small grammar fix in a comment.
4
Extract the dispatching loop for file descriptor handlers into a new
5
function aio_dispatch_handlers, and then inline aio_dispatch into
6
aio_poll.
5
7
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
aio_dispatch can now become void.
7
Reviewed-by: Eric Blake <eblake@redhat.com>
9
8
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-Id: <20210824101517.59802-3-vsementsov@virtuozzo.com>
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
14
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
16
---
12
block/qcow2-cluster.c | 49 ++++++++++++++++++++++++-------------------
17
include/block/aio.h | 6 +-----
13
1 file changed, 28 insertions(+), 21 deletions(-)
18
util/aio-posix.c | 44 ++++++++++++++------------------------------
19
util/aio-win32.c | 13 ++++---------
20
util/async.c | 2 +-
21
4 files changed, 20 insertions(+), 45 deletions(-)
14
22
15
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
23
diff --git a/include/block/aio.h b/include/block/aio.h
16
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2-cluster.c
25
--- a/include/block/aio.h
18
+++ b/block/qcow2-cluster.c
26
+++ b/include/block/aio.h
19
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
27
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx);
20
28
/* Dispatch any pending callbacks from the GSource attached to the AioContext.
21
if (end <= old_start || start >= old_end) {
29
*
22
/* No intersection */
30
* This is used internally in the implementation of the GSource.
23
- } else {
31
- *
24
- if (start < old_start) {
32
- * @dispatch_fds: true to process fds, false to skip them
25
- /* Stop at the start of a running allocation */
33
- * (can be used as an optimization by callers that know there
26
- bytes = old_start - start;
34
- * are no fds ready)
27
- } else {
35
*/
28
- bytes = 0;
36
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds);
29
- }
37
+void aio_dispatch(AioContext *ctx);
30
+ continue;
38
31
+ }
39
/* Progress in completing AIO work to occur. This can issue new pending
32
40
* aio as a result of executing I/O completion or bh callbacks.
33
- /* Stop if already an l2meta exists. After yielding, it wouldn't
41
diff --git a/util/aio-posix.c b/util/aio-posix.c
34
- * be valid any more, so we'd have to clean up the old L2Metas
42
index XXXXXXX..XXXXXXX 100644
35
- * and deal with requests depending on them before starting to
43
--- a/util/aio-posix.c
36
- * gather new ones. Not worth the trouble. */
44
+++ b/util/aio-posix.c
37
- if (bytes == 0 && *m) {
45
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
38
- *cur_bytes = 0;
46
AioHandler *node, *tmp;
39
- return 0;
47
bool progress = false;
40
- }
48
41
+ /* Conflict */
49
- /*
42
50
- * We have to walk very carefully in case aio_set_fd_handler is
43
- if (bytes == 0) {
51
- * called while we're walking.
44
- /* Wait for the dependency to complete. We need to recheck
52
- */
45
- * the free/allocated clusters when we continue. */
53
- qemu_lockcnt_inc(&ctx->list_lock);
46
- qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
54
-
47
- return -EAGAIN;
55
QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
48
- }
56
int revents;
49
+ if (start < old_start) {
57
50
+ /* Stop at the start of a running allocation */
58
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
51
+ bytes = old_start - start;
52
+ } else {
53
+ bytes = 0;
54
+ }
55
+
56
+ /*
57
+ * Stop if an l2meta already exists. After yielding, it wouldn't
58
+ * be valid any more, so we'd have to clean up the old L2Metas
59
+ * and deal with requests depending on them before starting to
60
+ * gather new ones. Not worth the trouble.
61
+ */
62
+ if (bytes == 0 && *m) {
63
+ *cur_bytes = 0;
64
+ return 0;
65
+ }
66
+
67
+ if (bytes == 0) {
68
+ /*
69
+ * Wait for the dependency to complete. We need to recheck
70
+ * the free/allocated clusters when we continue.
71
+ */
72
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
73
+ return -EAGAIN;
74
}
59
}
75
}
60
}
76
61
62
- qemu_lockcnt_dec(&ctx->list_lock);
63
return progress;
64
}
65
66
-/*
67
- * Note that dispatch_fds == false has the side-effect of post-poning the
68
- * freeing of deleted handlers.
69
- */
70
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
71
+void aio_dispatch(AioContext *ctx)
72
{
73
- bool progress;
74
+ aio_bh_poll(ctx);
75
76
- /*
77
- * If there are callbacks left that have been queued, we need to call them.
78
- * Do not call select in this case, because it is possible that the caller
79
- * does not need a complete flush (as is the case for aio_poll loops).
80
- */
81
- progress = aio_bh_poll(ctx);
82
+ qemu_lockcnt_inc(&ctx->list_lock);
83
+ aio_dispatch_handlers(ctx);
84
+ qemu_lockcnt_dec(&ctx->list_lock);
85
86
- if (dispatch_fds) {
87
- progress |= aio_dispatch_handlers(ctx);
88
- }
89
-
90
- /* Run our timers */
91
- progress |= timerlistgroup_run_timers(&ctx->tlg);
92
-
93
- return progress;
94
+ timerlistgroup_run_timers(&ctx->tlg);
95
}
96
97
/* These thread-local variables are used only in a small part of aio_poll
98
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
99
npfd = 0;
100
qemu_lockcnt_dec(&ctx->list_lock);
101
102
- /* Run dispatch even if there were no readable fds to run timers */
103
- if (aio_dispatch(ctx, ret > 0)) {
104
- progress = true;
105
+ progress |= aio_bh_poll(ctx);
106
+
107
+ if (ret > 0) {
108
+ qemu_lockcnt_inc(&ctx->list_lock);
109
+ progress |= aio_dispatch_handlers(ctx);
110
+ qemu_lockcnt_dec(&ctx->list_lock);
111
}
112
113
+ progress |= timerlistgroup_run_timers(&ctx->tlg);
114
+
115
return progress;
116
}
117
118
diff --git a/util/aio-win32.c b/util/aio-win32.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/util/aio-win32.c
121
+++ b/util/aio-win32.c
122
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
123
return progress;
124
}
125
126
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
127
+void aio_dispatch(AioContext *ctx)
128
{
129
- bool progress;
130
-
131
- progress = aio_bh_poll(ctx);
132
- if (dispatch_fds) {
133
- progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
134
- }
135
- progress |= timerlistgroup_run_timers(&ctx->tlg);
136
- return progress;
137
+ aio_bh_poll(ctx);
138
+ aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
139
+ timerlistgroup_run_timers(&ctx->tlg);
140
}
141
142
bool aio_poll(AioContext *ctx, bool blocking)
143
diff --git a/util/async.c b/util/async.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/util/async.c
146
+++ b/util/async.c
147
@@ -XXX,XX +XXX,XX @@ aio_ctx_dispatch(GSource *source,
148
AioContext *ctx = (AioContext *) source;
149
150
assert(callback == NULL);
151
- aio_dispatch(ctx, true);
152
+ aio_dispatch(ctx);
153
return true;
154
}
155
77
--
156
--
78
2.31.1
157
2.9.3
79
158
80
159
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
- don't use same name for size in bytes and in entries
3
Pull the increment/decrement pair out of aio_bh_poll and into the
4
- use g_autofree for l2_table
4
callers.
5
- add whitespace
6
- fix block comment style
7
5
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Message-Id: <20210914122454.141075-2-vsementsov@virtuozzo.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
12
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
10
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
12
---
14
block/qcow2-refcount.c | 47 +++++++++++++++++++++---------------------
13
util/aio-posix.c | 8 +++-----
15
1 file changed, 24 insertions(+), 23 deletions(-)
14
util/aio-win32.c | 8 ++++----
15
util/async.c | 12 ++++++------
16
3 files changed, 13 insertions(+), 15 deletions(-)
16
17
17
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
18
diff --git a/util/aio-posix.c b/util/aio-posix.c
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/block/qcow2-refcount.c
20
--- a/util/aio-posix.c
20
+++ b/block/qcow2-refcount.c
21
+++ b/util/aio-posix.c
21
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
22
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
22
int flags, BdrvCheckMode fix, bool active)
23
24
void aio_dispatch(AioContext *ctx)
23
{
25
{
24
BDRVQcow2State *s = bs->opaque;
26
+ qemu_lockcnt_inc(&ctx->list_lock);
25
- uint64_t *l2_table, l2_entry;
27
aio_bh_poll(ctx);
26
+ uint64_t l2_entry;
27
uint64_t next_contiguous_offset = 0;
28
- int i, l2_size, nb_csectors, ret;
29
+ int i, nb_csectors, ret;
30
+ size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
31
+ g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
32
33
/* Read L2 table from disk */
34
- l2_size = s->l2_size * l2_entry_size(s);
35
- l2_table = g_malloc(l2_size);
36
-
28
-
37
- ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
29
- qemu_lockcnt_inc(&ctx->list_lock);
38
+ ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
30
aio_dispatch_handlers(ctx);
39
if (ret < 0) {
31
qemu_lockcnt_dec(&ctx->list_lock);
40
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
32
41
res->check_errors++;
33
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
42
- goto fail;
43
+ return ret;
44
}
34
}
45
35
46
/* Do the actual checks */
36
npfd = 0;
47
- for(i = 0; i < s->l2_size; i++) {
37
- qemu_lockcnt_dec(&ctx->list_lock);
48
+ for (i = 0; i < s->l2_size; i++) {
38
49
l2_entry = get_l2_entry(s, l2_table, i);
39
progress |= aio_bh_poll(ctx);
50
40
51
switch (qcow2_get_cluster_type(bs, l2_entry)) {
41
if (ret > 0) {
52
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
42
- qemu_lockcnt_inc(&ctx->list_lock);
53
l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
43
progress |= aio_dispatch_handlers(ctx);
54
nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
44
- qemu_lockcnt_dec(&ctx->list_lock);
55
if (ret < 0) {
45
}
56
- goto fail;
46
57
+ return ret;
47
+ qemu_lockcnt_dec(&ctx->list_lock);
58
}
48
+
59
49
progress |= timerlistgroup_run_timers(&ctx->tlg);
60
if (flags & CHECK_FRAG_INFO) {
50
61
res->bfi.allocated_clusters++;
51
return progress;
62
res->bfi.compressed_clusters++;
52
diff --git a/util/aio-win32.c b/util/aio-win32.c
63
53
index XXXXXXX..XXXXXXX 100644
64
- /* Compressed clusters are fragmented by nature. Since they
54
--- a/util/aio-win32.c
65
+ /*
55
+++ b/util/aio-win32.c
66
+ * Compressed clusters are fragmented by nature. Since they
56
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
67
* take up sub-sector space but we only have sector granularity
57
bool progress = false;
68
* I/O we need to re-read the same sectors even for adjacent
58
AioHandler *tmp;
69
* compressed clusters.
59
70
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
60
- qemu_lockcnt_inc(&ctx->list_lock);
71
if (ret < 0) {
61
-
72
fprintf(stderr, "ERROR: Overlap check failed\n");
62
/*
73
res->check_errors++;
63
* We have to walk very carefully in case aio_set_fd_handler is
74
- /* Something is seriously wrong, so abort checking
64
* called while we're walking.
75
- * this L2 table */
65
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
76
- goto fail;
77
+ /*
78
+ * Something is seriously wrong, so abort checking
79
+ * this L2 table.
80
+ */
81
+ return ret;
82
}
83
84
ret = bdrv_pwrite_sync(bs->file, l2e_offset,
85
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
86
fprintf(stderr, "ERROR: Failed to overwrite L2 "
87
"table entry: %s\n", strerror(-ret));
88
res->check_errors++;
89
- /* Do not abort, continue checking the rest of this
90
- * L2 table's entries */
91
+ /*
92
+ * Do not abort, continue checking the rest of this
93
+ * L2 table's entries.
94
+ */
95
} else {
96
res->corruptions--;
97
res->corruptions_fixed++;
98
- /* Skip marking the cluster as used
99
- * (it is unused now) */
100
+ /*
101
+ * Skip marking the cluster as used
102
+ * (it is unused now).
103
+ */
104
continue;
105
}
106
}
107
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
108
refcount_table_size,
109
offset, s->cluster_size);
110
if (ret < 0) {
111
- goto fail;
112
+ return ret;
113
}
114
}
115
break;
116
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
117
}
66
}
118
}
67
}
119
68
120
- g_free(l2_table);
69
- qemu_lockcnt_dec(&ctx->list_lock);
121
return 0;
70
return progress;
71
}
72
73
void aio_dispatch(AioContext *ctx)
74
{
75
+ qemu_lockcnt_inc(&ctx->list_lock);
76
aio_bh_poll(ctx);
77
aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
78
+ qemu_lockcnt_dec(&ctx->list_lock);
79
timerlistgroup_run_timers(&ctx->tlg);
80
}
81
82
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
83
}
84
}
85
86
- qemu_lockcnt_dec(&ctx->list_lock);
87
first = true;
88
89
/* ctx->notifier is always registered. */
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
progress |= aio_dispatch_handlers(ctx, event);
92
} while (count > 0);
93
94
+ qemu_lockcnt_dec(&ctx->list_lock);
95
+
96
progress |= timerlistgroup_run_timers(&ctx->tlg);
97
return progress;
98
}
99
diff --git a/util/async.c b/util/async.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/util/async.c
102
+++ b/util/async.c
103
@@ -XXX,XX +XXX,XX @@ void aio_bh_call(QEMUBH *bh)
104
bh->cb(bh->opaque);
105
}
106
107
-/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
108
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
109
+ * The count in ctx->list_lock is incremented before the call, and is
110
+ * not affected by the call.
111
+ */
112
int aio_bh_poll(AioContext *ctx)
113
{
114
QEMUBH *bh, **bhp, *next;
115
int ret;
116
bool deleted = false;
117
118
- qemu_lockcnt_inc(&ctx->list_lock);
122
-
119
-
123
-fail:
120
ret = 0;
124
- g_free(l2_table);
121
for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
125
- return ret;
122
next = atomic_rcu_read(&bh->next);
123
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
124
125
/* remove deleted bhs */
126
if (!deleted) {
127
- qemu_lockcnt_dec(&ctx->list_lock);
128
return ret;
129
}
130
131
- if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
132
+ if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
133
bhp = &ctx->first_bh;
134
while (*bhp) {
135
bh = *bhp;
136
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
137
bhp = &bh->next;
138
}
139
}
140
- qemu_lockcnt_unlock(&ctx->list_lock);
141
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
142
}
143
return ret;
126
}
144
}
127
128
/*
129
--
145
--
130
2.31.1
146
2.9.3
131
147
132
148
diff view generated by jsdifflib
1
There is a comment above the BDS definition stating care must be taken
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
to consider handling newly added fields in bdrv_append().
3
2
4
Actually, this comment should have said "bdrv_swap()" as of 4ddc07cac
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
(nine years ago), and in any case, bdrv_swap() was dropped in
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
8e419aefa (six years ago). So no such care is necessary anymore.
5
Reviewed-by: Fam Zheng <famz@redhat.com>
7
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
9
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20210812084148.14458-2-hreitz@redhat.com>
13
---
9
---
14
include/block/block_int.h | 6 ------
10
include/block/block_int.h | 64 +++++++++++++++++++++++++-----------------
15
1 file changed, 6 deletions(-)
11
include/sysemu/block-backend.h | 14 ++++++---
12
2 files changed, 49 insertions(+), 29 deletions(-)
16
13
17
diff --git a/include/block/block_int.h b/include/block/block_int.h
14
diff --git a/include/block/block_int.h b/include/block/block_int.h
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block_int.h
16
--- a/include/block/block_int.h
20
+++ b/include/block/block_int.h
17
+++ b/include/block/block_int.h
21
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
18
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
22
QLIST_ENTRY(BdrvChild) next_parent;
19
* copied as well.
20
*/
21
struct BlockDriverState {
22
- int64_t total_sectors; /* if we are reading a disk image, give its
23
- size in sectors */
24
+ /* Protected by big QEMU lock or read-only after opening. No special
25
+ * locking needed during I/O...
26
+ */
27
int open_flags; /* flags used to open the file, re-used for re-open */
28
bool read_only; /* if true, the media is read only */
29
bool encrypted; /* if true, the media is encrypted */
30
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
31
bool sg; /* if true, the device is a /dev/sg* */
32
bool probed; /* if true, format was probed rather than specified */
33
34
- int copy_on_read; /* if nonzero, copy read backing sectors into image.
35
- note this is a reference count */
36
-
37
- CoQueue flush_queue; /* Serializing flush queue */
38
- bool active_flush_req; /* Flush request in flight? */
39
- unsigned int write_gen; /* Current data generation */
40
- unsigned int flushed_gen; /* Flushed write generation */
41
-
42
BlockDriver *drv; /* NULL means no media */
43
void *opaque;
44
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
46
BdrvChild *backing;
47
BdrvChild *file;
48
49
- /* Callback before write request is processed */
50
- NotifierWithReturnList before_write_notifiers;
51
-
52
- /* number of in-flight requests; overall and serialising */
53
- unsigned int in_flight;
54
- unsigned int serialising_in_flight;
55
-
56
- bool wakeup;
57
-
58
- /* Offset after the highest byte written to */
59
- uint64_t wr_highest_offset;
60
-
61
/* I/O Limits */
62
BlockLimits bl;
63
64
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
65
QTAILQ_ENTRY(BlockDriverState) bs_list;
66
/* element of the list of monitor-owned BDS */
67
QTAILQ_ENTRY(BlockDriverState) monitor_list;
68
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
69
int refcnt;
70
71
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
72
-
73
/* operation blockers */
74
QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
75
76
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
77
/* The error object in use for blocking operations on backing_hd */
78
Error *backing_blocker;
79
80
+ /* Protected by AioContext lock */
81
+
82
+ /* If true, copy read backing sectors into image. Can be >1 if more
83
+ * than one client has requested copy-on-read.
84
+ */
85
+ int copy_on_read;
86
+
87
+ /* If we are reading a disk image, give its size in sectors.
88
+ * Generally read-only; it is written to by load_vmstate and save_vmstate,
89
+ * but the block layer is quiescent during those.
90
+ */
91
+ int64_t total_sectors;
92
+
93
+ /* Callback before write request is processed */
94
+ NotifierWithReturnList before_write_notifiers;
95
+
96
+ /* number of in-flight requests; overall and serialising */
97
+ unsigned int in_flight;
98
+ unsigned int serialising_in_flight;
99
+
100
+ bool wakeup;
101
+
102
+ /* Offset after the highest byte written to */
103
+ uint64_t wr_highest_offset;
104
+
105
/* threshold limit for writes, in bytes. "High water mark". */
106
uint64_t write_threshold_offset;
107
NotifierWithReturn write_threshold_notifier;
108
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
109
/* counter for nested bdrv_io_plug */
110
unsigned io_plugged;
111
112
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
113
+ CoQueue flush_queue; /* Serializing flush queue */
114
+ bool active_flush_req; /* Flush request in flight? */
115
+ unsigned int write_gen; /* Current data generation */
116
+ unsigned int flushed_gen; /* Flushed write generation */
117
+
118
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
119
+
120
+ /* do we need to tell the quest if we have a volatile write cache? */
121
+ int enable_write_cache;
122
+
123
int quiesce_counter;
23
};
124
};
24
125
25
-/*
126
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
26
- * Note: the function bdrv_append() copies and swaps contents of
127
index XXXXXXX..XXXXXXX 100644
27
- * BlockDriverStates, so if you add new fields to this struct, please
128
--- a/include/sysemu/block-backend.h
28
- * inspect bdrv_append() to determine if the new fields need to be
129
+++ b/include/sysemu/block-backend.h
29
- * copied as well.
130
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
30
- */
131
* fields that must be public. This is in particular for QLIST_ENTRY() and
31
struct BlockDriverState {
132
* friends so that BlockBackends can be kept in lists outside block-backend.c */
32
/* Protected by big QEMU lock or read-only after opening. No special
133
typedef struct BlockBackendPublic {
33
* locking needed during I/O...
134
- /* I/O throttling.
135
- * throttle_state tells us if this BlockBackend has I/O limits configured.
136
- * io_limits_disabled tells us if they are currently being enforced */
137
+ /* I/O throttling has its own locking, but also some fields are
138
+ * protected by the AioContext lock.
139
+ */
140
+
141
+ /* Protected by AioContext lock. */
142
CoQueue throttled_reqs[2];
143
+
144
+ /* Nonzero if the I/O limits are currently being ignored; generally
145
+ * it is zero. */
146
unsigned int io_limits_disabled;
147
148
/* The following fields are protected by the ThrottleGroup lock.
149
- * See the ThrottleGroup documentation for details. */
150
+ * See the ThrottleGroup documentation for details.
151
+ * throttle_state tells us if I/O limits are configured. */
152
ThrottleState *throttle_state;
153
ThrottleTimers throttle_timers;
154
unsigned pending_reqs[2];
34
--
155
--
35
2.31.1
156
2.9.3
36
157
37
158
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
We must not inactivate child when parent has write permissions on
3
This uses the lock-free mutex described in the paper '"Blocking without
4
it.
4
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
5
5
Papatriantafilou. The same technique is used in OSv, and in fact
6
Calling .bdrv_inactivate() doesn't help: actually only qcow2 has this
6
the code is essentially a conversion to C of OSv's code.
7
handler and it is used to flush caches, not for permission
7
8
manipulations.
8
[Added missing coroutine_fn in tests/test-aio-multithread.c.
9
9
--Stefan]
10
So, let's simply check cumulative parent permissions before
10
11
inactivating the node.
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
This commit fixes a crash when we do migration during backup: prior to
13
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
14
the commit nothing prevents all nodes inactivation at migration finish
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
and following backup write to the target crashes on assertion
16
"assert(!(bs->open_flags & BDRV_O_INACTIVE));" in
17
bdrv_co_write_req_prepare().
18
19
After the commit, we rely on the fact that copy-before-write filter
20
keeps write permission on target node to be able to write to it. So
21
inactivation fails and migration fails as expected.
22
23
Corresponding test now passes, so, enable it.
24
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
27
Message-Id: <20210911120027.8063-3-vsementsov@virtuozzo.com>
28
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
29
---
15
---
30
block.c | 8 ++++++++
16
include/qemu/coroutine.h | 17 ++++-
31
tests/qemu-iotests/tests/migrate-during-backup | 2 +-
17
tests/test-aio-multithread.c | 86 ++++++++++++++++++++++++
32
2 files changed, 9 insertions(+), 1 deletion(-)
18
util/qemu-coroutine-lock.c | 155 ++++++++++++++++++++++++++++++++++++++++---
33
19
util/trace-events | 1 +
34
diff --git a/block.c b/block.c
20
4 files changed, 246 insertions(+), 13 deletions(-)
21
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
35
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
36
--- a/block.c
24
--- a/include/qemu/coroutine.h
37
+++ b/block.c
25
+++ b/include/qemu/coroutine.h
38
@@ -XXX,XX +XXX,XX @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
26
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
27
/**
28
* Provides a mutex that can be used to synchronise coroutines
29
*/
30
+struct CoWaitRecord;
31
typedef struct CoMutex {
32
- bool locked;
33
+ /* Count of pending lockers; 0 for a free mutex, 1 for an
34
+ * uncontended mutex.
35
+ */
36
+ unsigned locked;
37
+
38
+ /* A queue of waiters. Elements are added atomically in front of
39
+ * from_push. to_pop is only populated, and popped from, by whoever
40
+ * is in charge of the next wakeup. This can be an unlocker or,
41
+ * through the handoff protocol, a locker that is about to go to sleep.
42
+ */
43
+ QSLIST_HEAD(, CoWaitRecord) from_push, to_pop;
44
+
45
+ unsigned handoff, sequence;
46
+
47
Coroutine *holder;
48
- CoQueue queue;
49
} CoMutex;
50
51
/**
52
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tests/test-aio-multithread.c
55
+++ b/tests/test-aio-multithread.c
56
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_schedule_10(void)
57
test_multi_co_schedule(10);
58
}
59
60
+/* CoMutex thread-safety. */
61
+
62
+static uint32_t atomic_counter;
63
+static uint32_t running;
64
+static uint32_t counter;
65
+static CoMutex comutex;
66
+
67
+static void coroutine_fn test_multi_co_mutex_entry(void *opaque)
68
+{
69
+ while (!atomic_mb_read(&now_stopping)) {
70
+ qemu_co_mutex_lock(&comutex);
71
+ counter++;
72
+ qemu_co_mutex_unlock(&comutex);
73
+
74
+ /* Increase atomic_counter *after* releasing the mutex. Otherwise
75
+ * there is a chance (it happens about 1 in 3 runs) that the iothread
76
+ * exits before the coroutine is woken up, causing a spurious
77
+ * assertion failure.
78
+ */
79
+ atomic_inc(&atomic_counter);
80
+ }
81
+ atomic_dec(&running);
82
+}
83
+
84
+static void test_multi_co_mutex(int threads, int seconds)
85
+{
86
+ int i;
87
+
88
+ qemu_co_mutex_init(&comutex);
89
+ counter = 0;
90
+ atomic_counter = 0;
91
+ now_stopping = false;
92
+
93
+ create_aio_contexts();
94
+ assert(threads <= NUM_CONTEXTS);
95
+ running = threads;
96
+ for (i = 0; i < threads; i++) {
97
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_mutex_entry, NULL);
98
+ aio_co_schedule(ctx[i], co1);
99
+ }
100
+
101
+ g_usleep(seconds * 1000000);
102
+
103
+ atomic_mb_set(&now_stopping, true);
104
+ while (running > 0) {
105
+ g_usleep(100000);
106
+ }
107
+
108
+ join_aio_contexts();
109
+ g_test_message("%d iterations/second\n", counter / seconds);
110
+ g_assert_cmpint(counter, ==, atomic_counter);
111
+}
112
+
113
+/* Testing with NUM_CONTEXTS threads focuses on the queue. The mutex however
114
+ * is too contended (and the threads spend too much time in aio_poll)
115
+ * to actually stress the handoff protocol.
116
+ */
117
+static void test_multi_co_mutex_1(void)
118
+{
119
+ test_multi_co_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_co_mutex_10(void)
123
+{
124
+ test_multi_co_mutex(NUM_CONTEXTS, 10);
125
+}
126
+
127
+/* Testing with fewer threads stresses the handoff protocol too. Still, the
128
+ * case where the locker _can_ pick up a handoff is very rare, happening
129
+ * about 10 times in 1 million, so increase the runtime a bit compared to
130
+ * other "quick" testcases that only run for 1 second.
131
+ */
132
+static void test_multi_co_mutex_2_3(void)
133
+{
134
+ test_multi_co_mutex(2, 3);
135
+}
136
+
137
+static void test_multi_co_mutex_2_30(void)
138
+{
139
+ test_multi_co_mutex(2, 30);
140
+}
141
+
142
/* End of tests. */
143
144
int main(int argc, char **argv)
145
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
146
g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
147
if (g_test_quick()) {
148
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
149
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
150
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
151
} else {
152
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
153
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
154
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
155
}
156
return g_test_run();
157
}
158
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/util/qemu-coroutine-lock.c
161
+++ b/util/qemu-coroutine-lock.c
162
@@ -XXX,XX +XXX,XX @@
163
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
164
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
165
* THE SOFTWARE.
166
+ *
167
+ * The lock-free mutex implementation is based on OSv
168
+ * (core/lfmutex.cc, include/lockfree/mutex.hh).
169
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
170
*/
171
172
#include "qemu/osdep.h"
173
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue)
174
return QSIMPLEQ_FIRST(&queue->entries) == NULL;
175
}
176
177
+/* The wait records are handled with a multiple-producer, single-consumer
178
+ * lock-free queue. There cannot be two concurrent pop_waiter() calls
179
+ * because pop_waiter() can only be called while mutex->handoff is zero.
180
+ * This can happen in three cases:
181
+ * - in qemu_co_mutex_unlock, before the hand-off protocol has started.
182
+ * In this case, qemu_co_mutex_lock will see mutex->handoff == 0 and
183
+ * not take part in the handoff.
184
+ * - in qemu_co_mutex_lock, if it steals the hand-off responsibility from
185
+ * qemu_co_mutex_unlock. In this case, qemu_co_mutex_unlock will fail
186
+ * the cmpxchg (it will see either 0 or the next sequence value) and
187
+ * exit. The next hand-off cannot begin until qemu_co_mutex_lock has
188
+ * woken up someone.
189
+ * - in qemu_co_mutex_unlock, if it takes the hand-off token itself.
190
+ * In this case another iteration starts with mutex->handoff == 0;
191
+ * a concurrent qemu_co_mutex_lock will fail the cmpxchg, and
192
+ * qemu_co_mutex_unlock will go back to case (1).
193
+ *
194
+ * The following functions manage this queue.
195
+ */
196
+typedef struct CoWaitRecord {
197
+ Coroutine *co;
198
+ QSLIST_ENTRY(CoWaitRecord) next;
199
+} CoWaitRecord;
200
+
201
+static void push_waiter(CoMutex *mutex, CoWaitRecord *w)
202
+{
203
+ w->co = qemu_coroutine_self();
204
+ QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next);
205
+}
206
+
207
+static void move_waiters(CoMutex *mutex)
208
+{
209
+ QSLIST_HEAD(, CoWaitRecord) reversed;
210
+ QSLIST_MOVE_ATOMIC(&reversed, &mutex->from_push);
211
+ while (!QSLIST_EMPTY(&reversed)) {
212
+ CoWaitRecord *w = QSLIST_FIRST(&reversed);
213
+ QSLIST_REMOVE_HEAD(&reversed, next);
214
+ QSLIST_INSERT_HEAD(&mutex->to_pop, w, next);
215
+ }
216
+}
217
+
218
+static CoWaitRecord *pop_waiter(CoMutex *mutex)
219
+{
220
+ CoWaitRecord *w;
221
+
222
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
223
+ move_waiters(mutex);
224
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
225
+ return NULL;
226
+ }
227
+ }
228
+ w = QSLIST_FIRST(&mutex->to_pop);
229
+ QSLIST_REMOVE_HEAD(&mutex->to_pop, next);
230
+ return w;
231
+}
232
+
233
+static bool has_waiters(CoMutex *mutex)
234
+{
235
+ return QSLIST_EMPTY(&mutex->to_pop) || QSLIST_EMPTY(&mutex->from_push);
236
+}
237
+
238
void qemu_co_mutex_init(CoMutex *mutex)
39
{
239
{
40
BdrvChild *child, *parent;
240
memset(mutex, 0, sizeof(*mutex));
41
int ret;
241
- qemu_co_queue_init(&mutex->queue);
42
+ uint64_t cumulative_perms, cumulative_shared_perms;
242
}
43
243
44
if (!bs->drv) {
244
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
45
return -ENOMEDIUM;
245
+static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
46
@@ -XXX,XX +XXX,XX @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
246
{
47
}
247
Coroutine *self = qemu_coroutine_self();
248
+ CoWaitRecord w;
249
+ unsigned old_handoff;
250
251
trace_qemu_co_mutex_lock_entry(mutex, self);
252
+ w.co = self;
253
+ push_waiter(mutex, &w);
254
255
- while (mutex->locked) {
256
- qemu_co_queue_wait(&mutex->queue);
257
+ /* This is the "Responsibility Hand-Off" protocol; a lock() picks from
258
+ * a concurrent unlock() the responsibility of waking somebody up.
259
+ */
260
+ old_handoff = atomic_mb_read(&mutex->handoff);
261
+ if (old_handoff &&
262
+ has_waiters(mutex) &&
263
+ atomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
264
+ /* There can be no concurrent pops, because there can be only
265
+ * one active handoff at a time.
266
+ */
267
+ CoWaitRecord *to_wake = pop_waiter(mutex);
268
+ Coroutine *co = to_wake->co;
269
+ if (co == self) {
270
+ /* We got the lock ourselves! */
271
+ assert(to_wake == &w);
272
+ return;
273
+ }
274
+
275
+ aio_co_wake(co);
48
}
276
}
49
277
50
+ bdrv_get_cumulative_perm(bs, &cumulative_perms,
278
- mutex->locked = true;
51
+ &cumulative_shared_perms);
279
- mutex->holder = self;
52
+ if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
280
- self->locks_held++;
53
+ /* Our inactive parents still need write access. Inactivation failed. */
281
-
54
+ return -EPERM;
282
+ qemu_coroutine_yield();
55
+ }
283
trace_qemu_co_mutex_lock_return(mutex, self);
56
+
284
}
57
bs->open_flags |= BDRV_O_INACTIVE;
285
58
286
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
59
/*
287
+{
60
diff --git a/tests/qemu-iotests/tests/migrate-during-backup b/tests/qemu-iotests/tests/migrate-during-backup
288
+ Coroutine *self = qemu_coroutine_self();
61
index XXXXXXX..XXXXXXX 100755
289
+
62
--- a/tests/qemu-iotests/tests/migrate-during-backup
290
+ if (atomic_fetch_inc(&mutex->locked) == 0) {
63
+++ b/tests/qemu-iotests/tests/migrate-during-backup
291
+ /* Uncontended. */
64
@@ -XXX,XX +XXX,XX @@
292
+ trace_qemu_co_mutex_lock_uncontended(mutex, self);
65
#!/usr/bin/env python3
293
+ } else {
66
-# group: migration disabled
294
+ qemu_co_mutex_lock_slowpath(mutex);
67
+# group: migration
295
+ }
68
#
296
+ mutex->holder = self;
69
# Copyright (c) 2021 Virtuozzo International GmbH
297
+ self->locks_held++;
70
#
298
+}
299
+
300
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
301
{
302
Coroutine *self = qemu_coroutine_self();
303
304
trace_qemu_co_mutex_unlock_entry(mutex, self);
305
306
- assert(mutex->locked == true);
307
+ assert(mutex->locked);
308
assert(mutex->holder == self);
309
assert(qemu_in_coroutine());
310
311
- mutex->locked = false;
312
mutex->holder = NULL;
313
self->locks_held--;
314
- qemu_co_queue_next(&mutex->queue);
315
+ if (atomic_fetch_dec(&mutex->locked) == 1) {
316
+ /* No waiting qemu_co_mutex_lock(). Pfew, that was easy! */
317
+ return;
318
+ }
319
+
320
+ for (;;) {
321
+ CoWaitRecord *to_wake = pop_waiter(mutex);
322
+ unsigned our_handoff;
323
+
324
+ if (to_wake) {
325
+ Coroutine *co = to_wake->co;
326
+ aio_co_wake(co);
327
+ break;
328
+ }
329
+
330
+ /* Some concurrent lock() is in progress (we know this because
331
+ * mutex->locked was >1) but it hasn't yet put itself on the wait
332
+ * queue. Pick a sequence number for the handoff protocol (not 0).
333
+ */
334
+ if (++mutex->sequence == 0) {
335
+ mutex->sequence = 1;
336
+ }
337
+
338
+ our_handoff = mutex->sequence;
339
+ atomic_mb_set(&mutex->handoff, our_handoff);
340
+ if (!has_waiters(mutex)) {
341
+ /* The concurrent lock has not added itself yet, so it
342
+ * will be able to pick our handoff.
343
+ */
344
+ break;
345
+ }
346
+
347
+ /* Try to do the handoff protocol ourselves; if somebody else has
348
+ * already taken it, however, we're done and they're responsible.
349
+ */
350
+ if (atomic_cmpxchg(&mutex->handoff, our_handoff, 0) != our_handoff) {
351
+ break;
352
+ }
353
+ }
354
355
trace_qemu_co_mutex_unlock_return(mutex, self);
356
}
357
diff --git a/util/trace-events b/util/trace-events
358
index XXXXXXX..XXXXXXX 100644
359
--- a/util/trace-events
360
+++ b/util/trace-events
361
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
362
363
# util/qemu-coroutine-lock.c
364
qemu_co_queue_run_restart(void *co) "co %p"
365
+qemu_co_mutex_lock_uncontended(void *mutex, void *self) "mutex %p self %p"
366
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
367
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
368
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
71
--
369
--
72
2.31.1
370
2.9.3
73
371
74
372
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Check subcluster bitmap of the l2 entry for different types of
3
Running a very small critical section on pthread_mutex_t and CoMutex
4
clusters:
4
shows that pthread_mutex_t is much faster because it doesn't actually
5
go to sleep. What happens is that the critical section is shorter
6
than the latency of entering the kernel and thus FUTEX_WAIT always
7
fails. With CoMutex there is no such latency but you still want to
8
avoid wait and wakeup. So introduce it artificially.
5
9
6
- for compressed it must be zero
10
This only works with one waiters; because CoMutex is fair, it will
7
- for allocated check consistency of two parts of the bitmap
11
always have more waits and wakeups than a pthread_mutex_t.
8
- for unallocated all subclusters should be unallocated
9
(or zero-plain)
10
12
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Tested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
14
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Message-Id: <20210914122454.141075-7-vsementsov@virtuozzo.com>
15
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
14
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
16
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
17
---
17
---
18
block/qcow2-refcount.c | 28 ++++++++++++++++++++++++++--
18
include/qemu/coroutine.h | 5 +++++
19
1 file changed, 26 insertions(+), 2 deletions(-)
19
util/qemu-coroutine-lock.c | 51 ++++++++++++++++++++++++++++++++++++++++------
20
util/qemu-coroutine.c | 2 +-
21
3 files changed, 51 insertions(+), 7 deletions(-)
20
22
21
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
23
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
22
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2-refcount.c
25
--- a/include/qemu/coroutine.h
24
+++ b/block/qcow2-refcount.c
26
+++ b/include/qemu/coroutine.h
25
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
27
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex {
26
int flags, BdrvCheckMode fix, bool active)
28
*/
29
unsigned locked;
30
31
+ /* Context that is holding the lock. Useful to avoid spinning
32
+ * when two coroutines on the same AioContext try to get the lock. :)
33
+ */
34
+ AioContext *ctx;
35
+
36
/* A queue of waiters. Elements are added atomically in front of
37
* from_push. to_pop is only populated, and popped from, by whoever
38
* is in charge of the next wakeup. This can be an unlocker or,
39
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/util/qemu-coroutine-lock.c
42
+++ b/util/qemu-coroutine-lock.c
43
@@ -XXX,XX +XXX,XX @@
44
#include "qemu-common.h"
45
#include "qemu/coroutine.h"
46
#include "qemu/coroutine_int.h"
47
+#include "qemu/processor.h"
48
#include "qemu/queue.h"
49
#include "block/aio.h"
50
#include "trace.h"
51
@@ -XXX,XX +XXX,XX @@ void qemu_co_mutex_init(CoMutex *mutex)
52
memset(mutex, 0, sizeof(*mutex));
53
}
54
55
-static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
56
+static void coroutine_fn qemu_co_mutex_wake(CoMutex *mutex, Coroutine *co)
57
+{
58
+ /* Read co before co->ctx; pairs with smp_wmb() in
59
+ * qemu_coroutine_enter().
60
+ */
61
+ smp_read_barrier_depends();
62
+ mutex->ctx = co->ctx;
63
+ aio_co_wake(co);
64
+}
65
+
66
+static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
67
+ CoMutex *mutex)
27
{
68
{
28
BDRVQcow2State *s = bs->opaque;
69
Coroutine *self = qemu_coroutine_self();
29
- uint64_t l2_entry;
70
CoWaitRecord w;
30
+ uint64_t l2_entry, l2_bitmap;
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
31
uint64_t next_contiguous_offset = 0;
72
if (co == self) {
32
int i, ret;
73
/* We got the lock ourselves! */
33
size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
74
assert(to_wake == &w);
34
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
75
+ mutex->ctx = ctx;
35
uint64_t coffset;
76
return;
36
int csize;
77
}
37
l2_entry = get_l2_entry(s, l2_table, i);
78
38
+ l2_bitmap = get_l2_bitmap(s, l2_table, i);
79
- aio_co_wake(co);
39
80
+ qemu_co_mutex_wake(mutex, co);
40
switch (qcow2_get_cluster_type(bs, l2_entry)) {
81
}
41
case QCOW2_CLUSTER_COMPRESSED:
82
42
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
83
qemu_coroutine_yield();
43
break;
84
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
44
}
85
45
86
void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
46
+ if (l2_bitmap) {
87
{
47
+ fprintf(stderr, "ERROR compressed cluster %d with non-zero "
88
+ AioContext *ctx = qemu_get_current_aio_context();
48
+ "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
89
Coroutine *self = qemu_coroutine_self();
49
+ i, l2_entry);
90
+ int waiters, i;
50
+ res->corruptions++;
91
92
- if (atomic_fetch_inc(&mutex->locked) == 0) {
93
+ /* Running a very small critical section on pthread_mutex_t and CoMutex
94
+ * shows that pthread_mutex_t is much faster because it doesn't actually
95
+ * go to sleep. What happens is that the critical section is shorter
96
+ * than the latency of entering the kernel and thus FUTEX_WAIT always
97
+ * fails. With CoMutex there is no such latency but you still want to
98
+ * avoid wait and wakeup. So introduce it artificially.
99
+ */
100
+ i = 0;
101
+retry_fast_path:
102
+ waiters = atomic_cmpxchg(&mutex->locked, 0, 1);
103
+ if (waiters != 0) {
104
+ while (waiters == 1 && ++i < 1000) {
105
+ if (atomic_read(&mutex->ctx) == ctx) {
51
+ break;
106
+ break;
52
+ }
107
+ }
108
+ if (atomic_read(&mutex->locked) == 0) {
109
+ goto retry_fast_path;
110
+ }
111
+ cpu_relax();
112
+ }
113
+ waiters = atomic_fetch_inc(&mutex->locked);
114
+ }
53
+
115
+
54
/* Mark cluster as used */
116
+ if (waiters == 0) {
55
qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
117
/* Uncontended. */
56
ret = qcow2_inc_refcounts_imrt(
118
trace_qemu_co_mutex_lock_uncontended(mutex, self);
57
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
119
+ mutex->ctx = ctx;
58
{
120
} else {
59
uint64_t offset = l2_entry & L2E_OFFSET_MASK;
121
- qemu_co_mutex_lock_slowpath(mutex);
60
122
+ qemu_co_mutex_lock_slowpath(ctx, mutex);
61
+ if ((l2_bitmap >> 32) & l2_bitmap) {
123
}
62
+ res->corruptions++;
124
mutex->holder = self;
63
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
125
self->locks_held++;
64
+ "cluster has corrupted subcluster allocation bitmap\n",
126
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
65
+ offset);
127
assert(mutex->holder == self);
66
+ }
128
assert(qemu_in_coroutine());
67
+
129
68
/* Correct offsets are cluster aligned */
130
+ mutex->ctx = NULL;
69
if (offset_into_cluster(s, offset)) {
131
mutex->holder = NULL;
70
bool contains_data;
132
self->locks_held--;
71
res->corruptions++;
133
if (atomic_fetch_dec(&mutex->locked) == 1) {
72
134
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
73
if (has_subclusters(s)) {
135
unsigned our_handoff;
74
- uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i);
136
75
contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
137
if (to_wake) {
76
} else {
138
- Coroutine *co = to_wake->co;
77
contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
139
- aio_co_wake(co);
78
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
140
+ qemu_co_mutex_wake(mutex, to_wake->co);
141
break;
79
}
142
}
80
143
81
case QCOW2_CLUSTER_ZERO_PLAIN:
144
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
82
+ /* Impossible when image has subclusters */
145
index XXXXXXX..XXXXXXX 100644
83
+ assert(!l2_bitmap);
146
--- a/util/qemu-coroutine.c
84
+ break;
147
+++ b/util/qemu-coroutine.c
85
+
148
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
86
case QCOW2_CLUSTER_UNALLOCATED:
149
co->ctx = qemu_get_current_aio_context();
87
+ if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
150
88
+ res->corruptions++;
151
/* Store co->ctx before anything that stores co. Matches
89
+ fprintf(stderr, "ERROR: Unallocated "
152
- * barrier in aio_co_wake.
90
+ "cluster has non-zero subcluster allocation map\n");
153
+ * barrier in aio_co_wake and qemu_co_mutex_wake.
91
+ }
154
*/
92
break;
155
smp_wmb();
93
156
94
default:
95
--
157
--
96
2.31.1
158
2.9.3
97
159
98
160
diff view generated by jsdifflib
1
297 so far does not check the named tests, which reside in the tests/
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
directory (i.e. full path tests/qemu-iotests/tests). Fix it.
2
3
3
Add two implementations of the same benchmark as the previous patch,
4
Thanks to the previous two commits, all named tests pass its scrutiny,
4
but using pthreads. One uses a normal QemuMutex, the other is Linux
5
so we do not have to add anything to SKIP_FILES.
5
only and implements a fair mutex based on MCS locks and futexes.
6
6
This shows that the slower performance of the 5-thread case is due to
7
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
the fairness of CoMutex, rather than to coroutines. If fairness does
8
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
8
not matter, as is the case with two threads, CoMutex can actually be
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
faster than pthreads.
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
11
Message-Id: <20210902094017.32902-6-hreitz@redhat.com>
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
15
---
13
tests/qemu-iotests/297 | 5 +++--
16
tests/test-aio-multithread.c | 164 +++++++++++++++++++++++++++++++++++++++++++
14
1 file changed, 3 insertions(+), 2 deletions(-)
17
1 file changed, 164 insertions(+)
15
18
16
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
19
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
17
index XXXXXXX..XXXXXXX 100755
20
index XXXXXXX..XXXXXXX 100644
18
--- a/tests/qemu-iotests/297
21
--- a/tests/test-aio-multithread.c
19
+++ b/tests/qemu-iotests/297
22
+++ b/tests/test-aio-multithread.c
20
@@ -XXX,XX +XXX,XX @@ def is_python_file(filename):
23
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_mutex_2_30(void)
21
24
test_multi_co_mutex(2, 30);
22
25
}
23
def run_linters():
26
24
- files = [filename for filename in (set(os.listdir('.')) - set(SKIP_FILES))
27
+/* Same test with fair mutexes, for performance comparison. */
25
- if is_python_file(filename)]
28
+
26
+ named_tests = [f'tests/{entry}' for entry in os.listdir('tests')]
29
+#ifdef CONFIG_LINUX
27
+ check_tests = set(os.listdir('.') + named_tests) - set(SKIP_FILES)
30
+#include "qemu/futex.h"
28
+ files = [filename for filename in check_tests if is_python_file(filename)]
31
+
29
32
+/* The nodes for the mutex reside in this structure (on which we try to avoid
30
iotests.logger.debug('Files to be checked:')
33
+ * false sharing). The head of the mutex is in the "mutex_head" variable.
31
iotests.logger.debug(', '.join(sorted(files)))
34
+ */
35
+static struct {
36
+ int next, locked;
37
+ int padding[14];
38
+} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
39
+
40
+static int mutex_head = -1;
41
+
42
+static void mcs_mutex_lock(void)
43
+{
44
+ int prev;
45
+
46
+ nodes[id].next = -1;
47
+ nodes[id].locked = 1;
48
+ prev = atomic_xchg(&mutex_head, id);
49
+ if (prev != -1) {
50
+ atomic_set(&nodes[prev].next, id);
51
+ qemu_futex_wait(&nodes[id].locked, 1);
52
+ }
53
+}
54
+
55
+static void mcs_mutex_unlock(void)
56
+{
57
+ int next;
58
+ if (nodes[id].next == -1) {
59
+ if (atomic_read(&mutex_head) == id &&
60
+ atomic_cmpxchg(&mutex_head, id, -1) == id) {
61
+ /* Last item in the list, exit. */
62
+ return;
63
+ }
64
+ while (atomic_read(&nodes[id].next) == -1) {
65
+ /* mcs_mutex_lock did the xchg, but has not updated
66
+ * nodes[prev].next yet.
67
+ */
68
+ }
69
+ }
70
+
71
+ /* Wake up the next in line. */
72
+ next = nodes[id].next;
73
+ nodes[next].locked = 0;
74
+ qemu_futex_wake(&nodes[next].locked, 1);
75
+}
76
+
77
+static void test_multi_fair_mutex_entry(void *opaque)
78
+{
79
+ while (!atomic_mb_read(&now_stopping)) {
80
+ mcs_mutex_lock();
81
+ counter++;
82
+ mcs_mutex_unlock();
83
+ atomic_inc(&atomic_counter);
84
+ }
85
+ atomic_dec(&running);
86
+}
87
+
88
+static void test_multi_fair_mutex(int threads, int seconds)
89
+{
90
+ int i;
91
+
92
+ assert(mutex_head == -1);
93
+ counter = 0;
94
+ atomic_counter = 0;
95
+ now_stopping = false;
96
+
97
+ create_aio_contexts();
98
+ assert(threads <= NUM_CONTEXTS);
99
+ running = threads;
100
+ for (i = 0; i < threads; i++) {
101
+ Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry, NULL);
102
+ aio_co_schedule(ctx[i], co1);
103
+ }
104
+
105
+ g_usleep(seconds * 1000000);
106
+
107
+ atomic_mb_set(&now_stopping, true);
108
+ while (running > 0) {
109
+ g_usleep(100000);
110
+ }
111
+
112
+ join_aio_contexts();
113
+ g_test_message("%d iterations/second\n", counter / seconds);
114
+ g_assert_cmpint(counter, ==, atomic_counter);
115
+}
116
+
117
+static void test_multi_fair_mutex_1(void)
118
+{
119
+ test_multi_fair_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_fair_mutex_10(void)
123
+{
124
+ test_multi_fair_mutex(NUM_CONTEXTS, 10);
125
+}
126
+#endif
127
+
128
+/* Same test with pthread mutexes, for performance comparison and
129
+ * portability. */
130
+
131
+static QemuMutex mutex;
132
+
133
+static void test_multi_mutex_entry(void *opaque)
134
+{
135
+ while (!atomic_mb_read(&now_stopping)) {
136
+ qemu_mutex_lock(&mutex);
137
+ counter++;
138
+ qemu_mutex_unlock(&mutex);
139
+ atomic_inc(&atomic_counter);
140
+ }
141
+ atomic_dec(&running);
142
+}
143
+
144
+static void test_multi_mutex(int threads, int seconds)
145
+{
146
+ int i;
147
+
148
+ qemu_mutex_init(&mutex);
149
+ counter = 0;
150
+ atomic_counter = 0;
151
+ now_stopping = false;
152
+
153
+ create_aio_contexts();
154
+ assert(threads <= NUM_CONTEXTS);
155
+ running = threads;
156
+ for (i = 0; i < threads; i++) {
157
+ Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry, NULL);
158
+ aio_co_schedule(ctx[i], co1);
159
+ }
160
+
161
+ g_usleep(seconds * 1000000);
162
+
163
+ atomic_mb_set(&now_stopping, true);
164
+ while (running > 0) {
165
+ g_usleep(100000);
166
+ }
167
+
168
+ join_aio_contexts();
169
+ g_test_message("%d iterations/second\n", counter / seconds);
170
+ g_assert_cmpint(counter, ==, atomic_counter);
171
+}
172
+
173
+static void test_multi_mutex_1(void)
174
+{
175
+ test_multi_mutex(NUM_CONTEXTS, 1);
176
+}
177
+
178
+static void test_multi_mutex_10(void)
179
+{
180
+ test_multi_mutex(NUM_CONTEXTS, 10);
181
+}
182
+
183
/* End of tests. */
184
185
int main(int argc, char **argv)
186
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
187
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
188
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
189
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
190
+#ifdef CONFIG_LINUX
191
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
192
+#endif
193
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
194
} else {
195
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
196
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
197
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
198
+#ifdef CONFIG_LINUX
199
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
200
+#endif
201
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
202
}
203
return g_test_run();
204
}
32
--
205
--
33
2.31.1
206
2.9.3
34
207
35
208
diff view generated by jsdifflib
1
As we have attempted before
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
(https://lists.gnu.org/archive/html/qemu-devel/2019-01/msg06451.html,
3
"file-posix: Cache lseek result for data regions";
4
https://lists.nongnu.org/archive/html/qemu-block/2021-02/msg00934.html,
5
"file-posix: Cache next hole"), this patch seeks to reduce the number of
6
SEEK_DATA/HOLE operations the file-posix driver has to perform. The
7
main difference is that this time it is implemented as part of the
8
general block layer code.
9
2
10
The problem we face is that on some filesystems or in some
3
This will avoid forward references in the next patch. It is also
11
circumstances, SEEK_DATA/HOLE is unreasonably slow. Given the
4
more logical because CoQueue is not anymore the basic primitive.
12
implementation is outside of qemu, there is little we can do about its
13
performance.
14
5
15
We have already introduced the want_zero parameter to
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
16
bdrv_co_block_status() to reduce the number of SEEK_DATA/HOLE calls
7
Reviewed-by: Fam Zheng <famz@redhat.com>
17
unless we really want zero information; but sometimes we do want that
8
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
18
information, because for files that consist largely of zero areas,
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
special-casing those areas can give large performance boosts. So the
10
---
20
real problem is with files that consist largely of data, so that
11
include/qemu/coroutine.h | 89 ++++++++++++++++++++++++------------------------
21
inquiring the block status does not gain us much performance, but where
12
1 file changed, 44 insertions(+), 45 deletions(-)
22
such an inquiry itself takes a lot of time.
23
13
24
To address this, we want to cache data regions. Most of the time, when
14
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
25
bad performance is reported, it is in places where the image is iterated
26
over from start to end (qemu-img convert or the mirror job), so a simple
27
yet effective solution is to cache only the current data region.
28
29
(Note that only caching data regions but not zero regions means that
30
returning false information from the cache is not catastrophic: Treating
31
zeroes as data is fine. While we try to invalidate the cache on zero
32
writes and discards, such incongruences may still occur when there are
33
other processes writing to the image.)
34
35
We only use the cache for nodes without children (i.e. protocol nodes),
36
because that is where the problem is: Drivers that rely on block-status
37
implementations outside of qemu (e.g. SEEK_DATA/HOLE).
38
39
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/307
40
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
41
Message-Id: <20210812084148.14458-3-hreitz@redhat.com>
42
Reviewed-by: Eric Blake <eblake@redhat.com>
43
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
44
[hreitz: Added `local_file == bs` assertion, as suggested by Vladimir]
45
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
46
---
47
include/block/block_int.h | 50 ++++++++++++++++++++++++
48
block.c | 80 +++++++++++++++++++++++++++++++++++++++
49
block/io.c | 68 +++++++++++++++++++++++++++++++--
50
3 files changed, 195 insertions(+), 3 deletions(-)
51
52
diff --git a/include/block/block_int.h b/include/block/block_int.h
53
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
54
--- a/include/block/block_int.h
16
--- a/include/qemu/coroutine.h
55
+++ b/include/block/block_int.h
17
+++ b/include/qemu/coroutine.h
56
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ bool qemu_in_coroutine(void);
57
#include "qemu/hbitmap.h"
58
#include "block/snapshot.h"
59
#include "qemu/throttle.h"
60
+#include "qemu/rcu.h"
61
62
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
63
64
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
65
QLIST_ENTRY(BdrvChild) next_parent;
66
};
67
68
+/*
69
+ * Allows bdrv_co_block_status() to cache one data region for a
70
+ * protocol node.
71
+ *
72
+ * @valid: Whether the cache is valid (should be accessed with atomic
73
+ * functions so this can be reset by RCU readers)
74
+ * @data_start: Offset where we know (or strongly assume) is data
75
+ * @data_end: Offset where the data region ends (which is not necessarily
76
+ * the start of a zeroed region)
77
+ */
78
+typedef struct BdrvBlockStatusCache {
79
+ struct rcu_head rcu;
80
+
81
+ bool valid;
82
+ int64_t data_start;
83
+ int64_t data_end;
84
+} BdrvBlockStatusCache;
85
+
86
struct BlockDriverState {
87
/* Protected by big QEMU lock or read-only after opening. No special
88
* locking needed during I/O...
89
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
90
91
/* BdrvChild links to this node may never be frozen */
92
bool never_freeze;
93
+
94
+ /* Lock for block-status cache RCU writers */
95
+ CoMutex bsc_modify_lock;
96
+ /* Always non-NULL, but must only be dereferenced under an RCU read guard */
97
+ BdrvBlockStatusCache *block_status_cache;
98
};
99
100
struct BlockBackendRootState {
101
@@ -XXX,XX +XXX,XX @@ static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
102
*/
19
*/
103
void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
20
bool qemu_coroutine_entered(Coroutine *co);
104
21
105
+/**
22
-
106
+ * Check whether the given offset is in the cached block-status data
23
-/**
107
+ * region.
24
- * CoQueues are a mechanism to queue coroutines in order to continue executing
108
+ *
25
- * them later. They provide the fundamental primitives on which coroutine locks
109
+ * If it is, and @pnum is not NULL, *pnum is set to
26
- * are built.
110
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
27
- */
111
+ * @offset, are data (according to the cache).
28
-typedef struct CoQueue {
112
+ * Otherwise, *pnum is not touched.
29
- QSIMPLEQ_HEAD(, Coroutine) entries;
113
+ */
30
-} CoQueue;
114
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
31
-
32
-/**
33
- * Initialise a CoQueue. This must be called before any other operation is used
34
- * on the CoQueue.
35
- */
36
-void qemu_co_queue_init(CoQueue *queue);
37
-
38
-/**
39
- * Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
- */
42
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
43
-
44
-/**
45
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
46
- *
47
- * Returns true if a coroutine was restarted, false if the queue is empty.
48
- */
49
-bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
50
-
51
-/**
52
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
53
- */
54
-void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
55
-
56
-/**
57
- * Enter the next coroutine in the queue
58
- */
59
-bool qemu_co_enter_next(CoQueue *queue);
60
-
61
-/**
62
- * Checks if the CoQueue is empty.
63
- */
64
-bool qemu_co_queue_empty(CoQueue *queue);
65
-
66
-
67
/**
68
* Provides a mutex that can be used to synchronise coroutines
69
*/
70
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
71
*/
72
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
73
115
+
74
+
116
+/**
75
+/**
117
+ * If [offset, offset + bytes) overlaps with the currently cached
76
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
118
+ * block-status region, invalidate the cache.
77
+ * them later.
119
+ *
120
+ * (To be used by I/O paths that cause data regions to be zero or
121
+ * holes.)
122
+ */
78
+ */
123
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
79
+typedef struct CoQueue {
124
+ int64_t offset, int64_t bytes);
80
+ QSIMPLEQ_HEAD(, Coroutine) entries;
81
+} CoQueue;
125
+
82
+
126
+/**
83
+/**
127
+ * Mark the range [offset, offset + bytes) as a data region.
84
+ * Initialise a CoQueue. This must be called before any other operation is used
85
+ * on the CoQueue.
128
+ */
86
+ */
129
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
87
+void qemu_co_queue_init(CoQueue *queue);
130
+
131
#endif /* BLOCK_INT_H */
132
diff --git a/block.c b/block.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/block.c
135
+++ b/block.c
136
@@ -XXX,XX +XXX,XX @@
137
#include "qemu/timer.h"
138
#include "qemu/cutils.h"
139
#include "qemu/id.h"
140
+#include "qemu/range.h"
141
+#include "qemu/rcu.h"
142
#include "block/coroutines.h"
143
144
#ifdef CONFIG_BSD
145
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_new(void)
146
147
qemu_co_queue_init(&bs->flush_queue);
148
149
+ qemu_co_mutex_init(&bs->bsc_modify_lock);
150
+ bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
151
+
152
for (i = 0; i < bdrv_drain_all_count; i++) {
153
bdrv_drained_begin(bs);
154
}
155
@@ -XXX,XX +XXX,XX @@ static void bdrv_close(BlockDriverState *bs)
156
bs->explicit_options = NULL;
157
qobject_unref(bs->full_open_options);
158
bs->full_open_options = NULL;
159
+ g_free(bs->block_status_cache);
160
+ bs->block_status_cache = NULL;
161
162
bdrv_release_named_dirty_bitmaps(bs);
163
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
164
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
165
{
166
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
167
}
168
+
88
+
169
+/**
89
+/**
170
+ * Check whether [offset, offset + bytes) overlaps with the cached
90
+ * Adds the current coroutine to the CoQueue and transfers control to the
171
+ * block-status data region.
91
+ * caller of the coroutine.
172
+ *
173
+ * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
174
+ * which is what bdrv_bsc_is_data()'s interface needs.
175
+ * Otherwise, *pnum is not touched.
176
+ */
92
+ */
177
+static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
93
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
178
+ int64_t offset, int64_t bytes,
179
+ int64_t *pnum)
180
+{
181
+ BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
182
+ bool overlaps;
183
+
184
+ overlaps =
185
+ qatomic_read(&bsc->valid) &&
186
+ ranges_overlap(offset, bytes, bsc->data_start,
187
+ bsc->data_end - bsc->data_start);
188
+
189
+ if (overlaps && pnum) {
190
+ *pnum = bsc->data_end - offset;
191
+ }
192
+
193
+ return overlaps;
194
+}
195
+
94
+
196
+/**
95
+/**
197
+ * See block_int.h for this function's documentation.
96
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
97
+ *
98
+ * Returns true if a coroutine was restarted, false if the queue is empty.
198
+ */
99
+ */
199
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
100
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
200
+{
201
+ RCU_READ_LOCK_GUARD();
202
+
203
+ return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
204
+}
205
+
101
+
206
+/**
102
+/**
207
+ * See block_int.h for this function's documentation.
103
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
208
+ */
104
+ */
209
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
105
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
210
+ int64_t offset, int64_t bytes)
211
+{
212
+ RCU_READ_LOCK_GUARD();
213
+
214
+ if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
215
+ qatomic_set(&bs->block_status_cache->valid, false);
216
+ }
217
+}
218
+
106
+
219
+/**
107
+/**
220
+ * See block_int.h for this function's documentation.
108
+ * Enter the next coroutine in the queue
221
+ */
109
+ */
222
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
110
+bool qemu_co_enter_next(CoQueue *queue);
223
+{
224
+ BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
225
+ BdrvBlockStatusCache *old_bsc;
226
+
111
+
227
+ *new_bsc = (BdrvBlockStatusCache) {
112
+/**
228
+ .valid = true,
113
+ * Checks if the CoQueue is empty.
229
+ .data_start = offset,
114
+ */
230
+ .data_end = offset + bytes,
115
+bool qemu_co_queue_empty(CoQueue *queue);
231
+ };
232
+
116
+
233
+ QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
234
+
117
+
235
+ old_bsc = qatomic_rcu_read(&bs->block_status_cache);
118
typedef struct CoRwlock {
236
+ qatomic_rcu_set(&bs->block_status_cache, new_bsc);
119
bool writer;
237
+ if (old_bsc) {
120
int reader;
238
+ g_free_rcu(old_bsc, rcu);
239
+ }
240
+}
241
diff --git a/block/io.c b/block/io.c
242
index XXXXXXX..XXXXXXX 100644
243
--- a/block/io.c
244
+++ b/block/io.c
245
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
246
return -ENOTSUP;
247
}
248
249
+ /* Invalidate the cached block-status data range if this write overlaps */
250
+ bdrv_bsc_invalidate_range(bs, offset, bytes);
251
+
252
assert(alignment % bs->bl.request_alignment == 0);
253
head = offset % alignment;
254
tail = (offset + bytes) % alignment;
255
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
256
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
257
258
if (bs->drv->bdrv_co_block_status) {
259
- ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
260
- aligned_bytes, pnum, &local_map,
261
- &local_file);
262
+ /*
263
+ * Use the block-status cache only for protocol nodes: Format
264
+ * drivers are generally quick to inquire the status, but protocol
265
+ * drivers often need to get information from outside of qemu, so
266
+ * we do not have control over the actual implementation. There
267
+ * have been cases where inquiring the status took an unreasonably
268
+ * long time, and we can do nothing in qemu to fix it.
269
+ * This is especially problematic for images with large data areas,
270
+ * because finding the few holes in them and giving them special
271
+ * treatment does not gain much performance. Therefore, we try to
272
+ * cache the last-identified data region.
273
+ *
274
+ * Second, limiting ourselves to protocol nodes allows us to assume
275
+ * the block status for data regions to be DATA | OFFSET_VALID, and
276
+ * that the host offset is the same as the guest offset.
277
+ *
278
+ * Note that it is possible that external writers zero parts of
279
+ * the cached regions without the cache being invalidated, and so
280
+ * we may report zeroes as data. This is not catastrophic,
281
+ * however, because reporting zeroes as data is fine.
282
+ */
283
+ if (QLIST_EMPTY(&bs->children) &&
284
+ bdrv_bsc_is_data(bs, aligned_offset, pnum))
285
+ {
286
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
287
+ local_file = bs;
288
+ local_map = aligned_offset;
289
+ } else {
290
+ ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
291
+ aligned_bytes, pnum, &local_map,
292
+ &local_file);
293
+
294
+ /*
295
+ * Note that checking QLIST_EMPTY(&bs->children) is also done when
296
+ * the cache is queried above. Technically, we do not need to check
297
+ * it here; the worst that can happen is that we fill the cache for
298
+ * non-protocol nodes, and then it is never used. However, filling
299
+ * the cache requires an RCU update, so double check here to avoid
300
+ * such an update if possible.
301
+ */
302
+ if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
303
+ QLIST_EMPTY(&bs->children))
304
+ {
305
+ /*
306
+ * When a protocol driver reports BLOCK_OFFSET_VALID, the
307
+ * returned local_map value must be the same as the offset we
308
+ * have passed (aligned_offset), and local_bs must be the node
309
+ * itself.
310
+ * Assert this, because we follow this rule when reading from
311
+ * the cache (see the `local_file = bs` and
312
+ * `local_map = aligned_offset` assignments above), and the
313
+ * result the cache delivers must be the same as the driver
314
+ * would deliver.
315
+ */
316
+ assert(local_file == bs);
317
+ assert(local_map == aligned_offset);
318
+ bdrv_bsc_fill(bs, aligned_offset, *pnum);
319
+ }
320
+ }
321
} else {
322
/* Default code for filters */
323
324
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
325
return 0;
326
}
327
328
+ /* Invalidate the cached block-status data range if this discard overlaps */
329
+ bdrv_bsc_invalidate_range(bs, offset, bytes);
330
+
331
/* Discard is advisory, but some devices track and coalesce
332
* unaligned requests, so we must pass everything down rather than
333
* round here. Still, most devices will just silently ignore
334
--
121
--
335
2.31.1
122
2.9.3
336
123
337
124
diff view generated by jsdifflib
Deleted patch
1
.bdrv_co_block_status() implementations are free to return a *pnum that
2
exceeds @bytes, because bdrv_co_block_status() in block/io.c will clamp
3
*pnum as necessary.
4
1
5
On the other hand, if drivers' implementations return values for *pnum
6
that are as large as possible, our recently introduced block-status
7
cache will become more effective.
8
9
So, make a note in block_int.h that @bytes is no upper limit for *pnum.
10
11
Suggested-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
14
Message-Id: <20210812084148.14458-4-hreitz@redhat.com>
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
---
17
include/block/block_int.h | 9 +++++++++
18
1 file changed, 9 insertions(+)
19
20
diff --git a/include/block/block_int.h b/include/block/block_int.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/block/block_int.h
23
+++ b/include/block/block_int.h
24
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
25
* clamped to bdrv_getlength() and aligned to request_alignment,
26
* as well as non-NULL pnum, map, and file; in turn, the driver
27
* must return an error or set pnum to an aligned non-zero value.
28
+ *
29
+ * Note that @bytes is just a hint on how big of a region the
30
+ * caller wants to inspect. It is not a limit on *pnum.
31
+ * Implementations are free to return larger values of *pnum if
32
+ * doing so does not incur a performance penalty.
33
+ *
34
+ * block/io.c's bdrv_co_block_status() will utilize an unclamped
35
+ * *pnum value for the block-status cache on protocol nodes, prior
36
+ * to clamping *pnum for return to its caller.
37
*/
38
int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
39
bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
40
--
41
2.31.1
42
43
diff view generated by jsdifflib
Deleted patch
1
bdrv_co_block_status() does it for us, we do not need to do it here.
2
1
3
The advantage of not capping *pnum is that bdrv_co_block_status() can
4
cache larger data regions than requested by its caller.
5
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20210812084148.14458-5-hreitz@redhat.com>
11
---
12
block/file-posix.c | 7 ++++---
13
1 file changed, 4 insertions(+), 3 deletions(-)
14
15
diff --git a/block/file-posix.c b/block/file-posix.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/file-posix.c
18
+++ b/block/file-posix.c
19
@@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start,
20
* the specified offset) that are known to be in the same
21
* allocated/unallocated state.
22
*
23
- * 'bytes' is the max value 'pnum' should be set to.
24
+ * 'bytes' is a soft cap for 'pnum'. If the information is free, 'pnum' may
25
+ * well exceed it.
26
*/
27
static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
28
bool want_zero,
29
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
30
} else if (data == offset) {
31
/* On a data extent, compute bytes to the end of the extent,
32
* possibly including a partial sector at EOF. */
33
- *pnum = MIN(bytes, hole - offset);
34
+ *pnum = hole - offset;
35
36
/*
37
* We are not allowed to return partial sectors, though, so
38
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
39
} else {
40
/* On a hole, compute bytes to the beginning of the next extent. */
41
assert(hole == offset);
42
- *pnum = MIN(bytes, data - offset);
43
+ *pnum = data - offset;
44
ret = BDRV_BLOCK_ZERO;
45
}
46
*map = offset;
47
--
48
2.31.1
49
50
diff view generated by jsdifflib
Deleted patch
1
As of recently, pylint complains when `open()` calls are missing an
2
`encoding=` specified. Everything we have should be UTF-8 (and in fact,
3
everything should be UTF-8, period (exceptions apply)), so use that.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Message-Id: <20210824153540.177128-2-hreitz@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
---
10
tests/qemu-iotests/297 | 2 +-
11
tests/qemu-iotests/iotests.py | 8 +++++---
12
2 files changed, 6 insertions(+), 4 deletions(-)
13
14
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
15
index XXXXXXX..XXXXXXX 100755
16
--- a/tests/qemu-iotests/297
17
+++ b/tests/qemu-iotests/297
18
@@ -XXX,XX +XXX,XX @@ def is_python_file(filename):
19
if filename.endswith('.py'):
20
return True
21
22
- with open(filename) as f:
23
+ with open(filename, encoding='utf-8') as f:
24
try:
25
first_line = f.readline()
26
return re.match('^#!.*python', first_line) is not None
27
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tests/qemu-iotests/iotests.py
30
+++ b/tests/qemu-iotests/iotests.py
31
@@ -XXX,XX +XXX,XX @@ def _post_shutdown(self) -> None:
32
return
33
valgrind_filename = f"{test_dir}/{self._popen.pid}.valgrind"
34
if self.exitcode() == 99:
35
- with open(valgrind_filename) as f:
36
+ with open(valgrind_filename, encoding='utf-8') as f:
37
print(f.read())
38
else:
39
os.remove(valgrind_filename)
40
@@ -XXX,XX +XXX,XX @@ def notrun(reason):
41
# Each test in qemu-iotests has a number ("seq")
42
seq = os.path.basename(sys.argv[0])
43
44
- with open('%s/%s.notrun' % (output_dir, seq), 'w') as outfile:
45
+ with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \
46
+ as outfile:
47
outfile.write(reason + '\n')
48
logger.warning("%s not run: %s", seq, reason)
49
sys.exit(0)
50
@@ -XXX,XX +XXX,XX @@ def case_notrun(reason):
51
# Each test in qemu-iotests has a number ("seq")
52
seq = os.path.basename(sys.argv[0])
53
54
- with open('%s/%s.casenotrun' % (output_dir, seq), 'a') as outfile:
55
+ with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \
56
+ as outfile:
57
outfile.write(' [case not run] ' + reason + '\n')
58
59
def _verify_image_format(supported_fmts: Sequence[str] = (),
60
--
61
2.31.1
62
63
diff view generated by jsdifflib
Deleted patch
1
pylint proposes using `[]` instead of `list()` and `{}` instead of
2
`dict()`, because it is faster. That seems simple enough, so heed its
3
advice.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Message-Id: <20210824153540.177128-3-hreitz@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
---
9
tests/qemu-iotests/iotests.py | 4 ++--
10
1 file changed, 2 insertions(+), 2 deletions(-)
11
12
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/qemu-iotests/iotests.py
15
+++ b/tests/qemu-iotests/iotests.py
16
@@ -XXX,XX +XXX,XX @@ def hmp_qemu_io(self, drive: str, cmd: str,
17
18
def flatten_qmp_object(self, obj, output=None, basestr=''):
19
if output is None:
20
- output = dict()
21
+ output = {}
22
if isinstance(obj, list):
23
for i, item in enumerate(obj):
24
self.flatten_qmp_object(item, output, basestr + str(i) + '.')
25
@@ -XXX,XX +XXX,XX @@ def flatten_qmp_object(self, obj, output=None, basestr=''):
26
27
def qmp_to_opts(self, obj):
28
obj = self.flatten_qmp_object(obj)
29
- output_list = list()
30
+ output_list = []
31
for key in obj:
32
output_list += [key + '=' + obj[key]]
33
return ','.join(output_list)
34
--
35
2.31.1
36
37
diff view generated by jsdifflib
Deleted patch
1
169 and 199 have been renamed and moved to tests/ (commit a44be0334be:
2
"iotests: rename and move 169 and 199 tests"), so we can drop them from
3
the skip list.
4
1
5
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
6
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-Id: <20210902094017.32902-2-hreitz@redhat.com>
10
---
11
tests/qemu-iotests/297 | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
15
index XXXXXXX..XXXXXXX 100755
16
--- a/tests/qemu-iotests/297
17
+++ b/tests/qemu-iotests/297
18
@@ -XXX,XX +XXX,XX @@ import iotests
19
SKIP_FILES = (
20
'030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
21
'096', '118', '124', '132', '136', '139', '147', '148', '149',
22
- '151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
23
+ '151', '152', '155', '163', '165', '194', '196', '202',
24
'203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
25
'218', '219', '224', '228', '234', '235', '236', '237', '238',
26
'240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
27
--
28
2.31.1
29
30
diff view generated by jsdifflib
Deleted patch
1
pylint complains that discards1_sha256 and all_discards_sha256 are first
2
set in non-__init__ methods.
3
1
4
These variables are not really class-variables anyway, so let them
5
instead be returned by start_postcopy(), thus silencing pylint.
6
7
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Message-Id: <20210902094017.32902-3-hreitz@redhat.com>
11
---
12
.../tests/migrate-bitmaps-postcopy-test | 13 +++++++------
13
1 file changed, 7 insertions(+), 6 deletions(-)
14
15
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
16
index XXXXXXX..XXXXXXX 100755
17
--- a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
18
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
19
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
20
21
result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
22
node='drive0', name='bitmap0')
23
- self.discards1_sha256 = result['return']['sha256']
24
+ discards1_sha256 = result['return']['sha256']
25
26
# Check, that updating the bitmap by discards works
27
- assert self.discards1_sha256 != empty_sha256
28
+ assert discards1_sha256 != empty_sha256
29
30
# We want to calculate resulting sha256. Do it in bitmap0, so, disable
31
# other bitmaps
32
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
33
34
result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
35
node='drive0', name='bitmap0')
36
- self.all_discards_sha256 = result['return']['sha256']
37
+ all_discards_sha256 = result['return']['sha256']
38
39
# Now, enable some bitmaps, to be updated during migration
40
for i in range(2, nb_bitmaps, 2):
41
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
42
43
event_resume = self.vm_b.event_wait('RESUME')
44
self.vm_b_events.append(event_resume)
45
- return event_resume
46
+ return (event_resume, discards1_sha256, all_discards_sha256)
47
48
def test_postcopy_success(self):
49
- event_resume = self.start_postcopy()
50
+ event_resume, discards1_sha256, all_discards_sha256 = \
51
+ self.start_postcopy()
52
53
# enabled bitmaps should be updated
54
apply_discards(self.vm_b, discards2)
55
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
56
for i in range(0, nb_bitmaps, 5):
57
result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
58
node='drive0', name='bitmap{}'.format(i))
59
- sha = self.discards1_sha256 if i % 2 else self.all_discards_sha256
60
+ sha = discards1_sha256 if i % 2 else all_discards_sha256
61
self.assert_qmp(result, 'return/sha256', sha)
62
63
def test_early_shutdown_destination(self):
64
--
65
2.31.1
66
67
diff view generated by jsdifflib
Deleted patch
1
There are a couple of things pylint takes issue with:
2
- The "time" import is unused
3
- The import order (iotests should come last)
4
- get_bitmap_hash() doesn't use @self and so should be a function
5
- Semicolons at the end of some lines
6
- Parentheses after "if"
7
- Some lines are too long (80 characters instead of 79)
8
- inject_test_case()'s @name parameter shadows a top-level @name
9
variable
10
- "lambda self: mc(self)" were equivalent to just "mc", but in
11
inject_test_case(), it is not equivalent, so add a comment and disable
12
the warning locally
13
- Always put two empty lines after a function
14
- f'exec: cat > /dev/null' does not need to be an f-string
15
1
16
Fix them.
17
18
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
19
Message-Id: <20210902094017.32902-4-hreitz@redhat.com>
20
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
21
---
22
tests/qemu-iotests/tests/migrate-bitmaps-test | 43 +++++++++++--------
23
1 file changed, 25 insertions(+), 18 deletions(-)
24
25
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-test b/tests/qemu-iotests/tests/migrate-bitmaps-test
26
index XXXXXXX..XXXXXXX 100755
27
--- a/tests/qemu-iotests/tests/migrate-bitmaps-test
28
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-test
29
@@ -XXX,XX +XXX,XX @@
30
#
31
32
import os
33
-import iotests
34
-import time
35
import itertools
36
import operator
37
import re
38
+import iotests
39
from iotests import qemu_img, qemu_img_create, Timeout
40
41
42
@@ -XXX,XX +XXX,XX @@ mig_cmd = 'exec: cat > ' + mig_file
43
incoming_cmd = 'exec: cat ' + mig_file
44
45
46
+def get_bitmap_hash(vm):
47
+ result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
48
+ node='drive0', name='bitmap0')
49
+ return result['return']['sha256']
50
+
51
+
52
class TestDirtyBitmapMigration(iotests.QMPTestCase):
53
def tearDown(self):
54
self.vm_a.shutdown()
55
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
56
params['persistent'] = True
57
58
result = vm.qmp('block-dirty-bitmap-add', **params)
59
- self.assert_qmp(result, 'return', {});
60
-
61
- def get_bitmap_hash(self, vm):
62
- result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
63
- node='drive0', name='bitmap0')
64
- return result['return']['sha256']
65
+ self.assert_qmp(result, 'return', {})
66
67
def check_bitmap(self, vm, sha256):
68
result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
69
node='drive0', name='bitmap0')
70
if sha256:
71
- self.assert_qmp(result, 'return/sha256', sha256);
72
+ self.assert_qmp(result, 'return/sha256', sha256)
73
else:
74
self.assert_qmp(result, 'error/desc',
75
- "Dirty bitmap 'bitmap0' not found");
76
+ "Dirty bitmap 'bitmap0' not found")
77
78
def do_test_migration_resume_source(self, persistent, migrate_bitmaps):
79
granularity = 512
80
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
81
self.add_bitmap(self.vm_a, granularity, persistent)
82
for r in regions:
83
self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
84
- sha256 = self.get_bitmap_hash(self.vm_a)
85
+ sha256 = get_bitmap_hash(self.vm_a)
86
87
result = self.vm_a.qmp('migrate', uri=mig_cmd)
88
while True:
89
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
90
break
91
while True:
92
result = self.vm_a.qmp('query-status')
93
- if (result['return']['status'] == 'postmigrate'):
94
+ if result['return']['status'] == 'postmigrate':
95
break
96
97
# test that bitmap is still here
98
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
99
self.add_bitmap(self.vm_a, granularity, persistent)
100
for r in regions:
101
self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
102
- sha256 = self.get_bitmap_hash(self.vm_a)
103
+ sha256 = get_bitmap_hash(self.vm_a)
104
105
if pre_shutdown:
106
self.vm_a.shutdown()
107
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
108
self.check_bitmap(self.vm_b, sha256 if persistent else False)
109
110
111
-def inject_test_case(klass, name, method, *args, **kwargs):
112
+def inject_test_case(klass, suffix, method, *args, **kwargs):
113
mc = operator.methodcaller(method, *args, **kwargs)
114
- setattr(klass, 'test_' + method + name, lambda self: mc(self))
115
+ # We want to add a function attribute to `klass`, so that it is
116
+ # correctly converted to a method on instantiation. The
117
+ # methodcaller object `mc` is a callable, not a function, so we
118
+ # need the lambda to turn it into a function.
119
+ # pylint: disable=unnecessary-lambda
120
+ setattr(klass, 'test_' + method + suffix, lambda self: mc(self))
121
+
122
123
for cmb in list(itertools.product((True, False), repeat=5)):
124
name = ('_' if cmb[0] else '_not_') + 'persistent_'
125
name += ('_' if cmb[1] else '_not_') + 'migbitmap_'
126
name += '_online' if cmb[2] else '_offline'
127
name += '_shared' if cmb[3] else '_nonshared'
128
- if (cmb[4]):
129
+ if cmb[4]:
130
name += '__pre_shutdown'
131
132
inject_test_case(TestDirtyBitmapMigration, name, 'do_test_migration',
133
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
134
self.assert_qmp(result, 'return', {})
135
136
# Check that the bitmaps are there
137
- for node in self.vm.qmp('query-named-block-nodes', flat=True)['return']:
138
+ nodes = self.vm.qmp('query-named-block-nodes', flat=True)['return']
139
+ for node in nodes:
140
if 'node0' in node['node-name']:
141
self.assert_qmp(node, 'dirty-bitmaps[0]/name', 'bmap0')
142
143
@@ -XXX,XX +XXX,XX @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
144
"""
145
Continue the source after migration.
146
"""
147
- result = self.vm.qmp('migrate', uri=f'exec: cat > /dev/null')
148
+ result = self.vm.qmp('migrate', uri='exec: cat > /dev/null')
149
self.assert_qmp(result, 'return', {})
150
151
with Timeout(10, 'Migration timeout'):
152
--
153
2.31.1
154
155
diff view generated by jsdifflib
Deleted patch
1
The AbnormalShutdown exception class is not in qemu.machine, but in
2
qemu.machine.machine. (qemu.machine.AbnormalShutdown was enough for
3
Python to find it in order to run this test, but pylint complains about
4
it.)
5
1
6
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
7
Message-Id: <20210902094017.32902-5-hreitz@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
---
10
tests/qemu-iotests/tests/mirror-top-perms | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
12
13
diff --git a/tests/qemu-iotests/tests/mirror-top-perms b/tests/qemu-iotests/tests/mirror-top-perms
14
index XXXXXXX..XXXXXXX 100755
15
--- a/tests/qemu-iotests/tests/mirror-top-perms
16
+++ b/tests/qemu-iotests/tests/mirror-top-perms
17
@@ -XXX,XX +XXX,XX @@ class TestMirrorTopPerms(iotests.QMPTestCase):
18
def tearDown(self):
19
try:
20
self.vm.shutdown()
21
- except qemu.machine.AbnormalShutdown:
22
+ except qemu.machine.machine.AbnormalShutdown:
23
pass
24
25
if self.vm_b is not None:
26
--
27
2.31.1
28
29
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
There is no conflict and no dependency if we have parallel writes to
3
All that CoQueue needs in order to become thread-safe is help
4
different subclusters of one cluster when the cluster itself is already
4
from an external mutex. Add this to the API.
5
allocated. So, relax extra dependency.
5
6
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Measure performance:
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
First, prepare build/qemu-img-old and build/qemu-img-new images.
8
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
9
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
cd scripts/simplebench
11
./img_bench_templater.py
12
13
Paste the following to stdin of running script:
14
15
qemu_img=../../build/qemu-img-{old|new}
16
$qemu_img create -f qcow2 -o extended_l2=on /ssd/x.qcow2 1G
17
$qemu_img bench -c 100000 -d 8 [-s 2K|-s 2K -o 512|-s $((1024*2+512))] \
18
-w -t none -n /ssd/x.qcow2
19
20
The result:
21
22
All results are in seconds
23
24
------------------ --------- ---------
25
old new
26
-s 2K 6.7 ± 15% 6.2 ± 12%
27
-7%
28
-s 2K -o 512 13 ± 3% 11 ± 5%
29
-16%
30
-s $((1024*2+512)) 9.5 ± 4% 8.4
31
-12%
32
------------------ --------- ---------
33
34
So small writes are more independent now and that helps to keep deeper
35
io queue which improves performance.
36
37
271 iotest output becomes racy for three allocation in one cluster.
38
Second and third writes may finish in different order. Second and
39
third requests don't depend on each other any more. Still they both
40
depend on first request anyway. Filter out second and third write
41
offsets to cover both possible outputs.
42
43
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
44
Message-Id: <20210824101517.59802-4-vsementsov@virtuozzo.com>
45
Reviewed-by: Eric Blake <eblake@redhat.com>
46
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
47
[hreitz: s/ an / and /]
48
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
49
---
10
---
50
block/qcow2-cluster.c | 11 +++++++++++
11
include/qemu/coroutine.h | 8 +++++---
51
tests/qemu-iotests/271 | 5 ++++-
12
block/backup.c | 2 +-
52
tests/qemu-iotests/271.out | 4 ++--
13
block/io.c | 4 ++--
53
3 files changed, 17 insertions(+), 3 deletions(-)
14
block/nbd-client.c | 2 +-
54
15
block/qcow2-cluster.c | 4 +---
16
block/sheepdog.c | 2 +-
17
block/throttle-groups.c | 2 +-
18
hw/9pfs/9p.c | 2 +-
19
util/qemu-coroutine-lock.c | 24 +++++++++++++++++++++---
20
9 files changed, 34 insertions(+), 16 deletions(-)
21
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/coroutine.h
25
+++ b/include/qemu/coroutine.h
26
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
27
28
/**
29
* CoQueues are a mechanism to queue coroutines in order to continue executing
30
- * them later.
31
+ * them later. They are similar to condition variables, but they need help
32
+ * from an external mutex in order to maintain thread-safety.
33
*/
34
typedef struct CoQueue {
35
QSIMPLEQ_HEAD(, Coroutine) entries;
36
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue);
37
38
/**
39
* Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
+ * caller of the coroutine. The mutex is unlocked during the wait and
42
+ * locked again afterwards.
43
*/
44
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
45
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex);
46
47
/**
48
* Restarts the next coroutine in the CoQueue and removes it from the queue.
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
52
+++ b/block/backup.c
53
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
54
retry = false;
55
QLIST_FOREACH(req, &job->inflight_reqs, list) {
56
if (end > req->start && start < req->end) {
57
- qemu_co_queue_wait(&req->wait_queue);
58
+ qemu_co_queue_wait(&req->wait_queue, NULL);
59
retry = true;
60
break;
61
}
62
diff --git a/block/io.c b/block/io.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/block/io.c
65
+++ b/block/io.c
66
@@ -XXX,XX +XXX,XX @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
67
* (instead of producing a deadlock in the former case). */
68
if (!req->waiting_for) {
69
self->waiting_for = req;
70
- qemu_co_queue_wait(&req->wait_queue);
71
+ qemu_co_queue_wait(&req->wait_queue, NULL);
72
self->waiting_for = NULL;
73
retry = true;
74
waited = true;
75
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
76
77
/* Wait until any previous flushes are completed */
78
while (bs->active_flush_req) {
79
- qemu_co_queue_wait(&bs->flush_queue);
80
+ qemu_co_queue_wait(&bs->flush_queue, NULL);
81
}
82
83
bs->active_flush_req = true;
84
diff --git a/block/nbd-client.c b/block/nbd-client.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/block/nbd-client.c
87
+++ b/block/nbd-client.c
88
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
89
/* Poor man semaphore. The free_sema is locked when no other request
90
* can be accepted, and unlocked after receiving one reply. */
91
if (s->in_flight == MAX_NBD_REQUESTS) {
92
- qemu_co_queue_wait(&s->free_sema);
93
+ qemu_co_queue_wait(&s->free_sema, NULL);
94
assert(s->in_flight < MAX_NBD_REQUESTS);
95
}
96
s->in_flight++;
55
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
97
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
56
index XXXXXXX..XXXXXXX 100644
98
index XXXXXXX..XXXXXXX 100644
57
--- a/block/qcow2-cluster.c
99
--- a/block/qcow2-cluster.c
58
+++ b/block/qcow2-cluster.c
100
+++ b/block/qcow2-cluster.c
59
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
101
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
60
continue;
102
if (bytes == 0) {
103
/* Wait for the dependency to complete. We need to recheck
104
* the free/allocated clusters when we continue. */
105
- qemu_co_mutex_unlock(&s->lock);
106
- qemu_co_queue_wait(&old_alloc->dependent_requests);
107
- qemu_co_mutex_lock(&s->lock);
108
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
109
return -EAGAIN;
110
}
61
}
111
}
62
112
diff --git a/block/sheepdog.c b/block/sheepdog.c
63
+ if (old_alloc->keep_old_clusters &&
113
index XXXXXXX..XXXXXXX 100644
64
+ (end <= l2meta_cow_start(old_alloc) ||
114
--- a/block/sheepdog.c
65
+ start >= l2meta_cow_end(old_alloc)))
115
+++ b/block/sheepdog.c
66
+ {
116
@@ -XXX,XX +XXX,XX @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
67
+ /*
117
retry:
68
+ * Clusters intersect but COW areas don't. And cluster itself is
118
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
69
+ * already allocated. So, there is no actual conflict.
119
if (AIOCBOverlapping(acb, cb)) {
70
+ */
120
- qemu_co_queue_wait(&s->overlapping_queue);
71
+ continue;
121
+ qemu_co_queue_wait(&s->overlapping_queue, NULL);
72
+ }
122
goto retry;
123
}
124
}
125
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/block/throttle-groups.c
128
+++ b/block/throttle-groups.c
129
@@ -XXX,XX +XXX,XX @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
130
if (must_wait || blkp->pending_reqs[is_write]) {
131
blkp->pending_reqs[is_write]++;
132
qemu_mutex_unlock(&tg->lock);
133
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
134
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
135
qemu_mutex_lock(&tg->lock);
136
blkp->pending_reqs[is_write]--;
137
}
138
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/hw/9pfs/9p.c
141
+++ b/hw/9pfs/9p.c
142
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn v9fs_flush(void *opaque)
143
/*
144
* Wait for pdu to complete.
145
*/
146
- qemu_co_queue_wait(&cancel_pdu->complete);
147
+ qemu_co_queue_wait(&cancel_pdu->complete, NULL);
148
cancel_pdu->cancelled = 0;
149
pdu_free(cancel_pdu);
150
}
151
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/util/qemu-coroutine-lock.c
154
+++ b/util/qemu-coroutine-lock.c
155
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue)
156
QSIMPLEQ_INIT(&queue->entries);
157
}
158
159
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
160
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex)
161
{
162
Coroutine *self = qemu_coroutine_self();
163
QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
73
+
164
+
74
/* Conflict */
165
+ if (mutex) {
75
166
+ qemu_co_mutex_unlock(mutex);
76
if (start < old_start) {
167
+ }
77
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
168
+
78
index XXXXXXX..XXXXXXX 100755
169
+ /* There is no race condition here. Other threads will call
79
--- a/tests/qemu-iotests/271
170
+ * aio_co_schedule on our AioContext, which can reenter this
80
+++ b/tests/qemu-iotests/271
171
+ * coroutine but only after this yield and after the main loop
81
@@ -XXX,XX +XXX,XX @@ EOF
172
+ * has gone through the next iteration.
173
+ */
174
qemu_coroutine_yield();
175
assert(qemu_in_coroutine());
176
+
177
+ /* TODO: OSv implements wait morphing here, where the wakeup
178
+ * primitive automatically places the woken coroutine on the
179
+ * mutex's queue. This avoids the thundering herd effect.
180
+ */
181
+ if (mutex) {
182
+ qemu_co_mutex_lock(mutex);
183
+ }
82
}
184
}
83
185
84
_make_test_img -o extended_l2=on 1M
186
/**
85
-_concurrent_io | $QEMU_IO | _filter_qemu_io
187
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock)
86
+# Second and third writes in _concurrent_io() are independent and may finish in
188
Coroutine *self = qemu_coroutine_self();
87
+# different order. So, filter offset out to match both possible variants.
189
88
+_concurrent_io | $QEMU_IO | _filter_qemu_io | \
190
while (lock->writer) {
89
+ $SED -e 's/\(20480\|40960\)/OFFSET/'
191
- qemu_co_queue_wait(&lock->queue);
90
_concurrent_verify | $QEMU_IO | _filter_qemu_io
192
+ qemu_co_queue_wait(&lock->queue, NULL);
91
193
}
92
# success, all done
194
lock->reader++;
93
diff --git a/tests/qemu-iotests/271.out b/tests/qemu-iotests/271.out
195
self->locks_held++;
94
index XXXXXXX..XXXXXXX 100644
196
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_wrlock(CoRwlock *lock)
95
--- a/tests/qemu-iotests/271.out
197
Coroutine *self = qemu_coroutine_self();
96
+++ b/tests/qemu-iotests/271.out
198
97
@@ -XXX,XX +XXX,XX @@ blkdebug: Suspended request 'A'
199
while (lock->writer || lock->reader) {
98
blkdebug: Resuming request 'A'
200
- qemu_co_queue_wait(&lock->queue);
99
wrote 2048/2048 bytes at offset 30720
201
+ qemu_co_queue_wait(&lock->queue, NULL);
100
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
202
}
101
-wrote 2048/2048 bytes at offset 20480
203
lock->writer = true;
102
+wrote 2048/2048 bytes at offset OFFSET
204
self->locks_held++;
103
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
104
-wrote 2048/2048 bytes at offset 40960
105
+wrote 2048/2048 bytes at offset OFFSET
106
2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
107
*** done
108
--
205
--
109
2.31.1
206
2.9.3
110
207
111
208
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
- use g_autofree for l1_table
3
This adds a CoMutex around the existing CoQueue. Because the write-side
4
- better name for size in bytes variable
4
can just take CoMutex, the old "writer" field is not necessary anymore.
5
- reduce code blocks nesting
5
Instead of removing it altogether, count the number of pending writers
6
- whitespaces, braces, newlines
6
during a read-side critical section and forbid further readers from
7
entering.
7
8
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-Id: <20210914122454.141075-9-vsementsov@virtuozzo.com>
11
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
11
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
---
13
block/qcow2-refcount.c | 98 +++++++++++++++++++++---------------------
14
include/qemu/coroutine.h | 3 ++-
14
1 file changed, 50 insertions(+), 48 deletions(-)
15
util/qemu-coroutine-lock.c | 35 ++++++++++++++++++++++++-----------
16
2 files changed, 26 insertions(+), 12 deletions(-)
15
17
16
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
18
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
17
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2-refcount.c
20
--- a/include/qemu/coroutine.h
19
+++ b/block/qcow2-refcount.c
21
+++ b/include/qemu/coroutine.h
20
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
22
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
21
int flags, BdrvCheckMode fix, bool active)
23
24
25
typedef struct CoRwlock {
26
- bool writer;
27
+ int pending_writer;
28
int reader;
29
+ CoMutex mutex;
30
CoQueue queue;
31
} CoRwlock;
32
33
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/util/qemu-coroutine-lock.c
36
+++ b/util/qemu-coroutine-lock.c
37
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_init(CoRwlock *lock)
22
{
38
{
23
BDRVQcow2State *s = bs->opaque;
39
memset(lock, 0, sizeof(*lock));
24
- uint64_t *l1_table = NULL, l2_offset, l1_size2;
40
qemu_co_queue_init(&lock->queue);
25
+ size_t l1_size_bytes = l1_size * L1E_SIZE;
41
+ qemu_co_mutex_init(&lock->mutex);
26
+ g_autofree uint64_t *l1_table = NULL;
42
}
27
+ uint64_t l2_offset;
43
28
int i, ret;
44
void qemu_co_rwlock_rdlock(CoRwlock *lock)
29
45
{
30
- l1_size2 = l1_size * L1E_SIZE;
46
Coroutine *self = qemu_coroutine_self();
31
+ if (!l1_size) {
47
32
+ return 0;
48
- while (lock->writer) {
33
+ }
49
- qemu_co_queue_wait(&lock->queue, NULL);
34
50
+ qemu_co_mutex_lock(&lock->mutex);
35
/* Mark L1 table as used */
51
+ /* For fairness, wait if a writer is in line. */
36
ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
52
+ while (lock->pending_writer) {
37
- l1_table_offset, l1_size2);
53
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
38
+ l1_table_offset, l1_size_bytes);
54
}
39
if (ret < 0) {
55
lock->reader++;
40
- goto fail;
56
+ qemu_co_mutex_unlock(&lock->mutex);
41
+ return ret;
42
+ }
43
+
57
+
44
+ l1_table = g_try_malloc(l1_size_bytes);
58
+ /* The rest of the read-side critical section is run without the mutex. */
45
+ if (l1_table == NULL) {
59
self->locks_held++;
46
+ res->check_errors++;
60
}
47
+ return -ENOMEM;
61
48
}
62
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
49
63
Coroutine *self = qemu_coroutine_self();
50
/* Read L1 table entries from disk */
64
51
- if (l1_size2 > 0) {
65
assert(qemu_in_coroutine());
52
- l1_table = g_try_malloc(l1_size2);
66
- if (lock->writer) {
53
- if (l1_table == NULL) {
67
- lock->writer = false;
54
- ret = -ENOMEM;
68
+ if (!lock->reader) {
55
- res->check_errors++;
69
+ /* The critical section started in qemu_co_rwlock_wrlock. */
56
- goto fail;
70
qemu_co_queue_restart_all(&lock->queue);
57
- }
71
} else {
58
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
72
+ self->locks_held--;
59
- if (ret < 0) {
60
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
61
- res->check_errors++;
62
- goto fail;
63
- }
64
- for(i = 0;i < l1_size; i++)
65
- be64_to_cpus(&l1_table[i]);
66
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes);
67
+ if (ret < 0) {
68
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
69
+ res->check_errors++;
70
+ return ret;
71
+ }
72
+
73
+
73
+ for (i = 0; i < l1_size; i++) {
74
+ qemu_co_mutex_lock(&lock->mutex);
74
+ be64_to_cpus(&l1_table[i]);
75
lock->reader--;
75
}
76
assert(lock->reader >= 0);
76
77
/* Wakeup only one waiting writer */
77
/* Do the actual checks */
78
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
78
- for(i = 0; i < l1_size; i++) {
79
qemu_co_queue_next(&lock->queue);
79
- l2_offset = l1_table[i];
80
- if (l2_offset) {
81
- /* Mark L2 table as used */
82
- l2_offset &= L1E_OFFSET_MASK;
83
- ret = qcow2_inc_refcounts_imrt(bs, res,
84
- refcount_table, refcount_table_size,
85
- l2_offset, s->cluster_size);
86
- if (ret < 0) {
87
- goto fail;
88
- }
89
+ for (i = 0; i < l1_size; i++) {
90
+ if (!l1_table[i]) {
91
+ continue;
92
+ }
93
94
- /* L2 tables are cluster aligned */
95
- if (offset_into_cluster(s, l2_offset)) {
96
- fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
97
- "cluster aligned; L1 entry corrupted\n", l2_offset);
98
- res->corruptions++;
99
- }
100
+ l2_offset = l1_table[i] & L1E_OFFSET_MASK;
101
102
- /* Process and check L2 entries */
103
- ret = check_refcounts_l2(bs, res, refcount_table,
104
- refcount_table_size, l2_offset, flags,
105
- fix, active);
106
- if (ret < 0) {
107
- goto fail;
108
- }
109
+ /* Mark L2 table as used */
110
+ ret = qcow2_inc_refcounts_imrt(bs, res,
111
+ refcount_table, refcount_table_size,
112
+ l2_offset, s->cluster_size);
113
+ if (ret < 0) {
114
+ return ret;
115
+ }
116
+
117
+ /* L2 tables are cluster aligned */
118
+ if (offset_into_cluster(s, l2_offset)) {
119
+ fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
120
+ "cluster aligned; L1 entry corrupted\n", l2_offset);
121
+ res->corruptions++;
122
+ }
123
+
124
+ /* Process and check L2 entries */
125
+ ret = check_refcounts_l2(bs, res, refcount_table,
126
+ refcount_table_size, l2_offset, flags,
127
+ fix, active);
128
+ if (ret < 0) {
129
+ return ret;
130
}
80
}
131
}
81
}
132
- g_free(l1_table);
82
- self->locks_held--;
133
- return 0;
83
+ qemu_co_mutex_unlock(&lock->mutex);
134
135
-fail:
136
- g_free(l1_table);
137
- return ret;
138
+ return 0;
139
}
84
}
140
85
141
/*
86
void qemu_co_rwlock_wrlock(CoRwlock *lock)
87
{
88
- Coroutine *self = qemu_coroutine_self();
89
-
90
- while (lock->writer || lock->reader) {
91
- qemu_co_queue_wait(&lock->queue, NULL);
92
+ qemu_co_mutex_lock(&lock->mutex);
93
+ lock->pending_writer++;
94
+ while (lock->reader) {
95
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
96
}
97
- lock->writer = true;
98
- self->locks_held++;
99
+ lock->pending_writer--;
100
+
101
+ /* The rest of the write-side critical section is run with
102
+ * the mutex taken, so that lock->reader remains zero.
103
+ * There is no need to update self->locks_held.
104
+ */
105
}
142
--
106
--
143
2.31.1
107
2.9.3
144
108
145
109
diff view generated by jsdifflib