1
The following changes since commit ba29883206d92a29ad5a466e679ccfc2ee6132ef:
1
The following changes since commit 56f9e46b841c7be478ca038d8d4085d776ab4b0d:
2
2
3
Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20200310' into staging (2020-03-10 16:50:28 +0000)
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-20' into staging (2017-02-20 17:42:47 +0000)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/XanClic/qemu.git tags/pull-block-2020-03-11
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 397f4e9d83e9c0000905f0a988ba1aeda162571c:
9
for you to fetch changes up to a7b91d35bab97a2d3e779d0c64c9b837b52a6cf7:
10
10
11
block/block-copy: hide structure definitions (2020-03-11 12:42:30 +0100)
11
coroutine-lock: make CoRwlock thread-safe and fair (2017-02-21 11:39:40 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches for the 5.0 softfreeze:
14
Pull request
15
- qemu-img measure for LUKS
15
16
- Improve block-copy's performance by reducing inter-request
16
v2:
17
dependencies
17
* Rebased to resolve scsi conflicts
18
- Make curl's detection of accept-ranges more robust
19
- Memleak fixes
20
- iotest fix
21
18
22
----------------------------------------------------------------
19
----------------------------------------------------------------
23
David Edmondson (2):
24
block/curl: HTTP header fields allow whitespace around values
25
block/curl: HTTP header field names are case insensitive
26
20
27
Eric Blake (1):
21
Paolo Bonzini (24):
28
iotests: Fix nonportable use of od --endian
22
block: move AioContext, QEMUTimer, main-loop to libqemuutil
23
aio: introduce aio_co_schedule and aio_co_wake
24
block-backend: allow blk_prw from coroutine context
25
test-thread-pool: use generic AioContext infrastructure
26
io: add methods to set I/O handlers on AioContext
27
io: make qio_channel_yield aware of AioContexts
28
nbd: convert to use qio_channel_yield
29
coroutine-lock: reschedule coroutine on the AioContext it was running
30
on
31
blkdebug: reschedule coroutine on the AioContext it is running on
32
qed: introduce qed_aio_start_io and qed_aio_next_io_cb
33
aio: push aio_context_acquire/release down to dispatching
34
block: explicitly acquire aiocontext in timers that need it
35
block: explicitly acquire aiocontext in callbacks that need it
36
block: explicitly acquire aiocontext in bottom halves that need it
37
block: explicitly acquire aiocontext in aio callbacks that need it
38
aio-posix: partially inline aio_dispatch into aio_poll
39
async: remove unnecessary inc/dec pairs
40
block: document fields protected by AioContext lock
41
coroutine-lock: make CoMutex thread-safe
42
coroutine-lock: add limited spinning to CoMutex
43
test-aio-multithread: add performance comparison with thread-based
44
mutexes
45
coroutine-lock: place CoMutex before CoQueue in header
46
coroutine-lock: add mutex argument to CoQueue APIs
47
coroutine-lock: make CoRwlock thread-safe and fair
29
48
30
Pan Nengyuan (2):
49
Makefile.objs | 4 -
31
block/qcow2: do free crypto_opts in qcow2_close()
50
stubs/Makefile.objs | 1 +
32
qemu-img: free memory before re-assign
51
tests/Makefile.include | 19 +-
33
52
util/Makefile.objs | 6 +-
34
Stefan Hajnoczi (4):
53
block/nbd-client.h | 2 +-
35
luks: extract qcrypto_block_calculate_payload_offset()
54
block/qed.h | 3 +
36
luks: implement .bdrv_measure()
55
include/block/aio.h | 38 ++-
37
qemu-img: allow qemu-img measure --object without a filename
56
include/block/block_int.h | 64 +++--
38
iotests: add 288 luks qemu-img measure test
57
include/io/channel.h | 72 +++++-
39
58
include/qemu/coroutine.h | 84 ++++---
40
Vladimir Sementsov-Ogievskiy (10):
59
include/qemu/coroutine_int.h | 11 +-
41
block/qcow2-threads: fix qcow2_decompress
60
include/sysemu/block-backend.h | 14 +-
42
job: refactor progress to separate object
61
tests/iothread.h | 25 ++
43
block/block-copy: fix progress calculation
62
block/backup.c | 2 +-
44
block/block-copy: specialcase first copy_range request
63
block/blkdebug.c | 9 +-
45
block/block-copy: use block_status
64
block/blkreplay.c | 2 +-
46
block/block-copy: factor out find_conflicting_inflight_req
65
block/block-backend.c | 13 +-
47
block/block-copy: refactor interfaces to use bytes instead of end
66
block/curl.c | 44 +++-
48
block/block-copy: rename start to offset in interfaces
67
block/gluster.c | 9 +-
49
block/block-copy: reduce intersecting request lock
68
block/io.c | 42 +---
50
block/block-copy: hide structure definitions
69
block/iscsi.c | 15 +-
51
70
block/linux-aio.c | 10 +-
52
block/backup-top.c | 6 +-
71
block/mirror.c | 12 +-
53
block/backup.c | 38 ++-
72
block/nbd-client.c | 119 +++++----
54
block/block-copy.c | 405 ++++++++++++++++++++++++-------
73
block/nfs.c | 9 +-
55
block/crypto.c | 62 +++++
74
block/qcow2-cluster.c | 4 +-
56
block/curl.c | 32 ++-
75
block/qed-cluster.c | 2 +
57
block/qcow2-threads.c | 12 +-
76
block/qed-table.c | 12 +-
58
block/qcow2.c | 75 ++----
77
block/qed.c | 58 +++--
59
block/trace-events | 1 +
78
block/sheepdog.c | 31 +--
60
blockjob.c | 16 +-
79
block/ssh.c | 29 +--
61
crypto/block.c | 36 +++
80
block/throttle-groups.c | 4 +-
62
include/block/block-copy.h | 65 +----
81
block/win32-aio.c | 9 +-
63
include/crypto/block.h | 22 ++
82
dma-helpers.c | 2 +
64
include/qemu/job.h | 11 +-
83
hw/9pfs/9p.c | 2 +-
65
include/qemu/progress_meter.h | 58 +++++
84
hw/block/virtio-blk.c | 19 +-
66
job-qmp.c | 4 +-
85
hw/scsi/scsi-bus.c | 2 +
67
job.c | 6 +-
86
hw/scsi/scsi-disk.c | 15 ++
68
qemu-img.c | 14 +-
87
hw/scsi/scsi-generic.c | 20 +-
69
tests/qemu-iotests/178 | 2 +-
88
hw/scsi/virtio-scsi.c | 7 +
70
tests/qemu-iotests/178.out.qcow2 | 8 +-
89
io/channel-command.c | 13 +
71
tests/qemu-iotests/178.out.raw | 8 +-
90
io/channel-file.c | 11 +
72
tests/qemu-iotests/288 | 93 +++++++
91
io/channel-socket.c | 16 +-
73
tests/qemu-iotests/288.out | 30 +++
92
io/channel-tls.c | 12 +
74
tests/qemu-iotests/common.rc | 22 +-
93
io/channel-watch.c | 6 +
75
tests/qemu-iotests/group | 1 +
94
io/channel.c | 97 ++++++--
76
24 files changed, 749 insertions(+), 278 deletions(-)
95
nbd/client.c | 2 +-
77
create mode 100644 include/qemu/progress_meter.h
96
nbd/common.c | 9 +-
78
create mode 100755 tests/qemu-iotests/288
97
nbd/server.c | 94 +++-----
79
create mode 100644 tests/qemu-iotests/288.out
98
stubs/linux-aio.c | 32 +++
99
stubs/set-fd-handler.c | 11 -
100
tests/iothread.c | 91 +++++++
101
tests/test-aio-multithread.c | 463 ++++++++++++++++++++++++++++++++++++
102
tests/test-thread-pool.c | 12 +-
103
aio-posix.c => util/aio-posix.c | 62 ++---
104
aio-win32.c => util/aio-win32.c | 30 +--
105
util/aiocb.c | 55 +++++
106
async.c => util/async.c | 84 ++++++-
107
iohandler.c => util/iohandler.c | 0
108
main-loop.c => util/main-loop.c | 0
109
util/qemu-coroutine-lock.c | 254 ++++++++++++++++++--
110
util/qemu-coroutine-sleep.c | 2 +-
111
util/qemu-coroutine.c | 8 +
112
qemu-timer.c => util/qemu-timer.c | 0
113
thread-pool.c => util/thread-pool.c | 8 +-
114
trace-events | 11 -
115
util/trace-events | 17 +-
116
67 files changed, 1712 insertions(+), 533 deletions(-)
117
create mode 100644 tests/iothread.h
118
create mode 100644 stubs/linux-aio.c
119
create mode 100644 tests/iothread.c
120
create mode 100644 tests/test-aio-multithread.c
121
rename aio-posix.c => util/aio-posix.c (94%)
122
rename aio-win32.c => util/aio-win32.c (95%)
123
create mode 100644 util/aiocb.c
124
rename async.c => util/async.c (82%)
125
rename iohandler.c => util/iohandler.c (100%)
126
rename main-loop.c => util/main-loop.c (100%)
127
rename qemu-timer.c => util/qemu-timer.c (100%)
128
rename thread-pool.c => util/thread-pool.c (97%)
80
129
81
--
130
--
82
2.24.1
131
2.9.3
83
132
84
133
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
We need it in separate to pass to the block-copy object in the next
3
AioContext is fairly self contained, the only dependency is QEMUTimer but
4
commit.
4
that in turn doesn't need anything else. So move them out of block-obj-y
5
5
to avoid introducing a dependency from io/ to block-obj-y.
6
Cc: qemu-stable@nongnu.org
6
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
main-loop and its dependency iohandler also need to be moved, because
8
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
8
later in this series io/ will call iohandler_get_aio_context.
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
10
Message-Id: <20200311103004.7649-2-vsementsov@virtuozzo.com>
10
[Changed copyright "the QEMU team" to "other QEMU contributors" as
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
suggested by Daniel Berrange and agreed by Paolo.
12
--Stefan]
13
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
18
---
13
blockjob.c | 16 +++++-----
19
Makefile.objs | 4 ---
14
include/qemu/job.h | 11 ++-----
20
stubs/Makefile.objs | 1 +
15
include/qemu/progress_meter.h | 58 +++++++++++++++++++++++++++++++++++
21
tests/Makefile.include | 11 ++++----
16
job-qmp.c | 4 +--
22
util/Makefile.objs | 6 +++-
17
job.c | 6 ++--
23
block/io.c | 29 -------------------
18
qemu-img.c | 6 ++--
24
stubs/linux-aio.c | 32 +++++++++++++++++++++
19
6 files changed, 76 insertions(+), 25 deletions(-)
25
stubs/set-fd-handler.c | 11 --------
20
create mode 100644 include/qemu/progress_meter.h
26
aio-posix.c => util/aio-posix.c | 2 +-
21
27
aio-win32.c => util/aio-win32.c | 0
22
diff --git a/blockjob.c b/blockjob.c
28
util/aiocb.c | 55 +++++++++++++++++++++++++++++++++++++
23
index XXXXXXX..XXXXXXX 100644
29
async.c => util/async.c | 3 +-
24
--- a/blockjob.c
30
iohandler.c => util/iohandler.c | 0
25
+++ b/blockjob.c
31
main-loop.c => util/main-loop.c | 0
26
@@ -XXX,XX +XXX,XX @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
32
qemu-timer.c => util/qemu-timer.c | 0
27
info->device = g_strdup(job->job.id);
33
thread-pool.c => util/thread-pool.c | 2 +-
28
info->busy = atomic_read(&job->job.busy);
34
trace-events | 11 --------
29
info->paused = job->job.pause_count > 0;
35
util/trace-events | 11 ++++++++
30
- info->offset = job->job.progress_current;
36
17 files changed, 114 insertions(+), 64 deletions(-)
31
- info->len = job->job.progress_total;
37
create mode 100644 stubs/linux-aio.c
32
+ info->offset = job->job.progress.current;
38
rename aio-posix.c => util/aio-posix.c (99%)
33
+ info->len = job->job.progress.total;
39
rename aio-win32.c => util/aio-win32.c (100%)
34
info->speed = job->speed;
40
create mode 100644 util/aiocb.c
35
info->io_status = job->iostatus;
41
rename async.c => util/async.c (99%)
36
info->ready = job_is_ready(&job->job),
42
rename iohandler.c => util/iohandler.c (100%)
37
@@ -XXX,XX +XXX,XX @@ static void block_job_event_cancelled(Notifier *n, void *opaque)
43
rename main-loop.c => util/main-loop.c (100%)
38
44
rename qemu-timer.c => util/qemu-timer.c (100%)
39
qapi_event_send_block_job_cancelled(job_type(&job->job),
45
rename thread-pool.c => util/thread-pool.c (99%)
40
job->job.id,
46
41
- job->job.progress_total,
47
diff --git a/Makefile.objs b/Makefile.objs
42
- job->job.progress_current,
48
index XXXXXXX..XXXXXXX 100644
43
+ job->job.progress.total,
49
--- a/Makefile.objs
44
+ job->job.progress.current,
50
+++ b/Makefile.objs
45
job->speed);
51
@@ -XXX,XX +XXX,XX @@ chardev-obj-y = chardev/
52
#######################################################################
53
# block-obj-y is code used by both qemu system emulation and qemu-img
54
55
-block-obj-y = async.o thread-pool.o
56
block-obj-y += nbd/
57
block-obj-y += block.o blockjob.o
58
-block-obj-y += main-loop.o iohandler.o qemu-timer.o
59
-block-obj-$(CONFIG_POSIX) += aio-posix.o
60
-block-obj-$(CONFIG_WIN32) += aio-win32.o
61
block-obj-y += block/
62
block-obj-y += qemu-io-cmds.o
63
block-obj-$(CONFIG_REPLICATION) += replication.o
64
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
65
index XXXXXXX..XXXXXXX 100644
66
--- a/stubs/Makefile.objs
67
+++ b/stubs/Makefile.objs
68
@@ -XXX,XX +XXX,XX @@ stub-obj-y += get-vm-name.o
69
stub-obj-y += iothread.o
70
stub-obj-y += iothread-lock.o
71
stub-obj-y += is-daemonized.o
72
+stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
73
stub-obj-y += machine-init-done.o
74
stub-obj-y += migr-blocker.o
75
stub-obj-y += monitor.o
76
diff --git a/tests/Makefile.include b/tests/Makefile.include
77
index XXXXXXX..XXXXXXX 100644
78
--- a/tests/Makefile.include
79
+++ b/tests/Makefile.include
80
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
81
check-unit-y += tests/test-iov$(EXESUF)
82
gcov-files-test-iov-y = util/iov.c
83
check-unit-y += tests/test-aio$(EXESUF)
84
+gcov-files-test-aio-y = util/async.c util/qemu-timer.o
85
+gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
86
+gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
87
check-unit-y += tests/test-throttle$(EXESUF)
88
gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
89
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
90
@@ -XXX,XX +XXX,XX @@ tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y)
91
tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y)
92
tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
93
94
-tests/test-char$(EXESUF): tests/test-char.o qemu-timer.o \
95
-    $(test-util-obj-y) $(qtest-obj-y) $(test-block-obj-y) $(chardev-obj-y)
96
+tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
97
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
98
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
99
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
100
@@ -XXX,XX +XXX,XX @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
101
    migration/vmstate.o migration/qemu-file.o \
102
migration/qemu-file-channel.o migration/qjson.o \
103
    $(test-io-obj-y)
104
-tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
105
-    $(test-util-obj-y)
106
+tests/test-timed-average$(EXESUF): tests/test-timed-average.o $(test-util-obj-y)
107
tests/test-base64$(EXESUF): tests/test-base64.o \
108
    libqemuutil.a libqemustub.a
109
tests/ptimer-test$(EXESUF): tests/ptimer-test.o tests/ptimer-test-stubs.o hw/core/ptimer.o libqemustub.a
110
@@ -XXX,XX +XXX,XX @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
111
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
112
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
113
tests/postcopy-test$(EXESUF): tests/postcopy-test.o
114
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-timer.o \
115
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \
116
    $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y) \
117
    $(chardev-obj-y)
118
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
119
diff --git a/util/Makefile.objs b/util/Makefile.objs
120
index XXXXXXX..XXXXXXX 100644
121
--- a/util/Makefile.objs
122
+++ b/util/Makefile.objs
123
@@ -XXX,XX +XXX,XX @@
124
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
125
util-obj-y += bufferiszero.o
126
util-obj-y += lockcnt.o
127
+util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o
128
+util-obj-y += main-loop.o iohandler.o
129
+util-obj-$(CONFIG_POSIX) += aio-posix.o
130
util-obj-$(CONFIG_POSIX) += compatfd.o
131
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
132
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
133
util-obj-$(CONFIG_POSIX) += oslib-posix.o
134
util-obj-$(CONFIG_POSIX) += qemu-openpty.o
135
util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
136
-util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
137
util-obj-$(CONFIG_POSIX) += memfd.o
138
+util-obj-$(CONFIG_WIN32) += aio-win32.o
139
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
140
util-obj-$(CONFIG_WIN32) += oslib-win32.o
141
util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
142
util-obj-y += envlist.o path.o module.o
143
diff --git a/block/io.c b/block/io.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
148
return &acb->common;
46
}
149
}
47
150
48
@@ -XXX,XX +XXX,XX @@ static void block_job_event_completed(Notifier *n, void *opaque)
151
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
49
152
- BlockCompletionFunc *cb, void *opaque)
50
qapi_event_send_block_job_completed(job_type(&job->job),
153
-{
51
job->job.id,
154
- BlockAIOCB *acb;
52
- job->job.progress_total,
155
-
53
- job->job.progress_current,
156
- acb = g_malloc(aiocb_info->aiocb_size);
54
+ job->job.progress.total,
157
- acb->aiocb_info = aiocb_info;
55
+ job->job.progress.current,
158
- acb->bs = bs;
56
job->speed,
159
- acb->cb = cb;
57
!!msg,
160
- acb->opaque = opaque;
58
msg);
161
- acb->refcnt = 1;
59
@@ -XXX,XX +XXX,XX @@ static void block_job_event_ready(Notifier *n, void *opaque)
162
- return acb;
60
163
-}
61
qapi_event_send_block_job_ready(job_type(&job->job),
164
-
62
job->job.id,
165
-void qemu_aio_ref(void *p)
63
- job->job.progress_total,
166
-{
64
- job->job.progress_current,
167
- BlockAIOCB *acb = p;
65
+ job->job.progress.total,
168
- acb->refcnt++;
66
+ job->job.progress.current,
169
-}
67
job->speed);
170
-
68
}
171
-void qemu_aio_unref(void *p)
69
172
-{
70
diff --git a/include/qemu/job.h b/include/qemu/job.h
173
- BlockAIOCB *acb = p;
71
index XXXXXXX..XXXXXXX 100644
174
- assert(acb->refcnt > 0);
72
--- a/include/qemu/job.h
175
- if (--acb->refcnt == 0) {
73
+++ b/include/qemu/job.h
176
- g_free(acb);
74
@@ -XXX,XX +XXX,XX @@
177
- }
75
178
-}
76
#include "qapi/qapi-types-job.h"
179
-
77
#include "qemu/queue.h"
180
/**************************************************************/
78
+#include "qemu/progress_meter.h"
181
/* Coroutine block device emulation */
79
#include "qemu/coroutine.h"
182
80
#include "block/aio.h"
183
diff --git a/stubs/linux-aio.c b/stubs/linux-aio.c
81
82
@@ -XXX,XX +XXX,XX @@ typedef struct Job {
83
/** True if this job should automatically dismiss itself */
84
bool auto_dismiss;
85
86
- /**
87
- * Current progress. The unit is arbitrary as long as the ratio between
88
- * progress_current and progress_total represents the estimated percentage
89
- * of work already done.
90
- */
91
- int64_t progress_current;
92
-
93
- /** Estimated progress_current value at the completion of the job */
94
- int64_t progress_total;
95
+ ProgressMeter progress;
96
97
/**
98
* Return code from @run and/or @prepare callback(s).
99
diff --git a/include/qemu/progress_meter.h b/include/qemu/progress_meter.h
100
new file mode 100644
184
new file mode 100644
101
index XXXXXXX..XXXXXXX
185
index XXXXXXX..XXXXXXX
102
--- /dev/null
186
--- /dev/null
103
+++ b/include/qemu/progress_meter.h
187
+++ b/stubs/linux-aio.c
104
@@ -XXX,XX +XXX,XX @@
188
@@ -XXX,XX +XXX,XX @@
105
+/*
189
+/*
106
+ * Helper functionality for some process progress tracking.
190
+ * Linux native AIO support.
107
+ *
191
+ *
108
+ * Copyright (c) 2011 IBM Corp.
192
+ * Copyright (C) 2009 IBM, Corp.
109
+ * Copyright (c) 2012, 2018 Red Hat, Inc.
193
+ * Copyright (C) 2009 Red Hat, Inc.
110
+ * Copyright (c) 2020 Virtuozzo International GmbH
194
+ *
195
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
196
+ * See the COPYING file in the top-level directory.
197
+ */
198
+#include "qemu/osdep.h"
199
+#include "block/aio.h"
200
+#include "block/raw-aio.h"
201
+
202
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
203
+{
204
+ abort();
205
+}
206
+
207
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
208
+{
209
+ abort();
210
+}
211
+
212
+LinuxAioState *laio_init(void)
213
+{
214
+ abort();
215
+}
216
+
217
+void laio_cleanup(LinuxAioState *s)
218
+{
219
+ abort();
220
+}
221
diff --git a/stubs/set-fd-handler.c b/stubs/set-fd-handler.c
222
index XXXXXXX..XXXXXXX 100644
223
--- a/stubs/set-fd-handler.c
224
+++ b/stubs/set-fd-handler.c
225
@@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd,
226
{
227
abort();
228
}
229
-
230
-void aio_set_fd_handler(AioContext *ctx,
231
- int fd,
232
- bool is_external,
233
- IOHandler *io_read,
234
- IOHandler *io_write,
235
- AioPollFn *io_poll,
236
- void *opaque)
237
-{
238
- abort();
239
-}
240
diff --git a/aio-posix.c b/util/aio-posix.c
241
similarity index 99%
242
rename from aio-posix.c
243
rename to util/aio-posix.c
244
index XXXXXXX..XXXXXXX 100644
245
--- a/aio-posix.c
246
+++ b/util/aio-posix.c
247
@@ -XXX,XX +XXX,XX @@
248
#include "qemu/rcu_queue.h"
249
#include "qemu/sockets.h"
250
#include "qemu/cutils.h"
251
-#include "trace-root.h"
252
+#include "trace.h"
253
#ifdef CONFIG_EPOLL_CREATE1
254
#include <sys/epoll.h>
255
#endif
256
diff --git a/aio-win32.c b/util/aio-win32.c
257
similarity index 100%
258
rename from aio-win32.c
259
rename to util/aio-win32.c
260
diff --git a/util/aiocb.c b/util/aiocb.c
261
new file mode 100644
262
index XXXXXXX..XXXXXXX
263
--- /dev/null
264
+++ b/util/aiocb.c
265
@@ -XXX,XX +XXX,XX @@
266
+/*
267
+ * BlockAIOCB allocation
268
+ *
269
+ * Copyright (c) 2003-2017 Fabrice Bellard and other QEMU contributors
111
+ *
270
+ *
112
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
271
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
113
+ * of this software and associated documentation files (the "Software"), to deal
272
+ * of this software and associated documentation files (the "Software"), to deal
114
+ * in the Software without restriction, including without limitation the rights
273
+ * in the Software without restriction, including without limitation the rights
115
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
274
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
...
...
126
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
285
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
127
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
286
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
128
+ * THE SOFTWARE.
287
+ * THE SOFTWARE.
129
+ */
288
+ */
130
+
289
+
131
+#ifndef QEMU_PROGRESS_METER_H
290
+#include "qemu/osdep.h"
132
+#define QEMU_PROGRESS_METER_H
291
+#include "block/aio.h"
133
+
292
+
134
+typedef struct ProgressMeter {
293
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
135
+ /**
294
+ BlockCompletionFunc *cb, void *opaque)
136
+ * Current progress. The unit is arbitrary as long as the ratio between
295
+{
137
+ * current and total represents the estimated percentage
296
+ BlockAIOCB *acb;
138
+ * of work already done.
297
+
139
+ */
298
+ acb = g_malloc(aiocb_info->aiocb_size);
140
+ uint64_t current;
299
+ acb->aiocb_info = aiocb_info;
141
+
300
+ acb->bs = bs;
142
+ /** Estimated current value at the completion of the process */
301
+ acb->cb = cb;
143
+ uint64_t total;
302
+ acb->opaque = opaque;
144
+} ProgressMeter;
303
+ acb->refcnt = 1;
145
+
304
+ return acb;
146
+static inline void progress_work_done(ProgressMeter *pm, uint64_t done)
305
+}
147
+{
306
+
148
+ pm->current += done;
307
+void qemu_aio_ref(void *p)
149
+}
308
+{
150
+
309
+ BlockAIOCB *acb = p;
151
+static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
310
+ acb->refcnt++;
152
+{
311
+}
153
+ pm->total = pm->current + remaining;
312
+
154
+}
313
+void qemu_aio_unref(void *p)
155
+
314
+{
156
+static inline void progress_increase_remaining(ProgressMeter *pm,
315
+ BlockAIOCB *acb = p;
157
+ uint64_t delta)
316
+ assert(acb->refcnt > 0);
158
+{
317
+ if (--acb->refcnt == 0) {
159
+ pm->total += delta;
318
+ g_free(acb);
160
+}
319
+ }
161
+
320
+}
162
+#endif /* QEMU_PROGRESS_METER_H */
321
diff --git a/async.c b/util/async.c
163
diff --git a/job-qmp.c b/job-qmp.c
322
similarity index 99%
164
index XXXXXXX..XXXXXXX 100644
323
rename from async.c
165
--- a/job-qmp.c
324
rename to util/async.c
166
+++ b/job-qmp.c
325
index XXXXXXX..XXXXXXX 100644
167
@@ -XXX,XX +XXX,XX @@ static JobInfo *job_query_single(Job *job, Error **errp)
326
--- a/async.c
168
.id = g_strdup(job->id),
327
+++ b/util/async.c
169
.type = job_type(job),
328
@@ -XXX,XX +XXX,XX @@
170
.status = job->status,
329
/*
171
- .current_progress = job->progress_current,
330
- * QEMU System Emulator
172
- .total_progress = job->progress_total,
331
+ * Data plane event loop
173
+ .current_progress = job->progress.current,
332
*
174
+ .total_progress = job->progress.total,
333
* Copyright (c) 2003-2008 Fabrice Bellard
175
.has_error = !!job->err,
334
+ * Copyright (c) 2009-2017 QEMU contributors
176
.error = job->err ? \
335
*
177
g_strdup(error_get_pretty(job->err)) : NULL,
336
* Permission is hereby granted, free of charge, to any person obtaining a copy
178
diff --git a/job.c b/job.c
337
* of this software and associated documentation files (the "Software"), to deal
179
index XXXXXXX..XXXXXXX 100644
338
diff --git a/iohandler.c b/util/iohandler.c
180
--- a/job.c
339
similarity index 100%
181
+++ b/job.c
340
rename from iohandler.c
182
@@ -XXX,XX +XXX,XX @@ void job_unref(Job *job)
341
rename to util/iohandler.c
183
342
diff --git a/main-loop.c b/util/main-loop.c
184
void job_progress_update(Job *job, uint64_t done)
343
similarity index 100%
185
{
344
rename from main-loop.c
186
- job->progress_current += done;
345
rename to util/main-loop.c
187
+ progress_work_done(&job->progress, done);
346
diff --git a/qemu-timer.c b/util/qemu-timer.c
188
}
347
similarity index 100%
189
348
rename from qemu-timer.c
190
void job_progress_set_remaining(Job *job, uint64_t remaining)
349
rename to util/qemu-timer.c
191
{
350
diff --git a/thread-pool.c b/util/thread-pool.c
192
- job->progress_total = job->progress_current + remaining;
351
similarity index 99%
193
+ progress_set_remaining(&job->progress, remaining);
352
rename from thread-pool.c
194
}
353
rename to util/thread-pool.c
195
354
index XXXXXXX..XXXXXXX 100644
196
void job_progress_increase_remaining(Job *job, uint64_t delta)
355
--- a/thread-pool.c
197
{
356
+++ b/util/thread-pool.c
198
- job->progress_total += delta;
357
@@ -XXX,XX +XXX,XX @@
199
+ progress_increase_remaining(&job->progress, delta);
358
#include "qemu/queue.h"
200
}
359
#include "qemu/thread.h"
201
360
#include "qemu/coroutine.h"
202
void job_event_cancelled(Job *job)
361
-#include "trace-root.h"
203
diff --git a/qemu-img.c b/qemu-img.c
362
+#include "trace.h"
204
index XXXXXXX..XXXXXXX 100644
363
#include "block/thread-pool.h"
205
--- a/qemu-img.c
364
#include "qemu/main-loop.h"
206
+++ b/qemu-img.c
365
207
@@ -XXX,XX +XXX,XX @@ static void run_block_job(BlockJob *job, Error **errp)
366
diff --git a/trace-events b/trace-events
208
do {
367
index XXXXXXX..XXXXXXX 100644
209
float progress = 0.0f;
368
--- a/trace-events
210
aio_poll(aio_context, true);
369
+++ b/trace-events
211
- if (job->job.progress_total) {
370
@@ -XXX,XX +XXX,XX @@
212
- progress = (float)job->job.progress_current /
371
#
213
- job->job.progress_total * 100.f;
372
# The <format-string> should be a sprintf()-compatible format string.
214
+ if (job->job.progress.total) {
373
215
+ progress = (float)job->job.progress.current /
374
-# aio-posix.c
216
+ job->job.progress.total * 100.f;
375
-run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
217
}
376
-run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
218
qemu_progress_print(progress, 0);
377
-poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
219
} while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
378
-poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
379
-
380
-# thread-pool.c
381
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
382
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
383
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
384
-
385
# ioport.c
386
cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
387
cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
388
diff --git a/util/trace-events b/util/trace-events
389
index XXXXXXX..XXXXXXX 100644
390
--- a/util/trace-events
391
+++ b/util/trace-events
392
@@ -XXX,XX +XXX,XX @@
393
# See docs/tracing.txt for syntax documentation.
394
395
+# util/aio-posix.c
396
+run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
397
+run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
398
+poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
399
+poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
400
+
401
+# util/thread-pool.c
402
+thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
403
+thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
404
+thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
405
+
406
# util/buffer.c
407
buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
408
buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
220
--
409
--
221
2.24.1
410
2.9.3
222
411
223
412
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
We have a lot of "chunk_end - start" invocations, let's switch to
3
aio_co_wake provides the infrastructure to start a coroutine on a "home"
4
bytes/cur_bytes scheme instead.
4
AioContext. It will be used by CoMutex and CoQueue, so that coroutines
5
5
don't jump from one context to another when they go to sleep on a
6
While being here, improve check on block_copy_do_copy parameters to not
6
mutex or waitqueue. However, it can also be used as a more efficient
7
overflow when calculating nbytes and use int64_t for bytes in
7
alternative to one-shot bottom halves, and saves the effort of tracking
8
block_copy for consistency.
8
which AioContext a coroutine is running on.
9
9
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
aio_co_schedule is the part of aio_co_wake that starts a coroutine
11
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
11
on a remove AioContext, but it is also useful to implement e.g.
12
Reviewed-by: Max Reitz <mreitz@redhat.com>
12
bdrv_set_aio_context callbacks.
13
Message-Id: <20200311103004.7649-7-vsementsov@virtuozzo.com>
13
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
The implementation of aio_co_schedule is based on a lock-free
15
multiple-producer, single-consumer queue. The multiple producers use
16
cmpxchg to add to a LIFO stack. The consumer (a per-AioContext bottom
17
half) grabs all items added so far, inverts the list to make it FIFO,
18
and goes through it one item at a time until it's empty. The data
19
structure was inspired by OSv, which uses it in the very code we'll
20
"port" to QEMU for the thread-safe CoMutex.
21
22
Most of the new code is really tests.
23
24
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
25
Reviewed-by: Fam Zheng <famz@redhat.com>
26
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
28
---
16
block/block-copy.c | 78 ++++++++++++++++++++------------------
29
tests/Makefile.include | 8 +-
17
include/block/block-copy.h | 6 +--
30
include/block/aio.h | 32 +++++++
18
2 files changed, 44 insertions(+), 40 deletions(-)
31
include/qemu/coroutine_int.h | 11 ++-
19
32
tests/iothread.h | 25 +++++
20
diff --git a/block/block-copy.c b/block/block-copy.c
33
tests/iothread.c | 91 ++++++++++++++++++
34
tests/test-aio-multithread.c | 213 +++++++++++++++++++++++++++++++++++++++++++
35
util/async.c | 65 +++++++++++++
36
util/qemu-coroutine.c | 8 ++
37
util/trace-events | 4 +
38
9 files changed, 453 insertions(+), 4 deletions(-)
39
create mode 100644 tests/iothread.h
40
create mode 100644 tests/iothread.c
41
create mode 100644 tests/test-aio-multithread.c
42
43
diff --git a/tests/Makefile.include b/tests/Makefile.include
21
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
22
--- a/block/block-copy.c
45
--- a/tests/Makefile.include
23
+++ b/block/block-copy.c
46
+++ b/tests/Makefile.include
47
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-aio$(EXESUF)
48
gcov-files-test-aio-y = util/async.c util/qemu-timer.o
49
gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
50
gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
51
+check-unit-y += tests/test-aio-multithread$(EXESUF)
52
+gcov-files-test-aio-multithread-y = $(gcov-files-test-aio-y)
53
+gcov-files-test-aio-multithread-y += util/qemu-coroutine.c tests/iothread.c
54
check-unit-y += tests/test-throttle$(EXESUF)
55
-gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
56
-gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
57
check-unit-y += tests/test-thread-pool$(EXESUF)
58
gcov-files-test-thread-pool-y = thread-pool.c
59
gcov-files-test-hbitmap-y = util/hbitmap.c
60
@@ -XXX,XX +XXX,XX @@ test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \
61
    $(test-qom-obj-y)
62
test-crypto-obj-y = $(crypto-obj-y) $(test-qom-obj-y)
63
test-io-obj-y = $(io-obj-y) $(test-crypto-obj-y)
64
-test-block-obj-y = $(block-obj-y) $(test-io-obj-y)
65
+test-block-obj-y = $(block-obj-y) $(test-io-obj-y) tests/iothread.o
66
67
tests/check-qint$(EXESUF): tests/check-qint.o $(test-util-obj-y)
68
tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y)
69
@@ -XXX,XX +XXX,XX @@ tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
70
tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
71
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
72
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
73
+tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
74
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
75
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
76
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
77
diff --git a/include/block/aio.h b/include/block/aio.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/block/aio.h
80
+++ b/include/block/aio.h
81
@@ -XXX,XX +XXX,XX @@ typedef void QEMUBHFunc(void *opaque);
82
typedef bool AioPollFn(void *opaque);
83
typedef void IOHandler(void *opaque);
84
85
+struct Coroutine;
86
struct ThreadPool;
87
struct LinuxAioState;
88
89
@@ -XXX,XX +XXX,XX @@ struct AioContext {
90
bool notified;
91
EventNotifier notifier;
92
93
+ QSLIST_HEAD(, Coroutine) scheduled_coroutines;
94
+ QEMUBH *co_schedule_bh;
95
+
96
/* Thread pool for performing work and receiving completion callbacks.
97
* Has its own locking.
98
*/
99
@@ -XXX,XX +XXX,XX @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
100
}
101
102
/**
103
+ * aio_co_schedule:
104
+ * @ctx: the aio context
105
+ * @co: the coroutine
106
+ *
107
+ * Start a coroutine on a remote AioContext.
108
+ *
109
+ * The coroutine must not be entered by anyone else while aio_co_schedule()
110
+ * is active. In addition the coroutine must have yielded unless ctx
111
+ * is the context in which the coroutine is running (i.e. the value of
112
+ * qemu_get_current_aio_context() from the coroutine itself).
113
+ */
114
+void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
115
+
116
+/**
117
+ * aio_co_wake:
118
+ * @co: the coroutine
119
+ *
120
+ * Restart a coroutine on the AioContext where it was running last, thus
121
+ * preventing coroutines from jumping from one context to another when they
122
+ * go to sleep.
123
+ *
124
+ * aio_co_wake may be executed either in coroutine or non-coroutine
125
+ * context. The coroutine must not be entered by anyone else while
126
+ * aio_co_wake() is active.
127
+ */
128
+void aio_co_wake(struct Coroutine *co);
129
+
130
+/**
131
* Return the AioContext whose event loop runs in the current thread.
132
*
133
* If called from an IOThread this will be the IOThread's AioContext. If
134
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/include/qemu/coroutine_int.h
137
+++ b/include/qemu/coroutine_int.h
138
@@ -XXX,XX +XXX,XX @@ struct Coroutine {
139
CoroutineEntry *entry;
140
void *entry_arg;
141
Coroutine *caller;
142
+
143
+ /* Only used when the coroutine has terminated. */
144
QSLIST_ENTRY(Coroutine) pool_next;
145
+
146
size_t locks_held;
147
148
- /* Coroutines that should be woken up when we yield or terminate */
149
+ /* Coroutines that should be woken up when we yield or terminate.
150
+ * Only used when the coroutine is running.
151
+ */
152
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
153
+
154
+ /* Only used when the coroutine has yielded. */
155
+ AioContext *ctx;
156
QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
157
+ QSLIST_ENTRY(Coroutine) co_scheduled_next;
158
};
159
160
Coroutine *qemu_coroutine_new(void);
161
diff --git a/tests/iothread.h b/tests/iothread.h
162
new file mode 100644
163
index XXXXXXX..XXXXXXX
164
--- /dev/null
165
+++ b/tests/iothread.h
24
@@ -XXX,XX +XXX,XX @@
166
@@ -XXX,XX +XXX,XX @@
25
167
+/*
26
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
168
+ * Event loop thread implementation for unit tests
27
int64_t start,
169
+ *
28
- int64_t end)
170
+ * Copyright Red Hat Inc., 2013, 2016
29
+ int64_t bytes)
171
+ *
30
{
172
+ * Authors:
31
BlockCopyInFlightReq *req;
173
+ * Stefan Hajnoczi <stefanha@redhat.com>
32
174
+ * Paolo Bonzini <pbonzini@redhat.com>
33
QLIST_FOREACH(req, &s->inflight_reqs, list) {
175
+ *
34
- if (end > req->start_byte && start < req->end_byte) {
176
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
35
+ if (start + bytes > req->start && start < req->start + req->bytes) {
177
+ * See the COPYING file in the top-level directory.
36
return req;
178
+ */
37
}
179
+#ifndef TEST_IOTHREAD_H
180
+#define TEST_IOTHREAD_H
181
+
182
+#include "block/aio.h"
183
+#include "qemu/thread.h"
184
+
185
+typedef struct IOThread IOThread;
186
+
187
+IOThread *iothread_new(void);
188
+void iothread_join(IOThread *iothread);
189
+AioContext *iothread_get_aio_context(IOThread *iothread);
190
+
191
+#endif
192
diff --git a/tests/iothread.c b/tests/iothread.c
193
new file mode 100644
194
index XXXXXXX..XXXXXXX
195
--- /dev/null
196
+++ b/tests/iothread.c
197
@@ -XXX,XX +XXX,XX @@
198
+/*
199
+ * Event loop thread implementation for unit tests
200
+ *
201
+ * Copyright Red Hat Inc., 2013, 2016
202
+ *
203
+ * Authors:
204
+ * Stefan Hajnoczi <stefanha@redhat.com>
205
+ * Paolo Bonzini <pbonzini@redhat.com>
206
+ *
207
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
208
+ * See the COPYING file in the top-level directory.
209
+ *
210
+ */
211
+
212
+#include "qemu/osdep.h"
213
+#include "qapi/error.h"
214
+#include "block/aio.h"
215
+#include "qemu/main-loop.h"
216
+#include "qemu/rcu.h"
217
+#include "iothread.h"
218
+
219
+struct IOThread {
220
+ AioContext *ctx;
221
+
222
+ QemuThread thread;
223
+ QemuMutex init_done_lock;
224
+ QemuCond init_done_cond; /* is thread initialization done? */
225
+ bool stopping;
226
+};
227
+
228
+static __thread IOThread *my_iothread;
229
+
230
+AioContext *qemu_get_current_aio_context(void)
231
+{
232
+ return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
233
+}
234
+
235
+static void *iothread_run(void *opaque)
236
+{
237
+ IOThread *iothread = opaque;
238
+
239
+ rcu_register_thread();
240
+
241
+ my_iothread = iothread;
242
+ qemu_mutex_lock(&iothread->init_done_lock);
243
+ iothread->ctx = aio_context_new(&error_abort);
244
+ qemu_cond_signal(&iothread->init_done_cond);
245
+ qemu_mutex_unlock(&iothread->init_done_lock);
246
+
247
+ while (!atomic_read(&iothread->stopping)) {
248
+ aio_poll(iothread->ctx, true);
249
+ }
250
+
251
+ rcu_unregister_thread();
252
+ return NULL;
253
+}
254
+
255
+void iothread_join(IOThread *iothread)
256
+{
257
+ iothread->stopping = true;
258
+ aio_notify(iothread->ctx);
259
+ qemu_thread_join(&iothread->thread);
260
+ qemu_cond_destroy(&iothread->init_done_cond);
261
+ qemu_mutex_destroy(&iothread->init_done_lock);
262
+ aio_context_unref(iothread->ctx);
263
+ g_free(iothread);
264
+}
265
+
266
+IOThread *iothread_new(void)
267
+{
268
+ IOThread *iothread = g_new0(IOThread, 1);
269
+
270
+ qemu_mutex_init(&iothread->init_done_lock);
271
+ qemu_cond_init(&iothread->init_done_cond);
272
+ qemu_thread_create(&iothread->thread, NULL, iothread_run,
273
+ iothread, QEMU_THREAD_JOINABLE);
274
+
275
+ /* Wait for initialization to complete */
276
+ qemu_mutex_lock(&iothread->init_done_lock);
277
+ while (iothread->ctx == NULL) {
278
+ qemu_cond_wait(&iothread->init_done_cond,
279
+ &iothread->init_done_lock);
280
+ }
281
+ qemu_mutex_unlock(&iothread->init_done_lock);
282
+ return iothread;
283
+}
284
+
285
+AioContext *iothread_get_aio_context(IOThread *iothread)
286
+{
287
+ return iothread->ctx;
288
+}
289
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
290
new file mode 100644
291
index XXXXXXX..XXXXXXX
292
--- /dev/null
293
+++ b/tests/test-aio-multithread.c
294
@@ -XXX,XX +XXX,XX @@
295
+/*
296
+ * AioContext multithreading tests
297
+ *
298
+ * Copyright Red Hat, Inc. 2016
299
+ *
300
+ * Authors:
301
+ * Paolo Bonzini <pbonzini@redhat.com>
302
+ *
303
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
304
+ * See the COPYING.LIB file in the top-level directory.
305
+ */
306
+
307
+#include "qemu/osdep.h"
308
+#include <glib.h>
309
+#include "block/aio.h"
310
+#include "qapi/error.h"
311
+#include "qemu/coroutine.h"
312
+#include "qemu/thread.h"
313
+#include "qemu/error-report.h"
314
+#include "iothread.h"
315
+
316
+/* AioContext management */
317
+
318
+#define NUM_CONTEXTS 5
319
+
320
+static IOThread *threads[NUM_CONTEXTS];
321
+static AioContext *ctx[NUM_CONTEXTS];
322
+static __thread int id = -1;
323
+
324
+static QemuEvent done_event;
325
+
326
+/* Run a function synchronously on a remote iothread. */
327
+
328
+typedef struct CtxRunData {
329
+ QEMUBHFunc *cb;
330
+ void *arg;
331
+} CtxRunData;
332
+
333
+static void ctx_run_bh_cb(void *opaque)
334
+{
335
+ CtxRunData *data = opaque;
336
+
337
+ data->cb(data->arg);
338
+ qemu_event_set(&done_event);
339
+}
340
+
341
+static void ctx_run(int i, QEMUBHFunc *cb, void *opaque)
342
+{
343
+ CtxRunData data = {
344
+ .cb = cb,
345
+ .arg = opaque
346
+ };
347
+
348
+ qemu_event_reset(&done_event);
349
+ aio_bh_schedule_oneshot(ctx[i], ctx_run_bh_cb, &data);
350
+ qemu_event_wait(&done_event);
351
+}
352
+
353
+/* Starting the iothreads. */
354
+
355
+static void set_id_cb(void *opaque)
356
+{
357
+ int *i = opaque;
358
+
359
+ id = *i;
360
+}
361
+
362
+static void create_aio_contexts(void)
363
+{
364
+ int i;
365
+
366
+ for (i = 0; i < NUM_CONTEXTS; i++) {
367
+ threads[i] = iothread_new();
368
+ ctx[i] = iothread_get_aio_context(threads[i]);
369
+ }
370
+
371
+ qemu_event_init(&done_event, false);
372
+ for (i = 0; i < NUM_CONTEXTS; i++) {
373
+ ctx_run(i, set_id_cb, &i);
374
+ }
375
+}
376
+
377
+/* Stopping the iothreads. */
378
+
379
+static void join_aio_contexts(void)
380
+{
381
+ int i;
382
+
383
+ for (i = 0; i < NUM_CONTEXTS; i++) {
384
+ aio_context_ref(ctx[i]);
385
+ }
386
+ for (i = 0; i < NUM_CONTEXTS; i++) {
387
+ iothread_join(threads[i]);
388
+ }
389
+ for (i = 0; i < NUM_CONTEXTS; i++) {
390
+ aio_context_unref(ctx[i]);
391
+ }
392
+ qemu_event_destroy(&done_event);
393
+}
394
+
395
+/* Basic test for the stuff above. */
396
+
397
+static void test_lifecycle(void)
398
+{
399
+ create_aio_contexts();
400
+ join_aio_contexts();
401
+}
402
+
403
+/* aio_co_schedule test. */
404
+
405
+static Coroutine *to_schedule[NUM_CONTEXTS];
406
+
407
+static bool now_stopping;
408
+
409
+static int count_retry;
410
+static int count_here;
411
+static int count_other;
412
+
413
+static bool schedule_next(int n)
414
+{
415
+ Coroutine *co;
416
+
417
+ co = atomic_xchg(&to_schedule[n], NULL);
418
+ if (!co) {
419
+ atomic_inc(&count_retry);
420
+ return false;
421
+ }
422
+
423
+ if (n == id) {
424
+ atomic_inc(&count_here);
425
+ } else {
426
+ atomic_inc(&count_other);
427
+ }
428
+
429
+ aio_co_schedule(ctx[n], co);
430
+ return true;
431
+}
432
+
433
+static void finish_cb(void *opaque)
434
+{
435
+ schedule_next(id);
436
+}
437
+
438
+static coroutine_fn void test_multi_co_schedule_entry(void *opaque)
439
+{
440
+ g_assert(to_schedule[id] == NULL);
441
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
442
+
443
+ while (!atomic_mb_read(&now_stopping)) {
444
+ int n;
445
+
446
+ n = g_test_rand_int_range(0, NUM_CONTEXTS);
447
+ schedule_next(n);
448
+ qemu_coroutine_yield();
449
+
450
+ g_assert(to_schedule[id] == NULL);
451
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
452
+ }
453
+}
454
+
455
+
456
+static void test_multi_co_schedule(int seconds)
457
+{
458
+ int i;
459
+
460
+ count_here = count_other = count_retry = 0;
461
+ now_stopping = false;
462
+
463
+ create_aio_contexts();
464
+ for (i = 0; i < NUM_CONTEXTS; i++) {
465
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_schedule_entry, NULL);
466
+ aio_co_schedule(ctx[i], co1);
467
+ }
468
+
469
+ g_usleep(seconds * 1000000);
470
+
471
+ atomic_mb_set(&now_stopping, true);
472
+ for (i = 0; i < NUM_CONTEXTS; i++) {
473
+ ctx_run(i, finish_cb, NULL);
474
+ to_schedule[i] = NULL;
475
+ }
476
+
477
+ join_aio_contexts();
478
+ g_test_message("scheduled %d, queued %d, retry %d, total %d\n",
479
+ count_other, count_here, count_retry,
480
+ count_here + count_other + count_retry);
481
+}
482
+
483
+static void test_multi_co_schedule_1(void)
484
+{
485
+ test_multi_co_schedule(1);
486
+}
487
+
488
+static void test_multi_co_schedule_10(void)
489
+{
490
+ test_multi_co_schedule(10);
491
+}
492
+
493
+/* End of tests. */
494
+
495
+int main(int argc, char **argv)
496
+{
497
+ init_clocks();
498
+
499
+ g_test_init(&argc, &argv, NULL);
500
+ g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
501
+ if (g_test_quick()) {
502
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
503
+ } else {
504
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
505
+ }
506
+ return g_test_run();
507
+}
508
diff --git a/util/async.c b/util/async.c
509
index XXXXXXX..XXXXXXX 100644
510
--- a/util/async.c
511
+++ b/util/async.c
512
@@ -XXX,XX +XXX,XX @@
513
#include "qemu/main-loop.h"
514
#include "qemu/atomic.h"
515
#include "block/raw-aio.h"
516
+#include "qemu/coroutine_int.h"
517
+#include "trace.h"
518
519
/***********************************************************/
520
/* bottom halves (can be seen as timers which expire ASAP) */
521
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
38
}
522
}
39
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
523
#endif
40
524
41
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
525
+ assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
42
int64_t start,
526
+ qemu_bh_delete(ctx->co_schedule_bh);
43
- int64_t end)
527
+
44
+ int64_t bytes)
528
qemu_lockcnt_lock(&ctx->list_lock);
45
{
529
assert(!qemu_lockcnt_count(&ctx->list_lock));
46
BlockCopyInFlightReq *req;
530
while (ctx->first_bh) {
47
531
@@ -XXX,XX +XXX,XX @@ static bool event_notifier_poll(void *opaque)
48
- while ((req = find_conflicting_inflight_req(s, start, end))) {
532
return atomic_read(&ctx->notified);
49
+ while ((req = find_conflicting_inflight_req(s, start, bytes))) {
50
qemu_co_queue_wait(&req->wait_queue, NULL);
51
}
52
}
533
}
53
534
54
static void block_copy_inflight_req_begin(BlockCopyState *s,
535
+static void co_schedule_bh_cb(void *opaque)
55
BlockCopyInFlightReq *req,
536
+{
56
- int64_t start, int64_t end)
537
+ AioContext *ctx = opaque;
57
+ int64_t start, int64_t bytes)
538
+ QSLIST_HEAD(, Coroutine) straight, reversed;
58
{
539
+
59
- req->start_byte = start;
540
+ QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
60
- req->end_byte = end;
541
+ QSLIST_INIT(&straight);
61
+ req->start = start;
542
+
62
+ req->bytes = bytes;
543
+ while (!QSLIST_EMPTY(&reversed)) {
63
qemu_co_queue_init(&req->wait_queue);
544
+ Coroutine *co = QSLIST_FIRST(&reversed);
64
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
545
+ QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
65
}
546
+ QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
66
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
547
+ }
67
/*
548
+
68
* block_copy_do_copy
549
+ while (!QSLIST_EMPTY(&straight)) {
69
*
550
+ Coroutine *co = QSLIST_FIRST(&straight);
70
- * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
551
+ QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
71
- * cover last cluster when s->len is not aligned to clusters.
552
+ trace_aio_co_schedule_bh_cb(ctx, co);
72
+ * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
553
+ qemu_coroutine_enter(co);
73
+ * s->len only to cover last cluster when s->len is not aligned to clusters.
554
+ }
74
*
555
+}
75
* No sync here: nor bitmap neighter intersecting requests handling, only copy.
556
+
76
*
557
AioContext *aio_context_new(Error **errp)
77
* Returns 0 on success.
78
*/
79
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
80
- int64_t start, int64_t end,
81
+ int64_t start, int64_t bytes,
82
bool zeroes, bool *error_is_read)
83
{
558
{
84
int ret;
559
int ret;
85
- int nbytes = MIN(end, s->len) - start;
560
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
86
+ int64_t nbytes = MIN(start + bytes, s->len) - start;
561
}
87
void *bounce_buffer = NULL;
562
g_source_set_can_recurse(&ctx->source, true);
88
563
qemu_lockcnt_init(&ctx->list_lock);
89
+ assert(start >= 0 && bytes > 0 && INT64_MAX - start >= bytes);
564
+
90
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
565
+ ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
91
- assert(QEMU_IS_ALIGNED(end, s->cluster_size));
566
+ QSLIST_INIT(&ctx->scheduled_coroutines);
92
- assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
567
+
93
+ assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
568
aio_set_event_notifier(ctx, &ctx->notifier,
94
+ assert(start < s->len);
569
false,
95
+ assert(start + bytes <= s->len ||
570
(EventNotifierHandler *)
96
+ start + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
571
@@ -XXX,XX +XXX,XX @@ fail:
97
+ assert(nbytes < INT_MAX);
572
return NULL;
98
99
if (zeroes) {
100
ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
101
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
102
}
573
}
103
574
104
int coroutine_fn block_copy(BlockCopyState *s,
575
+void aio_co_schedule(AioContext *ctx, Coroutine *co)
105
- int64_t start, uint64_t bytes,
576
+{
106
+ int64_t start, int64_t bytes,
577
+ trace_aio_co_schedule(ctx, co);
107
bool *error_is_read)
578
+ QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
579
+ co, co_scheduled_next);
580
+ qemu_bh_schedule(ctx->co_schedule_bh);
581
+}
582
+
583
+void aio_co_wake(struct Coroutine *co)
584
+{
585
+ AioContext *ctx;
586
+
587
+ /* Read coroutine before co->ctx. Matches smp_wmb in
588
+ * qemu_coroutine_enter.
589
+ */
590
+ smp_read_barrier_depends();
591
+ ctx = atomic_read(&co->ctx);
592
+
593
+ if (ctx != qemu_get_current_aio_context()) {
594
+ aio_co_schedule(ctx, co);
595
+ return;
596
+ }
597
+
598
+ if (qemu_in_coroutine()) {
599
+ Coroutine *self = qemu_coroutine_self();
600
+ assert(self != co);
601
+ QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
602
+ } else {
603
+ aio_context_acquire(ctx);
604
+ qemu_coroutine_enter(co);
605
+ aio_context_release(ctx);
606
+ }
607
+}
608
+
609
void aio_context_ref(AioContext *ctx)
108
{
610
{
109
int ret = 0;
611
g_source_ref(&ctx->source);
110
- int64_t end = bytes + start; /* bytes */
612
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
111
BlockCopyInFlightReq req;
613
index XXXXXXX..XXXXXXX 100644
112
614
--- a/util/qemu-coroutine.c
113
/*
615
+++ b/util/qemu-coroutine.c
114
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
616
@@ -XXX,XX +XXX,XX @@
115
bdrv_get_aio_context(s->target->bs));
617
#include "qemu/atomic.h"
116
618
#include "qemu/coroutine.h"
117
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
619
#include "qemu/coroutine_int.h"
118
- assert(QEMU_IS_ALIGNED(end, s->cluster_size));
620
+#include "block/aio.h"
119
+ assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
621
120
622
enum {
121
block_copy_wait_inflight_reqs(s, start, bytes);
623
POOL_BATCH_SIZE = 64,
122
- block_copy_inflight_req_begin(s, &req, start, end);
624
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
123
+ block_copy_inflight_req_begin(s, &req, start, bytes);
124
125
- while (start < end) {
126
- int64_t next_zero, chunk_end, status_bytes;
127
+ while (bytes) {
128
+ int64_t next_zero, cur_bytes, status_bytes;
129
130
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
131
trace_block_copy_skip(s, start);
132
start += s->cluster_size;
133
+ bytes -= s->cluster_size;
134
continue; /* already copied */
135
}
136
137
- chunk_end = MIN(end, start + s->copy_size);
138
+ cur_bytes = MIN(bytes, s->copy_size);
139
140
next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
141
- chunk_end - start);
142
+ cur_bytes);
143
if (next_zero >= 0) {
144
assert(next_zero > start); /* start is dirty */
145
- assert(next_zero < chunk_end); /* no need to do MIN() */
146
- chunk_end = next_zero;
147
+ assert(next_zero < start + cur_bytes); /* no need to do MIN() */
148
+ cur_bytes = next_zero - start;
149
}
150
151
- ret = block_copy_block_status(s, start, chunk_end - start,
152
- &status_bytes);
153
+ ret = block_copy_block_status(s, start, cur_bytes, &status_bytes);
154
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
155
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
156
progress_set_remaining(s->progress,
157
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
158
s->in_flight_bytes);
159
trace_block_copy_skip_range(s, start, status_bytes);
160
start += status_bytes;
161
+ bytes -= status_bytes;
162
continue;
163
}
164
165
- chunk_end = MIN(chunk_end, start + status_bytes);
166
+ cur_bytes = MIN(cur_bytes, status_bytes);
167
168
trace_block_copy_process(s, start);
169
170
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
171
- s->in_flight_bytes += chunk_end - start;
172
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
173
+ s->in_flight_bytes += cur_bytes;
174
175
- co_get_from_shres(s->mem, chunk_end - start);
176
- ret = block_copy_do_copy(s, start, chunk_end, ret & BDRV_BLOCK_ZERO,
177
+ co_get_from_shres(s->mem, cur_bytes);
178
+ ret = block_copy_do_copy(s, start, cur_bytes, ret & BDRV_BLOCK_ZERO,
179
error_is_read);
180
- co_put_to_shres(s->mem, chunk_end - start);
181
- s->in_flight_bytes -= chunk_end - start;
182
+ co_put_to_shres(s->mem, cur_bytes);
183
+ s->in_flight_bytes -= cur_bytes;
184
if (ret < 0) {
185
- bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
186
+ bdrv_set_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
187
break;
188
}
189
190
- progress_work_done(s->progress, chunk_end - start);
191
- s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
192
- start = chunk_end;
193
- ret = 0;
194
+ progress_work_done(s->progress, cur_bytes);
195
+ s->progress_bytes_callback(cur_bytes, s->progress_opaque);
196
+ start += cur_bytes;
197
+ bytes -= cur_bytes;
198
}
625
}
199
626
200
block_copy_inflight_req_end(&req);
627
co->caller = self;
201
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
628
+ co->ctx = qemu_get_current_aio_context();
629
+
630
+ /* Store co->ctx before anything that stores co. Matches
631
+ * barrier in aio_co_wake.
632
+ */
633
+ smp_wmb();
634
+
635
ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
636
637
qemu_co_queue_run_restart(co);
638
diff --git a/util/trace-events b/util/trace-events
202
index XXXXXXX..XXXXXXX 100644
639
index XXXXXXX..XXXXXXX 100644
203
--- a/include/block/block-copy.h
640
--- a/util/trace-events
204
+++ b/include/block/block-copy.h
641
+++ b/util/trace-events
205
@@ -XXX,XX +XXX,XX @@
642
@@ -XXX,XX +XXX,XX @@ run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
206
#include "qemu/co-shared-resource.h"
643
poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
207
644
poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
208
typedef struct BlockCopyInFlightReq {
645
209
- int64_t start_byte;
646
+# util/async.c
210
- int64_t end_byte;
647
+aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
211
+ int64_t start;
648
+aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
212
+ int64_t bytes;
649
+
213
QLIST_ENTRY(BlockCopyInFlightReq) list;
650
# util/thread-pool.c
214
CoQueue wait_queue; /* coroutines blocked on this request */
651
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
215
} BlockCopyInFlightReq;
652
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
216
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s);
217
int64_t block_copy_reset_unallocated(BlockCopyState *s,
218
int64_t offset, int64_t *count);
219
220
-int coroutine_fn block_copy(BlockCopyState *s, int64_t start, uint64_t bytes,
221
+int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
222
bool *error_is_read);
223
224
#endif /* BLOCK_COPY_H */
225
--
653
--
226
2.24.1
654
2.9.3
227
655
228
656
diff view generated by jsdifflib
New patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
2
3
qcow2_create2 calls this. Do not run a nested event loop, as that
4
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
5
list of the currently running one.
6
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/block-backend.c | 12 ++++++++----
14
1 file changed, 8 insertions(+), 4 deletions(-)
15
16
diff --git a/block/block-backend.c b/block/block-backend.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/block-backend.c
19
+++ b/block/block-backend.c
20
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
21
{
22
QEMUIOVector qiov;
23
struct iovec iov;
24
- Coroutine *co;
25
BlkRwCo rwco;
26
27
iov = (struct iovec) {
28
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
29
.ret = NOT_DONE,
30
};
31
32
- co = qemu_coroutine_create(co_entry, &rwco);
33
- qemu_coroutine_enter(co);
34
- BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
35
+ if (qemu_in_coroutine()) {
36
+ /* Fast-path if already in coroutine context */
37
+ co_entry(&rwco);
38
+ } else {
39
+ Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
40
+ qemu_coroutine_enter(co);
41
+ BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
42
+ }
43
44
return rwco.ret;
45
}
46
--
47
2.9.3
48
49
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Use bdrv_block_status_above to chose effective chunk size and to handle
3
Once the thread pool starts using aio_co_wake, it will also need
4
zeroes effectively.
4
qemu_get_current_aio_context(). Make test-thread-pool create
5
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
6
and tests/iothread.c can provide the rest.
5
7
6
This substitutes checking for just being allocated or not, and drops
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
old code path for it. Assistance by backup job is dropped too, as
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
caching block-status information is more difficult than just caching
10
Reviewed-by: Fam Zheng <famz@redhat.com>
9
is-allocated information in our dirty bitmap, and backup job is not
11
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
10
good place for this caching anyway.
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
tests/test-thread-pool.c | 12 +++---------
15
1 file changed, 3 insertions(+), 9 deletions(-)
11
16
12
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
17
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
13
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
14
Reviewed-by: Max Reitz <mreitz@redhat.com>
15
Message-Id: <20200311103004.7649-5-vsementsov@virtuozzo.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
17
---
18
block/block-copy.c | 73 +++++++++++++++++++++++++++++++++++++---------
19
block/trace-events | 1 +
20
2 files changed, 61 insertions(+), 13 deletions(-)
21
22
diff --git a/block/block-copy.c b/block/block-copy.c
23
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
24
--- a/block/block-copy.c
19
--- a/tests/test-thread-pool.c
25
+++ b/block/block-copy.c
20
+++ b/tests/test-thread-pool.c
26
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
21
@@ -XXX,XX +XXX,XX @@
27
*/
22
#include "qapi/error.h"
28
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
23
#include "qemu/timer.h"
29
int64_t start, int64_t end,
24
#include "qemu/error-report.h"
30
- bool *error_is_read)
25
+#include "qemu/main-loop.h"
31
+ bool zeroes, bool *error_is_read)
26
27
static AioContext *ctx;
28
static ThreadPool *pool;
29
@@ -XXX,XX +XXX,XX @@ static void test_cancel_async(void)
30
int main(int argc, char **argv)
32
{
31
{
33
int ret;
32
int ret;
34
int nbytes = MIN(end, s->len) - start;
33
- Error *local_error = NULL;
35
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
34
36
assert(QEMU_IS_ALIGNED(end, s->cluster_size));
35
- init_clocks();
37
assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
36
-
38
37
- ctx = aio_context_new(&local_error);
39
+ if (zeroes) {
38
- if (!ctx) {
40
+ ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
39
- error_reportf_err(local_error, "Failed to create AIO Context: ");
41
+ ~BDRV_REQ_WRITE_COMPRESSED);
40
- exit(1);
42
+ if (ret < 0) {
41
- }
43
+ trace_block_copy_write_zeroes_fail(s, start, ret);
42
+ qemu_init_main_loop(&error_abort);
44
+ if (error_is_read) {
43
+ ctx = qemu_get_current_aio_context();
45
+ *error_is_read = false;
44
pool = aio_get_thread_pool(ctx);
46
+ }
45
47
+ }
46
g_test_init(&argc, &argv, NULL);
48
+ return ret;
47
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
49
+ }
48
50
+
49
ret = g_test_run();
51
if (s->use_copy_range) {
50
52
ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
51
- aio_context_unref(ctx);
53
0, s->write_flags);
54
@@ -XXX,XX +XXX,XX @@ out:
55
return ret;
52
return ret;
56
}
53
}
57
58
+static int block_copy_block_status(BlockCopyState *s, int64_t offset,
59
+ int64_t bytes, int64_t *pnum)
60
+{
61
+ int64_t num;
62
+ BlockDriverState *base;
63
+ int ret;
64
+
65
+ if (s->skip_unallocated && s->source->bs->backing) {
66
+ base = s->source->bs->backing->bs;
67
+ } else {
68
+ base = NULL;
69
+ }
70
+
71
+ ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
72
+ NULL, NULL);
73
+ if (ret < 0 || num < s->cluster_size) {
74
+ /*
75
+ * On error or if failed to obtain large enough chunk just fallback to
76
+ * copy one cluster.
77
+ */
78
+ num = s->cluster_size;
79
+ ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
80
+ } else if (offset + num == s->len) {
81
+ num = QEMU_ALIGN_UP(num, s->cluster_size);
82
+ } else {
83
+ num = QEMU_ALIGN_DOWN(num, s->cluster_size);
84
+ }
85
+
86
+ *pnum = num;
87
+ return ret;
88
+}
89
+
90
/*
91
* Check if the cluster starting at offset is allocated or not.
92
* return via pnum the number of contiguous clusters sharing this allocation.
93
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
94
{
95
int ret = 0;
96
int64_t end = bytes + start; /* bytes */
97
- int64_t status_bytes;
98
BlockCopyInFlightReq req;
99
100
/*
101
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
102
block_copy_inflight_req_begin(s, &req, start, end);
103
104
while (start < end) {
105
- int64_t next_zero, chunk_end;
106
+ int64_t next_zero, chunk_end, status_bytes;
107
108
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
109
trace_block_copy_skip(s, start);
110
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
111
chunk_end = next_zero;
112
}
113
114
- if (s->skip_unallocated) {
115
- ret = block_copy_reset_unallocated(s, start, &status_bytes);
116
- if (ret == 0) {
117
- trace_block_copy_skip_range(s, start, status_bytes);
118
- start += status_bytes;
119
- continue;
120
- }
121
- /* Clamp to known allocated region */
122
- chunk_end = MIN(chunk_end, start + status_bytes);
123
+ ret = block_copy_block_status(s, start, chunk_end - start,
124
+ &status_bytes);
125
+ if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
126
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
127
+ progress_set_remaining(s->progress,
128
+ bdrv_get_dirty_count(s->copy_bitmap) +
129
+ s->in_flight_bytes);
130
+ trace_block_copy_skip_range(s, start, status_bytes);
131
+ start += status_bytes;
132
+ continue;
133
}
134
135
+ chunk_end = MIN(chunk_end, start + status_bytes);
136
+
137
trace_block_copy_process(s, start);
138
139
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
140
s->in_flight_bytes += chunk_end - start;
141
142
co_get_from_shres(s->mem, chunk_end - start);
143
- ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
144
+ ret = block_copy_do_copy(s, start, chunk_end, ret & BDRV_BLOCK_ZERO,
145
+ error_is_read);
146
co_put_to_shres(s->mem, chunk_end - start);
147
s->in_flight_bytes -= chunk_end - start;
148
if (ret < 0) {
149
diff --git a/block/trace-events b/block/trace-events
150
index XXXXXXX..XXXXXXX 100644
151
--- a/block/trace-events
152
+++ b/block/trace-events
153
@@ -XXX,XX +XXX,XX @@ block_copy_process(void *bcs, int64_t start) "bcs %p start %"PRId64
154
block_copy_copy_range_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
155
block_copy_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
156
block_copy_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
157
+block_copy_write_zeroes_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
158
159
# ../blockdev.c
160
qmp_block_job_cancel(void *job) "job %p"
161
--
54
--
162
2.24.1
55
2.9.3
163
56
164
57
diff view generated by jsdifflib
New patch
1
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
3
This is in preparation for making qio_channel_yield work on
4
AioContexts other than the main one.
5
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
include/io/channel.h | 25 +++++++++++++++++++++++++
14
io/channel-command.c | 13 +++++++++++++
15
io/channel-file.c | 11 +++++++++++
16
io/channel-socket.c | 16 +++++++++++-----
17
io/channel-tls.c | 12 ++++++++++++
18
io/channel-watch.c | 6 ++++++
19
io/channel.c | 11 +++++++++++
20
7 files changed, 89 insertions(+), 5 deletions(-)
21
22
diff --git a/include/io/channel.h b/include/io/channel.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/io/channel.h
25
+++ b/include/io/channel.h
26
@@ -XXX,XX +XXX,XX @@
27
28
#include "qemu-common.h"
29
#include "qom/object.h"
30
+#include "block/aio.h"
31
32
#define TYPE_QIO_CHANNEL "qio-channel"
33
#define QIO_CHANNEL(obj) \
34
@@ -XXX,XX +XXX,XX @@ struct QIOChannelClass {
35
off_t offset,
36
int whence,
37
Error **errp);
38
+ void (*io_set_aio_fd_handler)(QIOChannel *ioc,
39
+ AioContext *ctx,
40
+ IOHandler *io_read,
41
+ IOHandler *io_write,
42
+ void *opaque);
43
};
44
45
/* General I/O handling functions */
46
@@ -XXX,XX +XXX,XX @@ void qio_channel_yield(QIOChannel *ioc,
47
void qio_channel_wait(QIOChannel *ioc,
48
GIOCondition condition);
49
50
+/**
51
+ * qio_channel_set_aio_fd_handler:
52
+ * @ioc: the channel object
53
+ * @ctx: the AioContext to set the handlers on
54
+ * @io_read: the read handler
55
+ * @io_write: the write handler
56
+ * @opaque: the opaque value passed to the handler
57
+ *
58
+ * This is used internally by qio_channel_yield(). It can
59
+ * be used by channel implementations to forward the handlers
60
+ * to another channel (e.g. from #QIOChannelTLS to the
61
+ * underlying socket).
62
+ */
63
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
64
+ AioContext *ctx,
65
+ IOHandler *io_read,
66
+ IOHandler *io_write,
67
+ void *opaque);
68
+
69
#endif /* QIO_CHANNEL_H */
70
diff --git a/io/channel-command.c b/io/channel-command.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/io/channel-command.c
73
+++ b/io/channel-command.c
74
@@ -XXX,XX +XXX,XX @@ static int qio_channel_command_close(QIOChannel *ioc,
75
}
76
77
78
+static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
79
+ AioContext *ctx,
80
+ IOHandler *io_read,
81
+ IOHandler *io_write,
82
+ void *opaque)
83
+{
84
+ QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
85
+ aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque);
86
+ aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque);
87
+}
88
+
89
+
90
static GSource *qio_channel_command_create_watch(QIOChannel *ioc,
91
GIOCondition condition)
92
{
93
@@ -XXX,XX +XXX,XX @@ static void qio_channel_command_class_init(ObjectClass *klass,
94
ioc_klass->io_set_blocking = qio_channel_command_set_blocking;
95
ioc_klass->io_close = qio_channel_command_close;
96
ioc_klass->io_create_watch = qio_channel_command_create_watch;
97
+ ioc_klass->io_set_aio_fd_handler = qio_channel_command_set_aio_fd_handler;
98
}
99
100
static const TypeInfo qio_channel_command_info = {
101
diff --git a/io/channel-file.c b/io/channel-file.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/io/channel-file.c
104
+++ b/io/channel-file.c
105
@@ -XXX,XX +XXX,XX @@ static int qio_channel_file_close(QIOChannel *ioc,
106
}
107
108
109
+static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
110
+ AioContext *ctx,
111
+ IOHandler *io_read,
112
+ IOHandler *io_write,
113
+ void *opaque)
114
+{
115
+ QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
116
+ aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque);
117
+}
118
+
119
static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
120
GIOCondition condition)
121
{
122
@@ -XXX,XX +XXX,XX @@ static void qio_channel_file_class_init(ObjectClass *klass,
123
ioc_klass->io_seek = qio_channel_file_seek;
124
ioc_klass->io_close = qio_channel_file_close;
125
ioc_klass->io_create_watch = qio_channel_file_create_watch;
126
+ ioc_klass->io_set_aio_fd_handler = qio_channel_file_set_aio_fd_handler;
127
}
128
129
static const TypeInfo qio_channel_file_info = {
130
diff --git a/io/channel-socket.c b/io/channel-socket.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/io/channel-socket.c
133
+++ b/io/channel-socket.c
134
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
135
qemu_set_block(sioc->fd);
136
} else {
137
qemu_set_nonblock(sioc->fd);
138
-#ifdef WIN32
139
- WSAEventSelect(sioc->fd, ioc->event,
140
- FD_READ | FD_ACCEPT | FD_CLOSE |
141
- FD_CONNECT | FD_WRITE | FD_OOB);
142
-#endif
143
}
144
return 0;
145
}
146
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_shutdown(QIOChannel *ioc,
147
return 0;
148
}
149
150
+static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
151
+ AioContext *ctx,
152
+ IOHandler *io_read,
153
+ IOHandler *io_write,
154
+ void *opaque)
155
+{
156
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
157
+ aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque);
158
+}
159
+
160
static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
161
GIOCondition condition)
162
{
163
@@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_class_init(ObjectClass *klass,
164
ioc_klass->io_set_cork = qio_channel_socket_set_cork;
165
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
166
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
167
+ ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
168
}
169
170
static const TypeInfo qio_channel_socket_info = {
171
diff --git a/io/channel-tls.c b/io/channel-tls.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/io/channel-tls.c
174
+++ b/io/channel-tls.c
175
@@ -XXX,XX +XXX,XX @@ static int qio_channel_tls_close(QIOChannel *ioc,
176
return qio_channel_close(tioc->master, errp);
177
}
178
179
+static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
180
+ AioContext *ctx,
181
+ IOHandler *io_read,
182
+ IOHandler *io_write,
183
+ void *opaque)
184
+{
185
+ QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
186
+
187
+ qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
188
+}
189
+
190
static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
191
GIOCondition condition)
192
{
193
@@ -XXX,XX +XXX,XX @@ static void qio_channel_tls_class_init(ObjectClass *klass,
194
ioc_klass->io_close = qio_channel_tls_close;
195
ioc_klass->io_shutdown = qio_channel_tls_shutdown;
196
ioc_klass->io_create_watch = qio_channel_tls_create_watch;
197
+ ioc_klass->io_set_aio_fd_handler = qio_channel_tls_set_aio_fd_handler;
198
}
199
200
static const TypeInfo qio_channel_tls_info = {
201
diff --git a/io/channel-watch.c b/io/channel-watch.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/io/channel-watch.c
204
+++ b/io/channel-watch.c
205
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
206
GSource *source;
207
QIOChannelSocketSource *ssource;
208
209
+#ifdef WIN32
210
+ WSAEventSelect(socket, ioc->event,
211
+ FD_READ | FD_ACCEPT | FD_CLOSE |
212
+ FD_CONNECT | FD_WRITE | FD_OOB);
213
+#endif
214
+
215
source = g_source_new(&qio_channel_socket_source_funcs,
216
sizeof(QIOChannelSocketSource));
217
ssource = (QIOChannelSocketSource *)source;
218
diff --git a/io/channel.c b/io/channel.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/io/channel.c
221
+++ b/io/channel.c
222
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
223
}
224
225
226
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
227
+ AioContext *ctx,
228
+ IOHandler *io_read,
229
+ IOHandler *io_write,
230
+ void *opaque)
231
+{
232
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
233
+
234
+ klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque);
235
+}
236
+
237
guint qio_channel_add_watch(QIOChannel *ioc,
238
GIOCondition condition,
239
QIOChannelFunc func,
240
--
241
2.9.3
242
243
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Tests 261 and 272 fail on RHEL 7 with coreutils 8.22, since od
3
Support separate coroutines for reading and writing, and place the
4
--endian was not added until coreutils 8.23. Fix this by manually
4
read/write handlers on the AioContext that the QIOChannel is registered
5
constructing the final value one byte at a time.
5
with.
6
6
7
Fixes: fc8ba423
7
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
8
Reported-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Message-Id: <20200226125424.481840-1-eblake@redhat.com>
11
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
13
---
14
tests/qemu-iotests/common.rc | 22 +++++++++++++++++-----
14
include/io/channel.h | 47 ++++++++++++++++++++++++++--
15
1 file changed, 17 insertions(+), 5 deletions(-)
15
io/channel.c | 86 +++++++++++++++++++++++++++++++++++++++-------------
16
16
2 files changed, 109 insertions(+), 24 deletions(-)
17
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
17
18
diff --git a/include/io/channel.h b/include/io/channel.h
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qemu-iotests/common.rc
20
--- a/include/io/channel.h
20
+++ b/tests/qemu-iotests/common.rc
21
+++ b/include/io/channel.h
21
@@ -XXX,XX +XXX,XX @@ poke_file()
22
@@ -XXX,XX +XXX,XX @@
22
# peek_file_le 'test.img' 512 2 => 65534
23
23
peek_file_le()
24
#include "qemu-common.h"
25
#include "qom/object.h"
26
+#include "qemu/coroutine.h"
27
#include "block/aio.h"
28
29
#define TYPE_QIO_CHANNEL "qio-channel"
30
@@ -XXX,XX +XXX,XX @@ struct QIOChannel {
31
Object parent;
32
unsigned int features; /* bitmask of QIOChannelFeatures */
33
char *name;
34
+ AioContext *ctx;
35
+ Coroutine *read_coroutine;
36
+ Coroutine *write_coroutine;
37
#ifdef _WIN32
38
HANDLE event; /* For use with GSource on Win32 */
39
#endif
40
@@ -XXX,XX +XXX,XX @@ guint qio_channel_add_watch(QIOChannel *ioc,
41
42
43
/**
44
+ * qio_channel_attach_aio_context:
45
+ * @ioc: the channel object
46
+ * @ctx: the #AioContext to set the handlers on
47
+ *
48
+ * Request that qio_channel_yield() sets I/O handlers on
49
+ * the given #AioContext. If @ctx is %NULL, qio_channel_yield()
50
+ * uses QEMU's main thread event loop.
51
+ *
52
+ * You can move a #QIOChannel from one #AioContext to another even if
53
+ * I/O handlers are set for a coroutine. However, #QIOChannel provides
54
+ * no synchronization between the calls to qio_channel_yield() and
55
+ * qio_channel_attach_aio_context().
56
+ *
57
+ * Therefore you should first call qio_channel_detach_aio_context()
58
+ * to ensure that the coroutine is not entered concurrently. Then,
59
+ * while the coroutine has yielded, call qio_channel_attach_aio_context(),
60
+ * and then aio_co_schedule() to place the coroutine on the new
61
+ * #AioContext. The calls to qio_channel_detach_aio_context()
62
+ * and qio_channel_attach_aio_context() should be protected with
63
+ * aio_context_acquire() and aio_context_release().
64
+ */
65
+void qio_channel_attach_aio_context(QIOChannel *ioc,
66
+ AioContext *ctx);
67
+
68
+/**
69
+ * qio_channel_detach_aio_context:
70
+ * @ioc: the channel object
71
+ *
72
+ * Disable any I/O handlers set by qio_channel_yield(). With the
73
+ * help of aio_co_schedule(), this allows moving a coroutine that was
74
+ * paused by qio_channel_yield() to another context.
75
+ */
76
+void qio_channel_detach_aio_context(QIOChannel *ioc);
77
+
78
+/**
79
* qio_channel_yield:
80
* @ioc: the channel object
81
* @condition: the I/O condition to wait for
82
*
83
- * Yields execution from the current coroutine until
84
- * the condition indicated by @condition becomes
85
- * available.
86
+ * Yields execution from the current coroutine until the condition
87
+ * indicated by @condition becomes available. @condition must
88
+ * be either %G_IO_IN or %G_IO_OUT; it cannot contain both. In
89
+ * addition, no two coroutine can be waiting on the same condition
90
+ * and channel at the same time.
91
*
92
* This must only be called from coroutine context
93
*/
94
diff --git a/io/channel.c b/io/channel.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/io/channel.c
97
+++ b/io/channel.c
98
@@ -XXX,XX +XXX,XX @@
99
#include "qemu/osdep.h"
100
#include "io/channel.h"
101
#include "qapi/error.h"
102
-#include "qemu/coroutine.h"
103
+#include "qemu/main-loop.h"
104
105
bool qio_channel_has_feature(QIOChannel *ioc,
106
QIOChannelFeature feature)
107
@@ -XXX,XX +XXX,XX @@ off_t qio_channel_io_seek(QIOChannel *ioc,
108
}
109
110
111
-typedef struct QIOChannelYieldData QIOChannelYieldData;
112
-struct QIOChannelYieldData {
113
- QIOChannel *ioc;
114
- Coroutine *co;
115
-};
116
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc);
117
118
+static void qio_channel_restart_read(void *opaque)
119
+{
120
+ QIOChannel *ioc = opaque;
121
+ Coroutine *co = ioc->read_coroutine;
122
+
123
+ ioc->read_coroutine = NULL;
124
+ qio_channel_set_aio_fd_handlers(ioc);
125
+ aio_co_wake(co);
126
+}
127
128
-static gboolean qio_channel_yield_enter(QIOChannel *ioc,
129
- GIOCondition condition,
130
- gpointer opaque)
131
+static void qio_channel_restart_write(void *opaque)
24
{
132
{
25
- # Wrap in echo $() to strip spaces
133
- QIOChannelYieldData *data = opaque;
26
- echo $(od -j"$2" -N"$3" --endian=little -An -vtu"$3" "$1")
134
- qemu_coroutine_enter(data->co);
27
+ local val=0 shift=0 byte
135
- return FALSE;
28
+
136
+ QIOChannel *ioc = opaque;
29
+ # coreutils' od --endian is not portable, so manually assemble bytes.
137
+ Coroutine *co = ioc->write_coroutine;
30
+ for byte in $(od -j"$2" -N"$3" -An -v -tu1 "$1"); do
138
+
31
+ val=$(( val | (byte << shift) ))
139
+ ioc->write_coroutine = NULL;
32
+ shift=$((shift + 8))
140
+ qio_channel_set_aio_fd_handlers(ioc);
33
+ done
141
+ aio_co_wake(co);
34
+ printf %llu $val
35
}
142
}
36
143
37
# peek_file_be 'test.img' 512 2 => 65279
144
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
38
peek_file_be()
145
+{
146
+ IOHandler *rd_handler = NULL, *wr_handler = NULL;
147
+ AioContext *ctx;
148
+
149
+ if (ioc->read_coroutine) {
150
+ rd_handler = qio_channel_restart_read;
151
+ }
152
+ if (ioc->write_coroutine) {
153
+ wr_handler = qio_channel_restart_write;
154
+ }
155
+
156
+ ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
157
+ qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
158
+}
159
+
160
+void qio_channel_attach_aio_context(QIOChannel *ioc,
161
+ AioContext *ctx)
162
+{
163
+ AioContext *old_ctx;
164
+ if (ioc->ctx == ctx) {
165
+ return;
166
+ }
167
+
168
+ old_ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
169
+ qio_channel_set_aio_fd_handler(ioc, old_ctx, NULL, NULL, NULL);
170
+ ioc->ctx = ctx;
171
+ qio_channel_set_aio_fd_handlers(ioc);
172
+}
173
+
174
+void qio_channel_detach_aio_context(QIOChannel *ioc)
175
+{
176
+ ioc->read_coroutine = NULL;
177
+ ioc->write_coroutine = NULL;
178
+ qio_channel_set_aio_fd_handlers(ioc);
179
+ ioc->ctx = NULL;
180
+}
181
182
void coroutine_fn qio_channel_yield(QIOChannel *ioc,
183
GIOCondition condition)
39
{
184
{
40
- # Wrap in echo $() to strip spaces
185
- QIOChannelYieldData data;
41
- echo $(od -j"$2" -N"$3" --endian=big -An -vtu"$3" "$1")
186
-
42
+ local val=0 byte
187
assert(qemu_in_coroutine());
43
+
188
- data.ioc = ioc;
44
+ # coreutils' od --endian is not portable, so manually assemble bytes.
189
- data.co = qemu_coroutine_self();
45
+ for byte in $(od -j"$2" -N"$3" -An -v -tu1 "$1"); do
190
- qio_channel_add_watch(ioc,
46
+ val=$(( (val << 8) | byte ))
191
- condition,
47
+ done
192
- qio_channel_yield_enter,
48
+ printf %llu $val
193
- &data,
194
- NULL);
195
+ if (condition == G_IO_IN) {
196
+ assert(!ioc->read_coroutine);
197
+ ioc->read_coroutine = qemu_coroutine_self();
198
+ } else if (condition == G_IO_OUT) {
199
+ assert(!ioc->write_coroutine);
200
+ ioc->write_coroutine = qemu_coroutine_self();
201
+ } else {
202
+ abort();
203
+ }
204
+ qio_channel_set_aio_fd_handlers(ioc);
205
qemu_coroutine_yield();
49
}
206
}
50
207
51
-# peek_file_raw 'test.img' 512 2 => '\xff\xfe'
52
+# peek_file_raw 'test.img' 512 2 => '\xff\xfe'. Do not use if the raw data
53
+# is likely to contain \0 or trailing \n.
54
peek_file_raw()
55
{
56
dd if="$1" bs=1 skip="$2" count="$3" status=none
57
--
208
--
58
2.24.1
209
2.9.3
59
210
60
211
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
In most qemu-img sub-commands the --object option only makes sense when
3
In the client, read the reply headers from a coroutine, switching the
4
there is a filename. qemu-img measure is an exception because objects
4
read side between the "read header" coroutine and the I/O coroutine that
5
may be referenced from the image creation options instead of an existing
5
reads the body of the reply.
6
image file. Allow --object without a filename.
7
6
7
In the server, if the server can read more requests it will create a new
8
"read request" coroutine as soon as a request has been read. Otherwise,
9
the new coroutine is created in nbd_request_put.
10
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
15
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Message-Id: <20200221112522.1497712-4-stefanha@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
17
---
13
qemu-img.c | 6 ++----
18
block/nbd-client.h | 2 +-
14
tests/qemu-iotests/178 | 2 +-
19
block/nbd-client.c | 117 ++++++++++++++++++++++++-----------------------------
15
tests/qemu-iotests/178.out.qcow2 | 8 ++++----
20
nbd/client.c | 2 +-
16
tests/qemu-iotests/178.out.raw | 8 ++++----
21
nbd/common.c | 9 +----
17
4 files changed, 11 insertions(+), 13 deletions(-)
22
nbd/server.c | 94 +++++++++++++-----------------------------
23
5 files changed, 83 insertions(+), 141 deletions(-)
18
24
19
diff --git a/qemu-img.c b/qemu-img.c
25
diff --git a/block/nbd-client.h b/block/nbd-client.h
20
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
21
--- a/qemu-img.c
27
--- a/block/nbd-client.h
22
+++ b/qemu-img.c
28
+++ b/block/nbd-client.h
23
@@ -XXX,XX +XXX,XX @@ static int img_measure(int argc, char **argv)
29
@@ -XXX,XX +XXX,XX @@ typedef struct NBDClientSession {
24
filename = argv[optind];
30
25
}
31
CoMutex send_mutex;
26
32
CoQueue free_sema;
27
- if (!filename &&
33
- Coroutine *send_coroutine;
28
- (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
34
+ Coroutine *read_reply_co;
29
- error_report("--object, --image-opts, -f, and -l "
35
int in_flight;
30
- "require a filename argument.");
36
31
+ if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
37
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
32
+ error_report("--image-opts, -f, and -l require a filename argument.");
38
diff --git a/block/nbd-client.c b/block/nbd-client.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/nbd-client.c
41
+++ b/block/nbd-client.c
42
@@ -XXX,XX +XXX,XX @@
43
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
44
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
45
46
-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
47
+static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
48
{
49
+ NBDClientSession *s = nbd_get_client_session(bs);
50
int i;
51
52
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
53
@@ -XXX,XX +XXX,XX @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
54
qemu_coroutine_enter(s->recv_coroutine[i]);
55
}
56
}
57
+ BDRV_POLL_WHILE(bs, s->read_reply_co);
58
}
59
60
static void nbd_teardown_connection(BlockDriverState *bs)
61
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
62
qio_channel_shutdown(client->ioc,
63
QIO_CHANNEL_SHUTDOWN_BOTH,
64
NULL);
65
- nbd_recv_coroutines_enter_all(client);
66
+ nbd_recv_coroutines_enter_all(bs);
67
68
nbd_client_detach_aio_context(bs);
69
object_unref(OBJECT(client->sioc));
70
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
71
client->ioc = NULL;
72
}
73
74
-static void nbd_reply_ready(void *opaque)
75
+static coroutine_fn void nbd_read_reply_entry(void *opaque)
76
{
77
- BlockDriverState *bs = opaque;
78
- NBDClientSession *s = nbd_get_client_session(bs);
79
+ NBDClientSession *s = opaque;
80
uint64_t i;
81
int ret;
82
83
- if (!s->ioc) { /* Already closed */
84
- return;
85
- }
86
-
87
- if (s->reply.handle == 0) {
88
- /* No reply already in flight. Fetch a header. It is possible
89
- * that another thread has done the same thing in parallel, so
90
- * the socket is not readable anymore.
91
- */
92
+ for (;;) {
93
+ assert(s->reply.handle == 0);
94
ret = nbd_receive_reply(s->ioc, &s->reply);
95
- if (ret == -EAGAIN) {
96
- return;
97
- }
98
if (ret < 0) {
99
- s->reply.handle = 0;
100
- goto fail;
101
+ break;
102
}
103
- }
104
105
- /* There's no need for a mutex on the receive side, because the
106
- * handler acts as a synchronization point and ensures that only
107
- * one coroutine is called until the reply finishes. */
108
- i = HANDLE_TO_INDEX(s, s->reply.handle);
109
- if (i >= MAX_NBD_REQUESTS) {
110
- goto fail;
111
- }
112
+ /* There's no need for a mutex on the receive side, because the
113
+ * handler acts as a synchronization point and ensures that only
114
+ * one coroutine is called until the reply finishes.
115
+ */
116
+ i = HANDLE_TO_INDEX(s, s->reply.handle);
117
+ if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
118
+ break;
119
+ }
120
121
- if (s->recv_coroutine[i]) {
122
- qemu_coroutine_enter(s->recv_coroutine[i]);
123
- return;
124
+ /* We're woken up by the recv_coroutine itself. Note that there
125
+ * is no race between yielding and reentering read_reply_co. This
126
+ * is because:
127
+ *
128
+ * - if recv_coroutine[i] runs on the same AioContext, it is only
129
+ * entered after we yield
130
+ *
131
+ * - if recv_coroutine[i] runs on a different AioContext, reentering
132
+ * read_reply_co happens through a bottom half, which can only
133
+ * run after we yield.
134
+ */
135
+ aio_co_wake(s->recv_coroutine[i]);
136
+ qemu_coroutine_yield();
137
}
138
-
139
-fail:
140
- nbd_teardown_connection(bs);
141
-}
142
-
143
-static void nbd_restart_write(void *opaque)
144
-{
145
- BlockDriverState *bs = opaque;
146
-
147
- qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
148
+ s->read_reply_co = NULL;
149
}
150
151
static int nbd_co_send_request(BlockDriverState *bs,
152
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
153
QEMUIOVector *qiov)
154
{
155
NBDClientSession *s = nbd_get_client_session(bs);
156
- AioContext *aio_context;
157
int rc, ret, i;
158
159
qemu_co_mutex_lock(&s->send_mutex);
160
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
161
return -EPIPE;
162
}
163
164
- s->send_coroutine = qemu_coroutine_self();
165
- aio_context = bdrv_get_aio_context(bs);
166
-
167
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
168
- nbd_reply_ready, nbd_restart_write, NULL, bs);
169
if (qiov) {
170
qio_channel_set_cork(s->ioc, true);
171
rc = nbd_send_request(s->ioc, request);
172
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
173
} else {
174
rc = nbd_send_request(s->ioc, request);
175
}
176
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
177
- nbd_reply_ready, NULL, NULL, bs);
178
- s->send_coroutine = NULL;
179
qemu_co_mutex_unlock(&s->send_mutex);
180
return rc;
181
}
182
@@ -XXX,XX +XXX,XX @@ static void nbd_co_receive_reply(NBDClientSession *s,
183
{
184
int ret;
185
186
- /* Wait until we're woken up by the read handler. TODO: perhaps
187
- * peek at the next reply and avoid yielding if it's ours? */
188
+ /* Wait until we're woken up by nbd_read_reply_entry. */
189
qemu_coroutine_yield();
190
*reply = s->reply;
191
if (reply->handle != request->handle ||
192
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
193
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
194
}
195
196
-static void nbd_coroutine_end(NBDClientSession *s,
197
+static void nbd_coroutine_end(BlockDriverState *bs,
198
NBDRequest *request)
199
{
200
+ NBDClientSession *s = nbd_get_client_session(bs);
201
int i = HANDLE_TO_INDEX(s, request->handle);
202
+
203
s->recv_coroutine[i] = NULL;
204
- if (s->in_flight-- == MAX_NBD_REQUESTS) {
205
- qemu_co_queue_next(&s->free_sema);
206
+ s->in_flight--;
207
+ qemu_co_queue_next(&s->free_sema);
208
+
209
+ /* Kick the read_reply_co to get the next reply. */
210
+ if (s->read_reply_co) {
211
+ aio_co_wake(s->read_reply_co);
212
}
213
}
214
215
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
216
} else {
217
nbd_co_receive_reply(client, &request, &reply, qiov);
218
}
219
- nbd_coroutine_end(client, &request);
220
+ nbd_coroutine_end(bs, &request);
221
return -reply.error;
222
}
223
224
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
225
} else {
226
nbd_co_receive_reply(client, &request, &reply, NULL);
227
}
228
- nbd_coroutine_end(client, &request);
229
+ nbd_coroutine_end(bs, &request);
230
return -reply.error;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
234
} else {
235
nbd_co_receive_reply(client, &request, &reply, NULL);
236
}
237
- nbd_coroutine_end(client, &request);
238
+ nbd_coroutine_end(bs, &request);
239
return -reply.error;
240
}
241
242
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_flush(BlockDriverState *bs)
243
} else {
244
nbd_co_receive_reply(client, &request, &reply, NULL);
245
}
246
- nbd_coroutine_end(client, &request);
247
+ nbd_coroutine_end(bs, &request);
248
return -reply.error;
249
}
250
251
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
252
} else {
253
nbd_co_receive_reply(client, &request, &reply, NULL);
254
}
255
- nbd_coroutine_end(client, &request);
256
+ nbd_coroutine_end(bs, &request);
257
return -reply.error;
258
259
}
260
261
void nbd_client_detach_aio_context(BlockDriverState *bs)
262
{
263
- aio_set_fd_handler(bdrv_get_aio_context(bs),
264
- nbd_get_client_session(bs)->sioc->fd,
265
- false, NULL, NULL, NULL, NULL);
266
+ NBDClientSession *client = nbd_get_client_session(bs);
267
+ qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
268
}
269
270
void nbd_client_attach_aio_context(BlockDriverState *bs,
271
AioContext *new_context)
272
{
273
- aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
274
- false, nbd_reply_ready, NULL, NULL, bs);
275
+ NBDClientSession *client = nbd_get_client_session(bs);
276
+ qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
277
+ aio_co_schedule(new_context, client->read_reply_co);
278
}
279
280
void nbd_client_close(BlockDriverState *bs)
281
@@ -XXX,XX +XXX,XX @@ int nbd_client_init(BlockDriverState *bs,
282
/* Now that we're connected, set the socket to be non-blocking and
283
* kick the reply mechanism. */
284
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
285
-
286
+ client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
287
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
288
289
logout("Established connection with NBD server\n");
290
diff --git a/nbd/client.c b/nbd/client.c
291
index XXXXXXX..XXXXXXX 100644
292
--- a/nbd/client.c
293
+++ b/nbd/client.c
294
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
295
ssize_t ret;
296
297
ret = read_sync(ioc, buf, sizeof(buf));
298
- if (ret < 0) {
299
+ if (ret <= 0) {
300
return ret;
301
}
302
303
diff --git a/nbd/common.c b/nbd/common.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/nbd/common.c
306
+++ b/nbd/common.c
307
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
308
}
309
if (len == QIO_CHANNEL_ERR_BLOCK) {
310
if (qemu_in_coroutine()) {
311
- /* XXX figure out if we can create a variant on
312
- * qio_channel_yield() that works with AIO contexts
313
- * and consider using that in this branch */
314
- qemu_coroutine_yield();
315
- } else if (done) {
316
- /* XXX this is needed by nbd_reply_ready. */
317
- qio_channel_wait(ioc,
318
- do_read ? G_IO_IN : G_IO_OUT);
319
+ qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
320
} else {
321
return -EAGAIN;
322
}
323
diff --git a/nbd/server.c b/nbd/server.c
324
index XXXXXXX..XXXXXXX 100644
325
--- a/nbd/server.c
326
+++ b/nbd/server.c
327
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
328
CoMutex send_lock;
329
Coroutine *send_coroutine;
330
331
- bool can_read;
332
-
333
QTAILQ_ENTRY(NBDClient) next;
334
int nb_requests;
335
bool closing;
336
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
337
338
/* That's all folks */
339
340
-static void nbd_set_handlers(NBDClient *client);
341
-static void nbd_unset_handlers(NBDClient *client);
342
-static void nbd_update_can_read(NBDClient *client);
343
+static void nbd_client_receive_next_request(NBDClient *client);
344
345
static gboolean nbd_negotiate_continue(QIOChannel *ioc,
346
GIOCondition condition,
347
@@ -XXX,XX +XXX,XX @@ void nbd_client_put(NBDClient *client)
348
*/
349
assert(client->closing);
350
351
- nbd_unset_handlers(client);
352
+ qio_channel_detach_aio_context(client->ioc);
353
object_unref(OBJECT(client->sioc));
354
object_unref(OBJECT(client->ioc));
355
if (client->tlscreds) {
356
@@ -XXX,XX +XXX,XX @@ static NBDRequestData *nbd_request_get(NBDClient *client)
357
358
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
359
client->nb_requests++;
360
- nbd_update_can_read(client);
361
362
req = g_new0(NBDRequestData, 1);
363
nbd_client_get(client);
364
@@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req)
365
g_free(req);
366
367
client->nb_requests--;
368
- nbd_update_can_read(client);
369
+ nbd_client_receive_next_request(client);
370
+
371
nbd_client_put(client);
372
}
373
374
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
375
exp->ctx = ctx;
376
377
QTAILQ_FOREACH(client, &exp->clients, next) {
378
- nbd_set_handlers(client);
379
+ qio_channel_attach_aio_context(client->ioc, ctx);
380
+ if (client->recv_coroutine) {
381
+ aio_co_schedule(ctx, client->recv_coroutine);
382
+ }
383
+ if (client->send_coroutine) {
384
+ aio_co_schedule(ctx, client->send_coroutine);
385
+ }
386
}
387
}
388
389
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
390
TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
391
392
QTAILQ_FOREACH(client, &exp->clients, next) {
393
- nbd_unset_handlers(client);
394
+ qio_channel_detach_aio_context(client->ioc);
395
}
396
397
exp->ctx = NULL;
398
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
399
g_assert(qemu_in_coroutine());
400
qemu_co_mutex_lock(&client->send_lock);
401
client->send_coroutine = qemu_coroutine_self();
402
- nbd_set_handlers(client);
403
404
if (!len) {
405
rc = nbd_send_reply(client->ioc, reply);
406
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
407
}
408
409
client->send_coroutine = NULL;
410
- nbd_set_handlers(client);
411
qemu_co_mutex_unlock(&client->send_lock);
412
return rc;
413
}
414
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
415
ssize_t rc;
416
417
g_assert(qemu_in_coroutine());
418
- client->recv_coroutine = qemu_coroutine_self();
419
- nbd_update_can_read(client);
420
-
421
+ assert(client->recv_coroutine == qemu_coroutine_self());
422
rc = nbd_receive_request(client->ioc, request);
423
if (rc < 0) {
424
if (rc != -EAGAIN) {
425
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
426
427
out:
428
client->recv_coroutine = NULL;
429
- nbd_update_can_read(client);
430
+ nbd_client_receive_next_request(client);
431
432
return rc;
433
}
434
435
-static void nbd_trip(void *opaque)
436
+/* Owns a reference to the NBDClient passed as opaque. */
437
+static coroutine_fn void nbd_trip(void *opaque)
438
{
439
NBDClient *client = opaque;
440
NBDExport *exp = client->exp;
441
NBDRequestData *req;
442
- NBDRequest request;
443
+ NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
444
NBDReply reply;
445
ssize_t ret;
446
int flags;
447
448
TRACE("Reading request.");
449
if (client->closing) {
450
+ nbd_client_put(client);
451
return;
452
}
453
454
@@ -XXX,XX +XXX,XX @@ static void nbd_trip(void *opaque)
455
456
done:
457
nbd_request_put(req);
458
+ nbd_client_put(client);
459
return;
460
461
out:
462
nbd_request_put(req);
463
client_close(client);
464
+ nbd_client_put(client);
465
}
466
467
-static void nbd_read(void *opaque)
468
+static void nbd_client_receive_next_request(NBDClient *client)
469
{
470
- NBDClient *client = opaque;
471
-
472
- if (client->recv_coroutine) {
473
- qemu_coroutine_enter(client->recv_coroutine);
474
- } else {
475
- qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client));
476
- }
477
-}
478
-
479
-static void nbd_restart_write(void *opaque)
480
-{
481
- NBDClient *client = opaque;
482
-
483
- qemu_coroutine_enter(client->send_coroutine);
484
-}
485
-
486
-static void nbd_set_handlers(NBDClient *client)
487
-{
488
- if (client->exp && client->exp->ctx) {
489
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true,
490
- client->can_read ? nbd_read : NULL,
491
- client->send_coroutine ? nbd_restart_write : NULL,
492
- NULL, client);
493
- }
494
-}
495
-
496
-static void nbd_unset_handlers(NBDClient *client)
497
-{
498
- if (client->exp && client->exp->ctx) {
499
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true, NULL,
500
- NULL, NULL, NULL);
501
- }
502
-}
503
-
504
-static void nbd_update_can_read(NBDClient *client)
505
-{
506
- bool can_read = client->recv_coroutine ||
507
- client->nb_requests < MAX_NBD_REQUESTS;
508
-
509
- if (can_read != client->can_read) {
510
- client->can_read = can_read;
511
- nbd_set_handlers(client);
512
-
513
- /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
514
- * in nbd_set_handlers() will have taken care of that */
515
+ if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
516
+ nbd_client_get(client);
517
+ client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
518
+ aio_co_schedule(client->exp->ctx, client->recv_coroutine);
519
}
520
}
521
522
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void nbd_co_client_start(void *opaque)
33
goto out;
523
goto out;
34
}
524
}
35
if (filename && img_size != UINT64_MAX) {
525
qemu_co_mutex_init(&client->send_lock);
36
diff --git a/tests/qemu-iotests/178 b/tests/qemu-iotests/178
526
- nbd_set_handlers(client);
37
index XXXXXXX..XXXXXXX 100755
527
38
--- a/tests/qemu-iotests/178
528
if (exp) {
39
+++ b/tests/qemu-iotests/178
529
QTAILQ_INSERT_TAIL(&exp->clients, client, next);
40
@@ -XXX,XX +XXX,XX @@ _make_test_img 1G
530
}
41
$QEMU_IMG measure # missing arguments
531
+
42
$QEMU_IMG measure --size 2G "$TEST_IMG" # only one allowed
532
+ nbd_client_receive_next_request(client);
43
$QEMU_IMG measure "$TEST_IMG" a # only one filename allowed
533
+
44
-$QEMU_IMG measure --object secret,id=sec0,data=MTIzNDU2,format=base64 # missing filename
534
out:
45
+$QEMU_IMG measure --object secret,id=sec0,data=MTIzNDU2,format=base64 # size or filename needed
535
g_free(data);
46
$QEMU_IMG measure --image-opts # missing filename
536
}
47
$QEMU_IMG measure -f qcow2 # missing filename
537
@@ -XXX,XX +XXX,XX @@ void nbd_client_new(NBDExport *exp,
48
$QEMU_IMG measure -l snap1 # missing filename
538
object_ref(OBJECT(client->sioc));
49
diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2
539
client->ioc = QIO_CHANNEL(sioc);
50
index XXXXXXX..XXXXXXX 100644
540
object_ref(OBJECT(client->ioc));
51
--- a/tests/qemu-iotests/178.out.qcow2
541
- client->can_read = true;
52
+++ b/tests/qemu-iotests/178.out.qcow2
542
client->close = close_fn;
53
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
543
54
qemu-img: Either --size N or one filename must be specified.
544
data->client = client;
55
qemu-img: --size N cannot be used together with a filename.
56
qemu-img: At most one filename argument is allowed.
57
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
58
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
59
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
60
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
61
+qemu-img: Either --size N or one filename must be specified.
62
+qemu-img: --image-opts, -f, and -l require a filename argument.
63
+qemu-img: --image-opts, -f, and -l require a filename argument.
64
+qemu-img: --image-opts, -f, and -l require a filename argument.
65
qemu-img: Invalid option list: ,
66
qemu-img: Invalid parameter 'snapshot.foo'
67
qemu-img: Failed in parsing snapshot param 'snapshot.foo'
68
diff --git a/tests/qemu-iotests/178.out.raw b/tests/qemu-iotests/178.out.raw
69
index XXXXXXX..XXXXXXX 100644
70
--- a/tests/qemu-iotests/178.out.raw
71
+++ b/tests/qemu-iotests/178.out.raw
72
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
73
qemu-img: Either --size N or one filename must be specified.
74
qemu-img: --size N cannot be used together with a filename.
75
qemu-img: At most one filename argument is allowed.
76
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
77
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
78
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
79
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
80
+qemu-img: Either --size N or one filename must be specified.
81
+qemu-img: --image-opts, -f, and -l require a filename argument.
82
+qemu-img: --image-opts, -f, and -l require a filename argument.
83
+qemu-img: --image-opts, -f, and -l require a filename argument.
84
qemu-img: Invalid option list: ,
85
qemu-img: Invalid parameter 'snapshot.foo'
86
qemu-img: Failed in parsing snapshot param 'snapshot.foo'
87
--
545
--
88
2.24.1
546
2.9.3
89
547
90
548
diff view generated by jsdifflib
1
From: Pan Nengyuan <pannengyuan@huawei.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
collect_image_check() is called twice in img_check(), the filename/format will be alloced without free the original memory.
3
As a small step towards the introduction of multiqueue, we want
4
It is not a big deal since the process will exit anyway, but seems like a clean code and it will remove the warning spotted by asan.
4
coroutines to remain on the same AioContext that started them,
5
unless they are moved explicitly with e.g. aio_co_schedule. This patch
6
avoids that coroutines switch AioContext when they use a CoMutex.
7
For now it does not make much of a difference, because the CoMutex
8
is not thread-safe and the AioContext itself is used to protect the
9
CoMutex from concurrent access. However, this is going to change.
5
10
6
Reported-by: Euler Robot <euler.robot@huawei.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Pan Nengyuan <pannengyuan@huawei.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-Id: <20200227012950.12256-3-pannengyuan@huawei.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
15
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
17
---
11
qemu-img.c | 2 ++
18
util/qemu-coroutine-lock.c | 5 ++---
12
1 file changed, 2 insertions(+)
19
util/trace-events | 1 -
20
2 files changed, 2 insertions(+), 4 deletions(-)
13
21
14
diff --git a/qemu-img.c b/qemu-img.c
22
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-img.c
24
--- a/util/qemu-coroutine-lock.c
17
+++ b/qemu-img.c
25
+++ b/util/qemu-coroutine-lock.c
18
@@ -XXX,XX +XXX,XX @@ static int img_check(int argc, char **argv)
26
@@ -XXX,XX +XXX,XX @@
19
check->corruptions_fixed);
27
#include "qemu/coroutine.h"
28
#include "qemu/coroutine_int.h"
29
#include "qemu/queue.h"
30
+#include "block/aio.h"
31
#include "trace.h"
32
33
void qemu_co_queue_init(CoQueue *queue)
34
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_run_restart(Coroutine *co)
35
36
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
37
{
38
- Coroutine *self = qemu_coroutine_self();
39
Coroutine *next;
40
41
if (QSIMPLEQ_EMPTY(&queue->entries)) {
42
@@ -XXX,XX +XXX,XX @@ static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
43
44
while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
45
QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
46
- QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
47
- trace_qemu_co_queue_next(next);
48
+ aio_co_wake(next);
49
if (single) {
50
break;
20
}
51
}
21
52
diff --git a/util/trace-events b/util/trace-events
22
+ qapi_free_ImageCheck(check);
53
index XXXXXXX..XXXXXXX 100644
23
+ check = g_new0(ImageCheck, 1);
54
--- a/util/trace-events
24
ret = collect_image_check(bs, check, filename, fmt, 0);
55
+++ b/util/trace-events
25
56
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
26
check->leaks_fixed = leaks_fixed;
57
58
# util/qemu-coroutine-lock.c
59
qemu_co_queue_run_restart(void *co) "co %p"
60
-qemu_co_queue_next(void *nxt) "next %p"
61
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
62
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
63
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
27
--
64
--
28
2.24.1
65
2.9.3
29
66
30
67
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Hide structure definitions and add explicit API instead, to keep an
3
Keep the coroutine on the same AioContext. Without this change,
4
eye on the scope of the shared fields.
4
there would be a race between yielding the coroutine and reentering it.
5
While the race cannot happen now, because the code only runs from a single
6
AioContext, this will change with multiqueue support in the block layer.
5
7
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
While doing the change, replace custom bottom half with aio_co_schedule.
7
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
9
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Message-Id: <20200311103004.7649-10-vsementsov@virtuozzo.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
14
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
16
---
12
block/backup-top.c | 6 ++--
17
block/blkdebug.c | 9 +--------
13
block/backup.c | 25 ++++++++--------
18
1 file changed, 1 insertion(+), 8 deletions(-)
14
block/block-copy.c | 59 ++++++++++++++++++++++++++++++++++++++
15
include/block/block-copy.h | 52 +++------------------------------
16
4 files changed, 80 insertions(+), 62 deletions(-)
17
19
18
diff --git a/block/backup-top.c b/block/backup-top.c
20
diff --git a/block/blkdebug.c b/block/blkdebug.c
19
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
20
--- a/block/backup-top.c
22
--- a/block/blkdebug.c
21
+++ b/block/backup-top.c
23
+++ b/block/blkdebug.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVBackupTopState {
24
@@ -XXX,XX +XXX,XX @@ out:
23
BlockCopyState *bcs;
24
BdrvChild *target;
25
bool active;
26
+ int64_t cluster_size;
27
} BDRVBackupTopState;
28
29
static coroutine_fn int backup_top_co_preadv(
30
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
31
return 0;
32
}
33
34
- off = QEMU_ALIGN_DOWN(offset, s->bcs->cluster_size);
35
- end = QEMU_ALIGN_UP(offset + bytes, s->bcs->cluster_size);
36
+ off = QEMU_ALIGN_DOWN(offset, s->cluster_size);
37
+ end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size);
38
39
return block_copy(s->bcs, off, end - off, NULL);
40
}
41
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
42
goto fail;
43
}
44
45
+ state->cluster_size = cluster_size;
46
state->bcs = block_copy_state_new(top->backing, state->target,
47
cluster_size, write_flags, &local_err);
48
if (local_err) {
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
52
+++ b/block/backup.c
53
@@ -XXX,XX +XXX,XX @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
54
55
if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
56
/* If we failed and synced, merge in the bits we didn't copy: */
57
- bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap,
58
+ bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs),
59
NULL, true);
60
}
61
}
62
@@ -XXX,XX +XXX,XX @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
63
return;
64
}
65
66
- bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len);
67
+ bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0,
68
+ backup_job->len);
69
}
70
71
static BlockErrorAction backup_error_action(BackupBlockJob *job,
72
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_loop(BackupBlockJob *job)
73
BdrvDirtyBitmapIter *bdbi;
74
int ret = 0;
75
76
- bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap);
77
+ bdbi = bdrv_dirty_iter_new(block_copy_dirty_bitmap(job->bcs));
78
while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
79
do {
80
if (yield_and_check(job)) {
81
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_loop(BackupBlockJob *job)
82
return ret;
25
return ret;
83
}
26
}
84
27
85
-static void backup_init_copy_bitmap(BackupBlockJob *job)
28
-static void error_callback_bh(void *opaque)
86
+static void backup_init_bcs_bitmap(BackupBlockJob *job)
29
-{
30
- Coroutine *co = opaque;
31
- qemu_coroutine_enter(co);
32
-}
33
-
34
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
87
{
35
{
88
bool ret;
36
BDRVBlkdebugState *s = bs->opaque;
89
uint64_t estimate;
37
@@ -XXX,XX +XXX,XX @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
90
+ BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
91
92
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
93
- ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap,
94
- job->sync_bitmap,
95
+ ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
96
NULL, true);
97
assert(ret);
98
} else {
99
@@ -XXX,XX +XXX,XX @@ static void backup_init_copy_bitmap(BackupBlockJob *job)
100
* We can't hog the coroutine to initialize this thoroughly.
101
* Set a flag and resume work when we are able to yield safely.
102
*/
103
- job->bcs->skip_unallocated = true;
104
+ block_copy_set_skip_unallocated(job->bcs, true);
105
}
106
- bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len);
107
+ bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
108
}
38
}
109
39
110
- estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap);
40
if (!immediately) {
111
+ estimate = bdrv_get_dirty_count(bcs_bitmap);
41
- aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
112
job_progress_set_remaining(&job->common.job, estimate);
42
- qemu_coroutine_self());
113
}
43
+ aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
114
44
qemu_coroutine_yield();
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run(Job *job, Error **errp)
116
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
117
int ret = 0;
118
119
- backup_init_copy_bitmap(s);
120
+ backup_init_bcs_bitmap(s);
121
122
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
123
int64_t offset = 0;
124
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run(Job *job, Error **errp)
125
126
offset += count;
127
}
128
- s->bcs->skip_unallocated = false;
129
+ block_copy_set_skip_unallocated(s->bcs, false);
130
}
45
}
131
46
132
if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
133
/*
134
- * All bits are set in copy_bitmap to allow any cluster to be copied.
135
+ * All bits are set in bcs bitmap to allow any cluster to be copied.
136
* This does not actually require them to be copied.
137
*/
138
while (!job_is_cancelled(job)) {
139
diff --git a/block/block-copy.c b/block/block-copy.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/block/block-copy.c
142
+++ b/block/block-copy.c
143
@@ -XXX,XX +XXX,XX @@
144
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
145
#define BLOCK_COPY_MAX_MEM (128 * MiB)
146
147
+typedef struct BlockCopyInFlightReq {
148
+ int64_t offset;
149
+ int64_t bytes;
150
+ QLIST_ENTRY(BlockCopyInFlightReq) list;
151
+ CoQueue wait_queue; /* coroutines blocked on this request */
152
+} BlockCopyInFlightReq;
153
+
154
+typedef struct BlockCopyState {
155
+ /*
156
+ * BdrvChild objects are not owned or managed by block-copy. They are
157
+ * provided by block-copy user and user is responsible for appropriate
158
+ * permissions on these children.
159
+ */
160
+ BdrvChild *source;
161
+ BdrvChild *target;
162
+ BdrvDirtyBitmap *copy_bitmap;
163
+ int64_t in_flight_bytes;
164
+ int64_t cluster_size;
165
+ bool use_copy_range;
166
+ int64_t copy_size;
167
+ uint64_t len;
168
+ QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
169
+
170
+ BdrvRequestFlags write_flags;
171
+
172
+ /*
173
+ * skip_unallocated:
174
+ *
175
+ * Used by sync=top jobs, which first scan the source node for unallocated
176
+ * areas and clear them in the copy_bitmap. During this process, the bitmap
177
+ * is thus not fully initialized: It may still have bits set for areas that
178
+ * are unallocated and should actually not be copied.
179
+ *
180
+ * This is indicated by skip_unallocated.
181
+ *
182
+ * In this case, block_copy() will query the source’s allocation status,
183
+ * skip unallocated regions, clear them in the copy_bitmap, and invoke
184
+ * block_copy_reset_unallocated() every time it does.
185
+ */
186
+ bool skip_unallocated;
187
+
188
+ ProgressMeter *progress;
189
+ /* progress_bytes_callback: called when some copying progress is done. */
190
+ ProgressBytesCallbackFunc progress_bytes_callback;
191
+ void *progress_opaque;
192
+
193
+ SharedResource *mem;
194
+} BlockCopyState;
195
+
196
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
197
int64_t offset,
198
int64_t bytes)
199
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
200
201
return ret;
202
}
203
+
204
+BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
205
+{
206
+ return s->copy_bitmap;
207
+}
208
+
209
+void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
210
+{
211
+ s->skip_unallocated = skip;
212
+}
213
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
214
index XXXXXXX..XXXXXXX 100644
215
--- a/include/block/block-copy.h
216
+++ b/include/block/block-copy.h
217
@@ -XXX,XX +XXX,XX @@
218
#include "block/block.h"
219
#include "qemu/co-shared-resource.h"
220
221
-typedef struct BlockCopyInFlightReq {
222
- int64_t offset;
223
- int64_t bytes;
224
- QLIST_ENTRY(BlockCopyInFlightReq) list;
225
- CoQueue wait_queue; /* coroutines blocked on this request */
226
-} BlockCopyInFlightReq;
227
-
228
typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
229
-typedef struct BlockCopyState {
230
- /*
231
- * BdrvChild objects are not owned or managed by block-copy. They are
232
- * provided by block-copy user and user is responsible for appropriate
233
- * permissions on these children.
234
- */
235
- BdrvChild *source;
236
- BdrvChild *target;
237
- BdrvDirtyBitmap *copy_bitmap;
238
- int64_t in_flight_bytes;
239
- int64_t cluster_size;
240
- bool use_copy_range;
241
- int64_t copy_size;
242
- uint64_t len;
243
- QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
244
-
245
- BdrvRequestFlags write_flags;
246
-
247
- /*
248
- * skip_unallocated:
249
- *
250
- * Used by sync=top jobs, which first scan the source node for unallocated
251
- * areas and clear them in the copy_bitmap. During this process, the bitmap
252
- * is thus not fully initialized: It may still have bits set for areas that
253
- * are unallocated and should actually not be copied.
254
- *
255
- * This is indicated by skip_unallocated.
256
- *
257
- * In this case, block_copy() will query the source’s allocation status,
258
- * skip unallocated regions, clear them in the copy_bitmap, and invoke
259
- * block_copy_reset_unallocated() every time it does.
260
- */
261
- bool skip_unallocated;
262
-
263
- ProgressMeter *progress;
264
- /* progress_bytes_callback: called when some copying progress is done. */
265
- ProgressBytesCallbackFunc progress_bytes_callback;
266
- void *progress_opaque;
267
-
268
- SharedResource *mem;
269
-} BlockCopyState;
270
+typedef struct BlockCopyState BlockCopyState;
271
272
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
273
int64_t cluster_size,
274
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
275
int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
276
bool *error_is_read);
277
278
+BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
279
+void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
280
+
281
#endif /* BLOCK_COPY_H */
282
--
47
--
283
2.24.1
48
2.9.3
284
49
285
50
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Add qemu-img measure support in the "luks" block driver.
3
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
4
the AioContext, while qed_aio_next_io_cb will. Split the functionality
5
and gain a little type-safety in the process.
4
6
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
11
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Message-Id: <20200221112522.1497712-3-stefanha@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
13
---
10
block/crypto.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
14
block/qed.c | 39 +++++++++++++++++++++++++--------------
11
1 file changed, 62 insertions(+)
15
1 file changed, 25 insertions(+), 14 deletions(-)
12
16
13
diff --git a/block/crypto.c b/block/crypto.c
17
diff --git a/block/qed.c b/block/qed.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/block/crypto.c
19
--- a/block/qed.c
16
+++ b/block/crypto.c
20
+++ b/block/qed.c
17
@@ -XXX,XX +XXX,XX @@ static int64_t block_crypto_getlength(BlockDriverState *bs)
21
@@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
22
return l2_table;
18
}
23
}
19
24
20
25
-static void qed_aio_next_io(void *opaque, int ret);
21
+static BlockMeasureInfo *block_crypto_measure(QemuOpts *opts,
26
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
22
+ BlockDriverState *in_bs,
27
+
23
+ Error **errp)
28
+static void qed_aio_start_io(QEDAIOCB *acb)
24
+{
29
+{
25
+ g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
30
+ qed_aio_next_io(acb, 0);
26
+ Error *local_err = NULL;
27
+ BlockMeasureInfo *info;
28
+ uint64_t size;
29
+ size_t luks_payload_size;
30
+ QDict *cryptoopts;
31
+
32
+ /*
33
+ * Preallocation mode doesn't affect size requirements but we must consume
34
+ * the option.
35
+ */
36
+ g_free(qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC));
37
+
38
+ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
39
+
40
+ if (in_bs) {
41
+ int64_t ssize = bdrv_getlength(in_bs);
42
+
43
+ if (ssize < 0) {
44
+ error_setg_errno(&local_err, -ssize,
45
+ "Unable to get image virtual_size");
46
+ goto err;
47
+ }
48
+
49
+ size = ssize;
50
+ }
51
+
52
+ cryptoopts = qemu_opts_to_qdict_filtered(opts, NULL,
53
+ &block_crypto_create_opts_luks, true);
54
+ qdict_put_str(cryptoopts, "format", "luks");
55
+ create_opts = block_crypto_create_opts_init(cryptoopts, &local_err);
56
+ qobject_unref(cryptoopts);
57
+ if (!create_opts) {
58
+ goto err;
59
+ }
60
+
61
+ if (!qcrypto_block_calculate_payload_offset(create_opts, NULL,
62
+ &luks_payload_size,
63
+ &local_err)) {
64
+ goto err;
65
+ }
66
+
67
+ /*
68
+ * Unallocated blocks are still encrypted so allocation status makes no
69
+ * difference to the file size.
70
+ */
71
+ info = g_new(BlockMeasureInfo, 1);
72
+ info->fully_allocated = luks_payload_size + size;
73
+ info->required = luks_payload_size + size;
74
+ return info;
75
+
76
+err:
77
+ error_propagate(errp, local_err);
78
+ return NULL;
79
+}
31
+}
80
+
32
+
33
+static void qed_aio_next_io_cb(void *opaque, int ret)
34
+{
35
+ QEDAIOCB *acb = opaque;
81
+
36
+
82
static int block_crypto_probe_luks(const uint8_t *buf,
37
+ qed_aio_next_io(acb, ret);
83
int buf_size,
38
+}
84
const char *filename) {
39
85
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_crypto_luks = {
40
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
86
.bdrv_co_preadv = block_crypto_co_preadv,
41
{
87
.bdrv_co_pwritev = block_crypto_co_pwritev,
42
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
88
.bdrv_getlength = block_crypto_getlength,
43
89
+ .bdrv_measure = block_crypto_measure,
44
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
90
.bdrv_get_info = block_crypto_get_info_luks,
45
if (acb) {
91
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
46
- qed_aio_next_io(acb, 0);
47
+ qed_aio_start_io(acb);
48
}
49
}
50
51
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
52
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
53
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
54
if (acb) {
55
- qed_aio_next_io(acb, 0);
56
+ qed_aio_start_io(acb);
57
} else if (s->header.features & QED_F_NEED_CHECK) {
58
qed_start_need_check_timer(s);
59
}
60
@@ -XXX,XX +XXX,XX @@ static void qed_commit_l2_update(void *opaque, int ret)
61
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
62
assert(acb->request.l2_table != NULL);
63
64
- qed_aio_next_io(opaque, ret);
65
+ qed_aio_next_io(acb, ret);
66
}
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
70
if (need_alloc) {
71
/* Write out the whole new L2 table */
72
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
73
- qed_aio_write_l1_update, acb);
74
+ qed_aio_write_l1_update, acb);
75
} else {
76
/* Write out only the updated part of the L2 table */
77
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
78
- qed_aio_next_io, acb);
79
+ qed_aio_next_io_cb, acb);
80
}
81
return;
82
83
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
84
}
85
86
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
87
- next_fn = qed_aio_next_io;
88
+ next_fn = qed_aio_next_io_cb;
89
} else {
90
if (s->bs->backing) {
91
next_fn = qed_aio_write_flush_before_l2_update;
92
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
93
if (acb->flags & QED_AIOCB_ZERO) {
94
/* Skip ahead if the clusters are already zero */
95
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
96
- qed_aio_next_io(acb, 0);
97
+ qed_aio_start_io(acb);
98
return;
99
}
100
101
@@ -XXX,XX +XXX,XX @@ static void qed_aio_read_data(void *opaque, int ret,
102
/* Handle zero cluster and backing file reads */
103
if (ret == QED_CLUSTER_ZERO) {
104
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
105
- qed_aio_next_io(acb, 0);
106
+ qed_aio_start_io(acb);
107
return;
108
} else if (ret != QED_CLUSTER_FOUND) {
109
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
110
- &acb->backing_qiov, qed_aio_next_io, acb);
111
+ &acb->backing_qiov, qed_aio_next_io_cb, acb);
112
return;
113
}
114
115
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
116
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
117
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
118
- qed_aio_next_io, acb);
119
+ qed_aio_next_io_cb, acb);
120
return;
121
122
err:
123
@@ -XXX,XX +XXX,XX @@ err:
124
/**
125
* Begin next I/O or complete the request
126
*/
127
-static void qed_aio_next_io(void *opaque, int ret)
128
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
129
{
130
- QEDAIOCB *acb = opaque;
131
BDRVQEDState *s = acb_to_s(acb);
132
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
133
qed_aio_write_data : qed_aio_read_data;
134
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
135
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
136
137
/* Start request */
138
- qed_aio_next_io(acb, 0);
139
+ qed_aio_start_io(acb);
140
return &acb->common;
141
}
92
142
93
--
143
--
94
2.24.1
144
2.9.3
95
145
96
146
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
On success path we return what inflate() returns instead of 0. And it
3
The AioContext data structures are now protected by list_lock and/or
4
most probably works for Z_STREAM_END as it is positive, but is
4
they are walked with FOREACH_RCU primitives. There is no need anymore
5
definitely broken for Z_BUF_ERROR.
5
to acquire the AioContext for the entire duration of aio_dispatch.
6
Instead, just acquire it before and after invoking the callbacks.
7
The next step is then to push it further down.
6
8
7
While being here, switch to errno return code, to be closer to
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
qcow2_compress API (and usual expectations).
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
13
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
util/aio-posix.c | 25 +++++++++++--------------
17
util/aio-win32.c | 15 +++++++--------
18
util/async.c | 2 ++
19
3 files changed, 20 insertions(+), 22 deletions(-)
9
20
10
Revert condition in if to be more positive. Drop dead initialization of
21
diff --git a/util/aio-posix.c b/util/aio-posix.c
11
ret.
12
13
Cc: qemu-stable@nongnu.org # v4.0
14
Fixes: 341926ab83e2b
15
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
Message-Id: <20200302150930.16218-1-vsementsov@virtuozzo.com>
17
Reviewed-by: Alberto Garcia <berto@igalia.com>
18
Reviewed-by: Ján Tomko <jtomko@redhat.com>
19
Signed-off-by: Max Reitz <mreitz@redhat.com>
20
---
21
block/qcow2-threads.c | 12 +++++++-----
22
1 file changed, 7 insertions(+), 5 deletions(-)
23
24
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
25
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
26
--- a/block/qcow2-threads.c
23
--- a/util/aio-posix.c
27
+++ b/block/qcow2-threads.c
24
+++ b/util/aio-posix.c
28
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, size_t dest_size,
25
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
29
* @src - source buffer, @src_size bytes
26
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
30
*
27
aio_node_check(ctx, node->is_external) &&
31
* Returns: 0 on success
28
node->io_read) {
32
- * -1 on fail
29
+ aio_context_acquire(ctx);
33
+ * -EIO on fail
30
node->io_read(node->opaque);
34
*/
31
+ aio_context_release(ctx);
35
static ssize_t qcow2_decompress(void *dest, size_t dest_size,
32
36
const void *src, size_t src_size)
33
/* aio_notify() does not count as progress */
37
{
34
if (node->opaque != &ctx->notifier) {
38
- int ret = 0;
35
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
39
+ int ret;
36
(revents & (G_IO_OUT | G_IO_ERR)) &&
40
z_stream strm;
37
aio_node_check(ctx, node->is_external) &&
41
38
node->io_write) {
42
memset(&strm, 0, sizeof(strm));
39
+ aio_context_acquire(ctx);
43
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_decompress(void *dest, size_t dest_size,
40
node->io_write(node->opaque);
44
41
+ aio_context_release(ctx);
45
ret = inflateInit2(&strm, -12);
42
progress = true;
46
if (ret != Z_OK) {
43
}
47
- return -1;
44
48
+ return -EIO;
45
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
49
}
46
}
50
47
51
ret = inflate(&strm, Z_FINISH);
48
/* Run our timers */
52
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
49
+ aio_context_acquire(ctx);
53
+ if ((ret == Z_STREAM_END || ret == Z_BUF_ERROR) && strm.avail_out == 0) {
50
progress |= timerlistgroup_run_timers(&ctx->tlg);
54
/*
51
+ aio_context_release(ctx);
55
* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
52
56
* @src buffer may be processed partly (because in qcow2 we know size of
53
return progress;
57
* compressed data with precision of one sector)
54
}
58
*/
55
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
59
- ret = -1;
56
int64_t timeout;
60
+ ret = 0;
57
int64_t start = 0;
61
+ } else {
58
62
+ ret = -EIO;
59
- aio_context_acquire(ctx);
60
- progress = false;
61
-
62
/* aio_notify can avoid the expensive event_notifier_set if
63
* everything (file descriptors, bottom halves, timers) will
64
* be re-evaluated before the next blocking poll(). This is
65
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
66
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
63
}
67
}
64
68
65
inflateEnd(&strm);
69
- if (try_poll_mode(ctx, blocking)) {
70
- progress = true;
71
- } else {
72
+ aio_context_acquire(ctx);
73
+ progress = try_poll_mode(ctx, blocking);
74
+ aio_context_release(ctx);
75
+
76
+ if (!progress) {
77
assert(npfd == 0);
78
79
/* fill pollfds */
80
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
81
timeout = blocking ? aio_compute_timeout(ctx) : 0;
82
83
/* wait until next event */
84
- if (timeout) {
85
- aio_context_release(ctx);
86
- }
87
if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
88
AioHandler epoll_handler;
89
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
} else {
92
ret = qemu_poll_ns(pollfds, npfd, timeout);
93
}
94
- if (timeout) {
95
- aio_context_acquire(ctx);
96
- }
97
}
98
99
if (blocking) {
100
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
101
progress = true;
102
}
103
104
- aio_context_release(ctx);
105
-
106
return progress;
107
}
108
109
diff --git a/util/aio-win32.c b/util/aio-win32.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/util/aio-win32.c
112
+++ b/util/aio-win32.c
113
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
114
(revents || event_notifier_get_handle(node->e) == event) &&
115
node->io_notify) {
116
node->pfd.revents = 0;
117
+ aio_context_acquire(ctx);
118
node->io_notify(node->e);
119
+ aio_context_release(ctx);
120
121
/* aio_notify() does not count as progress */
122
if (node->e != &ctx->notifier) {
123
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
124
(node->io_read || node->io_write)) {
125
node->pfd.revents = 0;
126
if ((revents & G_IO_IN) && node->io_read) {
127
+ aio_context_acquire(ctx);
128
node->io_read(node->opaque);
129
+ aio_context_release(ctx);
130
progress = true;
131
}
132
if ((revents & G_IO_OUT) && node->io_write) {
133
+ aio_context_acquire(ctx);
134
node->io_write(node->opaque);
135
+ aio_context_release(ctx);
136
progress = true;
137
}
138
139
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
140
int count;
141
int timeout;
142
143
- aio_context_acquire(ctx);
144
progress = false;
145
146
/* aio_notify can avoid the expensive event_notifier_set if
147
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
148
149
timeout = blocking && !have_select_revents
150
? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
151
- if (timeout) {
152
- aio_context_release(ctx);
153
- }
154
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
155
if (blocking) {
156
assert(first);
157
atomic_sub(&ctx->notify_me, 2);
158
}
159
- if (timeout) {
160
- aio_context_acquire(ctx);
161
- }
162
163
if (first) {
164
aio_notify_accept(ctx);
165
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
166
progress |= aio_dispatch_handlers(ctx, event);
167
} while (count > 0);
168
169
+ aio_context_acquire(ctx);
170
progress |= timerlistgroup_run_timers(&ctx->tlg);
171
-
172
aio_context_release(ctx);
173
return progress;
174
}
175
diff --git a/util/async.c b/util/async.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/util/async.c
178
+++ b/util/async.c
179
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
180
ret = 1;
181
}
182
bh->idle = 0;
183
+ aio_context_acquire(ctx);
184
aio_bh_call(bh);
185
+ aio_context_release(ctx);
186
}
187
if (bh->deleted) {
188
deleted = true;
66
--
189
--
67
2.24.1
190
2.9.3
68
191
69
192
diff view generated by jsdifflib
New patch
1
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/qed.h | 3 +++
11
block/curl.c | 2 ++
12
block/io.c | 5 +++++
13
block/iscsi.c | 8 ++++++--
14
block/null.c | 4 ++++
15
block/qed.c | 12 ++++++++++++
16
block/throttle-groups.c | 2 ++
17
util/aio-posix.c | 2 --
18
util/aio-win32.c | 2 --
19
util/qemu-coroutine-sleep.c | 2 +-
20
10 files changed, 35 insertions(+), 7 deletions(-)
21
22
diff --git a/block/qed.h b/block/qed.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/qed.h
25
+++ b/block/qed.h
26
@@ -XXX,XX +XXX,XX @@ enum {
27
*/
28
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
29
30
+void qed_acquire(BDRVQEDState *s);
31
+void qed_release(BDRVQEDState *s);
32
+
33
/**
34
* Generic callback for chaining async callbacks
35
*/
36
diff --git a/block/curl.c b/block/curl.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/curl.c
39
+++ b/block/curl.c
40
@@ -XXX,XX +XXX,XX @@ static void curl_multi_timeout_do(void *arg)
41
return;
42
}
43
44
+ aio_context_acquire(s->aio_context);
45
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
46
47
curl_multi_check_completion(s);
48
+ aio_context_release(s->aio_context);
49
#else
50
abort();
51
#endif
52
diff --git a/block/io.c b/block/io.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/block/io.c
55
+++ b/block/io.c
56
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel(BlockAIOCB *acb)
57
if (acb->aiocb_info->get_aio_context) {
58
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
59
} else if (acb->bs) {
60
+ /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
61
+ * assert that we're not using an I/O thread. Thread-safe
62
+ * code should use bdrv_aio_cancel_async exclusively.
63
+ */
64
+ assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
65
aio_poll(bdrv_get_aio_context(acb->bs), true);
66
} else {
67
abort();
68
diff --git a/block/iscsi.c b/block/iscsi.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/block/iscsi.c
71
+++ b/block/iscsi.c
72
@@ -XXX,XX +XXX,XX @@ static void iscsi_retry_timer_expired(void *opaque)
73
struct IscsiTask *iTask = opaque;
74
iTask->complete = 1;
75
if (iTask->co) {
76
- qemu_coroutine_enter(iTask->co);
77
+ aio_co_wake(iTask->co);
78
}
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static void iscsi_nop_timed_event(void *opaque)
82
{
83
IscsiLun *iscsilun = opaque;
84
85
+ aio_context_acquire(iscsilun->aio_context);
86
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
87
error_report("iSCSI: NOP timeout. Reconnecting...");
88
iscsilun->request_timed_out = true;
89
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
90
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
91
- return;
92
+ goto out;
93
}
94
95
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
96
iscsi_set_events(iscsilun);
97
+
98
+out:
99
+ aio_context_release(iscsilun->aio_context);
100
}
101
102
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
103
diff --git a/block/null.c b/block/null.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/block/null.c
106
+++ b/block/null.c
107
@@ -XXX,XX +XXX,XX @@ static void null_bh_cb(void *opaque)
108
static void null_timer_cb(void *opaque)
109
{
110
NullAIOCB *acb = opaque;
111
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
112
+
113
+ aio_context_acquire(ctx);
114
acb->common.cb(acb->common.opaque, 0);
115
+ aio_context_release(ctx);
116
timer_deinit(&acb->timer);
117
qemu_aio_unref(acb);
118
}
119
diff --git a/block/qed.c b/block/qed.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/qed.c
122
+++ b/block/qed.c
123
@@ -XXX,XX +XXX,XX @@ static void qed_need_check_timer_cb(void *opaque)
124
125
trace_qed_need_check_timer_cb(s);
126
127
+ qed_acquire(s);
128
qed_plug_allocating_write_reqs(s);
129
130
/* Ensure writes are on disk before clearing flag */
131
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
132
+ qed_release(s);
133
+}
134
+
135
+void qed_acquire(BDRVQEDState *s)
136
+{
137
+ aio_context_acquire(bdrv_get_aio_context(s->bs));
138
+}
139
+
140
+void qed_release(BDRVQEDState *s)
141
+{
142
+ aio_context_release(bdrv_get_aio_context(s->bs));
143
}
144
145
static void qed_start_need_check_timer(BDRVQEDState *s)
146
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/block/throttle-groups.c
149
+++ b/block/throttle-groups.c
150
@@ -XXX,XX +XXX,XX @@ static void timer_cb(BlockBackend *blk, bool is_write)
151
qemu_mutex_unlock(&tg->lock);
152
153
/* Run the request that was waiting for this timer */
154
+ aio_context_acquire(blk_get_aio_context(blk));
155
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
156
+ aio_context_release(blk_get_aio_context(blk));
157
158
/* If the request queue was empty then we have to take care of
159
* scheduling the next one */
160
diff --git a/util/aio-posix.c b/util/aio-posix.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/util/aio-posix.c
163
+++ b/util/aio-posix.c
164
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
165
}
166
167
/* Run our timers */
168
- aio_context_acquire(ctx);
169
progress |= timerlistgroup_run_timers(&ctx->tlg);
170
- aio_context_release(ctx);
171
172
return progress;
173
}
174
diff --git a/util/aio-win32.c b/util/aio-win32.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/util/aio-win32.c
177
+++ b/util/aio-win32.c
178
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
179
progress |= aio_dispatch_handlers(ctx, event);
180
} while (count > 0);
181
182
- aio_context_acquire(ctx);
183
progress |= timerlistgroup_run_timers(&ctx->tlg);
184
- aio_context_release(ctx);
185
return progress;
186
}
187
188
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
189
index XXXXXXX..XXXXXXX 100644
190
--- a/util/qemu-coroutine-sleep.c
191
+++ b/util/qemu-coroutine-sleep.c
192
@@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque)
193
{
194
CoSleepCB *sleep_cb = opaque;
195
196
- qemu_coroutine_enter(sleep_cb->co);
197
+ aio_co_wake(sleep_cb->co);
198
}
199
200
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
201
--
202
2.9.3
203
204
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
The qcow2 .bdrv_measure() code calculates the crypto payload offset.
3
This covers both file descriptor callbacks and polling callbacks,
4
This logic really belongs in crypto/block.c where it can be reused by
4
since they execute related code.
5
other image formats.
6
5
7
The "luks" block driver will need this same logic in order to implement
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
.bdrv_measure(), so extract the qcrypto_block_calculate_payload_offset()
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
function now.
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/curl.c | 16 +++++++++++++---
14
block/iscsi.c | 4 ++++
15
block/linux-aio.c | 4 ++++
16
block/nfs.c | 6 ++++++
17
block/sheepdog.c | 29 +++++++++++++++--------------
18
block/ssh.c | 29 +++++++++--------------------
19
block/win32-aio.c | 10 ++++++----
20
hw/block/virtio-blk.c | 5 ++++-
21
hw/scsi/virtio-scsi.c | 7 +++++++
22
util/aio-posix.c | 7 -------
23
util/aio-win32.c | 6 ------
24
11 files changed, 68 insertions(+), 55 deletions(-)
10
25
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
diff --git a/block/curl.c b/block/curl.c
12
Reviewed-by: Max Reitz <mreitz@redhat.com>
27
index XXXXXXX..XXXXXXX 100644
13
Message-Id: <20200221112522.1497712-2-stefanha@redhat.com>
28
--- a/block/curl.c
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
29
+++ b/block/curl.c
15
---
30
@@ -XXX,XX +XXX,XX @@ static void curl_multi_check_completion(BDRVCURLState *s)
16
block/qcow2.c | 74 +++++++++++-------------------------------
31
}
17
crypto/block.c | 36 ++++++++++++++++++++
32
}
18
include/crypto/block.h | 22 +++++++++++++
33
19
3 files changed, 77 insertions(+), 55 deletions(-)
34
-static void curl_multi_do(void *arg)
20
35
+static void curl_multi_do_locked(CURLState *s)
21
diff --git a/block/qcow2.c b/block/qcow2.c
36
{
22
index XXXXXXX..XXXXXXX 100644
37
- CURLState *s = (CURLState *)arg;
23
--- a/block/qcow2.c
38
CURLSocket *socket, *next_socket;
24
+++ b/block/qcow2.c
39
int running;
25
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
40
int r;
41
@@ -XXX,XX +XXX,XX @@ static void curl_multi_do(void *arg)
42
}
43
}
44
45
+static void curl_multi_do(void *arg)
46
+{
47
+ CURLState *s = (CURLState *)arg;
48
+
49
+ aio_context_acquire(s->s->aio_context);
50
+ curl_multi_do_locked(s);
51
+ aio_context_release(s->s->aio_context);
52
+}
53
+
54
static void curl_multi_read(void *arg)
55
{
56
CURLState *s = (CURLState *)arg;
57
58
- curl_multi_do(arg);
59
+ aio_context_acquire(s->s->aio_context);
60
+ curl_multi_do_locked(s);
61
curl_multi_check_completion(s->s);
62
+ aio_context_release(s->s->aio_context);
63
}
64
65
static void curl_multi_timeout_do(void *arg)
66
diff --git a/block/iscsi.c b/block/iscsi.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/iscsi.c
69
+++ b/block/iscsi.c
70
@@ -XXX,XX +XXX,XX @@ iscsi_process_read(void *arg)
71
IscsiLun *iscsilun = arg;
72
struct iscsi_context *iscsi = iscsilun->iscsi;
73
74
+ aio_context_acquire(iscsilun->aio_context);
75
iscsi_service(iscsi, POLLIN);
76
iscsi_set_events(iscsilun);
77
+ aio_context_release(iscsilun->aio_context);
78
}
79
80
static void
81
@@ -XXX,XX +XXX,XX @@ iscsi_process_write(void *arg)
82
IscsiLun *iscsilun = arg;
83
struct iscsi_context *iscsi = iscsilun->iscsi;
84
85
+ aio_context_acquire(iscsilun->aio_context);
86
iscsi_service(iscsi, POLLOUT);
87
iscsi_set_events(iscsilun);
88
+ aio_context_release(iscsilun->aio_context);
89
}
90
91
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
92
diff --git a/block/linux-aio.c b/block/linux-aio.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/block/linux-aio.c
95
+++ b/block/linux-aio.c
96
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
97
LinuxAioState *s = container_of(e, LinuxAioState, e);
98
99
if (event_notifier_test_and_clear(&s->e)) {
100
+ aio_context_acquire(s->aio_context);
101
qemu_laio_process_completions_and_submit(s);
102
+ aio_context_release(s->aio_context);
103
}
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
107
return false;
108
}
109
110
+ aio_context_acquire(s->aio_context);
111
qemu_laio_process_completions_and_submit(s);
112
+ aio_context_release(s->aio_context);
113
return true;
114
}
115
116
diff --git a/block/nfs.c b/block/nfs.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/block/nfs.c
119
+++ b/block/nfs.c
120
@@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client)
121
static void nfs_process_read(void *arg)
122
{
123
NFSClient *client = arg;
124
+
125
+ aio_context_acquire(client->aio_context);
126
nfs_service(client->context, POLLIN);
127
nfs_set_events(client);
128
+ aio_context_release(client->aio_context);
129
}
130
131
static void nfs_process_write(void *arg)
132
{
133
NFSClient *client = arg;
134
+
135
+ aio_context_acquire(client->aio_context);
136
nfs_service(client->context, POLLOUT);
137
nfs_set_events(client);
138
+ aio_context_release(client->aio_context);
139
}
140
141
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
142
diff --git a/block/sheepdog.c b/block/sheepdog.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/block/sheepdog.c
145
+++ b/block/sheepdog.c
146
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
26
return ret;
147
return ret;
27
}
148
}
28
149
29
-static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
150
-static void restart_co_req(void *opaque)
30
- size_t headerlen, void *opaque, Error **errp)
31
-{
151
-{
32
- size_t *headerlenp = opaque;
152
- Coroutine *co = opaque;
33
-
153
-
34
- /* Stash away the payload size */
154
- qemu_coroutine_enter(co);
35
- *headerlenp = headerlen;
36
- return 0;
37
-}
155
-}
38
-
156
-
39
-static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
157
typedef struct SheepdogReqCo {
40
- size_t offset, const uint8_t *buf, size_t buflen,
158
int sockfd;
41
- void *opaque, Error **errp)
159
BlockDriverState *bs;
42
-{
160
@@ -XXX,XX +XXX,XX @@ typedef struct SheepdogReqCo {
43
- /* Discard the bytes, we're not actually writing to an image */
161
unsigned int *rlen;
44
- return buflen;
162
int ret;
163
bool finished;
164
+ Coroutine *co;
165
} SheepdogReqCo;
166
167
+static void restart_co_req(void *opaque)
168
+{
169
+ SheepdogReqCo *srco = opaque;
170
+
171
+ aio_co_wake(srco->co);
172
+}
173
+
174
static coroutine_fn void do_co_req(void *opaque)
175
{
176
int ret;
177
- Coroutine *co;
178
SheepdogReqCo *srco = opaque;
179
int sockfd = srco->sockfd;
180
SheepdogReq *hdr = srco->hdr;
181
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
182
unsigned int *wlen = srco->wlen;
183
unsigned int *rlen = srco->rlen;
184
185
- co = qemu_coroutine_self();
186
+ srco->co = qemu_coroutine_self();
187
aio_set_fd_handler(srco->aio_context, sockfd, false,
188
- NULL, restart_co_req, NULL, co);
189
+ NULL, restart_co_req, NULL, srco);
190
191
ret = send_co_req(sockfd, hdr, data, wlen);
192
if (ret < 0) {
193
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
194
}
195
196
aio_set_fd_handler(srco->aio_context, sockfd, false,
197
- restart_co_req, NULL, NULL, co);
198
+ restart_co_req, NULL, NULL, srco);
199
200
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
201
if (ret != sizeof(*hdr)) {
202
@@ -XXX,XX +XXX,XX @@ out:
203
aio_set_fd_handler(srco->aio_context, sockfd, false,
204
NULL, NULL, NULL, NULL);
205
206
+ srco->co = NULL;
207
srco->ret = ret;
208
srco->finished = true;
209
if (srco->bs) {
210
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
211
* We've finished all requests which belong to the AIOCB, so
212
* we can switch back to sd_co_readv/writev now.
213
*/
214
- qemu_coroutine_enter(acb->coroutine);
215
+ aio_co_wake(acb->coroutine);
216
}
217
218
return;
219
@@ -XXX,XX +XXX,XX @@ static void co_read_response(void *opaque)
220
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
221
}
222
223
- qemu_coroutine_enter(s->co_recv);
224
+ aio_co_wake(s->co_recv);
225
}
226
227
static void co_write_request(void *opaque)
228
{
229
BDRVSheepdogState *s = opaque;
230
231
- qemu_coroutine_enter(s->co_send);
232
+ aio_co_wake(s->co_send);
233
}
234
235
/*
236
diff --git a/block/ssh.c b/block/ssh.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/block/ssh.c
239
+++ b/block/ssh.c
240
@@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque)
241
242
DPRINTF("co=%p", co);
243
244
- qemu_coroutine_enter(co);
245
+ aio_co_wake(co);
246
}
247
248
-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
249
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
250
+ * handlers are set up so that we'll be rescheduled when there is an
251
+ * interesting event on the socket.
252
+ */
253
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
254
{
255
int r;
256
IOHandler *rd_handler = NULL, *wr_handler = NULL;
257
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
258
259
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
260
false, rd_handler, wr_handler, NULL, co);
45
-}
261
-}
46
-
262
-
47
-/* Determine the number of bytes for the LUKS payload */
263
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
48
-static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
264
- BlockDriverState *bs)
49
- Error **errp)
50
-{
265
-{
51
- QDict *opts_qdict;
266
- DPRINTF("s->sock=%d", s->sock);
52
- QDict *cryptoopts_qdict;
267
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
53
- QCryptoBlockCreateOptions *cryptoopts;
268
- false, NULL, NULL, NULL, NULL);
54
- QCryptoBlock *crypto;
55
-
56
- /* Extract "encrypt." options into a qdict */
57
- opts_qdict = qemu_opts_to_qdict(opts, NULL);
58
- qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
59
- qobject_unref(opts_qdict);
60
-
61
- /* Build QCryptoBlockCreateOptions object from qdict */
62
- qdict_put_str(cryptoopts_qdict, "format", "luks");
63
- cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
64
- qobject_unref(cryptoopts_qdict);
65
- if (!cryptoopts) {
66
- return false;
67
- }
68
-
69
- /* Fake LUKS creation in order to determine the payload size */
70
- crypto = qcrypto_block_create(cryptoopts, "encrypt.",
71
- qcow2_measure_crypto_hdr_init_func,
72
- qcow2_measure_crypto_hdr_write_func,
73
- len, errp);
74
- qapi_free_QCryptoBlockCreateOptions(cryptoopts);
75
- if (!crypto) {
76
- return false;
77
- }
78
-
79
- qcrypto_block_free(crypto);
80
- return true;
81
-}
269
-}
82
-
270
-
83
static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
271
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
84
Error **errp)
272
- * handlers are set up so that we'll be rescheduled when there is an
85
{
273
- * interesting event on the socket.
86
@@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
274
- */
87
g_free(optstr);
275
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
88
276
-{
89
if (has_luks) {
277
- set_fd_handler(s, bs);
90
+ g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
278
qemu_coroutine_yield();
91
+ QDict *opts_qdict;
279
- clear_fd_handler(s, bs);
92
+ QDict *cryptoopts;
280
+ DPRINTF("s->sock=%d - back", s->sock);
93
size_t headerlen;
281
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
94
282
+ NULL, NULL, NULL, NULL);
95
- if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
283
}
96
+ opts_qdict = qemu_opts_to_qdict(opts, NULL);
284
97
+ qdict_extract_subqdict(opts_qdict, &cryptoopts, "encrypt.");
285
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
98
+ qobject_unref(opts_qdict);
286
diff --git a/block/win32-aio.c b/block/win32-aio.c
99
+
287
index XXXXXXX..XXXXXXX 100644
100
+ qdict_put_str(cryptoopts, "format", "luks");
288
--- a/block/win32-aio.c
101
+
289
+++ b/block/win32-aio.c
102
+ create_opts = block_crypto_create_opts_init(cryptoopts, errp);
290
@@ -XXX,XX +XXX,XX @@ struct QEMUWin32AIOState {
103
+ qobject_unref(cryptoopts);
291
HANDLE hIOCP;
104
+ if (!create_opts) {
292
EventNotifier e;
105
+ goto err;
293
int count;
106
+ }
294
- bool is_aio_context_attached;
107
+
295
+ AioContext *aio_ctx;
108
+ if (!qcrypto_block_calculate_payload_offset(create_opts,
296
};
109
+ "encrypt.",
297
110
+ &headerlen,
298
typedef struct QEMUWin32AIOCB {
111
+ &local_err)) {
299
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
112
goto err;
300
}
301
302
303
+ aio_context_acquire(s->aio_ctx);
304
waiocb->common.cb(waiocb->common.opaque, ret);
305
+ aio_context_release(s->aio_ctx);
306
qemu_aio_unref(waiocb);
307
}
308
309
@@ -XXX,XX +XXX,XX @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
310
AioContext *old_context)
311
{
312
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
313
- aio->is_aio_context_attached = false;
314
+ aio->aio_ctx = NULL;
315
}
316
317
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
318
AioContext *new_context)
319
{
320
- aio->is_aio_context_attached = true;
321
+ aio->aio_ctx = new_context;
322
aio_set_event_notifier(new_context, &aio->e, false,
323
win32_aio_completion_cb, NULL);
324
}
325
@@ -XXX,XX +XXX,XX @@ out_free_state:
326
327
void win32_aio_cleanup(QEMUWin32AIOState *aio)
328
{
329
- assert(!aio->is_aio_context_attached);
330
+ assert(!aio->aio_ctx);
331
CloseHandle(aio->hIOCP);
332
event_notifier_cleanup(&aio->e);
333
g_free(aio);
334
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
335
index XXXXXXX..XXXXXXX 100644
336
--- a/hw/block/virtio-blk.c
337
+++ b/hw/block/virtio-blk.c
338
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
339
{
340
VirtIOBlockIoctlReq *ioctl_req = opaque;
341
VirtIOBlockReq *req = ioctl_req->req;
342
- VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
343
+ VirtIOBlock *s = req->dev;
344
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
345
struct virtio_scsi_inhdr *scsi;
346
struct sg_io_hdr *hdr;
347
348
@@ -XXX,XX +XXX,XX @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
349
MultiReqBuffer mrb = {};
350
bool progress = false;
351
352
+ aio_context_acquire(blk_get_aio_context(s->blk));
353
blk_io_plug(s->blk);
354
355
do {
356
@@ -XXX,XX +XXX,XX @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
357
}
358
359
blk_io_unplug(s->blk);
360
+ aio_context_release(blk_get_aio_context(s->blk));
361
return progress;
362
}
363
364
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
365
index XXXXXXX..XXXXXXX 100644
366
--- a/hw/scsi/virtio-scsi.c
367
+++ b/hw/scsi/virtio-scsi.c
368
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
369
VirtIOSCSIReq *req;
370
bool progress = false;
371
372
+ virtio_scsi_acquire(s);
373
while ((req = virtio_scsi_pop_req(s, vq))) {
374
progress = true;
375
virtio_scsi_handle_ctrl_req(s, req);
376
}
377
+ virtio_scsi_release(s);
378
return progress;
379
}
380
381
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
382
383
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
384
385
+ virtio_scsi_acquire(s);
386
do {
387
virtio_queue_set_notification(vq, 0);
388
389
@@ -XXX,XX +XXX,XX @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
390
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
391
virtio_scsi_handle_cmd_req_submit(s, req);
392
}
393
+ virtio_scsi_release(s);
394
return progress;
395
}
396
397
@@ -XXX,XX +XXX,XX @@ out:
398
399
bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
400
{
401
+ virtio_scsi_acquire(s);
402
if (s->events_dropped) {
403
virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
404
+ virtio_scsi_release(s);
405
return true;
406
}
407
+ virtio_scsi_release(s);
408
return false;
409
}
410
411
diff --git a/util/aio-posix.c b/util/aio-posix.c
412
index XXXXXXX..XXXXXXX 100644
413
--- a/util/aio-posix.c
414
+++ b/util/aio-posix.c
415
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
416
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
417
aio_node_check(ctx, node->is_external) &&
418
node->io_read) {
419
- aio_context_acquire(ctx);
420
node->io_read(node->opaque);
421
- aio_context_release(ctx);
422
423
/* aio_notify() does not count as progress */
424
if (node->opaque != &ctx->notifier) {
425
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
426
(revents & (G_IO_OUT | G_IO_ERR)) &&
427
aio_node_check(ctx, node->is_external) &&
428
node->io_write) {
429
- aio_context_acquire(ctx);
430
node->io_write(node->opaque);
431
- aio_context_release(ctx);
432
progress = true;
113
}
433
}
114
434
115
diff --git a/crypto/block.c b/crypto/block.c
435
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
116
index XXXXXXX..XXXXXXX 100644
436
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
117
--- a/crypto/block.c
437
}
118
+++ b/crypto/block.c
438
119
@@ -XXX,XX +XXX,XX @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
439
- aio_context_acquire(ctx);
120
}
440
progress = try_poll_mode(ctx, blocking);
121
441
- aio_context_release(ctx);
122
442
-
123
+static ssize_t qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block,
443
if (!progress) {
124
+ size_t headerlen, void *opaque, Error **errp)
444
assert(npfd == 0);
125
+{
445
126
+ size_t *headerlenp = opaque;
446
diff --git a/util/aio-win32.c b/util/aio-win32.c
127
+
447
index XXXXXXX..XXXXXXX 100644
128
+ /* Stash away the payload size */
448
--- a/util/aio-win32.c
129
+ *headerlenp = headerlen;
449
+++ b/util/aio-win32.c
130
+ return 0;
450
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
131
+}
451
(revents || event_notifier_get_handle(node->e) == event) &&
132
+
452
node->io_notify) {
133
+
453
node->pfd.revents = 0;
134
+static ssize_t qcrypto_block_headerlen_hdr_write_func(QCryptoBlock *block,
454
- aio_context_acquire(ctx);
135
+ size_t offset, const uint8_t *buf, size_t buflen,
455
node->io_notify(node->e);
136
+ void *opaque, Error **errp)
456
- aio_context_release(ctx);
137
+{
457
138
+ /* Discard the bytes, we're not actually writing to an image */
458
/* aio_notify() does not count as progress */
139
+ return buflen;
459
if (node->e != &ctx->notifier) {
140
+}
460
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
141
+
461
(node->io_read || node->io_write)) {
142
+
462
node->pfd.revents = 0;
143
+bool
463
if ((revents & G_IO_IN) && node->io_read) {
144
+qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts,
464
- aio_context_acquire(ctx);
145
+ const char *optprefix,
465
node->io_read(node->opaque);
146
+ size_t *len,
466
- aio_context_release(ctx);
147
+ Error **errp)
467
progress = true;
148
+{
468
}
149
+ /* Fake LUKS creation in order to determine the payload size */
469
if ((revents & G_IO_OUT) && node->io_write) {
150
+ g_autoptr(QCryptoBlock) crypto =
470
- aio_context_acquire(ctx);
151
+ qcrypto_block_create(create_opts, optprefix,
471
node->io_write(node->opaque);
152
+ qcrypto_block_headerlen_hdr_init_func,
472
- aio_context_release(ctx);
153
+ qcrypto_block_headerlen_hdr_write_func,
473
progress = true;
154
+ len, errp);
474
}
155
+ return crypto != NULL;
475
156
+}
157
+
158
+
159
QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
160
Error **errp)
161
{
162
diff --git a/include/crypto/block.h b/include/crypto/block.h
163
index XXXXXXX..XXXXXXX 100644
164
--- a/include/crypto/block.h
165
+++ b/include/crypto/block.h
166
@@ -XXX,XX +XXX,XX @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
167
Error **errp);
168
169
170
+/**
171
+ * qcrypto_block_calculate_payload_offset:
172
+ * @create_opts: the encryption options
173
+ * @optprefix: name prefix for options
174
+ * @len: output for number of header bytes before payload
175
+ * @errp: pointer to a NULL-initialized error object
176
+ *
177
+ * Calculate the number of header bytes before the payload in an encrypted
178
+ * storage volume. The header is an area before the payload that is reserved
179
+ * for encryption metadata.
180
+ *
181
+ * Returns: true on success, false on error
182
+ */
183
+bool
184
+qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts,
185
+ const char *optprefix,
186
+ size_t *len,
187
+ Error **errp);
188
+
189
+
190
/**
191
* qcrypto_block_get_info:
192
* @block: the block encryption object
193
@@ -XXX,XX +XXX,XX @@ uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block);
194
void qcrypto_block_free(QCryptoBlock *block);
195
196
G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlock, qcrypto_block_free)
197
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlockCreateOptions,
198
+ qapi_free_QCryptoBlockCreateOptions)
199
200
#endif /* QCRYPTO_BLOCK_H */
201
--
476
--
202
2.24.1
477
2.9.3
203
478
204
479
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
RFC 7230 section 3.2 indicates that HTTP header field names are case
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
insensitive.
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/archipelago.c | 3 +++
11
block/blkreplay.c | 2 +-
12
block/block-backend.c | 6 ++++++
13
block/curl.c | 26 ++++++++++++++++++--------
14
block/gluster.c | 9 +--------
15
block/io.c | 6 +++++-
16
block/iscsi.c | 6 +++++-
17
block/linux-aio.c | 15 +++++++++------
18
block/nfs.c | 3 ++-
19
block/null.c | 4 ++++
20
block/qed.c | 3 +++
21
block/rbd.c | 4 ++++
22
dma-helpers.c | 2 ++
23
hw/block/virtio-blk.c | 2 ++
24
hw/scsi/scsi-bus.c | 2 ++
25
util/async.c | 4 ++--
26
util/thread-pool.c | 2 ++
27
17 files changed, 71 insertions(+), 28 deletions(-)
5
28
6
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
29
diff --git a/block/archipelago.c b/block/archipelago.c
7
Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com>
30
index XXXXXXX..XXXXXXX 100644
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
31
--- a/block/archipelago.c
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
32
+++ b/block/archipelago.c
10
---
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
11
block/curl.c | 5 +++--
34
{
12
1 file changed, 3 insertions(+), 2 deletions(-)
35
AIORequestData *reqdata = (AIORequestData *) opaque;
13
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
37
+ AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
38
39
+ aio_context_acquire(ctx);
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
41
+ aio_context_release(ctx);
42
aio_cb->status = 0;
43
44
qemu_aio_unref(aio_cb);
45
diff --git a/block/blkreplay.c b/block/blkreplay.c
46
index XXXXXXX..XXXXXXX 100755
47
--- a/block/blkreplay.c
48
+++ b/block/blkreplay.c
49
@@ -XXX,XX +XXX,XX @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
50
static void blkreplay_bh_cb(void *opaque)
51
{
52
Request *req = opaque;
53
- qemu_coroutine_enter(req->co);
54
+ aio_co_wake(req->co);
55
qemu_bh_delete(req->bh);
56
g_free(req);
57
}
58
diff --git a/block/block-backend.c b/block/block-backend.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/block-backend.c
61
+++ b/block/block-backend.c
62
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
63
static void error_callback_bh(void *opaque)
64
{
65
struct BlockBackendAIOCB *acb = opaque;
66
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
68
bdrv_dec_in_flight(acb->common.bs);
69
+ aio_context_acquire(ctx);
70
acb->common.cb(acb->common.opaque, acb->ret);
71
+ aio_context_release(ctx);
72
qemu_aio_unref(acb);
73
}
74
75
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
76
static void blk_aio_complete_bh(void *opaque)
77
{
78
BlkAioEmAIOCB *acb = opaque;
79
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
80
81
assert(acb->has_returned);
82
+ aio_context_acquire(ctx);
83
blk_aio_complete(acb);
84
+ aio_context_release(ctx);
85
}
86
87
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
14
diff --git a/block/curl.c b/block/curl.c
88
diff --git a/block/curl.c b/block/curl.c
15
index XXXXXXX..XXXXXXX 100644
89
index XXXXXXX..XXXXXXX 100644
16
--- a/block/curl.c
90
--- a/block/curl.c
17
+++ b/block/curl.c
91
+++ b/block/curl.c
18
@@ -XXX,XX +XXX,XX @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
92
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
19
size_t realsize = size * nmemb;
93
{
20
const char *header = (char *)ptr;
94
CURLState *state;
21
const char *end = header + realsize;
95
int running;
22
- const char *accept_ranges = "Accept-Ranges:";
96
+ int ret = -EINPROGRESS;
23
+ const char *accept_ranges = "accept-ranges:";
97
24
const char *bytes = "bytes";
98
CURLAIOCB *acb = p;
25
99
- BDRVCURLState *s = acb->common.bs->opaque;
26
if (realsize >= strlen(accept_ranges)
100
+ BlockDriverState *bs = acb->common.bs;
27
- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) {
101
+ BDRVCURLState *s = bs->opaque;
28
+ && g_ascii_strncasecmp(header, accept_ranges,
102
+ AioContext *ctx = bdrv_get_aio_context(bs);
29
+ strlen(accept_ranges)) == 0) {
103
30
104
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
31
char *p = strchr(header, ':') + 1;
105
size_t end;
32
106
107
+ aio_context_acquire(ctx);
108
+
109
// In case we have the requested data already (e.g. read-ahead),
110
// we can just call the callback and be done.
111
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
112
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
113
qemu_aio_unref(acb);
114
// fall through
115
case FIND_RET_WAIT:
116
- return;
117
+ goto out;
118
default:
119
break;
120
}
121
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
122
// No cache found, so let's start a new request
123
state = curl_init_state(acb->common.bs, s);
124
if (!state) {
125
- acb->common.cb(acb->common.opaque, -EIO);
126
- qemu_aio_unref(acb);
127
- return;
128
+ ret = -EIO;
129
+ goto out;
130
}
131
132
acb->start = 0;
133
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
134
state->orig_buf = g_try_malloc(state->buf_len);
135
if (state->buf_len && state->orig_buf == NULL) {
136
curl_clean_state(state);
137
- acb->common.cb(acb->common.opaque, -ENOMEM);
138
- qemu_aio_unref(acb);
139
- return;
140
+ ret = -ENOMEM;
141
+ goto out;
142
}
143
state->acb[0] = acb;
144
145
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
146
147
/* Tell curl it needs to kick things off */
148
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
149
+
150
+out:
151
+ if (ret != -EINPROGRESS) {
152
+ acb->common.cb(acb->common.opaque, ret);
153
+ qemu_aio_unref(acb);
154
+ }
155
+ aio_context_release(ctx);
156
}
157
158
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
159
diff --git a/block/gluster.c b/block/gluster.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/block/gluster.c
162
+++ b/block/gluster.c
163
@@ -XXX,XX +XXX,XX @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
164
return qemu_gluster_glfs_init(gconf, errp);
165
}
166
167
-static void qemu_gluster_complete_aio(void *opaque)
168
-{
169
- GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
170
-
171
- qemu_coroutine_enter(acb->coroutine);
172
-}
173
-
174
/*
175
* AIO callback routine called from GlusterFS thread.
176
*/
177
@@ -XXX,XX +XXX,XX @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
178
acb->ret = -EIO; /* Partial read/write - fail it */
179
}
180
181
- aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
182
+ aio_co_schedule(acb->aio_context, acb->coroutine);
183
}
184
185
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
186
diff --git a/block/io.c b/block/io.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/block/io.c
189
+++ b/block/io.c
190
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
191
bdrv_dec_in_flight(bs);
192
bdrv_drained_begin(bs);
193
data->done = true;
194
- qemu_coroutine_enter(co);
195
+ aio_co_wake(co);
196
}
197
198
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
199
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
200
static void bdrv_co_em_bh(void *opaque)
201
{
202
BlockAIOCBCoroutine *acb = opaque;
203
+ BlockDriverState *bs = acb->common.bs;
204
+ AioContext *ctx = bdrv_get_aio_context(bs);
205
206
assert(!acb->need_bh);
207
+ aio_context_acquire(ctx);
208
bdrv_co_complete(acb);
209
+ aio_context_release(ctx);
210
}
211
212
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
213
diff --git a/block/iscsi.c b/block/iscsi.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/block/iscsi.c
216
+++ b/block/iscsi.c
217
@@ -XXX,XX +XXX,XX @@ static void
218
iscsi_bh_cb(void *p)
219
{
220
IscsiAIOCB *acb = p;
221
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
222
223
qemu_bh_delete(acb->bh);
224
225
g_free(acb->buf);
226
acb->buf = NULL;
227
228
+ aio_context_acquire(ctx);
229
acb->common.cb(acb->common.opaque, acb->status);
230
+ aio_context_release(ctx);
231
232
if (acb->task != NULL) {
233
scsi_free_scsi_task(acb->task);
234
@@ -XXX,XX +XXX,XX @@ iscsi_schedule_bh(IscsiAIOCB *acb)
235
static void iscsi_co_generic_bh_cb(void *opaque)
236
{
237
struct IscsiTask *iTask = opaque;
238
+
239
iTask->complete = 1;
240
- qemu_coroutine_enter(iTask->co);
241
+ aio_co_wake(iTask->co);
242
}
243
244
static void iscsi_retry_timer_expired(void *opaque)
245
diff --git a/block/linux-aio.c b/block/linux-aio.c
246
index XXXXXXX..XXXXXXX 100644
247
--- a/block/linux-aio.c
248
+++ b/block/linux-aio.c
249
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState {
250
io_context_t ctx;
251
EventNotifier e;
252
253
- /* io queue for submit at batch */
254
+ /* io queue for submit at batch. Protected by AioContext lock. */
255
LaioQueue io_q;
256
257
- /* I/O completion processing */
258
+ /* I/O completion processing. Only runs in I/O thread. */
259
QEMUBH *completion_bh;
260
int event_idx;
261
int event_max;
262
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
263
*/
264
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
265
{
266
+ LinuxAioState *s = laiocb->ctx;
267
int ret;
268
269
ret = laiocb->ret;
270
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
271
}
272
273
laiocb->ret = ret;
274
+ aio_context_acquire(s->aio_context);
275
if (laiocb->co) {
276
/* If the coroutine is already entered it must be in ioq_submit() and
277
* will notice laio->ret has been filled in when it eventually runs
278
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
279
laiocb->common.cb(laiocb->common.opaque, ret);
280
qemu_aio_unref(laiocb);
281
}
282
+ aio_context_release(s->aio_context);
283
}
284
285
/**
286
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s)
287
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
288
{
289
qemu_laio_process_completions(s);
290
+
291
+ aio_context_acquire(s->aio_context);
292
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
293
ioq_submit(s);
294
}
295
+ aio_context_release(s->aio_context);
296
}
297
298
static void qemu_laio_completion_bh(void *opaque)
299
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
300
LinuxAioState *s = container_of(e, LinuxAioState, e);
301
302
if (event_notifier_test_and_clear(&s->e)) {
303
- aio_context_acquire(s->aio_context);
304
qemu_laio_process_completions_and_submit(s);
305
- aio_context_release(s->aio_context);
306
}
307
}
308
309
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
310
return false;
311
}
312
313
- aio_context_acquire(s->aio_context);
314
qemu_laio_process_completions_and_submit(s);
315
- aio_context_release(s->aio_context);
316
return true;
317
}
318
319
@@ -XXX,XX +XXX,XX @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
320
{
321
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
322
qemu_bh_delete(s->completion_bh);
323
+ s->aio_context = NULL;
324
}
325
326
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
327
diff --git a/block/nfs.c b/block/nfs.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/block/nfs.c
330
+++ b/block/nfs.c
331
@@ -XXX,XX +XXX,XX @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
332
static void nfs_co_generic_bh_cb(void *opaque)
333
{
334
NFSRPC *task = opaque;
335
+
336
task->complete = 1;
337
- qemu_coroutine_enter(task->co);
338
+ aio_co_wake(task->co);
339
}
340
341
static void
342
diff --git a/block/null.c b/block/null.c
343
index XXXXXXX..XXXXXXX 100644
344
--- a/block/null.c
345
+++ b/block/null.c
346
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
347
static void null_bh_cb(void *opaque)
348
{
349
NullAIOCB *acb = opaque;
350
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
351
+
352
+ aio_context_acquire(ctx);
353
acb->common.cb(acb->common.opaque, 0);
354
+ aio_context_release(ctx);
355
qemu_aio_unref(acb);
356
}
357
358
diff --git a/block/qed.c b/block/qed.c
359
index XXXXXXX..XXXXXXX 100644
360
--- a/block/qed.c
361
+++ b/block/qed.c
362
@@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
363
static void qed_aio_complete_bh(void *opaque)
364
{
365
QEDAIOCB *acb = opaque;
366
+ BDRVQEDState *s = acb_to_s(acb);
367
BlockCompletionFunc *cb = acb->common.cb;
368
void *user_opaque = acb->common.opaque;
369
int ret = acb->bh_ret;
370
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete_bh(void *opaque)
371
qemu_aio_unref(acb);
372
373
/* Invoke callback */
374
+ qed_acquire(s);
375
cb(user_opaque, ret);
376
+ qed_release(s);
377
}
378
379
static void qed_aio_complete(QEDAIOCB *acb, int ret)
380
diff --git a/block/rbd.c b/block/rbd.c
381
index XXXXXXX..XXXXXXX 100644
382
--- a/block/rbd.c
383
+++ b/block/rbd.c
384
@@ -XXX,XX +XXX,XX @@ shutdown:
385
static void qemu_rbd_complete_aio(RADOSCB *rcb)
386
{
387
RBDAIOCB *acb = rcb->acb;
388
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
389
int64_t r;
390
391
r = rcb->ret;
392
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
393
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
394
}
395
qemu_vfree(acb->bounce);
396
+
397
+ aio_context_acquire(ctx);
398
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
399
+ aio_context_release(ctx);
400
401
qemu_aio_unref(acb);
402
}
403
diff --git a/dma-helpers.c b/dma-helpers.c
404
index XXXXXXX..XXXXXXX 100644
405
--- a/dma-helpers.c
406
+++ b/dma-helpers.c
407
@@ -XXX,XX +XXX,XX @@ static void dma_blk_cb(void *opaque, int ret)
408
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
409
}
410
411
+ aio_context_acquire(dbs->ctx);
412
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
413
dma_blk_cb, dbs, dbs->io_func_opaque);
414
+ aio_context_release(dbs->ctx);
415
assert(dbs->acb);
416
}
417
418
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
419
index XXXXXXX..XXXXXXX 100644
420
--- a/hw/block/virtio-blk.c
421
+++ b/hw/block/virtio-blk.c
422
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
423
424
s->rq = NULL;
425
426
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
427
while (req) {
428
VirtIOBlockReq *next = req->next;
429
if (virtio_blk_handle_request(req, &mrb)) {
430
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
431
if (mrb.num_reqs) {
432
virtio_blk_submit_multireq(s->blk, &mrb);
433
}
434
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
435
}
436
437
static void virtio_blk_dma_restart_cb(void *opaque, int running,
438
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
439
index XXXXXXX..XXXXXXX 100644
440
--- a/hw/scsi/scsi-bus.c
441
+++ b/hw/scsi/scsi-bus.c
442
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
443
qemu_bh_delete(s->bh);
444
s->bh = NULL;
445
446
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
447
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
448
scsi_req_ref(req);
449
if (req->retry) {
450
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
451
}
452
scsi_req_unref(req);
453
}
454
+ aio_context_release(blk_get_aio_context(s->conf.blk));
455
}
456
457
void scsi_req_retry(SCSIRequest *req)
458
diff --git a/util/async.c b/util/async.c
459
index XXXXXXX..XXXXXXX 100644
460
--- a/util/async.c
461
+++ b/util/async.c
462
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
463
ret = 1;
464
}
465
bh->idle = 0;
466
- aio_context_acquire(ctx);
467
aio_bh_call(bh);
468
- aio_context_release(ctx);
469
}
470
if (bh->deleted) {
471
deleted = true;
472
@@ -XXX,XX +XXX,XX @@ static void co_schedule_bh_cb(void *opaque)
473
Coroutine *co = QSLIST_FIRST(&straight);
474
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
475
trace_aio_co_schedule_bh_cb(ctx, co);
476
+ aio_context_acquire(ctx);
477
qemu_coroutine_enter(co);
478
+ aio_context_release(ctx);
479
}
480
}
481
482
diff --git a/util/thread-pool.c b/util/thread-pool.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/util/thread-pool.c
485
+++ b/util/thread-pool.c
486
@@ -XXX,XX +XXX,XX @@ static void thread_pool_completion_bh(void *opaque)
487
ThreadPool *pool = opaque;
488
ThreadPoolElement *elem, *next;
489
490
+ aio_context_acquire(pool->ctx);
491
restart:
492
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
493
if (elem->state != THREAD_DONE) {
494
@@ -XXX,XX +XXX,XX @@ restart:
495
qemu_aio_unref(elem);
496
}
497
}
498
+ aio_context_release(pool->ctx);
499
}
500
501
static void thread_pool_cancel(BlockAIOCB *acb)
33
--
502
--
34
2.24.1
503
2.9.3
35
504
36
505
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
RFC 7230 section 3.2 indicates that whitespace is permitted between
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
the field name and field value and after the field value.
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/archipelago.c | 3 ---
11
block/block-backend.c | 7 -------
12
block/curl.c | 2 +-
13
block/io.c | 6 +-----
14
block/iscsi.c | 3 ---
15
block/linux-aio.c | 5 +----
16
block/mirror.c | 12 +++++++++---
17
block/null.c | 8 --------
18
block/qed-cluster.c | 2 ++
19
block/qed-table.c | 12 ++++++++++--
20
block/qed.c | 4 ++--
21
block/rbd.c | 4 ----
22
block/win32-aio.c | 3 ---
23
hw/block/virtio-blk.c | 12 +++++++++++-
24
hw/scsi/scsi-disk.c | 15 +++++++++++++++
25
hw/scsi/scsi-generic.c | 20 +++++++++++++++++---
26
util/thread-pool.c | 4 +++-
27
17 files changed, 72 insertions(+), 50 deletions(-)
5
28
6
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
29
diff --git a/block/archipelago.c b/block/archipelago.c
7
Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com>
30
index XXXXXXX..XXXXXXX 100644
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
31
--- a/block/archipelago.c
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
32
+++ b/block/archipelago.c
10
---
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
11
block/curl.c | 31 +++++++++++++++++++++++++++----
34
{
12
1 file changed, 27 insertions(+), 4 deletions(-)
35
AIORequestData *reqdata = (AIORequestData *) opaque;
13
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
37
- AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
38
39
- aio_context_acquire(ctx);
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
41
- aio_context_release(ctx);
42
aio_cb->status = 0;
43
44
qemu_aio_unref(aio_cb);
45
diff --git a/block/block-backend.c b/block/block-backend.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/block/block-backend.c
48
+++ b/block/block-backend.c
49
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
50
static void error_callback_bh(void *opaque)
51
{
52
struct BlockBackendAIOCB *acb = opaque;
53
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
54
55
bdrv_dec_in_flight(acb->common.bs);
56
- aio_context_acquire(ctx);
57
acb->common.cb(acb->common.opaque, acb->ret);
58
- aio_context_release(ctx);
59
qemu_aio_unref(acb);
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
63
static void blk_aio_complete_bh(void *opaque)
64
{
65
BlkAioEmAIOCB *acb = opaque;
66
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
-
68
assert(acb->has_returned);
69
- aio_context_acquire(ctx);
70
blk_aio_complete(acb);
71
- aio_context_release(ctx);
72
}
73
74
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
14
diff --git a/block/curl.c b/block/curl.c
75
diff --git a/block/curl.c b/block/curl.c
15
index XXXXXXX..XXXXXXX 100644
76
index XXXXXXX..XXXXXXX 100644
16
--- a/block/curl.c
77
--- a/block/curl.c
17
+++ b/block/curl.c
78
+++ b/block/curl.c
18
@@ -XXX,XX +XXX,XX @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
79
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
19
{
80
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
20
BDRVCURLState *s = opaque;
81
21
size_t realsize = size * nmemb;
82
out:
22
- const char *accept_line = "Accept-Ranges: bytes";
83
+ aio_context_release(ctx);
23
+ const char *header = (char *)ptr;
84
if (ret != -EINPROGRESS) {
24
+ const char *end = header + realsize;
85
acb->common.cb(acb->common.opaque, ret);
25
+ const char *accept_ranges = "Accept-Ranges:";
86
qemu_aio_unref(acb);
26
+ const char *bytes = "bytes";
87
}
27
88
- aio_context_release(ctx);
28
- if (realsize >= strlen(accept_line)
89
}
29
- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
90
30
- s->accept_range = true;
91
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
31
+ if (realsize >= strlen(accept_ranges)
92
diff --git a/block/io.c b/block/io.c
32
+ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) {
93
index XXXXXXX..XXXXXXX 100644
33
+
94
--- a/block/io.c
34
+ char *p = strchr(header, ':') + 1;
95
+++ b/block/io.c
35
+
96
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
36
+ /* Skip whitespace between the header name and value. */
97
CoroutineIOCompletion *co = opaque;
37
+ while (p < end && *p && g_ascii_isspace(*p)) {
98
38
+ p++;
99
co->ret = ret;
39
+ }
100
- qemu_coroutine_enter(co->coroutine);
40
+
101
+ aio_co_wake(co->coroutine);
41
+ if (end - p >= strlen(bytes)
102
}
42
+ && strncmp(p, bytes, strlen(bytes)) == 0) {
103
43
+
104
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
44
+ /* Check that there is nothing but whitespace after the value. */
105
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
45
+ p += strlen(bytes);
106
static void bdrv_co_em_bh(void *opaque)
46
+ while (p < end && *p && g_ascii_isspace(*p)) {
107
{
47
+ p++;
108
BlockAIOCBCoroutine *acb = opaque;
48
+ }
109
- BlockDriverState *bs = acb->common.bs;
49
+
110
- AioContext *ctx = bdrv_get_aio_context(bs);
50
+ if (p == end || !*p) {
111
51
+ s->accept_range = true;
112
assert(!acb->need_bh);
52
+ }
113
- aio_context_acquire(ctx);
53
+ }
114
bdrv_co_complete(acb);
54
}
115
- aio_context_release(ctx);
55
116
}
56
return realsize;
117
118
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
119
diff --git a/block/iscsi.c b/block/iscsi.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/iscsi.c
122
+++ b/block/iscsi.c
123
@@ -XXX,XX +XXX,XX @@ static void
124
iscsi_bh_cb(void *p)
125
{
126
IscsiAIOCB *acb = p;
127
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
128
129
qemu_bh_delete(acb->bh);
130
131
g_free(acb->buf);
132
acb->buf = NULL;
133
134
- aio_context_acquire(ctx);
135
acb->common.cb(acb->common.opaque, acb->status);
136
- aio_context_release(ctx);
137
138
if (acb->task != NULL) {
139
scsi_free_scsi_task(acb->task);
140
diff --git a/block/linux-aio.c b/block/linux-aio.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/block/linux-aio.c
143
+++ b/block/linux-aio.c
144
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
145
*/
146
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
147
{
148
- LinuxAioState *s = laiocb->ctx;
149
int ret;
150
151
ret = laiocb->ret;
152
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
153
}
154
155
laiocb->ret = ret;
156
- aio_context_acquire(s->aio_context);
157
if (laiocb->co) {
158
/* If the coroutine is already entered it must be in ioq_submit() and
159
* will notice laio->ret has been filled in when it eventually runs
160
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
161
* that!
162
*/
163
if (!qemu_coroutine_entered(laiocb->co)) {
164
- qemu_coroutine_enter(laiocb->co);
165
+ aio_co_wake(laiocb->co);
166
}
167
} else {
168
laiocb->common.cb(laiocb->common.opaque, ret);
169
qemu_aio_unref(laiocb);
170
}
171
- aio_context_release(s->aio_context);
172
}
173
174
/**
175
diff --git a/block/mirror.c b/block/mirror.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/block/mirror.c
178
+++ b/block/mirror.c
179
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
180
{
181
MirrorOp *op = opaque;
182
MirrorBlockJob *s = op->s;
183
+
184
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
185
if (ret < 0) {
186
BlockErrorAction action;
187
188
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
189
}
190
}
191
mirror_iteration_done(op, ret);
192
+ aio_context_release(blk_get_aio_context(s->common.blk));
193
}
194
195
static void mirror_read_complete(void *opaque, int ret)
196
{
197
MirrorOp *op = opaque;
198
MirrorBlockJob *s = op->s;
199
+
200
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
201
if (ret < 0) {
202
BlockErrorAction action;
203
204
@@ -XXX,XX +XXX,XX @@ static void mirror_read_complete(void *opaque, int ret)
205
}
206
207
mirror_iteration_done(op, ret);
208
- return;
209
+ } else {
210
+ blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
211
+ 0, mirror_write_complete, op);
212
}
213
- blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
214
- 0, mirror_write_complete, op);
215
+ aio_context_release(blk_get_aio_context(s->common.blk));
216
}
217
218
static inline void mirror_clip_sectors(MirrorBlockJob *s,
219
diff --git a/block/null.c b/block/null.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/block/null.c
222
+++ b/block/null.c
223
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
224
static void null_bh_cb(void *opaque)
225
{
226
NullAIOCB *acb = opaque;
227
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
228
-
229
- aio_context_acquire(ctx);
230
acb->common.cb(acb->common.opaque, 0);
231
- aio_context_release(ctx);
232
qemu_aio_unref(acb);
233
}
234
235
static void null_timer_cb(void *opaque)
236
{
237
NullAIOCB *acb = opaque;
238
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
239
-
240
- aio_context_acquire(ctx);
241
acb->common.cb(acb->common.opaque, 0);
242
- aio_context_release(ctx);
243
timer_deinit(&acb->timer);
244
qemu_aio_unref(acb);
245
}
246
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/qed-cluster.c
249
+++ b/block/qed-cluster.c
250
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
251
unsigned int index;
252
unsigned int n;
253
254
+ qed_acquire(s);
255
if (ret) {
256
goto out;
257
}
258
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
259
260
out:
261
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
262
+ qed_release(s);
263
g_free(find_cluster_cb);
264
}
265
266
diff --git a/block/qed-table.c b/block/qed-table.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/block/qed-table.c
269
+++ b/block/qed-table.c
270
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
271
{
272
QEDReadTableCB *read_table_cb = opaque;
273
QEDTable *table = read_table_cb->table;
274
+ BDRVQEDState *s = read_table_cb->s;
275
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
276
int i;
277
278
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
279
}
280
281
/* Byteswap offsets */
282
+ qed_acquire(s);
283
for (i = 0; i < noffsets; i++) {
284
table->offsets[i] = le64_to_cpu(table->offsets[i]);
285
}
286
+ qed_release(s);
287
288
out:
289
/* Completion */
290
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
291
+ trace_qed_read_table_cb(s, read_table_cb->table, ret);
292
gencb_complete(&read_table_cb->gencb, ret);
293
}
294
295
@@ -XXX,XX +XXX,XX @@ typedef struct {
296
static void qed_write_table_cb(void *opaque, int ret)
297
{
298
QEDWriteTableCB *write_table_cb = opaque;
299
+ BDRVQEDState *s = write_table_cb->s;
300
301
- trace_qed_write_table_cb(write_table_cb->s,
302
+ trace_qed_write_table_cb(s,
303
write_table_cb->orig_table,
304
write_table_cb->flush,
305
ret);
306
@@ -XXX,XX +XXX,XX @@ static void qed_write_table_cb(void *opaque, int ret)
307
if (write_table_cb->flush) {
308
/* We still need to flush first */
309
write_table_cb->flush = false;
310
+ qed_acquire(s);
311
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
312
write_table_cb);
313
+ qed_release(s);
314
return;
315
}
316
317
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
318
CachedL2Table *l2_table = request->l2_table;
319
uint64_t l2_offset = read_l2_table_cb->l2_offset;
320
321
+ qed_acquire(s);
322
if (ret) {
323
/* can't trust loaded L2 table anymore */
324
qed_unref_l2_cache_entry(l2_table);
325
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
326
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
327
assert(request->l2_table != NULL);
328
}
329
+ qed_release(s);
330
331
gencb_complete(&read_l2_table_cb->gencb, ret);
332
}
333
diff --git a/block/qed.c b/block/qed.c
334
index XXXXXXX..XXXXXXX 100644
335
--- a/block/qed.c
336
+++ b/block/qed.c
337
@@ -XXX,XX +XXX,XX @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
338
}
339
340
if (cb->co) {
341
- qemu_coroutine_enter(cb->co);
342
+ aio_co_wake(cb->co);
343
}
344
}
345
346
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
347
cb->done = true;
348
cb->ret = ret;
349
if (cb->co) {
350
- qemu_coroutine_enter(cb->co);
351
+ aio_co_wake(cb->co);
352
}
353
}
354
355
diff --git a/block/rbd.c b/block/rbd.c
356
index XXXXXXX..XXXXXXX 100644
357
--- a/block/rbd.c
358
+++ b/block/rbd.c
359
@@ -XXX,XX +XXX,XX @@ shutdown:
360
static void qemu_rbd_complete_aio(RADOSCB *rcb)
361
{
362
RBDAIOCB *acb = rcb->acb;
363
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
364
int64_t r;
365
366
r = rcb->ret;
367
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
368
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
369
}
370
qemu_vfree(acb->bounce);
371
-
372
- aio_context_acquire(ctx);
373
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
374
- aio_context_release(ctx);
375
376
qemu_aio_unref(acb);
377
}
378
diff --git a/block/win32-aio.c b/block/win32-aio.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/block/win32-aio.c
381
+++ b/block/win32-aio.c
382
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
383
qemu_vfree(waiocb->buf);
384
}
385
386
-
387
- aio_context_acquire(s->aio_ctx);
388
waiocb->common.cb(waiocb->common.opaque, ret);
389
- aio_context_release(s->aio_ctx);
390
qemu_aio_unref(waiocb);
391
}
392
393
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
394
index XXXXXXX..XXXXXXX 100644
395
--- a/hw/block/virtio-blk.c
396
+++ b/hw/block/virtio-blk.c
397
@@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
398
static void virtio_blk_rw_complete(void *opaque, int ret)
399
{
400
VirtIOBlockReq *next = opaque;
401
+ VirtIOBlock *s = next->dev;
402
403
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
404
while (next) {
405
VirtIOBlockReq *req = next;
406
next = req->mr_next;
407
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_rw_complete(void *opaque, int ret)
408
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
409
virtio_blk_free_request(req);
410
}
411
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
412
}
413
414
static void virtio_blk_flush_complete(void *opaque, int ret)
415
{
416
VirtIOBlockReq *req = opaque;
417
+ VirtIOBlock *s = req->dev;
418
419
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
420
if (ret) {
421
if (virtio_blk_handle_rw_error(req, -ret, 0)) {
422
- return;
423
+ goto out;
424
}
425
}
426
427
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
428
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
429
virtio_blk_free_request(req);
430
+
431
+out:
432
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
433
}
434
435
#ifdef __linux__
436
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
437
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
438
439
out:
440
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
441
virtio_blk_req_complete(req, status);
442
virtio_blk_free_request(req);
443
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
444
g_free(ioctl_req);
445
}
446
447
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
448
index XXXXXXX..XXXXXXX 100644
449
--- a/hw/scsi/scsi-disk.c
450
+++ b/hw/scsi/scsi-disk.c
451
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
452
453
assert(r->req.aiocb != NULL);
454
r->req.aiocb = NULL;
455
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
456
if (scsi_disk_req_check_error(r, ret, true)) {
457
goto done;
458
}
459
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
460
scsi_req_complete(&r->req, GOOD);
461
462
done:
463
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
464
scsi_req_unref(&r->req);
465
}
466
467
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
468
assert(r->req.aiocb != NULL);
469
r->req.aiocb = NULL;
470
471
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
472
if (ret < 0) {
473
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
474
} else {
475
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
476
}
477
scsi_dma_complete_noio(r, ret);
478
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
479
}
480
481
static void scsi_read_complete(void * opaque, int ret)
482
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
483
484
assert(r->req.aiocb != NULL);
485
r->req.aiocb = NULL;
486
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
487
if (scsi_disk_req_check_error(r, ret, true)) {
488
goto done;
489
}
490
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
491
492
done:
493
scsi_req_unref(&r->req);
494
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
495
}
496
497
/* Actually issue a read to the block device. */
498
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read_cb(void *opaque, int ret)
499
assert (r->req.aiocb != NULL);
500
r->req.aiocb = NULL;
501
502
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
503
if (ret < 0) {
504
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
505
} else {
506
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
507
}
508
scsi_do_read(opaque, ret);
509
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
510
}
511
512
/* Read more data from scsi device into buffer. */
513
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
514
assert (r->req.aiocb != NULL);
515
r->req.aiocb = NULL;
516
517
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
518
if (ret < 0) {
519
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
520
} else {
521
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
522
}
523
scsi_write_complete_noio(r, ret);
524
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
525
}
526
527
static void scsi_write_data(SCSIRequest *req)
528
@@ -XXX,XX +XXX,XX @@ static void scsi_unmap_complete(void *opaque, int ret)
529
{
530
UnmapCBData *data = opaque;
531
SCSIDiskReq *r = data->r;
532
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
533
534
assert(r->req.aiocb != NULL);
535
r->req.aiocb = NULL;
536
537
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
538
scsi_unmap_complete_noio(data, ret);
539
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
540
}
541
542
static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
543
@@ -XXX,XX +XXX,XX @@ static void scsi_write_same_complete(void *opaque, int ret)
544
545
assert(r->req.aiocb != NULL);
546
r->req.aiocb = NULL;
547
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
548
if (scsi_disk_req_check_error(r, ret, true)) {
549
goto done;
550
}
551
@@ -XXX,XX +XXX,XX @@ done:
552
scsi_req_unref(&r->req);
553
qemu_vfree(data->iov.iov_base);
554
g_free(data);
555
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
556
}
557
558
static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
559
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
560
index XXXXXXX..XXXXXXX 100644
561
--- a/hw/scsi/scsi-generic.c
562
+++ b/hw/scsi/scsi-generic.c
563
@@ -XXX,XX +XXX,XX @@ done:
564
static void scsi_command_complete(void *opaque, int ret)
565
{
566
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
567
+ SCSIDevice *s = r->req.dev;
568
569
assert(r->req.aiocb != NULL);
570
r->req.aiocb = NULL;
571
+
572
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
573
scsi_command_complete_noio(r, ret);
574
+ aio_context_release(blk_get_aio_context(s->conf.blk));
575
}
576
577
static int execute_command(BlockBackend *blk,
578
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
579
assert(r->req.aiocb != NULL);
580
r->req.aiocb = NULL;
581
582
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
583
+
584
if (ret || r->req.io_canceled) {
585
scsi_command_complete_noio(r, ret);
586
- return;
587
+ goto done;
588
}
589
590
len = r->io_header.dxfer_len - r->io_header.resid;
591
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
592
r->len = -1;
593
if (len == 0) {
594
scsi_command_complete_noio(r, 0);
595
- return;
596
+ goto done;
597
}
598
599
/* Snoop READ CAPACITY output to set the blocksize. */
600
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
601
}
602
scsi_req_data(&r->req, len);
603
scsi_req_unref(&r->req);
604
+
605
+done:
606
+ aio_context_release(blk_get_aio_context(s->conf.blk));
607
}
608
609
/* Read more data from scsi device into buffer. */
610
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
611
assert(r->req.aiocb != NULL);
612
r->req.aiocb = NULL;
613
614
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
615
+
616
if (ret || r->req.io_canceled) {
617
scsi_command_complete_noio(r, ret);
618
- return;
619
+ goto done;
620
}
621
622
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
623
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
624
}
625
626
scsi_command_complete_noio(r, ret);
627
+
628
+done:
629
+ aio_context_release(blk_get_aio_context(s->conf.blk));
630
}
631
632
/* Write data to a scsi device. Returns nonzero on failure.
633
diff --git a/util/thread-pool.c b/util/thread-pool.c
634
index XXXXXXX..XXXXXXX 100644
635
--- a/util/thread-pool.c
636
+++ b/util/thread-pool.c
637
@@ -XXX,XX +XXX,XX @@ restart:
638
*/
639
qemu_bh_schedule(pool->completion_bh);
640
641
+ aio_context_release(pool->ctx);
642
elem->common.cb(elem->common.opaque, elem->ret);
643
+ aio_context_acquire(pool->ctx);
644
qemu_aio_unref(elem);
645
goto restart;
646
} else {
647
@@ -XXX,XX +XXX,XX @@ static void thread_pool_co_cb(void *opaque, int ret)
648
ThreadPoolCo *co = opaque;
649
650
co->ret = ret;
651
- qemu_coroutine_enter(co->co);
652
+ aio_co_wake(co->co);
653
}
654
655
int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
57
--
656
--
58
2.24.1
657
2.9.3
59
658
60
659
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
offset/bytes pair is more usual naming in block layer, let's use it.
3
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
4
Extract the dispatching loop for file descriptor handlers into a new
5
function aio_dispatch_handlers, and then inline aio_dispatch into
6
aio_poll.
4
7
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
aio_dispatch can now become void.
6
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
9
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200311103004.7649-8-vsementsov@virtuozzo.com>
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
14
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
16
---
11
block/block-copy.c | 82 +++++++++++++++++++-------------------
17
include/block/aio.h | 6 +-----
12
include/block/block-copy.h | 4 +-
18
util/aio-posix.c | 44 ++++++++++++++------------------------------
13
2 files changed, 43 insertions(+), 43 deletions(-)
19
util/aio-win32.c | 13 ++++---------
20
util/async.c | 2 +-
21
4 files changed, 20 insertions(+), 45 deletions(-)
14
22
15
diff --git a/block/block-copy.c b/block/block-copy.c
23
diff --git a/include/block/aio.h b/include/block/aio.h
16
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
17
--- a/block/block-copy.c
25
--- a/include/block/aio.h
18
+++ b/block/block-copy.c
26
+++ b/include/block/aio.h
19
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx);
20
#define BLOCK_COPY_MAX_MEM (128 * MiB)
28
/* Dispatch any pending callbacks from the GSource attached to the AioContext.
21
29
*
22
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
30
* This is used internally in the implementation of the GSource.
23
- int64_t start,
31
- *
24
+ int64_t offset,
32
- * @dispatch_fds: true to process fds, false to skip them
25
int64_t bytes)
33
- * (can be used as an optimization by callers that know there
26
{
34
- * are no fds ready)
27
BlockCopyInFlightReq *req;
35
*/
28
36
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds);
29
QLIST_FOREACH(req, &s->inflight_reqs, list) {
37
+void aio_dispatch(AioContext *ctx);
30
- if (start + bytes > req->start && start < req->start + req->bytes) {
38
31
+ if (offset + bytes > req->offset && offset < req->offset + req->bytes) {
39
/* Progress in completing AIO work to occur. This can issue new pending
32
return req;
40
* aio as a result of executing I/O completion or bh callbacks.
41
diff --git a/util/aio-posix.c b/util/aio-posix.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/util/aio-posix.c
44
+++ b/util/aio-posix.c
45
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
46
AioHandler *node, *tmp;
47
bool progress = false;
48
49
- /*
50
- * We have to walk very carefully in case aio_set_fd_handler is
51
- * called while we're walking.
52
- */
53
- qemu_lockcnt_inc(&ctx->list_lock);
54
-
55
QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
56
int revents;
57
58
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
33
}
59
}
34
}
60
}
35
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
61
62
- qemu_lockcnt_dec(&ctx->list_lock);
63
return progress;
36
}
64
}
37
65
38
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
66
-/*
39
- int64_t start,
67
- * Note that dispatch_fds == false has the side-effect of post-poning the
40
+ int64_t offset,
68
- * freeing of deleted handlers.
41
int64_t bytes)
69
- */
70
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
71
+void aio_dispatch(AioContext *ctx)
42
{
72
{
43
BlockCopyInFlightReq *req;
73
- bool progress;
44
74
+ aio_bh_poll(ctx);
45
- while ((req = find_conflicting_inflight_req(s, start, bytes))) {
75
46
+ while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
76
- /*
47
qemu_co_queue_wait(&req->wait_queue, NULL);
77
- * If there are callbacks left that have been queued, we need to call them.
78
- * Do not call select in this case, because it is possible that the caller
79
- * does not need a complete flush (as is the case for aio_poll loops).
80
- */
81
- progress = aio_bh_poll(ctx);
82
+ qemu_lockcnt_inc(&ctx->list_lock);
83
+ aio_dispatch_handlers(ctx);
84
+ qemu_lockcnt_dec(&ctx->list_lock);
85
86
- if (dispatch_fds) {
87
- progress |= aio_dispatch_handlers(ctx);
88
- }
89
-
90
- /* Run our timers */
91
- progress |= timerlistgroup_run_timers(&ctx->tlg);
92
-
93
- return progress;
94
+ timerlistgroup_run_timers(&ctx->tlg);
95
}
96
97
/* These thread-local variables are used only in a small part of aio_poll
98
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
99
npfd = 0;
100
qemu_lockcnt_dec(&ctx->list_lock);
101
102
- /* Run dispatch even if there were no readable fds to run timers */
103
- if (aio_dispatch(ctx, ret > 0)) {
104
- progress = true;
105
+ progress |= aio_bh_poll(ctx);
106
+
107
+ if (ret > 0) {
108
+ qemu_lockcnt_inc(&ctx->list_lock);
109
+ progress |= aio_dispatch_handlers(ctx);
110
+ qemu_lockcnt_dec(&ctx->list_lock);
48
}
111
}
112
113
+ progress |= timerlistgroup_run_timers(&ctx->tlg);
114
+
115
return progress;
49
}
116
}
50
117
51
static void block_copy_inflight_req_begin(BlockCopyState *s,
118
diff --git a/util/aio-win32.c b/util/aio-win32.c
52
BlockCopyInFlightReq *req,
119
index XXXXXXX..XXXXXXX 100644
53
- int64_t start, int64_t bytes)
120
--- a/util/aio-win32.c
54
+ int64_t offset, int64_t bytes)
121
+++ b/util/aio-win32.c
122
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
123
return progress;
124
}
125
126
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
127
+void aio_dispatch(AioContext *ctx)
55
{
128
{
56
- req->start = start;
129
- bool progress;
57
+ req->offset = offset;
130
-
58
req->bytes = bytes;
131
- progress = aio_bh_poll(ctx);
59
qemu_co_queue_init(&req->wait_queue);
132
- if (dispatch_fds) {
60
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
133
- progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
61
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
134
- }
62
* Returns 0 on success.
135
- progress |= timerlistgroup_run_timers(&ctx->tlg);
63
*/
136
- return progress;
64
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
137
+ aio_bh_poll(ctx);
65
- int64_t start, int64_t bytes,
138
+ aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
66
+ int64_t offset, int64_t bytes,
139
+ timerlistgroup_run_timers(&ctx->tlg);
67
bool zeroes, bool *error_is_read)
68
{
69
int ret;
70
- int64_t nbytes = MIN(start + bytes, s->len) - start;
71
+ int64_t nbytes = MIN(offset + bytes, s->len) - offset;
72
void *bounce_buffer = NULL;
73
74
- assert(start >= 0 && bytes > 0 && INT64_MAX - start >= bytes);
75
- assert(QEMU_IS_ALIGNED(start, s->cluster_size));
76
+ assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
77
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
78
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
79
- assert(start < s->len);
80
- assert(start + bytes <= s->len ||
81
- start + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
82
+ assert(offset < s->len);
83
+ assert(offset + bytes <= s->len ||
84
+ offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
85
assert(nbytes < INT_MAX);
86
87
if (zeroes) {
88
- ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
89
+ ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
90
~BDRV_REQ_WRITE_COMPRESSED);
91
if (ret < 0) {
92
- trace_block_copy_write_zeroes_fail(s, start, ret);
93
+ trace_block_copy_write_zeroes_fail(s, offset, ret);
94
if (error_is_read) {
95
*error_is_read = false;
96
}
97
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
98
}
99
100
if (s->use_copy_range) {
101
- ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
102
+ ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
103
0, s->write_flags);
104
if (ret < 0) {
105
- trace_block_copy_copy_range_fail(s, start, ret);
106
+ trace_block_copy_copy_range_fail(s, offset, ret);
107
s->use_copy_range = false;
108
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
109
/* Fallback to read+write with allocated buffer */
110
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
111
112
bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
113
114
- ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
115
+ ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
116
if (ret < 0) {
117
- trace_block_copy_read_fail(s, start, ret);
118
+ trace_block_copy_read_fail(s, offset, ret);
119
if (error_is_read) {
120
*error_is_read = true;
121
}
122
goto out;
123
}
124
125
- ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
126
+ ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
127
s->write_flags);
128
if (ret < 0) {
129
- trace_block_copy_write_fail(s, start, ret);
130
+ trace_block_copy_write_fail(s, offset, ret);
131
if (error_is_read) {
132
*error_is_read = false;
133
}
134
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
135
}
140
}
136
141
137
int coroutine_fn block_copy(BlockCopyState *s,
142
bool aio_poll(AioContext *ctx, bool blocking)
138
- int64_t start, int64_t bytes,
143
diff --git a/util/async.c b/util/async.c
139
+ int64_t offset, int64_t bytes,
140
bool *error_is_read)
141
{
142
int ret = 0;
143
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
144
assert(bdrv_get_aio_context(s->source->bs) ==
145
bdrv_get_aio_context(s->target->bs));
146
147
- assert(QEMU_IS_ALIGNED(start, s->cluster_size));
148
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
149
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
150
151
- block_copy_wait_inflight_reqs(s, start, bytes);
152
- block_copy_inflight_req_begin(s, &req, start, bytes);
153
+ block_copy_wait_inflight_reqs(s, offset, bytes);
154
+ block_copy_inflight_req_begin(s, &req, offset, bytes);
155
156
while (bytes) {
157
int64_t next_zero, cur_bytes, status_bytes;
158
159
- if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
160
- trace_block_copy_skip(s, start);
161
- start += s->cluster_size;
162
+ if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
163
+ trace_block_copy_skip(s, offset);
164
+ offset += s->cluster_size;
165
bytes -= s->cluster_size;
166
continue; /* already copied */
167
}
168
169
cur_bytes = MIN(bytes, s->copy_size);
170
171
- next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
172
+ next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
173
cur_bytes);
174
if (next_zero >= 0) {
175
- assert(next_zero > start); /* start is dirty */
176
- assert(next_zero < start + cur_bytes); /* no need to do MIN() */
177
- cur_bytes = next_zero - start;
178
+ assert(next_zero > offset); /* offset is dirty */
179
+ assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
180
+ cur_bytes = next_zero - offset;
181
}
182
183
- ret = block_copy_block_status(s, start, cur_bytes, &status_bytes);
184
+ ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
185
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
186
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
187
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
188
progress_set_remaining(s->progress,
189
bdrv_get_dirty_count(s->copy_bitmap) +
190
s->in_flight_bytes);
191
- trace_block_copy_skip_range(s, start, status_bytes);
192
- start += status_bytes;
193
+ trace_block_copy_skip_range(s, offset, status_bytes);
194
+ offset += status_bytes;
195
bytes -= status_bytes;
196
continue;
197
}
198
199
cur_bytes = MIN(cur_bytes, status_bytes);
200
201
- trace_block_copy_process(s, start);
202
+ trace_block_copy_process(s, offset);
203
204
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
205
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
206
s->in_flight_bytes += cur_bytes;
207
208
co_get_from_shres(s->mem, cur_bytes);
209
- ret = block_copy_do_copy(s, start, cur_bytes, ret & BDRV_BLOCK_ZERO,
210
+ ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
211
error_is_read);
212
co_put_to_shres(s->mem, cur_bytes);
213
s->in_flight_bytes -= cur_bytes;
214
if (ret < 0) {
215
- bdrv_set_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
216
+ bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
217
break;
218
}
219
220
progress_work_done(s->progress, cur_bytes);
221
s->progress_bytes_callback(cur_bytes, s->progress_opaque);
222
- start += cur_bytes;
223
+ offset += cur_bytes;
224
bytes -= cur_bytes;
225
}
226
227
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
228
index XXXXXXX..XXXXXXX 100644
144
index XXXXXXX..XXXXXXX 100644
229
--- a/include/block/block-copy.h
145
--- a/util/async.c
230
+++ b/include/block/block-copy.h
146
+++ b/util/async.c
231
@@ -XXX,XX +XXX,XX @@
147
@@ -XXX,XX +XXX,XX @@ aio_ctx_dispatch(GSource *source,
232
#include "qemu/co-shared-resource.h"
148
AioContext *ctx = (AioContext *) source;
233
149
234
typedef struct BlockCopyInFlightReq {
150
assert(callback == NULL);
235
- int64_t start;
151
- aio_dispatch(ctx, true);
236
+ int64_t offset;
152
+ aio_dispatch(ctx);
237
int64_t bytes;
153
return true;
238
QLIST_ENTRY(BlockCopyInFlightReq) list;
154
}
239
CoQueue wait_queue; /* coroutines blocked on this request */
155
240
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s);
241
int64_t block_copy_reset_unallocated(BlockCopyState *s,
242
int64_t offset, int64_t *count);
243
244
-int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
245
+int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
246
bool *error_is_read);
247
248
#endif /* BLOCK_COPY_H */
249
--
156
--
250
2.24.1
157
2.9.3
251
158
252
159
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Currently, block_copy operation lock the whole requested region. But
3
Pull the increment/decrement pair out of aio_bh_poll and into the
4
there is no reason to lock clusters, which are already copied, it will
4
callers.
5
disturb other parallel block_copy requests for no reason.
6
5
7
Let's instead do the following:
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
util/aio-posix.c | 8 +++-----
14
util/aio-win32.c | 8 ++++----
15
util/async.c | 12 ++++++------
16
3 files changed, 13 insertions(+), 15 deletions(-)
8
17
9
Lock only sub-region, which we are going to operate on. Then, after
18
diff --git a/util/aio-posix.c b/util/aio-posix.c
10
copying all dirty sub-regions, we should wait for intersecting
11
requests block-copy, if they failed, we should retry these new dirty
12
clusters.
13
14
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
15
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
16
Message-Id: <20200311103004.7649-9-vsementsov@virtuozzo.com>
17
Signed-off-by: Max Reitz <mreitz@redhat.com>
18
---
19
block/block-copy.c | 129 ++++++++++++++++++++++++++++++++++++---------
20
1 file changed, 105 insertions(+), 24 deletions(-)
21
22
diff --git a/block/block-copy.c b/block/block-copy.c
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/block/block-copy.c
20
--- a/util/aio-posix.c
25
+++ b/block/block-copy.c
21
+++ b/util/aio-posix.c
26
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
22
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
27
return NULL;
23
24
void aio_dispatch(AioContext *ctx)
25
{
26
+ qemu_lockcnt_inc(&ctx->list_lock);
27
aio_bh_poll(ctx);
28
-
29
- qemu_lockcnt_inc(&ctx->list_lock);
30
aio_dispatch_handlers(ctx);
31
qemu_lockcnt_dec(&ctx->list_lock);
32
33
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
34
}
35
36
npfd = 0;
37
- qemu_lockcnt_dec(&ctx->list_lock);
38
39
progress |= aio_bh_poll(ctx);
40
41
if (ret > 0) {
42
- qemu_lockcnt_inc(&ctx->list_lock);
43
progress |= aio_dispatch_handlers(ctx);
44
- qemu_lockcnt_dec(&ctx->list_lock);
45
}
46
47
+ qemu_lockcnt_dec(&ctx->list_lock);
48
+
49
progress |= timerlistgroup_run_timers(&ctx->tlg);
50
51
return progress;
52
diff --git a/util/aio-win32.c b/util/aio-win32.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/util/aio-win32.c
55
+++ b/util/aio-win32.c
56
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
57
bool progress = false;
58
AioHandler *tmp;
59
60
- qemu_lockcnt_inc(&ctx->list_lock);
61
-
62
/*
63
* We have to walk very carefully in case aio_set_fd_handler is
64
* called while we're walking.
65
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
66
}
67
}
68
69
- qemu_lockcnt_dec(&ctx->list_lock);
70
return progress;
28
}
71
}
29
72
30
-static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
73
void aio_dispatch(AioContext *ctx)
31
- int64_t offset,
74
{
32
- int64_t bytes)
75
+ qemu_lockcnt_inc(&ctx->list_lock);
33
+/*
76
aio_bh_poll(ctx);
34
+ * If there are no intersecting requests return false. Otherwise, wait for the
77
aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
35
+ * first found intersecting request to finish and return true.
78
+ qemu_lockcnt_dec(&ctx->list_lock);
79
timerlistgroup_run_timers(&ctx->tlg);
80
}
81
82
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
83
}
84
}
85
86
- qemu_lockcnt_dec(&ctx->list_lock);
87
first = true;
88
89
/* ctx->notifier is always registered. */
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
progress |= aio_dispatch_handlers(ctx, event);
92
} while (count > 0);
93
94
+ qemu_lockcnt_dec(&ctx->list_lock);
95
+
96
progress |= timerlistgroup_run_timers(&ctx->tlg);
97
return progress;
98
}
99
diff --git a/util/async.c b/util/async.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/util/async.c
102
+++ b/util/async.c
103
@@ -XXX,XX +XXX,XX @@ void aio_bh_call(QEMUBH *bh)
104
bh->cb(bh->opaque);
105
}
106
107
-/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
108
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
109
+ * The count in ctx->list_lock is incremented before the call, and is
110
+ * not affected by the call.
36
+ */
111
+ */
37
+static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
112
int aio_bh_poll(AioContext *ctx)
38
+ int64_t bytes)
39
{
113
{
40
- BlockCopyInFlightReq *req;
114
QEMUBH *bh, **bhp, *next;
41
+ BlockCopyInFlightReq *req = find_conflicting_inflight_req(s, offset, bytes);
115
int ret;
42
116
bool deleted = false;
43
- while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
117
44
- qemu_co_queue_wait(&req->wait_queue, NULL);
118
- qemu_lockcnt_inc(&ctx->list_lock);
45
+ if (!req) {
119
-
46
+ return false;
120
ret = 0;
121
for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
122
next = atomic_rcu_read(&bh->next);
123
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
124
125
/* remove deleted bhs */
126
if (!deleted) {
127
- qemu_lockcnt_dec(&ctx->list_lock);
128
return ret;
47
}
129
}
48
+
130
49
+ qemu_co_queue_wait(&req->wait_queue, NULL);
131
- if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
50
+
132
+ if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
51
+ return true;
133
bhp = &ctx->first_bh;
52
}
134
while (*bhp) {
53
135
bh = *bhp;
54
+/* Called only on full-dirty region */
136
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
55
static void block_copy_inflight_req_begin(BlockCopyState *s,
137
bhp = &bh->next;
56
BlockCopyInFlightReq *req,
138
}
57
int64_t offset, int64_t bytes)
58
{
59
+ assert(!find_conflicting_inflight_req(s, offset, bytes));
60
+
61
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
62
+ s->in_flight_bytes += bytes;
63
+
64
req->offset = offset;
65
req->bytes = bytes;
66
qemu_co_queue_init(&req->wait_queue);
67
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
68
}
69
70
-static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
71
+/*
72
+ * block_copy_inflight_req_shrink
73
+ *
74
+ * Drop the tail of the request to be handled later. Set dirty bits back and
75
+ * wake up all requests waiting for us (may be some of them are not intersecting
76
+ * with shrunk request)
77
+ */
78
+static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
79
+ BlockCopyInFlightReq *req, int64_t new_bytes)
80
{
81
+ if (new_bytes == req->bytes) {
82
+ return;
83
+ }
84
+
85
+ assert(new_bytes > 0 && new_bytes < req->bytes);
86
+
87
+ s->in_flight_bytes -= req->bytes - new_bytes;
88
+ bdrv_set_dirty_bitmap(s->copy_bitmap,
89
+ req->offset + new_bytes, req->bytes - new_bytes);
90
+
91
+ req->bytes = new_bytes;
92
+ qemu_co_queue_restart_all(&req->wait_queue);
93
+}
94
+
95
+static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
96
+ BlockCopyInFlightReq *req,
97
+ int ret)
98
+{
99
+ s->in_flight_bytes -= req->bytes;
100
+ if (ret < 0) {
101
+ bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
102
+ }
103
QLIST_REMOVE(req, list);
104
qemu_co_queue_restart_all(&req->wait_queue);
105
}
106
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
107
return ret;
108
}
109
110
-int coroutine_fn block_copy(BlockCopyState *s,
111
- int64_t offset, int64_t bytes,
112
- bool *error_is_read)
113
+/*
114
+ * block_copy_dirty_clusters
115
+ *
116
+ * Copy dirty clusters in @offset/@bytes range.
117
+ * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
118
+ * clusters found and -errno on failure.
119
+ */
120
+static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
121
+ int64_t offset, int64_t bytes,
122
+ bool *error_is_read)
123
{
124
int ret = 0;
125
- BlockCopyInFlightReq req;
126
+ bool found_dirty = false;
127
128
/*
129
* block_copy() user is responsible for keeping source and target in same
130
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
131
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
132
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
133
134
- block_copy_wait_inflight_reqs(s, offset, bytes);
135
- block_copy_inflight_req_begin(s, &req, offset, bytes);
136
-
137
while (bytes) {
138
+ BlockCopyInFlightReq req;
139
int64_t next_zero, cur_bytes, status_bytes;
140
141
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
142
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
143
continue; /* already copied */
144
}
139
}
145
140
- qemu_lockcnt_unlock(&ctx->list_lock);
146
+ found_dirty = true;
141
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
147
+
148
cur_bytes = MIN(bytes, s->copy_size);
149
150
next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
151
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
152
assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
153
cur_bytes = next_zero - offset;
154
}
155
+ block_copy_inflight_req_begin(s, &req, offset, cur_bytes);
156
157
ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
158
+ assert(ret >= 0); /* never fail */
159
+ cur_bytes = MIN(cur_bytes, status_bytes);
160
+ block_copy_inflight_req_shrink(s, &req, cur_bytes);
161
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
162
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
163
+ block_copy_inflight_req_end(s, &req, 0);
164
progress_set_remaining(s->progress,
165
bdrv_get_dirty_count(s->copy_bitmap) +
166
s->in_flight_bytes);
167
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
168
continue;
169
}
170
171
- cur_bytes = MIN(cur_bytes, status_bytes);
172
-
173
trace_block_copy_process(s, offset);
174
175
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
176
- s->in_flight_bytes += cur_bytes;
177
-
178
co_get_from_shres(s->mem, cur_bytes);
179
ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
180
error_is_read);
181
co_put_to_shres(s->mem, cur_bytes);
182
- s->in_flight_bytes -= cur_bytes;
183
+ block_copy_inflight_req_end(s, &req, ret);
184
if (ret < 0) {
185
- bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
186
- break;
187
+ return ret;
188
}
189
190
progress_work_done(s->progress, cur_bytes);
191
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
192
bytes -= cur_bytes;
193
}
142
}
194
195
- block_copy_inflight_req_end(&req);
196
+ return found_dirty;
197
+}
198
+
199
+/*
200
+ * block_copy
201
+ *
202
+ * Copy requested region, accordingly to dirty bitmap.
203
+ * Collaborate with parallel block_copy requests: if they succeed it will help
204
+ * us. If they fail, we will retry not-copied regions. So, if we return error,
205
+ * it means that some I/O operation failed in context of _this_ block_copy call,
206
+ * not some parallel operation.
207
+ */
208
+int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
209
+ bool *error_is_read)
210
+{
211
+ int ret;
212
+
213
+ do {
214
+ ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
215
+
216
+ if (ret == 0) {
217
+ ret = block_copy_wait_one(s, offset, bytes);
218
+ }
219
+
220
+ /*
221
+ * We retry in two cases:
222
+ * 1. Some progress done
223
+ * Something was copied, which means that there were yield points
224
+ * and some new dirty bits may have appeared (due to failed parallel
225
+ * block-copy requests).
226
+ * 2. We have waited for some intersecting block-copy request
227
+ * It may have failed and produced new dirty bits.
228
+ */
229
+ } while (ret > 0);
230
231
return ret;
143
return ret;
232
}
144
}
233
--
145
--
234
2.24.1
146
2.9.3
235
147
236
148
diff view generated by jsdifflib
1
From: Pan Nengyuan <pannengyuan@huawei.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
'crypto_opts' forgot to free in qcow2_close(), this patch fix the bellow leak stack:
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
include/block/block_int.h | 64 +++++++++++++++++++++++++-----------------
11
include/sysemu/block-backend.h | 14 ++++++---
12
2 files changed, 49 insertions(+), 29 deletions(-)
4
13
5
Direct leak of 24 byte(s) in 1 object(s) allocated from:
14
diff --git a/include/block/block_int.h b/include/block/block_int.h
6
#0 0x7f0edd81f970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970)
7
#1 0x7f0edc6d149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d)
8
#2 0x55d7eaede63d in qobject_input_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qobject-input-visitor.c:295
9
#3 0x55d7eaed78b8 in visit_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qapi-visit-core.c:49
10
#4 0x55d7eaf5140b in visit_type_QCryptoBlockOpenOptions qapi/qapi-visit-crypto.c:290
11
#5 0x55d7eae43af3 in block_crypto_open_opts_init /mnt/sdb/qemu-new/qemu_test/qemu/block/crypto.c:163
12
#6 0x55d7eacd2924 in qcow2_update_options_prepare /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1148
13
#7 0x55d7eacd33f7 in qcow2_update_options /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1232
14
#8 0x55d7eacd9680 in qcow2_do_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1512
15
#9 0x55d7eacdc55e in qcow2_open_entry /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1792
16
#10 0x55d7eacdc8fe in qcow2_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1819
17
#11 0x55d7eac3742d in bdrv_open_driver /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1317
18
#12 0x55d7eac3e990 in bdrv_open_common /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1575
19
#13 0x55d7eac4442c in bdrv_open_inherit /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3126
20
#14 0x55d7eac45c3f in bdrv_open /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3219
21
#15 0x55d7ead8e8a4 in blk_new_open /mnt/sdb/qemu-new/qemu_test/qemu/block/block-backend.c:397
22
#16 0x55d7eacde74c in qcow2_co_create /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3534
23
#17 0x55d7eacdfa6d in qcow2_co_create_opts /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3668
24
#18 0x55d7eac1c678 in bdrv_create_co_entry /mnt/sdb/qemu-new/qemu_test/qemu/block.c:485
25
#19 0x55d7eb0024d2 in coroutine_trampoline /mnt/sdb/qemu-new/qemu_test/qemu/util/coroutine-ucontext.c:115
26
27
Reported-by: Euler Robot <euler.robot@huawei.com>
28
Signed-off-by: Pan Nengyuan <pannengyuan@huawei.com>
29
Reviewed-by: Max Reitz <mreitz@redhat.com>
30
Message-Id: <20200227012950.12256-2-pannengyuan@huawei.com>
31
Signed-off-by: Max Reitz <mreitz@redhat.com>
32
---
33
block/qcow2.c | 1 +
34
1 file changed, 1 insertion(+)
35
36
diff --git a/block/qcow2.c b/block/qcow2.c
37
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
38
--- a/block/qcow2.c
16
--- a/include/block/block_int.h
39
+++ b/block/qcow2.c
17
+++ b/include/block/block_int.h
40
@@ -XXX,XX +XXX,XX @@ static void qcow2_close(BlockDriverState *bs)
18
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
41
19
* copied as well.
42
qcrypto_block_free(s->crypto);
20
*/
43
s->crypto = NULL;
21
struct BlockDriverState {
44
+ qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
22
- int64_t total_sectors; /* if we are reading a disk image, give its
45
23
- size in sectors */
46
g_free(s->unknown_header_fields);
24
+ /* Protected by big QEMU lock or read-only after opening. No special
47
cleanup_unknown_header_ext(bs);
25
+ * locking needed during I/O...
26
+ */
27
int open_flags; /* flags used to open the file, re-used for re-open */
28
bool read_only; /* if true, the media is read only */
29
bool encrypted; /* if true, the media is encrypted */
30
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
31
bool sg; /* if true, the device is a /dev/sg* */
32
bool probed; /* if true, format was probed rather than specified */
33
34
- int copy_on_read; /* if nonzero, copy read backing sectors into image.
35
- note this is a reference count */
36
-
37
- CoQueue flush_queue; /* Serializing flush queue */
38
- bool active_flush_req; /* Flush request in flight? */
39
- unsigned int write_gen; /* Current data generation */
40
- unsigned int flushed_gen; /* Flushed write generation */
41
-
42
BlockDriver *drv; /* NULL means no media */
43
void *opaque;
44
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
46
BdrvChild *backing;
47
BdrvChild *file;
48
49
- /* Callback before write request is processed */
50
- NotifierWithReturnList before_write_notifiers;
51
-
52
- /* number of in-flight requests; overall and serialising */
53
- unsigned int in_flight;
54
- unsigned int serialising_in_flight;
55
-
56
- bool wakeup;
57
-
58
- /* Offset after the highest byte written to */
59
- uint64_t wr_highest_offset;
60
-
61
/* I/O Limits */
62
BlockLimits bl;
63
64
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
65
QTAILQ_ENTRY(BlockDriverState) bs_list;
66
/* element of the list of monitor-owned BDS */
67
QTAILQ_ENTRY(BlockDriverState) monitor_list;
68
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
69
int refcnt;
70
71
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
72
-
73
/* operation blockers */
74
QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
75
76
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
77
/* The error object in use for blocking operations on backing_hd */
78
Error *backing_blocker;
79
80
+ /* Protected by AioContext lock */
81
+
82
+ /* If true, copy read backing sectors into image. Can be >1 if more
83
+ * than one client has requested copy-on-read.
84
+ */
85
+ int copy_on_read;
86
+
87
+ /* If we are reading a disk image, give its size in sectors.
88
+ * Generally read-only; it is written to by load_vmstate and save_vmstate,
89
+ * but the block layer is quiescent during those.
90
+ */
91
+ int64_t total_sectors;
92
+
93
+ /* Callback before write request is processed */
94
+ NotifierWithReturnList before_write_notifiers;
95
+
96
+ /* number of in-flight requests; overall and serialising */
97
+ unsigned int in_flight;
98
+ unsigned int serialising_in_flight;
99
+
100
+ bool wakeup;
101
+
102
+ /* Offset after the highest byte written to */
103
+ uint64_t wr_highest_offset;
104
+
105
/* threshold limit for writes, in bytes. "High water mark". */
106
uint64_t write_threshold_offset;
107
NotifierWithReturn write_threshold_notifier;
108
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
109
/* counter for nested bdrv_io_plug */
110
unsigned io_plugged;
111
112
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
113
+ CoQueue flush_queue; /* Serializing flush queue */
114
+ bool active_flush_req; /* Flush request in flight? */
115
+ unsigned int write_gen; /* Current data generation */
116
+ unsigned int flushed_gen; /* Flushed write generation */
117
+
118
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
119
+
120
+ /* do we need to tell the quest if we have a volatile write cache? */
121
+ int enable_write_cache;
122
+
123
int quiesce_counter;
124
};
125
126
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
127
index XXXXXXX..XXXXXXX 100644
128
--- a/include/sysemu/block-backend.h
129
+++ b/include/sysemu/block-backend.h
130
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
131
* fields that must be public. This is in particular for QLIST_ENTRY() and
132
* friends so that BlockBackends can be kept in lists outside block-backend.c */
133
typedef struct BlockBackendPublic {
134
- /* I/O throttling.
135
- * throttle_state tells us if this BlockBackend has I/O limits configured.
136
- * io_limits_disabled tells us if they are currently being enforced */
137
+ /* I/O throttling has its own locking, but also some fields are
138
+ * protected by the AioContext lock.
139
+ */
140
+
141
+ /* Protected by AioContext lock. */
142
CoQueue throttled_reqs[2];
143
+
144
+ /* Nonzero if the I/O limits are currently being ignored; generally
145
+ * it is zero. */
146
unsigned int io_limits_disabled;
147
148
/* The following fields are protected by the ThrottleGroup lock.
149
- * See the ThrottleGroup documentation for details. */
150
+ * See the ThrottleGroup documentation for details.
151
+ * throttle_state tells us if I/O limits are configured. */
152
ThrottleState *throttle_state;
153
ThrottleTimers throttle_timers;
154
unsigned pending_reqs[2];
48
--
155
--
49
2.24.1
156
2.9.3
50
157
51
158
diff view generated by jsdifflib
New patch
1
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
3
This uses the lock-free mutex described in the paper '"Blocking without
4
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
5
Papatriantafilou. The same technique is used in OSv, and in fact
6
the code is essentially a conversion to C of OSv's code.
7
8
[Added missing coroutine_fn in tests/test-aio-multithread.c.
9
--Stefan]
10
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
include/qemu/coroutine.h | 17 ++++-
17
tests/test-aio-multithread.c | 86 ++++++++++++++++++++++++
18
util/qemu-coroutine-lock.c | 155 ++++++++++++++++++++++++++++++++++++++++---
19
util/trace-events | 1 +
20
4 files changed, 246 insertions(+), 13 deletions(-)
21
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/coroutine.h
25
+++ b/include/qemu/coroutine.h
26
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
27
/**
28
* Provides a mutex that can be used to synchronise coroutines
29
*/
30
+struct CoWaitRecord;
31
typedef struct CoMutex {
32
- bool locked;
33
+ /* Count of pending lockers; 0 for a free mutex, 1 for an
34
+ * uncontended mutex.
35
+ */
36
+ unsigned locked;
37
+
38
+ /* A queue of waiters. Elements are added atomically in front of
39
+ * from_push. to_pop is only populated, and popped from, by whoever
40
+ * is in charge of the next wakeup. This can be an unlocker or,
41
+ * through the handoff protocol, a locker that is about to go to sleep.
42
+ */
43
+ QSLIST_HEAD(, CoWaitRecord) from_push, to_pop;
44
+
45
+ unsigned handoff, sequence;
46
+
47
Coroutine *holder;
48
- CoQueue queue;
49
} CoMutex;
50
51
/**
52
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tests/test-aio-multithread.c
55
+++ b/tests/test-aio-multithread.c
56
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_schedule_10(void)
57
test_multi_co_schedule(10);
58
}
59
60
+/* CoMutex thread-safety. */
61
+
62
+static uint32_t atomic_counter;
63
+static uint32_t running;
64
+static uint32_t counter;
65
+static CoMutex comutex;
66
+
67
+static void coroutine_fn test_multi_co_mutex_entry(void *opaque)
68
+{
69
+ while (!atomic_mb_read(&now_stopping)) {
70
+ qemu_co_mutex_lock(&comutex);
71
+ counter++;
72
+ qemu_co_mutex_unlock(&comutex);
73
+
74
+ /* Increase atomic_counter *after* releasing the mutex. Otherwise
75
+ * there is a chance (it happens about 1 in 3 runs) that the iothread
76
+ * exits before the coroutine is woken up, causing a spurious
77
+ * assertion failure.
78
+ */
79
+ atomic_inc(&atomic_counter);
80
+ }
81
+ atomic_dec(&running);
82
+}
83
+
84
+static void test_multi_co_mutex(int threads, int seconds)
85
+{
86
+ int i;
87
+
88
+ qemu_co_mutex_init(&comutex);
89
+ counter = 0;
90
+ atomic_counter = 0;
91
+ now_stopping = false;
92
+
93
+ create_aio_contexts();
94
+ assert(threads <= NUM_CONTEXTS);
95
+ running = threads;
96
+ for (i = 0; i < threads; i++) {
97
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_mutex_entry, NULL);
98
+ aio_co_schedule(ctx[i], co1);
99
+ }
100
+
101
+ g_usleep(seconds * 1000000);
102
+
103
+ atomic_mb_set(&now_stopping, true);
104
+ while (running > 0) {
105
+ g_usleep(100000);
106
+ }
107
+
108
+ join_aio_contexts();
109
+ g_test_message("%d iterations/second\n", counter / seconds);
110
+ g_assert_cmpint(counter, ==, atomic_counter);
111
+}
112
+
113
+/* Testing with NUM_CONTEXTS threads focuses on the queue. The mutex however
114
+ * is too contended (and the threads spend too much time in aio_poll)
115
+ * to actually stress the handoff protocol.
116
+ */
117
+static void test_multi_co_mutex_1(void)
118
+{
119
+ test_multi_co_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_co_mutex_10(void)
123
+{
124
+ test_multi_co_mutex(NUM_CONTEXTS, 10);
125
+}
126
+
127
+/* Testing with fewer threads stresses the handoff protocol too. Still, the
128
+ * case where the locker _can_ pick up a handoff is very rare, happening
129
+ * about 10 times in 1 million, so increase the runtime a bit compared to
130
+ * other "quick" testcases that only run for 1 second.
131
+ */
132
+static void test_multi_co_mutex_2_3(void)
133
+{
134
+ test_multi_co_mutex(2, 3);
135
+}
136
+
137
+static void test_multi_co_mutex_2_30(void)
138
+{
139
+ test_multi_co_mutex(2, 30);
140
+}
141
+
142
/* End of tests. */
143
144
int main(int argc, char **argv)
145
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
146
g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
147
if (g_test_quick()) {
148
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
149
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
150
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
151
} else {
152
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
153
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
154
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
155
}
156
return g_test_run();
157
}
158
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/util/qemu-coroutine-lock.c
161
+++ b/util/qemu-coroutine-lock.c
162
@@ -XXX,XX +XXX,XX @@
163
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
164
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
165
* THE SOFTWARE.
166
+ *
167
+ * The lock-free mutex implementation is based on OSv
168
+ * (core/lfmutex.cc, include/lockfree/mutex.hh).
169
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
170
*/
171
172
#include "qemu/osdep.h"
173
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue)
174
return QSIMPLEQ_FIRST(&queue->entries) == NULL;
175
}
176
177
+/* The wait records are handled with a multiple-producer, single-consumer
178
+ * lock-free queue. There cannot be two concurrent pop_waiter() calls
179
+ * because pop_waiter() can only be called while mutex->handoff is zero.
180
+ * This can happen in three cases:
181
+ * - in qemu_co_mutex_unlock, before the hand-off protocol has started.
182
+ * In this case, qemu_co_mutex_lock will see mutex->handoff == 0 and
183
+ * not take part in the handoff.
184
+ * - in qemu_co_mutex_lock, if it steals the hand-off responsibility from
185
+ * qemu_co_mutex_unlock. In this case, qemu_co_mutex_unlock will fail
186
+ * the cmpxchg (it will see either 0 or the next sequence value) and
187
+ * exit. The next hand-off cannot begin until qemu_co_mutex_lock has
188
+ * woken up someone.
189
+ * - in qemu_co_mutex_unlock, if it takes the hand-off token itself.
190
+ * In this case another iteration starts with mutex->handoff == 0;
191
+ * a concurrent qemu_co_mutex_lock will fail the cmpxchg, and
192
+ * qemu_co_mutex_unlock will go back to case (1).
193
+ *
194
+ * The following functions manage this queue.
195
+ */
196
+typedef struct CoWaitRecord {
197
+ Coroutine *co;
198
+ QSLIST_ENTRY(CoWaitRecord) next;
199
+} CoWaitRecord;
200
+
201
+static void push_waiter(CoMutex *mutex, CoWaitRecord *w)
202
+{
203
+ w->co = qemu_coroutine_self();
204
+ QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next);
205
+}
206
+
207
+static void move_waiters(CoMutex *mutex)
208
+{
209
+ QSLIST_HEAD(, CoWaitRecord) reversed;
210
+ QSLIST_MOVE_ATOMIC(&reversed, &mutex->from_push);
211
+ while (!QSLIST_EMPTY(&reversed)) {
212
+ CoWaitRecord *w = QSLIST_FIRST(&reversed);
213
+ QSLIST_REMOVE_HEAD(&reversed, next);
214
+ QSLIST_INSERT_HEAD(&mutex->to_pop, w, next);
215
+ }
216
+}
217
+
218
+static CoWaitRecord *pop_waiter(CoMutex *mutex)
219
+{
220
+ CoWaitRecord *w;
221
+
222
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
223
+ move_waiters(mutex);
224
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
225
+ return NULL;
226
+ }
227
+ }
228
+ w = QSLIST_FIRST(&mutex->to_pop);
229
+ QSLIST_REMOVE_HEAD(&mutex->to_pop, next);
230
+ return w;
231
+}
232
+
233
+static bool has_waiters(CoMutex *mutex)
234
+{
235
+ return QSLIST_EMPTY(&mutex->to_pop) || QSLIST_EMPTY(&mutex->from_push);
236
+}
237
+
238
void qemu_co_mutex_init(CoMutex *mutex)
239
{
240
memset(mutex, 0, sizeof(*mutex));
241
- qemu_co_queue_init(&mutex->queue);
242
}
243
244
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
245
+static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
246
{
247
Coroutine *self = qemu_coroutine_self();
248
+ CoWaitRecord w;
249
+ unsigned old_handoff;
250
251
trace_qemu_co_mutex_lock_entry(mutex, self);
252
+ w.co = self;
253
+ push_waiter(mutex, &w);
254
255
- while (mutex->locked) {
256
- qemu_co_queue_wait(&mutex->queue);
257
+ /* This is the "Responsibility Hand-Off" protocol; a lock() picks from
258
+ * a concurrent unlock() the responsibility of waking somebody up.
259
+ */
260
+ old_handoff = atomic_mb_read(&mutex->handoff);
261
+ if (old_handoff &&
262
+ has_waiters(mutex) &&
263
+ atomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
264
+ /* There can be no concurrent pops, because there can be only
265
+ * one active handoff at a time.
266
+ */
267
+ CoWaitRecord *to_wake = pop_waiter(mutex);
268
+ Coroutine *co = to_wake->co;
269
+ if (co == self) {
270
+ /* We got the lock ourselves! */
271
+ assert(to_wake == &w);
272
+ return;
273
+ }
274
+
275
+ aio_co_wake(co);
276
}
277
278
- mutex->locked = true;
279
- mutex->holder = self;
280
- self->locks_held++;
281
-
282
+ qemu_coroutine_yield();
283
trace_qemu_co_mutex_lock_return(mutex, self);
284
}
285
286
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
287
+{
288
+ Coroutine *self = qemu_coroutine_self();
289
+
290
+ if (atomic_fetch_inc(&mutex->locked) == 0) {
291
+ /* Uncontended. */
292
+ trace_qemu_co_mutex_lock_uncontended(mutex, self);
293
+ } else {
294
+ qemu_co_mutex_lock_slowpath(mutex);
295
+ }
296
+ mutex->holder = self;
297
+ self->locks_held++;
298
+}
299
+
300
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
301
{
302
Coroutine *self = qemu_coroutine_self();
303
304
trace_qemu_co_mutex_unlock_entry(mutex, self);
305
306
- assert(mutex->locked == true);
307
+ assert(mutex->locked);
308
assert(mutex->holder == self);
309
assert(qemu_in_coroutine());
310
311
- mutex->locked = false;
312
mutex->holder = NULL;
313
self->locks_held--;
314
- qemu_co_queue_next(&mutex->queue);
315
+ if (atomic_fetch_dec(&mutex->locked) == 1) {
316
+ /* No waiting qemu_co_mutex_lock(). Pfew, that was easy! */
317
+ return;
318
+ }
319
+
320
+ for (;;) {
321
+ CoWaitRecord *to_wake = pop_waiter(mutex);
322
+ unsigned our_handoff;
323
+
324
+ if (to_wake) {
325
+ Coroutine *co = to_wake->co;
326
+ aio_co_wake(co);
327
+ break;
328
+ }
329
+
330
+ /* Some concurrent lock() is in progress (we know this because
331
+ * mutex->locked was >1) but it hasn't yet put itself on the wait
332
+ * queue. Pick a sequence number for the handoff protocol (not 0).
333
+ */
334
+ if (++mutex->sequence == 0) {
335
+ mutex->sequence = 1;
336
+ }
337
+
338
+ our_handoff = mutex->sequence;
339
+ atomic_mb_set(&mutex->handoff, our_handoff);
340
+ if (!has_waiters(mutex)) {
341
+ /* The concurrent lock has not added itself yet, so it
342
+ * will be able to pick our handoff.
343
+ */
344
+ break;
345
+ }
346
+
347
+ /* Try to do the handoff protocol ourselves; if somebody else has
348
+ * already taken it, however, we're done and they're responsible.
349
+ */
350
+ if (atomic_cmpxchg(&mutex->handoff, our_handoff, 0) != our_handoff) {
351
+ break;
352
+ }
353
+ }
354
355
trace_qemu_co_mutex_unlock_return(mutex, self);
356
}
357
diff --git a/util/trace-events b/util/trace-events
358
index XXXXXXX..XXXXXXX 100644
359
--- a/util/trace-events
360
+++ b/util/trace-events
361
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
362
363
# util/qemu-coroutine-lock.c
364
qemu_co_queue_run_restart(void *co) "co %p"
365
+qemu_co_mutex_lock_uncontended(void *mutex, void *self) "mutex %p self %p"
366
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
367
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
368
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
369
--
370
2.9.3
371
372
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Assume we have two regions, A and B, and region B is in-flight now,
3
Running a very small critical section on pthread_mutex_t and CoMutex
4
region A is not yet touched, but it is unallocated and should be
4
shows that pthread_mutex_t is much faster because it doesn't actually
5
skipped.
5
go to sleep. What happens is that the critical section is shorter
6
than the latency of entering the kernel and thus FUTEX_WAIT always
7
fails. With CoMutex there is no such latency but you still want to
8
avoid wait and wakeup. So introduce it artificially.
6
9
7
Correspondingly, as progress we have
10
This only works with one waiters; because CoMutex is fair, it will
11
always have more waits and wakeups than a pthread_mutex_t.
8
12
9
total = A + B
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
current = 0
14
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
18
include/qemu/coroutine.h | 5 +++++
19
util/qemu-coroutine-lock.c | 51 ++++++++++++++++++++++++++++++++++++++++------
20
util/qemu-coroutine.c | 2 +-
21
3 files changed, 51 insertions(+), 7 deletions(-)
11
22
12
If we reset unallocated region A and call progress_reset_callback,
23
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
13
it will calculate 0 bytes dirty in the bitmap and call
14
job_progress_set_remaining, which will set
15
16
total = current + 0 = 0 + 0 = 0
17
18
So, B bytes are actually removed from total accounting. When job
19
finishes we'll have
20
21
total = 0
22
current = B
23
24
, which doesn't sound good.
25
26
This is because we didn't considered in-flight bytes, actually when
27
calculating remaining, we should have set (in_flight + dirty_bytes)
28
as remaining, not only dirty_bytes.
29
30
To fix it, let's refactor progress calculation, moving it to block-copy
31
itself instead of fixing callback. And, of course, track in_flight
32
bytes count.
33
34
We still have to keep one callback, to maintain backup job bytes_read
35
calculation, but it will go on soon, when we turn the whole backup
36
process into one block_copy call.
37
38
Cc: qemu-stable@nongnu.org
39
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
40
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
41
Message-Id: <20200311103004.7649-3-vsementsov@virtuozzo.com>
42
Signed-off-by: Max Reitz <mreitz@redhat.com>
43
---
44
block/backup.c | 13 ++-----------
45
block/block-copy.c | 16 ++++++++++++----
46
include/block/block-copy.h | 15 +++++----------
47
3 files changed, 19 insertions(+), 25 deletions(-)
48
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
25
--- a/include/qemu/coroutine.h
52
+++ b/block/backup.c
26
+++ b/include/qemu/coroutine.h
53
@@ -XXX,XX +XXX,XX @@ static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
27
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex {
54
BackupBlockJob *s = opaque;
28
*/
55
29
unsigned locked;
56
s->bytes_read += bytes;
30
57
- job_progress_update(&s->common.job, bytes);
31
+ /* Context that is holding the lock. Useful to avoid spinning
58
-}
32
+ * when two coroutines on the same AioContext try to get the lock. :)
59
-
33
+ */
60
-static void backup_progress_reset_callback(void *opaque)
34
+ AioContext *ctx;
61
-{
35
+
62
- BackupBlockJob *s = opaque;
36
/* A queue of waiters. Elements are added atomically in front of
63
- uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap);
37
* from_push. to_pop is only populated, and popped from, by whoever
64
-
38
* is in charge of the next wakeup. This can be an unlocker or,
65
- job_progress_set_remaining(&s->common.job, estimate);
39
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/util/qemu-coroutine-lock.c
42
+++ b/util/qemu-coroutine-lock.c
43
@@ -XXX,XX +XXX,XX @@
44
#include "qemu-common.h"
45
#include "qemu/coroutine.h"
46
#include "qemu/coroutine_int.h"
47
+#include "qemu/processor.h"
48
#include "qemu/queue.h"
49
#include "block/aio.h"
50
#include "trace.h"
51
@@ -XXX,XX +XXX,XX @@ void qemu_co_mutex_init(CoMutex *mutex)
52
memset(mutex, 0, sizeof(*mutex));
66
}
53
}
67
54
68
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
55
-static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
69
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
56
+static void coroutine_fn qemu_co_mutex_wake(CoMutex *mutex, Coroutine *co)
70
job->cluster_size = cluster_size;
71
job->len = len;
72
73
- block_copy_set_callbacks(bcs, backup_progress_bytes_callback,
74
- backup_progress_reset_callback, job);
75
+ block_copy_set_progress_callback(bcs, backup_progress_bytes_callback, job);
76
+ block_copy_set_progress_meter(bcs, &job->common.job.progress);
77
78
/* Required permissions are already taken by backup-top target */
79
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
80
diff --git a/block/block-copy.c b/block/block-copy.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/block/block-copy.c
83
+++ b/block/block-copy.c
84
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
85
return s;
86
}
87
88
-void block_copy_set_callbacks(
89
+void block_copy_set_progress_callback(
90
BlockCopyState *s,
91
ProgressBytesCallbackFunc progress_bytes_callback,
92
- ProgressResetCallbackFunc progress_reset_callback,
93
void *progress_opaque)
94
{
95
s->progress_bytes_callback = progress_bytes_callback;
96
- s->progress_reset_callback = progress_reset_callback;
97
s->progress_opaque = progress_opaque;
98
}
99
100
+void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
101
+{
57
+{
102
+ s->progress = pm;
58
+ /* Read co before co->ctx; pairs with smp_wmb() in
59
+ * qemu_coroutine_enter().
60
+ */
61
+ smp_read_barrier_depends();
62
+ mutex->ctx = co->ctx;
63
+ aio_co_wake(co);
103
+}
64
+}
104
+
65
+
105
/*
66
+static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
106
* block_copy_do_copy
67
+ CoMutex *mutex)
107
*
68
{
108
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
69
Coroutine *self = qemu_coroutine_self();
109
70
CoWaitRecord w;
110
if (!ret) {
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
111
bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
72
if (co == self) {
112
- s->progress_reset_callback(s->progress_opaque);
73
/* We got the lock ourselves! */
113
+ progress_set_remaining(s->progress,
74
assert(to_wake == &w);
114
+ bdrv_get_dirty_count(s->copy_bitmap) +
75
+ mutex->ctx = ctx;
115
+ s->in_flight_bytes);
76
return;
77
}
78
79
- aio_co_wake(co);
80
+ qemu_co_mutex_wake(mutex, co);
116
}
81
}
117
82
118
*count = bytes;
83
qemu_coroutine_yield();
119
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
84
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
120
trace_block_copy_process(s, start);
85
121
86
void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
122
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
87
{
123
+ s->in_flight_bytes += chunk_end - start;
88
+ AioContext *ctx = qemu_get_current_aio_context();
124
89
Coroutine *self = qemu_coroutine_self();
125
co_get_from_shres(s->mem, chunk_end - start);
90
+ int waiters, i;
126
ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
91
127
co_put_to_shres(s->mem, chunk_end - start);
92
- if (atomic_fetch_inc(&mutex->locked) == 0) {
128
+ s->in_flight_bytes -= chunk_end - start;
93
+ /* Running a very small critical section on pthread_mutex_t and CoMutex
129
if (ret < 0) {
94
+ * shows that pthread_mutex_t is much faster because it doesn't actually
130
bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
95
+ * go to sleep. What happens is that the critical section is shorter
96
+ * than the latency of entering the kernel and thus FUTEX_WAIT always
97
+ * fails. With CoMutex there is no such latency but you still want to
98
+ * avoid wait and wakeup. So introduce it artificially.
99
+ */
100
+ i = 0;
101
+retry_fast_path:
102
+ waiters = atomic_cmpxchg(&mutex->locked, 0, 1);
103
+ if (waiters != 0) {
104
+ while (waiters == 1 && ++i < 1000) {
105
+ if (atomic_read(&mutex->ctx) == ctx) {
106
+ break;
107
+ }
108
+ if (atomic_read(&mutex->locked) == 0) {
109
+ goto retry_fast_path;
110
+ }
111
+ cpu_relax();
112
+ }
113
+ waiters = atomic_fetch_inc(&mutex->locked);
114
+ }
115
+
116
+ if (waiters == 0) {
117
/* Uncontended. */
118
trace_qemu_co_mutex_lock_uncontended(mutex, self);
119
+ mutex->ctx = ctx;
120
} else {
121
- qemu_co_mutex_lock_slowpath(mutex);
122
+ qemu_co_mutex_lock_slowpath(ctx, mutex);
123
}
124
mutex->holder = self;
125
self->locks_held++;
126
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
127
assert(mutex->holder == self);
128
assert(qemu_in_coroutine());
129
130
+ mutex->ctx = NULL;
131
mutex->holder = NULL;
132
self->locks_held--;
133
if (atomic_fetch_dec(&mutex->locked) == 1) {
134
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
135
unsigned our_handoff;
136
137
if (to_wake) {
138
- Coroutine *co = to_wake->co;
139
- aio_co_wake(co);
140
+ qemu_co_mutex_wake(mutex, to_wake->co);
131
break;
141
break;
132
}
142
}
133
143
134
+ progress_work_done(s->progress, chunk_end - start);
144
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
135
s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
136
start = chunk_end;
137
ret = 0;
138
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
139
index XXXXXXX..XXXXXXX 100644
145
index XXXXXXX..XXXXXXX 100644
140
--- a/include/block/block-copy.h
146
--- a/util/qemu-coroutine.c
141
+++ b/include/block/block-copy.h
147
+++ b/util/qemu-coroutine.c
142
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyInFlightReq {
148
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
143
} BlockCopyInFlightReq;
149
co->ctx = qemu_get_current_aio_context();
144
150
145
typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
151
/* Store co->ctx before anything that stores co. Matches
146
-typedef void (*ProgressResetCallbackFunc)(void *opaque);
152
- * barrier in aio_co_wake.
147
typedef struct BlockCopyState {
153
+ * barrier in aio_co_wake and qemu_co_mutex_wake.
148
/*
149
* BdrvChild objects are not owned or managed by block-copy. They are
150
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyState {
151
BdrvChild *source;
152
BdrvChild *target;
153
BdrvDirtyBitmap *copy_bitmap;
154
+ int64_t in_flight_bytes;
155
int64_t cluster_size;
156
bool use_copy_range;
157
int64_t copy_size;
158
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyState {
159
*/
154
*/
160
bool skip_unallocated;
155
smp_wmb();
161
156
162
+ ProgressMeter *progress;
163
/* progress_bytes_callback: called when some copying progress is done. */
164
ProgressBytesCallbackFunc progress_bytes_callback;
165
-
166
- /*
167
- * progress_reset_callback: called when some bytes reset from copy_bitmap
168
- * (see @skip_unallocated above). The callee is assumed to recalculate how
169
- * many bytes remain based on the dirty bit count of copy_bitmap.
170
- */
171
- ProgressResetCallbackFunc progress_reset_callback;
172
void *progress_opaque;
173
174
SharedResource *mem;
175
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
176
BdrvRequestFlags write_flags,
177
Error **errp);
178
179
-void block_copy_set_callbacks(
180
+void block_copy_set_progress_callback(
181
BlockCopyState *s,
182
ProgressBytesCallbackFunc progress_bytes_callback,
183
- ProgressResetCallbackFunc progress_reset_callback,
184
void *progress_opaque);
185
186
+void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
187
+
188
void block_copy_state_free(BlockCopyState *s);
189
190
int64_t block_copy_reset_unallocated(BlockCopyState *s,
191
--
157
--
192
2.24.1
158
2.9.3
193
159
194
160
diff view generated by jsdifflib
New patch
1
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
3
Add two implementations of the same benchmark as the previous patch,
4
but using pthreads. One uses a normal QemuMutex, the other is Linux
5
only and implements a fair mutex based on MCS locks and futexes.
6
This shows that the slower performance of the 5-thread case is due to
7
the fairness of CoMutex, rather than to coroutines. If fairness does
8
not matter, as is the case with two threads, CoMutex can actually be
9
faster than pthreads.
10
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
tests/test-aio-multithread.c | 164 +++++++++++++++++++++++++++++++++++++++++++
17
1 file changed, 164 insertions(+)
18
19
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tests/test-aio-multithread.c
22
+++ b/tests/test-aio-multithread.c
23
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_mutex_2_30(void)
24
test_multi_co_mutex(2, 30);
25
}
26
27
+/* Same test with fair mutexes, for performance comparison. */
28
+
29
+#ifdef CONFIG_LINUX
30
+#include "qemu/futex.h"
31
+
32
+/* The nodes for the mutex reside in this structure (on which we try to avoid
33
+ * false sharing). The head of the mutex is in the "mutex_head" variable.
34
+ */
35
+static struct {
36
+ int next, locked;
37
+ int padding[14];
38
+} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
39
+
40
+static int mutex_head = -1;
41
+
42
+static void mcs_mutex_lock(void)
43
+{
44
+ int prev;
45
+
46
+ nodes[id].next = -1;
47
+ nodes[id].locked = 1;
48
+ prev = atomic_xchg(&mutex_head, id);
49
+ if (prev != -1) {
50
+ atomic_set(&nodes[prev].next, id);
51
+ qemu_futex_wait(&nodes[id].locked, 1);
52
+ }
53
+}
54
+
55
+static void mcs_mutex_unlock(void)
56
+{
57
+ int next;
58
+ if (nodes[id].next == -1) {
59
+ if (atomic_read(&mutex_head) == id &&
60
+ atomic_cmpxchg(&mutex_head, id, -1) == id) {
61
+ /* Last item in the list, exit. */
62
+ return;
63
+ }
64
+ while (atomic_read(&nodes[id].next) == -1) {
65
+ /* mcs_mutex_lock did the xchg, but has not updated
66
+ * nodes[prev].next yet.
67
+ */
68
+ }
69
+ }
70
+
71
+ /* Wake up the next in line. */
72
+ next = nodes[id].next;
73
+ nodes[next].locked = 0;
74
+ qemu_futex_wake(&nodes[next].locked, 1);
75
+}
76
+
77
+static void test_multi_fair_mutex_entry(void *opaque)
78
+{
79
+ while (!atomic_mb_read(&now_stopping)) {
80
+ mcs_mutex_lock();
81
+ counter++;
82
+ mcs_mutex_unlock();
83
+ atomic_inc(&atomic_counter);
84
+ }
85
+ atomic_dec(&running);
86
+}
87
+
88
+static void test_multi_fair_mutex(int threads, int seconds)
89
+{
90
+ int i;
91
+
92
+ assert(mutex_head == -1);
93
+ counter = 0;
94
+ atomic_counter = 0;
95
+ now_stopping = false;
96
+
97
+ create_aio_contexts();
98
+ assert(threads <= NUM_CONTEXTS);
99
+ running = threads;
100
+ for (i = 0; i < threads; i++) {
101
+ Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry, NULL);
102
+ aio_co_schedule(ctx[i], co1);
103
+ }
104
+
105
+ g_usleep(seconds * 1000000);
106
+
107
+ atomic_mb_set(&now_stopping, true);
108
+ while (running > 0) {
109
+ g_usleep(100000);
110
+ }
111
+
112
+ join_aio_contexts();
113
+ g_test_message("%d iterations/second\n", counter / seconds);
114
+ g_assert_cmpint(counter, ==, atomic_counter);
115
+}
116
+
117
+static void test_multi_fair_mutex_1(void)
118
+{
119
+ test_multi_fair_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_fair_mutex_10(void)
123
+{
124
+ test_multi_fair_mutex(NUM_CONTEXTS, 10);
125
+}
126
+#endif
127
+
128
+/* Same test with pthread mutexes, for performance comparison and
129
+ * portability. */
130
+
131
+static QemuMutex mutex;
132
+
133
+static void test_multi_mutex_entry(void *opaque)
134
+{
135
+ while (!atomic_mb_read(&now_stopping)) {
136
+ qemu_mutex_lock(&mutex);
137
+ counter++;
138
+ qemu_mutex_unlock(&mutex);
139
+ atomic_inc(&atomic_counter);
140
+ }
141
+ atomic_dec(&running);
142
+}
143
+
144
+static void test_multi_mutex(int threads, int seconds)
145
+{
146
+ int i;
147
+
148
+ qemu_mutex_init(&mutex);
149
+ counter = 0;
150
+ atomic_counter = 0;
151
+ now_stopping = false;
152
+
153
+ create_aio_contexts();
154
+ assert(threads <= NUM_CONTEXTS);
155
+ running = threads;
156
+ for (i = 0; i < threads; i++) {
157
+ Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry, NULL);
158
+ aio_co_schedule(ctx[i], co1);
159
+ }
160
+
161
+ g_usleep(seconds * 1000000);
162
+
163
+ atomic_mb_set(&now_stopping, true);
164
+ while (running > 0) {
165
+ g_usleep(100000);
166
+ }
167
+
168
+ join_aio_contexts();
169
+ g_test_message("%d iterations/second\n", counter / seconds);
170
+ g_assert_cmpint(counter, ==, atomic_counter);
171
+}
172
+
173
+static void test_multi_mutex_1(void)
174
+{
175
+ test_multi_mutex(NUM_CONTEXTS, 1);
176
+}
177
+
178
+static void test_multi_mutex_10(void)
179
+{
180
+ test_multi_mutex(NUM_CONTEXTS, 10);
181
+}
182
+
183
/* End of tests. */
184
185
int main(int argc, char **argv)
186
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
187
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
188
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
189
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
190
+#ifdef CONFIG_LINUX
191
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
192
+#endif
193
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
194
} else {
195
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
196
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
197
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
198
+#ifdef CONFIG_LINUX
199
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
200
+#endif
201
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
202
}
203
return g_test_run();
204
}
205
--
206
2.9.3
207
208
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
This test exercises the block/crypto.c "luks" block driver
3
This will avoid forward references in the next patch. It is also
4
.bdrv_measure() code.
4
more logical because CoQueue is not anymore the basic primitive.
5
5
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20200221112522.1497712-5-stefanha@redhat.com>
9
[mreitz: Renamed test from 282 to 288]
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
10
---
12
tests/qemu-iotests/288 | 93 ++++++++++++++++++++++++++++++++++++++
11
include/qemu/coroutine.h | 89 ++++++++++++++++++++++++------------------------
13
tests/qemu-iotests/288.out | 30 ++++++++++++
12
1 file changed, 44 insertions(+), 45 deletions(-)
14
tests/qemu-iotests/group | 1 +
15
3 files changed, 124 insertions(+)
16
create mode 100755 tests/qemu-iotests/288
17
create mode 100644 tests/qemu-iotests/288.out
18
13
19
diff --git a/tests/qemu-iotests/288 b/tests/qemu-iotests/288
14
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
20
new file mode 100755
15
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX
16
--- a/include/qemu/coroutine.h
22
--- /dev/null
17
+++ b/include/qemu/coroutine.h
23
+++ b/tests/qemu-iotests/288
18
@@ -XXX,XX +XXX,XX @@ bool qemu_in_coroutine(void);
24
@@ -XXX,XX +XXX,XX @@
19
*/
25
+#!/usr/bin/env bash
20
bool qemu_coroutine_entered(Coroutine *co);
26
+#
21
27
+# qemu-img measure tests for LUKS images
22
-
28
+#
23
-/**
29
+# Copyright (C) 2020 Red Hat, Inc.
24
- * CoQueues are a mechanism to queue coroutines in order to continue executing
30
+#
25
- * them later. They provide the fundamental primitives on which coroutine locks
31
+# This program is free software; you can redistribute it and/or modify
26
- * are built.
32
+# it under the terms of the GNU General Public License as published by
27
- */
33
+# the Free Software Foundation; either version 2 of the License, or
28
-typedef struct CoQueue {
34
+# (at your option) any later version.
29
- QSIMPLEQ_HEAD(, Coroutine) entries;
35
+#
30
-} CoQueue;
36
+# This program is distributed in the hope that it will be useful,
31
-
37
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
32
-/**
38
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33
- * Initialise a CoQueue. This must be called before any other operation is used
39
+# GNU General Public License for more details.
34
- * on the CoQueue.
40
+#
35
- */
41
+# You should have received a copy of the GNU General Public License
36
-void qemu_co_queue_init(CoQueue *queue);
42
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
37
-
43
+#
38
-/**
39
- * Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
- */
42
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
43
-
44
-/**
45
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
46
- *
47
- * Returns true if a coroutine was restarted, false if the queue is empty.
48
- */
49
-bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
50
-
51
-/**
52
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
53
- */
54
-void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
55
-
56
-/**
57
- * Enter the next coroutine in the queue
58
- */
59
-bool qemu_co_enter_next(CoQueue *queue);
60
-
61
-/**
62
- * Checks if the CoQueue is empty.
63
- */
64
-bool qemu_co_queue_empty(CoQueue *queue);
65
-
66
-
67
/**
68
* Provides a mutex that can be used to synchronise coroutines
69
*/
70
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
71
*/
72
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
73
44
+
74
+
45
+# creator
75
+/**
46
+owner=stefanha@redhat.com
76
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
77
+ * them later.
78
+ */
79
+typedef struct CoQueue {
80
+ QSIMPLEQ_HEAD(, Coroutine) entries;
81
+} CoQueue;
47
+
82
+
48
+seq=`basename $0`
83
+/**
49
+echo "QA output created by $seq"
84
+ * Initialise a CoQueue. This must be called before any other operation is used
85
+ * on the CoQueue.
86
+ */
87
+void qemu_co_queue_init(CoQueue *queue);
50
+
88
+
51
+status=1 # failure is the default!
89
+/**
90
+ * Adds the current coroutine to the CoQueue and transfers control to the
91
+ * caller of the coroutine.
92
+ */
93
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
52
+
94
+
53
+_cleanup()
95
+/**
54
+{
96
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
55
+ _cleanup_test_img
97
+ *
56
+ rm -f "$TEST_IMG.converted"
98
+ * Returns true if a coroutine was restarted, false if the queue is empty.
57
+}
99
+ */
58
+trap "_cleanup; exit \$status" 0 1 2 3 15
100
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
59
+
101
+
60
+# get standard environment, filters and checks
102
+/**
61
+. ./common.rc
103
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
62
+. ./common.filter
104
+ */
63
+. ./common.pattern
105
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
64
+
106
+
65
+_supported_fmt luks
107
+/**
66
+_supported_proto file
108
+ * Enter the next coroutine in the queue
67
+_supported_os Linux
109
+ */
110
+bool qemu_co_enter_next(CoQueue *queue);
68
+
111
+
69
+SECRET=secret,id=sec0,data=passphrase
112
+/**
113
+ * Checks if the CoQueue is empty.
114
+ */
115
+bool qemu_co_queue_empty(CoQueue *queue);
70
+
116
+
71
+echo "== measure 1G image file =="
72
+echo
73
+
117
+
74
+$QEMU_IMG measure --object "$SECRET" \
118
typedef struct CoRwlock {
75
+     -O "$IMGFMT" \
119
bool writer;
76
+         -o key-secret=sec0,iter-time=10 \
120
int reader;
77
+         --size 1G
78
+
79
+echo
80
+echo "== create 1G image file (size should be no greater than measured) =="
81
+echo
82
+
83
+_make_test_img 1G
84
+stat -c "image file size in bytes: %s" "$TEST_IMG_FILE"
85
+
86
+echo
87
+echo "== modified 1G image file (size should be no greater than measured) =="
88
+echo
89
+
90
+$QEMU_IO --object "$SECRET" --image-opts "$TEST_IMG" -c "write -P 0x51 0x10000 0x400" | _filter_qemu_io | _filter_testdir
91
+stat -c "image file size in bytes: %s" "$TEST_IMG_FILE"
92
+
93
+echo
94
+echo "== measure preallocation=falloc 1G image file =="
95
+echo
96
+
97
+$QEMU_IMG measure --object "$SECRET" \
98
+     -O "$IMGFMT" \
99
+         -o key-secret=sec0,iter-time=10,preallocation=falloc \
100
+         --size 1G
101
+
102
+echo
103
+echo "== measure with input image file =="
104
+echo
105
+
106
+IMGFMT=raw IMGKEYSECRET= IMGOPTS= _make_test_img 1G | _filter_imgfmt
107
+QEMU_IO_OPTIONS= IMGOPTSSYNTAX= $QEMU_IO -f raw -c "write -P 0x51 0x10000 0x400" "$TEST_IMG_FILE" | _filter_qemu_io | _filter_testdir
108
+$QEMU_IMG measure --object "$SECRET" \
109
+     -O "$IMGFMT" \
110
+         -o key-secret=sec0,iter-time=10 \
111
+         -f raw \
112
+         "$TEST_IMG_FILE"
113
+
114
+# success, all done
115
+echo "*** done"
116
+rm -f $seq.full
117
+status=0
118
diff --git a/tests/qemu-iotests/288.out b/tests/qemu-iotests/288.out
119
new file mode 100644
120
index XXXXXXX..XXXXXXX
121
--- /dev/null
122
+++ b/tests/qemu-iotests/288.out
123
@@ -XXX,XX +XXX,XX @@
124
+QA output created by 288
125
+== measure 1G image file ==
126
+
127
+required size: 1075810304
128
+fully allocated size: 1075810304
129
+
130
+== create 1G image file (size should be no greater than measured) ==
131
+
132
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
133
+image file size in bytes: 1075810304
134
+
135
+== modified 1G image file (size should be no greater than measured) ==
136
+
137
+wrote 1024/1024 bytes at offset 65536
138
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
139
+image file size in bytes: 1075810304
140
+
141
+== measure preallocation=falloc 1G image file ==
142
+
143
+required size: 1075810304
144
+fully allocated size: 1075810304
145
+
146
+== measure with input image file ==
147
+
148
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
149
+wrote 1024/1024 bytes at offset 65536
150
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
151
+required size: 1075810304
152
+fully allocated size: 1075810304
153
+*** done
154
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tests/qemu-iotests/group
157
+++ b/tests/qemu-iotests/group
158
@@ -XXX,XX +XXX,XX @@
159
283 auto quick
160
284 rw
161
286 rw quick
162
+288 quick
163
--
121
--
164
2.24.1
122
2.9.3
165
123
166
124
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Split find_conflicting_inflight_req to be used separately.
3
All that CoQueue needs in order to become thread-safe is help
4
4
from an external mutex. Add this to the API.
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
6
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Message-Id: <20200311103004.7649-6-vsementsov@virtuozzo.com>
8
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
10
---
11
block/block-copy.c | 31 +++++++++++++++++++------------
11
include/qemu/coroutine.h | 8 +++++---
12
1 file changed, 19 insertions(+), 12 deletions(-)
12
block/backup.c | 2 +-
13
13
block/io.c | 4 ++--
14
diff --git a/block/block-copy.c b/block/block-copy.c
14
block/nbd-client.c | 2 +-
15
index XXXXXXX..XXXXXXX 100644
15
block/qcow2-cluster.c | 4 +---
16
--- a/block/block-copy.c
16
block/sheepdog.c | 2 +-
17
+++ b/block/block-copy.c
17
block/throttle-groups.c | 2 +-
18
@@ -XXX,XX +XXX,XX @@
18
hw/9pfs/9p.c | 2 +-
19
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
19
util/qemu-coroutine-lock.c | 24 +++++++++++++++++++++---
20
#define BLOCK_COPY_MAX_MEM (128 * MiB)
20
9 files changed, 34 insertions(+), 16 deletions(-)
21
21
22
+static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
23
+ int64_t start,
23
index XXXXXXX..XXXXXXX 100644
24
+ int64_t end)
24
--- a/include/qemu/coroutine.h
25
+{
25
+++ b/include/qemu/coroutine.h
26
+ BlockCopyInFlightReq *req;
26
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
27
28
/**
29
* CoQueues are a mechanism to queue coroutines in order to continue executing
30
- * them later.
31
+ * them later. They are similar to condition variables, but they need help
32
+ * from an external mutex in order to maintain thread-safety.
33
*/
34
typedef struct CoQueue {
35
QSIMPLEQ_HEAD(, Coroutine) entries;
36
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue);
37
38
/**
39
* Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
+ * caller of the coroutine. The mutex is unlocked during the wait and
42
+ * locked again afterwards.
43
*/
44
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
45
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex);
46
47
/**
48
* Restarts the next coroutine in the CoQueue and removes it from the queue.
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
52
+++ b/block/backup.c
53
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
54
retry = false;
55
QLIST_FOREACH(req, &job->inflight_reqs, list) {
56
if (end > req->start && start < req->end) {
57
- qemu_co_queue_wait(&req->wait_queue);
58
+ qemu_co_queue_wait(&req->wait_queue, NULL);
59
retry = true;
60
break;
61
}
62
diff --git a/block/io.c b/block/io.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/block/io.c
65
+++ b/block/io.c
66
@@ -XXX,XX +XXX,XX @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
67
* (instead of producing a deadlock in the former case). */
68
if (!req->waiting_for) {
69
self->waiting_for = req;
70
- qemu_co_queue_wait(&req->wait_queue);
71
+ qemu_co_queue_wait(&req->wait_queue, NULL);
72
self->waiting_for = NULL;
73
retry = true;
74
waited = true;
75
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
76
77
/* Wait until any previous flushes are completed */
78
while (bs->active_flush_req) {
79
- qemu_co_queue_wait(&bs->flush_queue);
80
+ qemu_co_queue_wait(&bs->flush_queue, NULL);
81
}
82
83
bs->active_flush_req = true;
84
diff --git a/block/nbd-client.c b/block/nbd-client.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/block/nbd-client.c
87
+++ b/block/nbd-client.c
88
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
89
/* Poor man semaphore. The free_sema is locked when no other request
90
* can be accepted, and unlocked after receiving one reply. */
91
if (s->in_flight == MAX_NBD_REQUESTS) {
92
- qemu_co_queue_wait(&s->free_sema);
93
+ qemu_co_queue_wait(&s->free_sema, NULL);
94
assert(s->in_flight < MAX_NBD_REQUESTS);
95
}
96
s->in_flight++;
97
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/qcow2-cluster.c
100
+++ b/block/qcow2-cluster.c
101
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
102
if (bytes == 0) {
103
/* Wait for the dependency to complete. We need to recheck
104
* the free/allocated clusters when we continue. */
105
- qemu_co_mutex_unlock(&s->lock);
106
- qemu_co_queue_wait(&old_alloc->dependent_requests);
107
- qemu_co_mutex_lock(&s->lock);
108
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
109
return -EAGAIN;
110
}
111
}
112
diff --git a/block/sheepdog.c b/block/sheepdog.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/block/sheepdog.c
115
+++ b/block/sheepdog.c
116
@@ -XXX,XX +XXX,XX @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
117
retry:
118
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
119
if (AIOCBOverlapping(acb, cb)) {
120
- qemu_co_queue_wait(&s->overlapping_queue);
121
+ qemu_co_queue_wait(&s->overlapping_queue, NULL);
122
goto retry;
123
}
124
}
125
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/block/throttle-groups.c
128
+++ b/block/throttle-groups.c
129
@@ -XXX,XX +XXX,XX @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
130
if (must_wait || blkp->pending_reqs[is_write]) {
131
blkp->pending_reqs[is_write]++;
132
qemu_mutex_unlock(&tg->lock);
133
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
134
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
135
qemu_mutex_lock(&tg->lock);
136
blkp->pending_reqs[is_write]--;
137
}
138
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/hw/9pfs/9p.c
141
+++ b/hw/9pfs/9p.c
142
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn v9fs_flush(void *opaque)
143
/*
144
* Wait for pdu to complete.
145
*/
146
- qemu_co_queue_wait(&cancel_pdu->complete);
147
+ qemu_co_queue_wait(&cancel_pdu->complete, NULL);
148
cancel_pdu->cancelled = 0;
149
pdu_free(cancel_pdu);
150
}
151
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/util/qemu-coroutine-lock.c
154
+++ b/util/qemu-coroutine-lock.c
155
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue)
156
QSIMPLEQ_INIT(&queue->entries);
157
}
158
159
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
160
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex)
161
{
162
Coroutine *self = qemu_coroutine_self();
163
QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
27
+
164
+
28
+ QLIST_FOREACH(req, &s->inflight_reqs, list) {
165
+ if (mutex) {
29
+ if (end > req->start_byte && start < req->end_byte) {
166
+ qemu_co_mutex_unlock(mutex);
30
+ return req;
31
+ }
32
+ }
167
+ }
33
+
168
+
34
+ return NULL;
169
+ /* There is no race condition here. Other threads will call
35
+}
170
+ * aio_co_schedule on our AioContext, which can reenter this
171
+ * coroutine but only after this yield and after the main loop
172
+ * has gone through the next iteration.
173
+ */
174
qemu_coroutine_yield();
175
assert(qemu_in_coroutine());
36
+
176
+
37
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
177
+ /* TODO: OSv implements wait morphing here, where the wakeup
38
int64_t start,
178
+ * primitive automatically places the woken coroutine on the
39
int64_t end)
179
+ * mutex's queue. This avoids the thundering herd effect.
40
{
180
+ */
41
BlockCopyInFlightReq *req;
181
+ if (mutex) {
42
- bool waited;
182
+ qemu_co_mutex_lock(mutex);
43
-
44
- do {
45
- waited = false;
46
- QLIST_FOREACH(req, &s->inflight_reqs, list) {
47
- if (end > req->start_byte && start < req->end_byte) {
48
- qemu_co_queue_wait(&req->wait_queue, NULL);
49
- waited = true;
50
- break;
51
- }
52
- }
53
- } while (waited);
54
+
55
+ while ((req = find_conflicting_inflight_req(s, start, end))) {
56
+ qemu_co_queue_wait(&req->wait_queue, NULL);
57
+ }
183
+ }
58
}
184
}
59
185
60
static void block_copy_inflight_req_begin(BlockCopyState *s,
186
/**
187
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock)
188
Coroutine *self = qemu_coroutine_self();
189
190
while (lock->writer) {
191
- qemu_co_queue_wait(&lock->queue);
192
+ qemu_co_queue_wait(&lock->queue, NULL);
193
}
194
lock->reader++;
195
self->locks_held++;
196
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_wrlock(CoRwlock *lock)
197
Coroutine *self = qemu_coroutine_self();
198
199
while (lock->writer || lock->reader) {
200
- qemu_co_queue_wait(&lock->queue);
201
+ qemu_co_queue_wait(&lock->queue, NULL);
202
}
203
lock->writer = true;
204
self->locks_held++;
61
--
205
--
62
2.24.1
206
2.9.3
63
207
64
208
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
In block_copy_do_copy we fallback to read+write if copy_range failed.
3
This adds a CoMutex around the existing CoQueue. Because the write-side
4
In this case copy_size is larger than defined for buffered IO, and
4
can just take CoMutex, the old "writer" field is not necessary anymore.
5
there is corresponding commit. Still, backup copies data cluster by
5
Instead of removing it altogether, count the number of pending writers
6
cluster, and most of requests are limited to one cluster anyway, so the
6
during a read-side critical section and forbid further readers from
7
only source of this one bad-limited request is copy-before-write
7
entering.
8
operation.
9
8
10
Further patch will move backup to use block_copy directly, than for
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
cases where copy_range is not supported, first request will be
10
Reviewed-by: Fam Zheng <famz@redhat.com>
12
oversized in each backup. It's not good, let's change it now.
11
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
include/qemu/coroutine.h | 3 ++-
15
util/qemu-coroutine-lock.c | 35 ++++++++++++++++++++++++-----------
16
2 files changed, 26 insertions(+), 12 deletions(-)
13
17
14
Fix is simple: just limit first copy_range request like buffer-based
18
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
15
request. If it succeed, set larger copy_range limit.
16
17
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
19
Reviewed-by: Max Reitz <mreitz@redhat.com>
20
Message-Id: <20200311103004.7649-4-vsementsov@virtuozzo.com>
21
Signed-off-by: Max Reitz <mreitz@redhat.com>
22
---
23
block/block-copy.c | 41 +++++++++++++++++++++++++++++++----------
24
1 file changed, 31 insertions(+), 10 deletions(-)
25
26
diff --git a/block/block-copy.c b/block/block-copy.c
27
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
28
--- a/block/block-copy.c
20
--- a/include/qemu/coroutine.h
29
+++ b/block/block-copy.c
21
+++ b/include/qemu/coroutine.h
30
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s)
22
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
31
g_free(s);
23
24
25
typedef struct CoRwlock {
26
- bool writer;
27
+ int pending_writer;
28
int reader;
29
+ CoMutex mutex;
30
CoQueue queue;
31
} CoRwlock;
32
33
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/util/qemu-coroutine-lock.c
36
+++ b/util/qemu-coroutine-lock.c
37
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_init(CoRwlock *lock)
38
{
39
memset(lock, 0, sizeof(*lock));
40
qemu_co_queue_init(&lock->queue);
41
+ qemu_co_mutex_init(&lock->mutex);
32
}
42
}
33
43
34
+static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
44
void qemu_co_rwlock_rdlock(CoRwlock *lock)
35
+{
45
{
36
+ return MIN_NON_ZERO(INT_MAX,
46
Coroutine *self = qemu_coroutine_self();
37
+ MIN_NON_ZERO(source->bs->bl.max_transfer,
47
38
+ target->bs->bl.max_transfer));
48
- while (lock->writer) {
39
+}
49
- qemu_co_queue_wait(&lock->queue, NULL);
50
+ qemu_co_mutex_lock(&lock->mutex);
51
+ /* For fairness, wait if a writer is in line. */
52
+ while (lock->pending_writer) {
53
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
54
}
55
lock->reader++;
56
+ qemu_co_mutex_unlock(&lock->mutex);
40
+
57
+
41
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
58
+ /* The rest of the read-side critical section is run without the mutex. */
42
int64_t cluster_size,
59
self->locks_held++;
43
BdrvRequestFlags write_flags, Error **errp)
60
}
44
{
61
45
BlockCopyState *s;
62
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
46
BdrvDirtyBitmap *copy_bitmap;
63
Coroutine *self = qemu_coroutine_self();
47
- uint32_t max_transfer =
64
48
- MIN_NON_ZERO(INT_MAX,
65
assert(qemu_in_coroutine());
49
- MIN_NON_ZERO(source->bs->bl.max_transfer,
66
- if (lock->writer) {
50
- target->bs->bl.max_transfer));
67
- lock->writer = false;
51
68
+ if (!lock->reader) {
52
copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
69
+ /* The critical section started in qemu_co_rwlock_wrlock. */
53
errp);
70
qemu_co_queue_restart_all(&lock->queue);
54
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
55
.mem = shres_create(BLOCK_COPY_MAX_MEM),
56
};
57
58
- if (max_transfer < cluster_size) {
59
+ if (block_copy_max_transfer(source, target) < cluster_size) {
60
/*
61
* copy_range does not respect max_transfer. We don't want to bother
62
* with requests smaller than block-copy cluster size, so fallback to
63
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
64
s->copy_size = cluster_size;
65
} else {
71
} else {
66
/*
72
+ self->locks_held--;
67
- * copy_range does not respect max_transfer (it's a TODO), so we factor
73
+
68
- * that in here.
74
+ qemu_co_mutex_lock(&lock->mutex);
69
+ * We enable copy-range, but keep small copy_size, until first
75
lock->reader--;
70
+ * successful copy_range (look at block_copy_do_copy).
76
assert(lock->reader >= 0);
71
*/
77
/* Wakeup only one waiting writer */
72
s->use_copy_range = true;
78
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
73
- s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
79
qemu_co_queue_next(&lock->queue);
74
- QEMU_ALIGN_DOWN(max_transfer, cluster_size));
75
+ s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
76
}
77
78
QLIST_INIT(&s->inflight_reqs);
79
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
80
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
81
/* Fallback to read+write with allocated buffer */
82
} else {
83
+ if (s->use_copy_range) {
84
+ /*
85
+ * Successful copy-range. Now increase copy_size. copy_range
86
+ * does not respect max_transfer (it's a TODO), so we factor
87
+ * that in here.
88
+ *
89
+ * Note: we double-check s->use_copy_range for the case when
90
+ * parallel block-copy request unsets it during previous
91
+ * bdrv_co_copy_range call.
92
+ */
93
+ s->copy_size =
94
+ MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
95
+ QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
96
+ s->target),
97
+ s->cluster_size));
98
+ }
99
goto out;
100
}
80
}
101
}
81
}
102
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
82
- self->locks_held--;
103
/*
83
+ qemu_co_mutex_unlock(&lock->mutex);
104
* In case of failed copy_range request above, we may proceed with buffered
84
}
105
* request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
85
106
- * be properly limited, so don't care too much.
86
void qemu_co_rwlock_wrlock(CoRwlock *lock)
107
+ * be properly limited, so don't care too much. Moreover the most likely
87
{
108
+ * case (copy_range is unsupported for the configuration, so the very first
88
- Coroutine *self = qemu_coroutine_self();
109
+ * copy_range request fails) is handled by setting large copy_size only
89
-
110
+ * after first successful copy_range.
90
- while (lock->writer || lock->reader) {
111
*/
91
- qemu_co_queue_wait(&lock->queue, NULL);
112
92
+ qemu_co_mutex_lock(&lock->mutex);
113
bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
93
+ lock->pending_writer++;
94
+ while (lock->reader) {
95
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
96
}
97
- lock->writer = true;
98
- self->locks_held++;
99
+ lock->pending_writer--;
100
+
101
+ /* The rest of the write-side critical section is run with
102
+ * the mutex taken, so that lock->reader remains zero.
103
+ * There is no need to update self->locks_held.
104
+ */
105
}
114
--
106
--
115
2.24.1
107
2.9.3
116
108
117
109
diff view generated by jsdifflib