1
The following changes since commit 5dae13cd71f0755a1395b5a4cde635b8a6ee3f58:
1
The following changes since commit 928173659d6e5dc368284f73f90ea1d129e1f57d:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-or-20170214' into staging (2017-02-14 09:55:48 +0000)
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200130' into staging (2020-01-30 16:19:04 +0000)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to decc18f33adecb1316437a47fff0cf0a7665906a:
9
for you to fetch changes up to 8dff69b9415b4287e900358744b732195e1ab2e2:
10
10
11
coroutine-lock: make CoRwlock thread-safe and fair (2017-02-16 17:17:34 +0000)
11
tests/qemu-iotests: use AIOMODE with various tests (2020-01-30 21:01:40 +0000)
12
13
----------------------------------------------------------------
14
Pull request
12
15
13
----------------------------------------------------------------
16
----------------------------------------------------------------
14
17
15
----------------------------------------------------------------
18
Aarushi Mehta (15):
19
configure: permit use of io_uring
20
qapi/block-core: add option for io_uring
21
block/block: add BDRV flag for io_uring
22
block/io_uring: implements interfaces for io_uring
23
stubs: add stubs for io_uring interface
24
util/async: add aio interfaces for io_uring
25
blockdev: adds bdrv_parse_aio to use io_uring
26
block/file-posix.c: extend to use io_uring
27
block: add trace events for io_uring
28
block/io_uring: adds userspace completion polling
29
qemu-io: adds option to use aio engine
30
qemu-img: adds option to use aio engine for benchmarking
31
qemu-nbd: adds option for aio engines
32
tests/qemu-iotests: enable testing with aio options
33
tests/qemu-iotests: use AIOMODE with various tests
16
34
17
Paolo Bonzini (24):
35
Paolo Bonzini (3):
18
block: move AioContext, QEMUTimer, main-loop to libqemuutil
36
block: eliminate BDRV_REQ_NO_SERIALISING
19
aio: introduce aio_co_schedule and aio_co_wake
37
block/io: wait for serialising requests when a request becomes
20
block-backend: allow blk_prw from coroutine context
38
serialising
21
test-thread-pool: use generic AioContext infrastructure
39
block/io: take bs->reqs_lock in bdrv_mark_request_serialising
22
io: add methods to set I/O handlers on AioContext
23
io: make qio_channel_yield aware of AioContexts
24
nbd: convert to use qio_channel_yield
25
coroutine-lock: reschedule coroutine on the AioContext it was running
26
on
27
blkdebug: reschedule coroutine on the AioContext it is running on
28
qed: introduce qed_aio_start_io and qed_aio_next_io_cb
29
aio: push aio_context_acquire/release down to dispatching
30
block: explicitly acquire aiocontext in timers that need it
31
block: explicitly acquire aiocontext in callbacks that need it
32
block: explicitly acquire aiocontext in bottom halves that need it
33
block: explicitly acquire aiocontext in aio callbacks that need it
34
aio-posix: partially inline aio_dispatch into aio_poll
35
async: remove unnecessary inc/dec pairs
36
block: document fields protected by AioContext lock
37
coroutine-lock: make CoMutex thread-safe
38
coroutine-lock: add limited spinning to CoMutex
39
test-aio-multithread: add performance comparison with thread-based
40
mutexes
41
coroutine-lock: place CoMutex before CoQueue in header
42
coroutine-lock: add mutex argument to CoQueue APIs
43
coroutine-lock: make CoRwlock thread-safe and fair
44
40
45
Makefile.objs | 4 -
41
MAINTAINERS | 9 +
46
stubs/Makefile.objs | 1 +
42
block.c | 22 ++
47
tests/Makefile.include | 19 +-
43
block/Makefile.objs | 3 +
48
util/Makefile.objs | 6 +-
44
block/file-posix.c | 99 ++++++--
49
block/nbd-client.h | 2 +-
45
block/io.c | 162 +++++++------
50
block/qed.h | 3 +
46
block/io_uring.c | 433 ++++++++++++++++++++++++++++++++++
51
include/block/aio.h | 38 ++-
47
block/trace-events | 12 +
52
include/block/block_int.h | 64 +++--
48
blockdev.c | 12 +-
53
include/io/channel.h | 72 +++++-
49
configure | 27 +++
54
include/qemu/coroutine.h | 84 ++++---
50
docs/interop/qemu-nbd.rst | 4 +-
55
include/qemu/coroutine_int.h | 11 +-
51
include/block/aio.h | 16 +-
56
include/sysemu/block-backend.h | 14 +-
52
include/block/block.h | 14 +-
57
tests/iothread.h | 25 ++
53
include/block/block_int.h | 3 +-
58
block/backup.c | 2 +-
54
include/block/raw-aio.h | 12 +
59
block/blkdebug.c | 9 +-
55
qapi/block-core.json | 4 +-
60
block/blkreplay.c | 2 +-
56
qemu-img-cmds.hx | 4 +-
61
block/block-backend.c | 13 +-
57
qemu-img.c | 11 +-
62
block/curl.c | 44 +++-
58
qemu-img.texi | 5 +-
63
block/gluster.c | 9 +-
59
qemu-io.c | 25 +-
64
block/io.c | 42 +---
60
qemu-nbd.c | 12 +-
65
block/iscsi.c | 15 +-
61
stubs/Makefile.objs | 1 +
66
block/linux-aio.c | 10 +-
62
stubs/io_uring.c | 32 +++
67
block/mirror.c | 12 +-
63
tests/qemu-iotests/028 | 2 +-
68
block/nbd-client.c | 119 +++++----
64
tests/qemu-iotests/058 | 2 +-
69
block/nfs.c | 9 +-
65
tests/qemu-iotests/089 | 4 +-
70
block/qcow2-cluster.c | 4 +-
66
tests/qemu-iotests/091 | 4 +-
71
block/qed-cluster.c | 2 +
67
tests/qemu-iotests/109 | 2 +-
72
block/qed-table.c | 12 +-
68
tests/qemu-iotests/147 | 5 +-
73
block/qed.c | 58 +++--
69
tests/qemu-iotests/181 | 8 +-
74
block/sheepdog.c | 31 +--
70
tests/qemu-iotests/183 | 4 +-
75
block/ssh.c | 29 +--
71
tests/qemu-iotests/185 | 10 +-
76
block/throttle-groups.c | 4 +-
72
tests/qemu-iotests/200 | 2 +-
77
block/win32-aio.c | 9 +-
73
tests/qemu-iotests/201 | 8 +-
78
dma-helpers.c | 2 +
74
tests/qemu-iotests/check | 15 +-
79
hw/9pfs/9p.c | 2 +-
75
tests/qemu-iotests/common.rc | 14 ++
80
hw/block/virtio-blk.c | 19 +-
76
tests/qemu-iotests/iotests.py | 12 +-
81
hw/scsi/scsi-bus.c | 2 +
77
util/async.c | 36 +++
82
hw/scsi/scsi-disk.c | 15 ++
78
37 files changed, 878 insertions(+), 172 deletions(-)
83
hw/scsi/scsi-generic.c | 20 +-
79
create mode 100644 block/io_uring.c
84
hw/scsi/virtio-scsi.c | 6 +
80
create mode 100644 stubs/io_uring.c
85
io/channel-command.c | 13 +
86
io/channel-file.c | 11 +
87
io/channel-socket.c | 16 +-
88
io/channel-tls.c | 12 +
89
io/channel-watch.c | 6 +
90
io/channel.c | 97 ++++++--
91
nbd/client.c | 2 +-
92
nbd/common.c | 9 +-
93
nbd/server.c | 94 +++-----
94
stubs/linux-aio.c | 32 +++
95
stubs/set-fd-handler.c | 11 -
96
tests/iothread.c | 91 +++++++
97
tests/test-aio-multithread.c | 463 ++++++++++++++++++++++++++++++++++++
98
tests/test-thread-pool.c | 12 +-
99
aio-posix.c => util/aio-posix.c | 62 ++---
100
aio-win32.c => util/aio-win32.c | 30 +--
101
util/aiocb.c | 55 +++++
102
async.c => util/async.c | 84 ++++++-
103
iohandler.c => util/iohandler.c | 0
104
main-loop.c => util/main-loop.c | 0
105
util/qemu-coroutine-lock.c | 254 ++++++++++++++++++--
106
util/qemu-coroutine-sleep.c | 2 +-
107
util/qemu-coroutine.c | 8 +
108
qemu-timer.c => util/qemu-timer.c | 0
109
thread-pool.c => util/thread-pool.c | 8 +-
110
trace-events | 11 -
111
util/trace-events | 17 +-
112
67 files changed, 1711 insertions(+), 533 deletions(-)
113
create mode 100644 tests/iothread.h
114
create mode 100644 stubs/linux-aio.c
115
create mode 100644 tests/iothread.c
116
create mode 100644 tests/test-aio-multithread.c
117
rename aio-posix.c => util/aio-posix.c (94%)
118
rename aio-win32.c => util/aio-win32.c (95%)
119
create mode 100644 util/aiocb.c
120
rename async.c => util/async.c (82%)
121
rename iohandler.c => util/iohandler.c (100%)
122
rename main-loop.c => util/main-loop.c (100%)
123
rename qemu-timer.c => util/qemu-timer.c (100%)
124
rename thread-pool.c => util/thread-pool.c (97%)
125
81
126
--
82
--
127
2.9.3
83
2.24.1
128
84
129
85
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
All that CoQueue needs in order to become thread-safe is help
3
It is unused since commit 00e30f0 ("block/backup: use backup-top instead
4
from an external mutex. Add this to the API.
4
of write notifiers", 2019-10-01), drop it to simplify the code.
5
6
While at it, drop redundant assertions on flags.
5
7
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Message-id: 1578495356-46219-2-git-send-email-pbonzini@redhat.com
8
Message-id: 20170213181244.16297-6-pbonzini@redhat.com
10
Message-Id: <1578495356-46219-2-git-send-email-pbonzini@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
12
---
11
include/qemu/coroutine.h | 8 +++++---
13
block/io.c | 18 ++++--------------
12
block/backup.c | 2 +-
14
include/block/block.h | 12 ------------
13
block/io.c | 4 ++--
15
2 files changed, 4 insertions(+), 26 deletions(-)
14
block/nbd-client.c | 2 +-
15
block/qcow2-cluster.c | 4 +---
16
block/sheepdog.c | 2 +-
17
block/throttle-groups.c | 2 +-
18
hw/9pfs/9p.c | 2 +-
19
util/qemu-coroutine-lock.c | 24 +++++++++++++++++++++---
20
9 files changed, 34 insertions(+), 16 deletions(-)
21
16
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/coroutine.h
25
+++ b/include/qemu/coroutine.h
26
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
27
28
/**
29
* CoQueues are a mechanism to queue coroutines in order to continue executing
30
- * them later.
31
+ * them later. They are similar to condition variables, but they need help
32
+ * from an external mutex in order to maintain thread-safety.
33
*/
34
typedef struct CoQueue {
35
QSIMPLEQ_HEAD(, Coroutine) entries;
36
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue);
37
38
/**
39
* Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
+ * caller of the coroutine. The mutex is unlocked during the wait and
42
+ * locked again afterwards.
43
*/
44
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
45
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex);
46
47
/**
48
* Restarts the next coroutine in the CoQueue and removes it from the queue.
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
52
+++ b/block/backup.c
53
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
54
retry = false;
55
QLIST_FOREACH(req, &job->inflight_reqs, list) {
56
if (end > req->start && start < req->end) {
57
- qemu_co_queue_wait(&req->wait_queue);
58
+ qemu_co_queue_wait(&req->wait_queue, NULL);
59
retry = true;
60
break;
61
}
62
diff --git a/block/io.c b/block/io.c
17
diff --git a/block/io.c b/block/io.c
63
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
64
--- a/block/io.c
19
--- a/block/io.c
65
+++ b/block/io.c
20
+++ b/block/io.c
66
@@ -XXX,XX +XXX,XX @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
21
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
67
* (instead of producing a deadlock in the former case). */
22
* potential fallback support, if we ever implement any read flags
68
if (!req->waiting_for) {
23
* to pass through to drivers. For now, there aren't any
69
self->waiting_for = req;
24
* passthrough flags. */
70
- qemu_co_queue_wait(&req->wait_queue);
25
- assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ |
71
+ qemu_co_queue_wait(&req->wait_queue, NULL);
26
- BDRV_REQ_PREFETCH)));
72
self->waiting_for = NULL;
27
+ assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
73
retry = true;
28
74
waited = true;
29
/* Handle Copy on Read and associated serialisation */
75
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
30
if (flags & BDRV_REQ_COPY_ON_READ) {
76
31
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
77
/* Wait until any previous flushes are completed */
32
bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
78
while (bs->active_flush_req) {
79
- qemu_co_queue_wait(&bs->flush_queue);
80
+ qemu_co_queue_wait(&bs->flush_queue, NULL);
81
}
33
}
82
34
83
bs->active_flush_req = true;
35
- /* BDRV_REQ_SERIALISING is only for write operation */
84
diff --git a/block/nbd-client.c b/block/nbd-client.c
36
- assert(!(flags & BDRV_REQ_SERIALISING));
37
-
38
- if (!(flags & BDRV_REQ_NO_SERIALISING)) {
39
- bdrv_wait_serialising_requests(req);
40
- }
41
+ bdrv_wait_serialising_requests(req);
42
43
if (flags & BDRV_REQ_COPY_ON_READ) {
44
int64_t pnum;
45
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
46
bdrv_inc_in_flight(bs);
47
48
/* Don't do copy-on-read if we read data before write operation */
49
- if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
50
+ if (atomic_read(&bs->copy_on_read)) {
51
flags |= BDRV_REQ_COPY_ON_READ;
52
}
53
54
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
55
return -EPERM;
56
}
57
58
- /* BDRV_REQ_NO_SERIALISING is only for read operation */
59
- assert(!(flags & BDRV_REQ_NO_SERIALISING));
60
assert(!(bs->open_flags & BDRV_O_INACTIVE));
61
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
62
assert(!(flags & ~BDRV_REQ_MASK));
63
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(
64
65
/* BDRV_REQ_SERIALISING is only for write operation */
66
assert(!(read_flags & BDRV_REQ_SERIALISING));
67
- if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
68
- bdrv_wait_serialising_requests(&req);
69
- }
70
+ bdrv_wait_serialising_requests(&req);
71
72
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
73
src, src_offset,
74
diff --git a/include/block/block.h b/include/block/block.h
85
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
86
--- a/block/nbd-client.c
76
--- a/include/block/block.h
87
+++ b/block/nbd-client.c
77
+++ b/include/block/block.h
88
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
78
@@ -XXX,XX +XXX,XX @@ typedef enum {
89
/* Poor man semaphore. The free_sema is locked when no other request
79
*/
90
* can be accepted, and unlocked after receiving one reply. */
80
BDRV_REQ_MAY_UNMAP = 0x4,
91
if (s->in_flight == MAX_NBD_REQUESTS) {
81
92
- qemu_co_queue_wait(&s->free_sema);
82
- /*
93
+ qemu_co_queue_wait(&s->free_sema, NULL);
83
- * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that
94
assert(s->in_flight < MAX_NBD_REQUESTS);
84
- * we don't want wait_serialising_requests() during the read operation.
95
}
85
- *
96
s->in_flight++;
86
- * This flag is used for backup copy-on-write operations, when we need to
97
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
87
- * read old data before write (write notifier triggered). It is okay since
98
index XXXXXXX..XXXXXXX 100644
88
- * we already waited for other serializing requests in the initiating write
99
--- a/block/qcow2-cluster.c
89
- * (see bdrv_aligned_pwritev), and it is necessary if the initiating write
100
+++ b/block/qcow2-cluster.c
90
- * is already serializing (without the flag, the read would deadlock
101
@@ -XXX,XX +XXX,XX @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
91
- * waiting for the serialising write to complete).
102
if (bytes == 0) {
92
- */
103
/* Wait for the dependency to complete. We need to recheck
93
- BDRV_REQ_NO_SERIALISING = 0x8,
104
* the free/allocated clusters when we continue. */
94
BDRV_REQ_FUA = 0x10,
105
- qemu_co_mutex_unlock(&s->lock);
95
BDRV_REQ_WRITE_COMPRESSED = 0x20,
106
- qemu_co_queue_wait(&old_alloc->dependent_requests);
96
107
- qemu_co_mutex_lock(&s->lock);
108
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
109
return -EAGAIN;
110
}
111
}
112
diff --git a/block/sheepdog.c b/block/sheepdog.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/block/sheepdog.c
115
+++ b/block/sheepdog.c
116
@@ -XXX,XX +XXX,XX @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
117
retry:
118
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
119
if (AIOCBOverlapping(acb, cb)) {
120
- qemu_co_queue_wait(&s->overlapping_queue);
121
+ qemu_co_queue_wait(&s->overlapping_queue, NULL);
122
goto retry;
123
}
124
}
125
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/block/throttle-groups.c
128
+++ b/block/throttle-groups.c
129
@@ -XXX,XX +XXX,XX @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
130
if (must_wait || blkp->pending_reqs[is_write]) {
131
blkp->pending_reqs[is_write]++;
132
qemu_mutex_unlock(&tg->lock);
133
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
134
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
135
qemu_mutex_lock(&tg->lock);
136
blkp->pending_reqs[is_write]--;
137
}
138
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/hw/9pfs/9p.c
141
+++ b/hw/9pfs/9p.c
142
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn v9fs_flush(void *opaque)
143
/*
144
* Wait for pdu to complete.
145
*/
146
- qemu_co_queue_wait(&cancel_pdu->complete);
147
+ qemu_co_queue_wait(&cancel_pdu->complete, NULL);
148
cancel_pdu->cancelled = 0;
149
pdu_free(cancel_pdu);
150
}
151
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/util/qemu-coroutine-lock.c
154
+++ b/util/qemu-coroutine-lock.c
155
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue)
156
QSIMPLEQ_INIT(&queue->entries);
157
}
158
159
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
160
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex)
161
{
162
Coroutine *self = qemu_coroutine_self();
163
QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
164
+
165
+ if (mutex) {
166
+ qemu_co_mutex_unlock(mutex);
167
+ }
168
+
169
+ /* There is no race condition here. Other threads will call
170
+ * aio_co_schedule on our AioContext, which can reenter this
171
+ * coroutine but only after this yield and after the main loop
172
+ * has gone through the next iteration.
173
+ */
174
qemu_coroutine_yield();
175
assert(qemu_in_coroutine());
176
+
177
+ /* TODO: OSv implements wait morphing here, where the wakeup
178
+ * primitive automatically places the woken coroutine on the
179
+ * mutex's queue. This avoids the thundering herd effect.
180
+ */
181
+ if (mutex) {
182
+ qemu_co_mutex_lock(mutex);
183
+ }
184
}
185
186
/**
187
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock)
188
Coroutine *self = qemu_coroutine_self();
189
190
while (lock->writer) {
191
- qemu_co_queue_wait(&lock->queue);
192
+ qemu_co_queue_wait(&lock->queue, NULL);
193
}
194
lock->reader++;
195
self->locks_held++;
196
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_wrlock(CoRwlock *lock)
197
Coroutine *self = qemu_coroutine_self();
198
199
while (lock->writer || lock->reader) {
200
- qemu_co_queue_wait(&lock->queue);
201
+ qemu_co_queue_wait(&lock->queue, NULL);
202
}
203
lock->writer = true;
204
self->locks_held++;
205
--
97
--
206
2.9.3
98
2.24.1
207
99
208
100
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Marking without waiting would not result in actual serialising behavior.
4
Thus, make a call bdrv_mark_request_serialising sufficient for
5
serialisation to happen.
6
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Message-id: 1578495356-46219-3-git-send-email-pbonzini@redhat.com
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
9
Message-Id: <1578495356-46219-3-git-send-email-pbonzini@redhat.com>
7
Message-id: 20170213135235.12274-16-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
11
---
10
block/archipelago.c | 3 ---
12
block/file-posix.c | 1 -
11
block/block-backend.c | 7 -------
13
block/io.c | 40 +++++++++++++++++----------------------
12
block/curl.c | 2 +-
14
include/block/block_int.h | 3 +--
13
block/io.c | 6 +-----
15
3 files changed, 18 insertions(+), 26 deletions(-)
14
block/iscsi.c | 3 ---
15
block/linux-aio.c | 5 +----
16
block/mirror.c | 12 +++++++++---
17
block/null.c | 8 --------
18
block/qed-cluster.c | 2 ++
19
block/qed-table.c | 12 ++++++++++--
20
block/qed.c | 4 ++--
21
block/rbd.c | 4 ----
22
block/win32-aio.c | 3 ---
23
hw/block/virtio-blk.c | 12 +++++++++++-
24
hw/scsi/scsi-disk.c | 15 +++++++++++++++
25
hw/scsi/scsi-generic.c | 20 +++++++++++++++++---
26
util/thread-pool.c | 4 +++-
27
17 files changed, 72 insertions(+), 50 deletions(-)
28
16
29
diff --git a/block/archipelago.c b/block/archipelago.c
17
diff --git a/block/file-posix.c b/block/file-posix.c
30
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
31
--- a/block/archipelago.c
19
--- a/block/file-posix.c
32
+++ b/block/archipelago.c
20
+++ b/block/file-posix.c
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
21
@@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
34
{
22
req->overlap_bytes = req->bytes;
35
AIORequestData *reqdata = (AIORequestData *) opaque;
23
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
24
bdrv_mark_request_serialising(req, bs->bl.request_alignment);
37
- AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
25
- bdrv_wait_serialising_requests(req);
38
39
- aio_context_acquire(ctx);
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
41
- aio_context_release(ctx);
42
aio_cb->status = 0;
43
44
qemu_aio_unref(aio_cb);
45
diff --git a/block/block-backend.c b/block/block-backend.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/block/block-backend.c
48
+++ b/block/block-backend.c
49
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
50
static void error_callback_bh(void *opaque)
51
{
52
struct BlockBackendAIOCB *acb = opaque;
53
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
54
55
bdrv_dec_in_flight(acb->common.bs);
56
- aio_context_acquire(ctx);
57
acb->common.cb(acb->common.opaque, acb->ret);
58
- aio_context_release(ctx);
59
qemu_aio_unref(acb);
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
63
static void blk_aio_complete_bh(void *opaque)
64
{
65
BlkAioEmAIOCB *acb = opaque;
66
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
-
68
assert(acb->has_returned);
69
- aio_context_acquire(ctx);
70
blk_aio_complete(acb);
71
- aio_context_release(ctx);
72
}
73
74
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
75
diff --git a/block/curl.c b/block/curl.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/block/curl.c
78
+++ b/block/curl.c
79
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
80
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
81
82
out:
83
+ aio_context_release(ctx);
84
if (ret != -EINPROGRESS) {
85
acb->common.cb(acb->common.opaque, ret);
86
qemu_aio_unref(acb);
87
}
26
}
88
- aio_context_release(ctx);
27
#endif
89
}
28
90
91
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
92
diff --git a/block/io.c b/block/io.c
29
diff --git a/block/io.c b/block/io.c
93
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
94
--- a/block/io.c
31
--- a/block/io.c
95
+++ b/block/io.c
32
+++ b/block/io.c
96
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
33
@@ -XXX,XX +XXX,XX @@
97
CoroutineIOCompletion *co = opaque;
34
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
98
35
99
co->ret = ret;
36
static void bdrv_parent_cb_resize(BlockDriverState *bs);
100
- qemu_coroutine_enter(co->coroutine);
37
+static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
101
+ aio_co_wake(co->coroutine);
38
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
39
int64_t offset, int bytes, BdrvRequestFlags flags);
40
41
@@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req,
42
qemu_co_mutex_unlock(&bs->reqs_lock);
102
}
43
}
103
44
104
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
45
-void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
105
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
46
+bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
106
static void bdrv_co_em_bh(void *opaque)
107
{
47
{
108
BlockAIOCBCoroutine *acb = opaque;
48
int64_t overlap_offset = req->offset & ~(align - 1);
109
- BlockDriverState *bs = acb->common.bs;
49
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
110
- AioContext *ctx = bdrv_get_aio_context(bs);
50
@@ -XXX,XX +XXX,XX @@ void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
111
51
112
assert(!acb->need_bh);
52
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
113
- aio_context_acquire(ctx);
53
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
114
bdrv_co_complete(acb);
54
-}
115
- aio_context_release(ctx);
55
-
56
-static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
57
-{
58
- /*
59
- * If the request is serialising, overlap_offset and overlap_bytes are set,
60
- * so we can check if the request is aligned. Otherwise, don't care and
61
- * return false.
62
- */
63
-
64
- return req->serialising && (req->offset == req->overlap_offset) &&
65
- (req->bytes == req->overlap_bytes);
66
+ return bdrv_wait_serialising_requests(req);
116
}
67
}
117
68
118
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
69
/**
119
diff --git a/block/iscsi.c b/block/iscsi.c
70
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs)
71
bdrv_wakeup(bs);
72
}
73
74
-bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
75
+static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
76
{
77
BlockDriverState *bs = self->bs;
78
BdrvTrackedRequest *req;
79
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
80
* it ensures that the CoR read and write operations are atomic and
81
* guest writes cannot interleave between them. */
82
bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
83
+ } else {
84
+ bdrv_wait_serialising_requests(req);
85
}
86
87
- bdrv_wait_serialising_requests(req);
88
-
89
if (flags & BDRV_REQ_COPY_ON_READ) {
90
int64_t pnum;
91
92
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
93
assert(!(flags & ~BDRV_REQ_MASK));
94
95
if (flags & BDRV_REQ_SERIALISING) {
96
- bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
97
+ waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
98
+ /*
99
+ * For a misaligned request we should have already waited earlier,
100
+ * because we come after bdrv_padding_rmw_read which must be called
101
+ * with the request already marked as serialising.
102
+ */
103
+ assert(!waited ||
104
+ (req->offset == req->overlap_offset &&
105
+ req->bytes == req->overlap_bytes));
106
+ } else {
107
+ bdrv_wait_serialising_requests(req);
108
}
109
110
- waited = bdrv_wait_serialising_requests(req);
111
-
112
- assert(!waited || !req->serialising ||
113
- is_request_serialising_and_aligned(req));
114
assert(req->overlap_offset <= offset);
115
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
116
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
117
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
118
padding = bdrv_init_padding(bs, offset, bytes, &pad);
119
if (padding) {
120
bdrv_mark_request_serialising(req, align);
121
- bdrv_wait_serialising_requests(req);
122
123
bdrv_padding_rmw_read(child, req, &pad, true);
124
125
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
126
127
if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
128
bdrv_mark_request_serialising(&req, align);
129
- bdrv_wait_serialising_requests(&req);
130
bdrv_padding_rmw_read(child, &req, &pad, false);
131
}
132
133
diff --git a/include/block/block_int.h b/include/block/block_int.h
120
index XXXXXXX..XXXXXXX 100644
134
index XXXXXXX..XXXXXXX 100644
121
--- a/block/iscsi.c
135
--- a/include/block/block_int.h
122
+++ b/block/iscsi.c
136
+++ b/include/block/block_int.h
123
@@ -XXX,XX +XXX,XX @@ static void
137
@@ -XXX,XX +XXX,XX @@ extern unsigned int bdrv_drain_all_count;
124
iscsi_bh_cb(void *p)
138
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
125
{
139
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
126
IscsiAIOCB *acb = p;
140
127
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
141
-bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
128
142
-void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
129
qemu_bh_delete(acb->bh);
143
+bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
130
144
BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
131
g_free(acb->buf);
145
132
acb->buf = NULL;
146
int get_tmp_filename(char *filename, int size);
133
134
- aio_context_acquire(ctx);
135
acb->common.cb(acb->common.opaque, acb->status);
136
- aio_context_release(ctx);
137
138
if (acb->task != NULL) {
139
scsi_free_scsi_task(acb->task);
140
diff --git a/block/linux-aio.c b/block/linux-aio.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/block/linux-aio.c
143
+++ b/block/linux-aio.c
144
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
145
*/
146
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
147
{
148
- LinuxAioState *s = laiocb->ctx;
149
int ret;
150
151
ret = laiocb->ret;
152
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
153
}
154
155
laiocb->ret = ret;
156
- aio_context_acquire(s->aio_context);
157
if (laiocb->co) {
158
/* If the coroutine is already entered it must be in ioq_submit() and
159
* will notice laio->ret has been filled in when it eventually runs
160
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
161
* that!
162
*/
163
if (!qemu_coroutine_entered(laiocb->co)) {
164
- qemu_coroutine_enter(laiocb->co);
165
+ aio_co_wake(laiocb->co);
166
}
167
} else {
168
laiocb->common.cb(laiocb->common.opaque, ret);
169
qemu_aio_unref(laiocb);
170
}
171
- aio_context_release(s->aio_context);
172
}
173
174
/**
175
diff --git a/block/mirror.c b/block/mirror.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/block/mirror.c
178
+++ b/block/mirror.c
179
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
180
{
181
MirrorOp *op = opaque;
182
MirrorBlockJob *s = op->s;
183
+
184
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
185
if (ret < 0) {
186
BlockErrorAction action;
187
188
@@ -XXX,XX +XXX,XX @@ static void mirror_write_complete(void *opaque, int ret)
189
}
190
}
191
mirror_iteration_done(op, ret);
192
+ aio_context_release(blk_get_aio_context(s->common.blk));
193
}
194
195
static void mirror_read_complete(void *opaque, int ret)
196
{
197
MirrorOp *op = opaque;
198
MirrorBlockJob *s = op->s;
199
+
200
+ aio_context_acquire(blk_get_aio_context(s->common.blk));
201
if (ret < 0) {
202
BlockErrorAction action;
203
204
@@ -XXX,XX +XXX,XX @@ static void mirror_read_complete(void *opaque, int ret)
205
}
206
207
mirror_iteration_done(op, ret);
208
- return;
209
+ } else {
210
+ blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
211
+ 0, mirror_write_complete, op);
212
}
213
- blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
214
- 0, mirror_write_complete, op);
215
+ aio_context_release(blk_get_aio_context(s->common.blk));
216
}
217
218
static inline void mirror_clip_sectors(MirrorBlockJob *s,
219
diff --git a/block/null.c b/block/null.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/block/null.c
222
+++ b/block/null.c
223
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
224
static void null_bh_cb(void *opaque)
225
{
226
NullAIOCB *acb = opaque;
227
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
228
-
229
- aio_context_acquire(ctx);
230
acb->common.cb(acb->common.opaque, 0);
231
- aio_context_release(ctx);
232
qemu_aio_unref(acb);
233
}
234
235
static void null_timer_cb(void *opaque)
236
{
237
NullAIOCB *acb = opaque;
238
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
239
-
240
- aio_context_acquire(ctx);
241
acb->common.cb(acb->common.opaque, 0);
242
- aio_context_release(ctx);
243
timer_deinit(&acb->timer);
244
qemu_aio_unref(acb);
245
}
246
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/qed-cluster.c
249
+++ b/block/qed-cluster.c
250
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
251
unsigned int index;
252
unsigned int n;
253
254
+ qed_acquire(s);
255
if (ret) {
256
goto out;
257
}
258
@@ -XXX,XX +XXX,XX @@ static void qed_find_cluster_cb(void *opaque, int ret)
259
260
out:
261
find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
262
+ qed_release(s);
263
g_free(find_cluster_cb);
264
}
265
266
diff --git a/block/qed-table.c b/block/qed-table.c
267
index XXXXXXX..XXXXXXX 100644
268
--- a/block/qed-table.c
269
+++ b/block/qed-table.c
270
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
271
{
272
QEDReadTableCB *read_table_cb = opaque;
273
QEDTable *table = read_table_cb->table;
274
+ BDRVQEDState *s = read_table_cb->s;
275
int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
276
int i;
277
278
@@ -XXX,XX +XXX,XX @@ static void qed_read_table_cb(void *opaque, int ret)
279
}
280
281
/* Byteswap offsets */
282
+ qed_acquire(s);
283
for (i = 0; i < noffsets; i++) {
284
table->offsets[i] = le64_to_cpu(table->offsets[i]);
285
}
286
+ qed_release(s);
287
288
out:
289
/* Completion */
290
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
291
+ trace_qed_read_table_cb(s, read_table_cb->table, ret);
292
gencb_complete(&read_table_cb->gencb, ret);
293
}
294
295
@@ -XXX,XX +XXX,XX @@ typedef struct {
296
static void qed_write_table_cb(void *opaque, int ret)
297
{
298
QEDWriteTableCB *write_table_cb = opaque;
299
+ BDRVQEDState *s = write_table_cb->s;
300
301
- trace_qed_write_table_cb(write_table_cb->s,
302
+ trace_qed_write_table_cb(s,
303
write_table_cb->orig_table,
304
write_table_cb->flush,
305
ret);
306
@@ -XXX,XX +XXX,XX @@ static void qed_write_table_cb(void *opaque, int ret)
307
if (write_table_cb->flush) {
308
/* We still need to flush first */
309
write_table_cb->flush = false;
310
+ qed_acquire(s);
311
bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
312
write_table_cb);
313
+ qed_release(s);
314
return;
315
}
316
317
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
318
CachedL2Table *l2_table = request->l2_table;
319
uint64_t l2_offset = read_l2_table_cb->l2_offset;
320
321
+ qed_acquire(s);
322
if (ret) {
323
/* can't trust loaded L2 table anymore */
324
qed_unref_l2_cache_entry(l2_table);
325
@@ -XXX,XX +XXX,XX @@ static void qed_read_l2_table_cb(void *opaque, int ret)
326
request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
327
assert(request->l2_table != NULL);
328
}
329
+ qed_release(s);
330
331
gencb_complete(&read_l2_table_cb->gencb, ret);
332
}
333
diff --git a/block/qed.c b/block/qed.c
334
index XXXXXXX..XXXXXXX 100644
335
--- a/block/qed.c
336
+++ b/block/qed.c
337
@@ -XXX,XX +XXX,XX @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
338
}
339
340
if (cb->co) {
341
- qemu_coroutine_enter(cb->co);
342
+ aio_co_wake(cb->co);
343
}
344
}
345
346
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
347
cb->done = true;
348
cb->ret = ret;
349
if (cb->co) {
350
- qemu_coroutine_enter(cb->co);
351
+ aio_co_wake(cb->co);
352
}
353
}
354
355
diff --git a/block/rbd.c b/block/rbd.c
356
index XXXXXXX..XXXXXXX 100644
357
--- a/block/rbd.c
358
+++ b/block/rbd.c
359
@@ -XXX,XX +XXX,XX @@ shutdown:
360
static void qemu_rbd_complete_aio(RADOSCB *rcb)
361
{
362
RBDAIOCB *acb = rcb->acb;
363
- AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
364
int64_t r;
365
366
r = rcb->ret;
367
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
368
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
369
}
370
qemu_vfree(acb->bounce);
371
-
372
- aio_context_acquire(ctx);
373
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
374
- aio_context_release(ctx);
375
376
qemu_aio_unref(acb);
377
}
378
diff --git a/block/win32-aio.c b/block/win32-aio.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/block/win32-aio.c
381
+++ b/block/win32-aio.c
382
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
383
qemu_vfree(waiocb->buf);
384
}
385
386
-
387
- aio_context_acquire(s->aio_ctx);
388
waiocb->common.cb(waiocb->common.opaque, ret);
389
- aio_context_release(s->aio_ctx);
390
qemu_aio_unref(waiocb);
391
}
392
393
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
394
index XXXXXXX..XXXXXXX 100644
395
--- a/hw/block/virtio-blk.c
396
+++ b/hw/block/virtio-blk.c
397
@@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
398
static void virtio_blk_rw_complete(void *opaque, int ret)
399
{
400
VirtIOBlockReq *next = opaque;
401
+ VirtIOBlock *s = next->dev;
402
403
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
404
while (next) {
405
VirtIOBlockReq *req = next;
406
next = req->mr_next;
407
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_rw_complete(void *opaque, int ret)
408
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
409
virtio_blk_free_request(req);
410
}
411
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
412
}
413
414
static void virtio_blk_flush_complete(void *opaque, int ret)
415
{
416
VirtIOBlockReq *req = opaque;
417
+ VirtIOBlock *s = req->dev;
418
419
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
420
if (ret) {
421
if (virtio_blk_handle_rw_error(req, -ret, 0)) {
422
- return;
423
+ goto out;
424
}
425
}
426
427
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
428
block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
429
virtio_blk_free_request(req);
430
+
431
+out:
432
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
433
}
434
435
#ifdef __linux__
436
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
437
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
438
439
out:
440
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
441
virtio_blk_req_complete(req, status);
442
virtio_blk_free_request(req);
443
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
444
g_free(ioctl_req);
445
}
446
447
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
448
index XXXXXXX..XXXXXXX 100644
449
--- a/hw/scsi/scsi-disk.c
450
+++ b/hw/scsi/scsi-disk.c
451
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
452
453
assert(r->req.aiocb != NULL);
454
r->req.aiocb = NULL;
455
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
456
if (scsi_disk_req_check_error(r, ret, true)) {
457
goto done;
458
}
459
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
460
scsi_req_complete(&r->req, GOOD);
461
462
done:
463
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
464
scsi_req_unref(&r->req);
465
}
466
467
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
468
assert(r->req.aiocb != NULL);
469
r->req.aiocb = NULL;
470
471
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
472
if (ret < 0) {
473
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
474
} else {
475
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
476
}
477
scsi_dma_complete_noio(r, ret);
478
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
479
}
480
481
static void scsi_read_complete(void * opaque, int ret)
482
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
483
484
assert(r->req.aiocb != NULL);
485
r->req.aiocb = NULL;
486
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
487
if (scsi_disk_req_check_error(r, ret, true)) {
488
goto done;
489
}
490
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
491
492
done:
493
scsi_req_unref(&r->req);
494
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
495
}
496
497
/* Actually issue a read to the block device. */
498
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read_cb(void *opaque, int ret)
499
assert (r->req.aiocb != NULL);
500
r->req.aiocb = NULL;
501
502
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
503
if (ret < 0) {
504
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
505
} else {
506
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
507
}
508
scsi_do_read(opaque, ret);
509
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
510
}
511
512
/* Read more data from scsi device into buffer. */
513
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
514
assert (r->req.aiocb != NULL);
515
r->req.aiocb = NULL;
516
517
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
518
if (ret < 0) {
519
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
520
} else {
521
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
522
}
523
scsi_write_complete_noio(r, ret);
524
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
525
}
526
527
static void scsi_write_data(SCSIRequest *req)
528
@@ -XXX,XX +XXX,XX @@ static void scsi_unmap_complete(void *opaque, int ret)
529
{
530
UnmapCBData *data = opaque;
531
SCSIDiskReq *r = data->r;
532
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
533
534
assert(r->req.aiocb != NULL);
535
r->req.aiocb = NULL;
536
537
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
538
scsi_unmap_complete_noio(data, ret);
539
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
540
}
541
542
static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
543
@@ -XXX,XX +XXX,XX @@ static void scsi_write_same_complete(void *opaque, int ret)
544
545
assert(r->req.aiocb != NULL);
546
r->req.aiocb = NULL;
547
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
548
if (scsi_disk_req_check_error(r, ret, true)) {
549
goto done;
550
}
551
@@ -XXX,XX +XXX,XX @@ done:
552
scsi_req_unref(&r->req);
553
qemu_vfree(data->iov.iov_base);
554
g_free(data);
555
+ aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
556
}
557
558
static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
559
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
560
index XXXXXXX..XXXXXXX 100644
561
--- a/hw/scsi/scsi-generic.c
562
+++ b/hw/scsi/scsi-generic.c
563
@@ -XXX,XX +XXX,XX @@ done:
564
static void scsi_command_complete(void *opaque, int ret)
565
{
566
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
567
+ SCSIDevice *s = r->req.dev;
568
569
assert(r->req.aiocb != NULL);
570
r->req.aiocb = NULL;
571
+
572
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
573
scsi_command_complete_noio(r, ret);
574
+ aio_context_release(blk_get_aio_context(s->conf.blk));
575
}
576
577
static int execute_command(BlockBackend *blk,
578
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
579
assert(r->req.aiocb != NULL);
580
r->req.aiocb = NULL;
581
582
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
583
+
584
if (ret || r->req.io_canceled) {
585
scsi_command_complete_noio(r, ret);
586
- return;
587
+ goto done;
588
}
589
590
len = r->io_header.dxfer_len - r->io_header.resid;
591
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
592
r->len = -1;
593
if (len == 0) {
594
scsi_command_complete_noio(r, 0);
595
- return;
596
+ goto done;
597
}
598
599
/* Snoop READ CAPACITY output to set the blocksize. */
600
@@ -XXX,XX +XXX,XX @@ static void scsi_read_complete(void * opaque, int ret)
601
}
602
scsi_req_data(&r->req, len);
603
scsi_req_unref(&r->req);
604
+
605
+done:
606
+ aio_context_release(blk_get_aio_context(s->conf.blk));
607
}
608
609
/* Read more data from scsi device into buffer. */
610
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
611
assert(r->req.aiocb != NULL);
612
r->req.aiocb = NULL;
613
614
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
615
+
616
if (ret || r->req.io_canceled) {
617
scsi_command_complete_noio(r, ret);
618
- return;
619
+ goto done;
620
}
621
622
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
623
@@ -XXX,XX +XXX,XX @@ static void scsi_write_complete(void * opaque, int ret)
624
}
625
626
scsi_command_complete_noio(r, ret);
627
+
628
+done:
629
+ aio_context_release(blk_get_aio_context(s->conf.blk));
630
}
631
632
/* Write data to a scsi device. Returns nonzero on failure.
633
diff --git a/util/thread-pool.c b/util/thread-pool.c
634
index XXXXXXX..XXXXXXX 100644
635
--- a/util/thread-pool.c
636
+++ b/util/thread-pool.c
637
@@ -XXX,XX +XXX,XX @@ restart:
638
*/
639
qemu_bh_schedule(pool->completion_bh);
640
641
+ aio_context_release(pool->ctx);
642
elem->common.cb(elem->common.opaque, elem->ret);
643
+ aio_context_acquire(pool->ctx);
644
qemu_aio_unref(elem);
645
goto restart;
646
} else {
647
@@ -XXX,XX +XXX,XX @@ static void thread_pool_co_cb(void *opaque, int ret)
648
ThreadPoolCo *co = opaque;
649
650
co->ret = ret;
651
- qemu_coroutine_enter(co->co);
652
+ aio_co_wake(co->co);
653
}
654
655
int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
656
--
147
--
657
2.9.3
148
2.24.1
658
149
659
150
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
bdrv_mark_request_serialising is writing the overlap_offset and
4
overlap_bytes fields of BdrvTrackedRequest. Take bs->reqs_lock
5
for the whole duration of it, and not just when waiting for
6
serialising requests, so that tracked_request_overlaps does not
7
look at a half-updated request.
8
9
The new code does not unlock/relock around retries. This is unnecessary
10
because a retry is always preceded by a CoQueue wait, which already
11
releases and reacquires bs->reqs_lock.
12
13
Reported-by: Peter Lieven <pl@kamp.de>
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Message-id: 1578495356-46219-4-git-send-email-pbonzini@redhat.com
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
16
Message-Id: <1578495356-46219-4-git-send-email-pbonzini@redhat.com>
7
Message-id: 20170213135235.12274-15-pbonzini@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
18
---
10
block/archipelago.c | 3 +++
19
block/io.c | 112 ++++++++++++++++++++++++++++++-----------------------
11
block/blkreplay.c | 2 +-
20
1 file changed, 63 insertions(+), 49 deletions(-)
12
block/block-backend.c | 6 ++++++
13
block/curl.c | 26 ++++++++++++++++++--------
14
block/gluster.c | 9 +--------
15
block/io.c | 6 +++++-
16
block/iscsi.c | 6 +++++-
17
block/linux-aio.c | 15 +++++++++------
18
block/nfs.c | 3 ++-
19
block/null.c | 4 ++++
20
block/qed.c | 3 +++
21
block/rbd.c | 4 ++++
22
dma-helpers.c | 2 ++
23
hw/block/virtio-blk.c | 2 ++
24
hw/scsi/scsi-bus.c | 2 ++
25
util/async.c | 4 ++--
26
util/thread-pool.c | 2 ++
27
17 files changed, 71 insertions(+), 28 deletions(-)
28
21
29
diff --git a/block/archipelago.c b/block/archipelago.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/block/archipelago.c
32
+++ b/block/archipelago.c
33
@@ -XXX,XX +XXX,XX @@ static void qemu_archipelago_complete_aio(void *opaque)
34
{
35
AIORequestData *reqdata = (AIORequestData *) opaque;
36
ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
37
+ AioContext *ctx = bdrv_get_aio_context(aio_cb->common.bs);
38
39
+ aio_context_acquire(ctx);
40
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
41
+ aio_context_release(ctx);
42
aio_cb->status = 0;
43
44
qemu_aio_unref(aio_cb);
45
diff --git a/block/blkreplay.c b/block/blkreplay.c
46
index XXXXXXX..XXXXXXX 100755
47
--- a/block/blkreplay.c
48
+++ b/block/blkreplay.c
49
@@ -XXX,XX +XXX,XX @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
50
static void blkreplay_bh_cb(void *opaque)
51
{
52
Request *req = opaque;
53
- qemu_coroutine_enter(req->co);
54
+ aio_co_wake(req->co);
55
qemu_bh_delete(req->bh);
56
g_free(req);
57
}
58
diff --git a/block/block-backend.c b/block/block-backend.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/block-backend.c
61
+++ b/block/block-backend.c
62
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
63
static void error_callback_bh(void *opaque)
64
{
65
struct BlockBackendAIOCB *acb = opaque;
66
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
67
68
bdrv_dec_in_flight(acb->common.bs);
69
+ aio_context_acquire(ctx);
70
acb->common.cb(acb->common.opaque, acb->ret);
71
+ aio_context_release(ctx);
72
qemu_aio_unref(acb);
73
}
74
75
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
76
static void blk_aio_complete_bh(void *opaque)
77
{
78
BlkAioEmAIOCB *acb = opaque;
79
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
80
81
assert(acb->has_returned);
82
+ aio_context_acquire(ctx);
83
blk_aio_complete(acb);
84
+ aio_context_release(ctx);
85
}
86
87
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
88
diff --git a/block/curl.c b/block/curl.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/block/curl.c
91
+++ b/block/curl.c
92
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
93
{
94
CURLState *state;
95
int running;
96
+ int ret = -EINPROGRESS;
97
98
CURLAIOCB *acb = p;
99
- BDRVCURLState *s = acb->common.bs->opaque;
100
+ BlockDriverState *bs = acb->common.bs;
101
+ BDRVCURLState *s = bs->opaque;
102
+ AioContext *ctx = bdrv_get_aio_context(bs);
103
104
size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
105
size_t end;
106
107
+ aio_context_acquire(ctx);
108
+
109
// In case we have the requested data already (e.g. read-ahead),
110
// we can just call the callback and be done.
111
switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
112
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
113
qemu_aio_unref(acb);
114
// fall through
115
case FIND_RET_WAIT:
116
- return;
117
+ goto out;
118
default:
119
break;
120
}
121
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
122
// No cache found, so let's start a new request
123
state = curl_init_state(acb->common.bs, s);
124
if (!state) {
125
- acb->common.cb(acb->common.opaque, -EIO);
126
- qemu_aio_unref(acb);
127
- return;
128
+ ret = -EIO;
129
+ goto out;
130
}
131
132
acb->start = 0;
133
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
134
state->orig_buf = g_try_malloc(state->buf_len);
135
if (state->buf_len && state->orig_buf == NULL) {
136
curl_clean_state(state);
137
- acb->common.cb(acb->common.opaque, -ENOMEM);
138
- qemu_aio_unref(acb);
139
- return;
140
+ ret = -ENOMEM;
141
+ goto out;
142
}
143
state->acb[0] = acb;
144
145
@@ -XXX,XX +XXX,XX @@ static void curl_readv_bh_cb(void *p)
146
147
/* Tell curl it needs to kick things off */
148
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
149
+
150
+out:
151
+ if (ret != -EINPROGRESS) {
152
+ acb->common.cb(acb->common.opaque, ret);
153
+ qemu_aio_unref(acb);
154
+ }
155
+ aio_context_release(ctx);
156
}
157
158
static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
159
diff --git a/block/gluster.c b/block/gluster.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/block/gluster.c
162
+++ b/block/gluster.c
163
@@ -XXX,XX +XXX,XX @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
164
return qemu_gluster_glfs_init(gconf, errp);
165
}
166
167
-static void qemu_gluster_complete_aio(void *opaque)
168
-{
169
- GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
170
-
171
- qemu_coroutine_enter(acb->coroutine);
172
-}
173
-
174
/*
175
* AIO callback routine called from GlusterFS thread.
176
*/
177
@@ -XXX,XX +XXX,XX @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
178
acb->ret = -EIO; /* Partial read/write - fail it */
179
}
180
181
- aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
182
+ aio_co_schedule(acb->aio_context, acb->coroutine);
183
}
184
185
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
186
diff --git a/block/io.c b/block/io.c
22
diff --git a/block/io.c b/block/io.c
187
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
188
--- a/block/io.c
24
--- a/block/io.c
189
+++ b/block/io.c
25
+++ b/block/io.c
190
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
26
@@ -XXX,XX +XXX,XX @@
191
bdrv_dec_in_flight(bs);
27
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
192
bdrv_drained_begin(bs);
28
193
data->done = true;
29
static void bdrv_parent_cb_resize(BlockDriverState *bs);
194
- qemu_coroutine_enter(co);
30
-static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
195
+ aio_co_wake(co);
31
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
32
int64_t offset, int bytes, BdrvRequestFlags flags);
33
34
@@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req,
35
qemu_co_mutex_unlock(&bs->reqs_lock);
196
}
36
}
197
37
198
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
38
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
199
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
39
+ int64_t offset, uint64_t bytes)
200
static void bdrv_co_em_bh(void *opaque)
40
+{
41
+ /* aaaa bbbb */
42
+ if (offset >= req->overlap_offset + req->overlap_bytes) {
43
+ return false;
44
+ }
45
+ /* bbbb aaaa */
46
+ if (req->overlap_offset >= offset + bytes) {
47
+ return false;
48
+ }
49
+ return true;
50
+}
51
+
52
+static bool coroutine_fn
53
+bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
54
+ BdrvTrackedRequest *self)
55
+{
56
+ BdrvTrackedRequest *req;
57
+ bool retry;
58
+ bool waited = false;
59
+
60
+ do {
61
+ retry = false;
62
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
63
+ if (req == self || (!req->serialising && !self->serialising)) {
64
+ continue;
65
+ }
66
+ if (tracked_request_overlaps(req, self->overlap_offset,
67
+ self->overlap_bytes))
68
+ {
69
+ /* Hitting this means there was a reentrant request, for
70
+ * example, a block driver issuing nested requests. This must
71
+ * never happen since it means deadlock.
72
+ */
73
+ assert(qemu_coroutine_self() != req->co);
74
+
75
+ /* If the request is already (indirectly) waiting for us, or
76
+ * will wait for us as soon as it wakes up, then just go on
77
+ * (instead of producing a deadlock in the former case). */
78
+ if (!req->waiting_for) {
79
+ self->waiting_for = req;
80
+ qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
81
+ self->waiting_for = NULL;
82
+ retry = true;
83
+ waited = true;
84
+ break;
85
+ }
86
+ }
87
+ }
88
+ } while (retry);
89
+ return waited;
90
+}
91
+
92
bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
201
{
93
{
202
BlockAIOCBCoroutine *acb = opaque;
94
+ BlockDriverState *bs = req->bs;
203
+ BlockDriverState *bs = acb->common.bs;
95
int64_t overlap_offset = req->offset & ~(align - 1);
204
+ AioContext *ctx = bdrv_get_aio_context(bs);
96
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
205
97
- overlap_offset;
206
assert(!acb->need_bh);
98
+ bool waited;
207
+ aio_context_acquire(ctx);
99
208
bdrv_co_complete(acb);
100
+ qemu_co_mutex_lock(&bs->reqs_lock);
209
+ aio_context_release(ctx);
101
if (!req->serialising) {
102
atomic_inc(&req->bs->serialising_in_flight);
103
req->serialising = true;
104
@@ -XXX,XX +XXX,XX @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
105
106
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
107
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
108
- return bdrv_wait_serialising_requests(req);
109
+ waited = bdrv_wait_serialising_requests_locked(bs, req);
110
+ qemu_co_mutex_unlock(&bs->reqs_lock);
111
+ return waited;
210
}
112
}
211
113
212
static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
213
diff --git a/block/iscsi.c b/block/iscsi.c
214
index XXXXXXX..XXXXXXX 100644
215
--- a/block/iscsi.c
216
+++ b/block/iscsi.c
217
@@ -XXX,XX +XXX,XX @@ static void
218
iscsi_bh_cb(void *p)
219
{
220
IscsiAIOCB *acb = p;
221
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
222
223
qemu_bh_delete(acb->bh);
224
225
g_free(acb->buf);
226
acb->buf = NULL;
227
228
+ aio_context_acquire(ctx);
229
acb->common.cb(acb->common.opaque, acb->status);
230
+ aio_context_release(ctx);
231
232
if (acb->task != NULL) {
233
scsi_free_scsi_task(acb->task);
234
@@ -XXX,XX +XXX,XX @@ iscsi_schedule_bh(IscsiAIOCB *acb)
235
static void iscsi_co_generic_bh_cb(void *opaque)
236
{
237
struct IscsiTask *iTask = opaque;
238
+
239
iTask->complete = 1;
240
- qemu_coroutine_enter(iTask->co);
241
+ aio_co_wake(iTask->co);
242
}
243
244
static void iscsi_retry_timer_expired(void *opaque)
245
diff --git a/block/linux-aio.c b/block/linux-aio.c
246
index XXXXXXX..XXXXXXX 100644
247
--- a/block/linux-aio.c
248
+++ b/block/linux-aio.c
249
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState {
250
io_context_t ctx;
251
EventNotifier e;
252
253
- /* io queue for submit at batch */
254
+ /* io queue for submit at batch. Protected by AioContext lock. */
255
LaioQueue io_q;
256
257
- /* I/O completion processing */
258
+ /* I/O completion processing. Only runs in I/O thread. */
259
QEMUBH *completion_bh;
260
int event_idx;
261
int event_max;
262
@@ -XXX,XX +XXX,XX @@ static inline ssize_t io_event_ret(struct io_event *ev)
263
*/
264
static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
265
{
266
+ LinuxAioState *s = laiocb->ctx;
267
int ret;
268
269
ret = laiocb->ret;
270
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
271
}
272
273
laiocb->ret = ret;
274
+ aio_context_acquire(s->aio_context);
275
if (laiocb->co) {
276
/* If the coroutine is already entered it must be in ioq_submit() and
277
* will notice laio->ret has been filled in when it eventually runs
278
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
279
laiocb->common.cb(laiocb->common.opaque, ret);
280
qemu_aio_unref(laiocb);
281
}
282
+ aio_context_release(s->aio_context);
283
}
284
285
/**
114
/**
286
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s)
115
@@ -XXX,XX +XXX,XX @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
287
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
288
{
289
qemu_laio_process_completions(s);
290
+
291
+ aio_context_acquire(s->aio_context);
292
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
293
ioq_submit(s);
294
}
295
+ aio_context_release(s->aio_context);
296
}
297
298
static void qemu_laio_completion_bh(void *opaque)
299
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
300
LinuxAioState *s = container_of(e, LinuxAioState, e);
301
302
if (event_notifier_test_and_clear(&s->e)) {
303
- aio_context_acquire(s->aio_context);
304
qemu_laio_process_completions_and_submit(s);
305
- aio_context_release(s->aio_context);
306
}
116
}
307
}
117
}
308
118
309
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
119
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
120
- int64_t offset, uint64_t bytes)
121
-{
122
- /* aaaa bbbb */
123
- if (offset >= req->overlap_offset + req->overlap_bytes) {
124
- return false;
125
- }
126
- /* bbbb aaaa */
127
- if (req->overlap_offset >= offset + bytes) {
128
- return false;
129
- }
130
- return true;
131
-}
132
-
133
void bdrv_inc_in_flight(BlockDriverState *bs)
134
{
135
atomic_inc(&bs->in_flight);
136
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs)
137
static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
138
{
139
BlockDriverState *bs = self->bs;
140
- BdrvTrackedRequest *req;
141
- bool retry;
142
bool waited = false;
143
144
if (!atomic_read(&bs->serialising_in_flight)) {
310
return false;
145
return false;
311
}
146
}
312
147
313
- aio_context_acquire(s->aio_context);
148
- do {
314
qemu_laio_process_completions_and_submit(s);
149
- retry = false;
315
- aio_context_release(s->aio_context);
150
- qemu_co_mutex_lock(&bs->reqs_lock);
316
return true;
151
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
152
- if (req == self || (!req->serialising && !self->serialising)) {
153
- continue;
154
- }
155
- if (tracked_request_overlaps(req, self->overlap_offset,
156
- self->overlap_bytes))
157
- {
158
- /* Hitting this means there was a reentrant request, for
159
- * example, a block driver issuing nested requests. This must
160
- * never happen since it means deadlock.
161
- */
162
- assert(qemu_coroutine_self() != req->co);
163
-
164
- /* If the request is already (indirectly) waiting for us, or
165
- * will wait for us as soon as it wakes up, then just go on
166
- * (instead of producing a deadlock in the former case). */
167
- if (!req->waiting_for) {
168
- self->waiting_for = req;
169
- qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
170
- self->waiting_for = NULL;
171
- retry = true;
172
- waited = true;
173
- break;
174
- }
175
- }
176
- }
177
- qemu_co_mutex_unlock(&bs->reqs_lock);
178
- } while (retry);
179
+ qemu_co_mutex_lock(&bs->reqs_lock);
180
+ waited = bdrv_wait_serialising_requests_locked(bs, self);
181
+ qemu_co_mutex_unlock(&bs->reqs_lock);
182
183
return waited;
317
}
184
}
318
319
@@ -XXX,XX +XXX,XX @@ void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
320
{
321
aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
322
qemu_bh_delete(s->completion_bh);
323
+ s->aio_context = NULL;
324
}
325
326
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
327
diff --git a/block/nfs.c b/block/nfs.c
328
index XXXXXXX..XXXXXXX 100644
329
--- a/block/nfs.c
330
+++ b/block/nfs.c
331
@@ -XXX,XX +XXX,XX @@ static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
332
static void nfs_co_generic_bh_cb(void *opaque)
333
{
334
NFSRPC *task = opaque;
335
+
336
task->complete = 1;
337
- qemu_coroutine_enter(task->co);
338
+ aio_co_wake(task->co);
339
}
340
341
static void
342
diff --git a/block/null.c b/block/null.c
343
index XXXXXXX..XXXXXXX 100644
344
--- a/block/null.c
345
+++ b/block/null.c
346
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo null_aiocb_info = {
347
static void null_bh_cb(void *opaque)
348
{
349
NullAIOCB *acb = opaque;
350
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
351
+
352
+ aio_context_acquire(ctx);
353
acb->common.cb(acb->common.opaque, 0);
354
+ aio_context_release(ctx);
355
qemu_aio_unref(acb);
356
}
357
358
diff --git a/block/qed.c b/block/qed.c
359
index XXXXXXX..XXXXXXX 100644
360
--- a/block/qed.c
361
+++ b/block/qed.c
362
@@ -XXX,XX +XXX,XX @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
363
static void qed_aio_complete_bh(void *opaque)
364
{
365
QEDAIOCB *acb = opaque;
366
+ BDRVQEDState *s = acb_to_s(acb);
367
BlockCompletionFunc *cb = acb->common.cb;
368
void *user_opaque = acb->common.opaque;
369
int ret = acb->bh_ret;
370
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete_bh(void *opaque)
371
qemu_aio_unref(acb);
372
373
/* Invoke callback */
374
+ qed_acquire(s);
375
cb(user_opaque, ret);
376
+ qed_release(s);
377
}
378
379
static void qed_aio_complete(QEDAIOCB *acb, int ret)
380
diff --git a/block/rbd.c b/block/rbd.c
381
index XXXXXXX..XXXXXXX 100644
382
--- a/block/rbd.c
383
+++ b/block/rbd.c
384
@@ -XXX,XX +XXX,XX @@ shutdown:
385
static void qemu_rbd_complete_aio(RADOSCB *rcb)
386
{
387
RBDAIOCB *acb = rcb->acb;
388
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
389
int64_t r;
390
391
r = rcb->ret;
392
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
393
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
394
}
395
qemu_vfree(acb->bounce);
396
+
397
+ aio_context_acquire(ctx);
398
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
399
+ aio_context_release(ctx);
400
401
qemu_aio_unref(acb);
402
}
403
diff --git a/dma-helpers.c b/dma-helpers.c
404
index XXXXXXX..XXXXXXX 100644
405
--- a/dma-helpers.c
406
+++ b/dma-helpers.c
407
@@ -XXX,XX +XXX,XX @@ static void dma_blk_cb(void *opaque, int ret)
408
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
409
}
410
411
+ aio_context_acquire(dbs->ctx);
412
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
413
dma_blk_cb, dbs, dbs->io_func_opaque);
414
+ aio_context_release(dbs->ctx);
415
assert(dbs->acb);
416
}
417
418
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
419
index XXXXXXX..XXXXXXX 100644
420
--- a/hw/block/virtio-blk.c
421
+++ b/hw/block/virtio-blk.c
422
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
423
424
s->rq = NULL;
425
426
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
427
while (req) {
428
VirtIOBlockReq *next = req->next;
429
if (virtio_blk_handle_request(req, &mrb)) {
430
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_dma_restart_bh(void *opaque)
431
if (mrb.num_reqs) {
432
virtio_blk_submit_multireq(s->blk, &mrb);
433
}
434
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
435
}
436
437
static void virtio_blk_dma_restart_cb(void *opaque, int running,
438
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
439
index XXXXXXX..XXXXXXX 100644
440
--- a/hw/scsi/scsi-bus.c
441
+++ b/hw/scsi/scsi-bus.c
442
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
443
qemu_bh_delete(s->bh);
444
s->bh = NULL;
445
446
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
447
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
448
scsi_req_ref(req);
449
if (req->retry) {
450
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_restart_bh(void *opaque)
451
}
452
scsi_req_unref(req);
453
}
454
+ aio_context_release(blk_get_aio_context(s->conf.blk));
455
}
456
457
void scsi_req_retry(SCSIRequest *req)
458
diff --git a/util/async.c b/util/async.c
459
index XXXXXXX..XXXXXXX 100644
460
--- a/util/async.c
461
+++ b/util/async.c
462
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
463
ret = 1;
464
}
465
bh->idle = 0;
466
- aio_context_acquire(ctx);
467
aio_bh_call(bh);
468
- aio_context_release(ctx);
469
}
470
if (bh->deleted) {
471
deleted = true;
472
@@ -XXX,XX +XXX,XX @@ static void co_schedule_bh_cb(void *opaque)
473
Coroutine *co = QSLIST_FIRST(&straight);
474
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
475
trace_aio_co_schedule_bh_cb(ctx, co);
476
+ aio_context_acquire(ctx);
477
qemu_coroutine_enter(co);
478
+ aio_context_release(ctx);
479
}
480
}
481
482
diff --git a/util/thread-pool.c b/util/thread-pool.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/util/thread-pool.c
485
+++ b/util/thread-pool.c
486
@@ -XXX,XX +XXX,XX @@ static void thread_pool_completion_bh(void *opaque)
487
ThreadPool *pool = opaque;
488
ThreadPoolElement *elem, *next;
489
490
+ aio_context_acquire(pool->ctx);
491
restart:
492
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
493
if (elem->state != THREAD_DONE) {
494
@@ -XXX,XX +XXX,XX @@ restart:
495
qemu_aio_unref(elem);
496
}
497
}
498
+ aio_context_release(pool->ctx);
499
}
500
501
static void thread_pool_cancel(BlockAIOCB *acb)
502
--
185
--
503
2.9.3
186
2.24.1
504
187
505
188
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This adds a CoMutex around the existing CoQueue. Because the write-side
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
can just take CoMutex, the old "writer" field is not necessary anymore.
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
Instead of removing it altogether, count the number of pending writers
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
during a read-side critical section and forbid further readers from
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
entering.
7
Message-id: 20200120141858.587874-2-stefanha@redhat.com
8
8
Message-Id: <20200120141858.587874-2-stefanha@redhat.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Message-id: 20170213181244.16297-7-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
10
---
14
include/qemu/coroutine.h | 3 ++-
11
configure | 27 +++++++++++++++++++++++++++
15
util/qemu-coroutine-lock.c | 35 ++++++++++++++++++++++++-----------
12
1 file changed, 27 insertions(+)
16
2 files changed, 26 insertions(+), 12 deletions(-)
17
13
18
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
14
diff --git a/configure b/configure
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100755
20
--- a/include/qemu/coroutine.h
16
--- a/configure
21
+++ b/include/qemu/coroutine.h
17
+++ b/configure
22
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
18
@@ -XXX,XX +XXX,XX @@ xen=""
23
19
xen_ctrl_version=""
24
20
xen_pci_passthrough=""
25
typedef struct CoRwlock {
21
linux_aio=""
26
- bool writer;
22
+linux_io_uring=""
27
+ int pending_writer;
23
cap_ng=""
28
int reader;
24
attr=""
29
+ CoMutex mutex;
25
libattr=""
30
CoQueue queue;
26
@@ -XXX,XX +XXX,XX @@ for opt do
31
} CoRwlock;
27
;;
32
28
--enable-linux-aio) linux_aio="yes"
33
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
29
;;
34
index XXXXXXX..XXXXXXX 100644
30
+ --disable-linux-io-uring) linux_io_uring="no"
35
--- a/util/qemu-coroutine-lock.c
31
+ ;;
36
+++ b/util/qemu-coroutine-lock.c
32
+ --enable-linux-io-uring) linux_io_uring="yes"
37
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_init(CoRwlock *lock)
33
+ ;;
38
{
34
--disable-attr) attr="no"
39
memset(lock, 0, sizeof(*lock));
35
;;
40
qemu_co_queue_init(&lock->queue);
36
--enable-attr) attr="yes"
41
+ qemu_co_mutex_init(&lock->mutex);
37
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
42
}
38
vde support for vde network
43
39
netmap support for netmap network
44
void qemu_co_rwlock_rdlock(CoRwlock *lock)
40
linux-aio Linux AIO support
45
{
41
+ linux-io-uring Linux io_uring support
46
Coroutine *self = qemu_coroutine_self();
42
cap-ng libcap-ng support
47
43
attr attr and xattr support
48
- while (lock->writer) {
44
vhost-net vhost-net kernel acceleration support
49
- qemu_co_queue_wait(&lock->queue, NULL);
45
@@ -XXX,XX +XXX,XX @@ EOF
50
+ qemu_co_mutex_lock(&lock->mutex);
46
linux_aio=no
51
+ /* For fairness, wait if a writer is in line. */
47
fi
52
+ while (lock->pending_writer) {
48
fi
53
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
49
+##########################################
54
}
50
+# linux-io-uring probe
55
lock->reader++;
56
+ qemu_co_mutex_unlock(&lock->mutex);
57
+
51
+
58
+ /* The rest of the read-side critical section is run without the mutex. */
52
+if test "$linux_io_uring" != "no" ; then
59
self->locks_held++;
53
+ if $pkg_config liburing; then
60
}
54
+ linux_io_uring_cflags=$($pkg_config --cflags liburing)
61
55
+ linux_io_uring_libs=$($pkg_config --libs liburing)
62
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
56
+ linux_io_uring=yes
63
Coroutine *self = qemu_coroutine_self();
57
+ else
64
58
+ if test "$linux_io_uring" = "yes" ; then
65
assert(qemu_in_coroutine());
59
+ feature_not_found "linux io_uring" "Install liburing devel"
66
- if (lock->writer) {
60
+ fi
67
- lock->writer = false;
61
+ linux_io_uring=no
68
+ if (!lock->reader) {
62
+ fi
69
+ /* The critical section started in qemu_co_rwlock_wrlock. */
63
+fi
70
qemu_co_queue_restart_all(&lock->queue);
64
71
} else {
65
##########################################
72
+ self->locks_held--;
66
# TPM emulation is only on POSIX
73
+
67
@@ -XXX,XX +XXX,XX @@ echo "PIE $pie"
74
+ qemu_co_mutex_lock(&lock->mutex);
68
echo "vde support $vde"
75
lock->reader--;
69
echo "netmap support $netmap"
76
assert(lock->reader >= 0);
70
echo "Linux AIO support $linux_aio"
77
/* Wakeup only one waiting writer */
71
+echo "Linux io_uring support $linux_io_uring"
78
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
72
echo "ATTR/XATTR support $attr"
79
qemu_co_queue_next(&lock->queue);
73
echo "Install blobs $blobs"
80
}
74
echo "KVM support $kvm"
81
}
75
@@ -XXX,XX +XXX,XX @@ fi
82
- self->locks_held--;
76
if test "$linux_aio" = "yes" ; then
83
+ qemu_co_mutex_unlock(&lock->mutex);
77
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
84
}
78
fi
85
79
+if test "$linux_io_uring" = "yes" ; then
86
void qemu_co_rwlock_wrlock(CoRwlock *lock)
80
+ echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak
87
{
81
+ echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak
88
- Coroutine *self = qemu_coroutine_self();
82
+ echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak
89
-
83
+fi
90
- while (lock->writer || lock->reader) {
84
if test "$attr" = "yes" ; then
91
- qemu_co_queue_wait(&lock->queue, NULL);
85
echo "CONFIG_ATTR=y" >> $config_host_mak
92
+ qemu_co_mutex_lock(&lock->mutex);
86
fi
93
+ lock->pending_writer++;
94
+ while (lock->reader) {
95
+ qemu_co_queue_wait(&lock->queue, &lock->mutex);
96
}
97
- lock->writer = true;
98
- self->locks_held++;
99
+ lock->pending_writer--;
100
+
101
+ /* The rest of the write-side critical section is run with
102
+ * the mutex taken, so that lock->reader remains zero.
103
+ * There is no need to update self->locks_held.
104
+ */
105
}
106
--
87
--
107
2.9.3
88
2.24.1
108
89
109
90
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This will avoid forward references in the next patch. It is also
3
Since io_uring is the actual name of the Linux API, we use it as enum
4
more logical because CoQueue is not anymore the basic primitive.
4
value even though the QAPI schema conventions would prefer io-uring.
5
5
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
7
Acked-by: Markus Armbruster <armbru@redhat.com>
8
Message-id: 20170213181244.16297-5-pbonzini@redhat.com
8
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20200120141858.587874-3-stefanha@redhat.com
11
Message-Id: <20200120141858.587874-3-stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
13
---
11
include/qemu/coroutine.h | 89 ++++++++++++++++++++++++------------------------
14
qapi/block-core.json | 4 +++-
12
1 file changed, 44 insertions(+), 45 deletions(-)
15
1 file changed, 3 insertions(+), 1 deletion(-)
13
16
14
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
17
diff --git a/qapi/block-core.json b/qapi/block-core.json
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/include/qemu/coroutine.h
19
--- a/qapi/block-core.json
17
+++ b/include/qemu/coroutine.h
20
+++ b/qapi/block-core.json
18
@@ -XXX,XX +XXX,XX @@ bool qemu_in_coroutine(void);
21
@@ -XXX,XX +XXX,XX @@
19
*/
22
#
20
bool qemu_coroutine_entered(Coroutine *co);
23
# @threads: Use qemu's thread pool
21
24
# @native: Use native AIO backend (only Linux and Windows)
22
-
25
+# @io_uring: Use linux io_uring (since 5.0)
23
-/**
26
#
24
- * CoQueues are a mechanism to queue coroutines in order to continue executing
27
# Since: 2.9
25
- * them later. They provide the fundamental primitives on which coroutine locks
28
##
26
- * are built.
29
{ 'enum': 'BlockdevAioOptions',
27
- */
30
- 'data': [ 'threads', 'native' ] }
28
-typedef struct CoQueue {
31
+ 'data': [ 'threads', 'native',
29
- QSIMPLEQ_HEAD(, Coroutine) entries;
32
+ { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] }
30
-} CoQueue;
33
31
-
34
##
32
-/**
35
# @BlockdevCacheOptions:
33
- * Initialise a CoQueue. This must be called before any other operation is used
34
- * on the CoQueue.
35
- */
36
-void qemu_co_queue_init(CoQueue *queue);
37
-
38
-/**
39
- * Adds the current coroutine to the CoQueue and transfers control to the
40
- * caller of the coroutine.
41
- */
42
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
43
-
44
-/**
45
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
46
- *
47
- * Returns true if a coroutine was restarted, false if the queue is empty.
48
- */
49
-bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
50
-
51
-/**
52
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
53
- */
54
-void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
55
-
56
-/**
57
- * Enter the next coroutine in the queue
58
- */
59
-bool qemu_co_enter_next(CoQueue *queue);
60
-
61
-/**
62
- * Checks if the CoQueue is empty.
63
- */
64
-bool qemu_co_queue_empty(CoQueue *queue);
65
-
66
-
67
/**
68
* Provides a mutex that can be used to synchronise coroutines
69
*/
70
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
71
*/
72
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
73
74
+
75
+/**
76
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
77
+ * them later.
78
+ */
79
+typedef struct CoQueue {
80
+ QSIMPLEQ_HEAD(, Coroutine) entries;
81
+} CoQueue;
82
+
83
+/**
84
+ * Initialise a CoQueue. This must be called before any other operation is used
85
+ * on the CoQueue.
86
+ */
87
+void qemu_co_queue_init(CoQueue *queue);
88
+
89
+/**
90
+ * Adds the current coroutine to the CoQueue and transfers control to the
91
+ * caller of the coroutine.
92
+ */
93
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
94
+
95
+/**
96
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
97
+ *
98
+ * Returns true if a coroutine was restarted, false if the queue is empty.
99
+ */
100
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
101
+
102
+/**
103
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
104
+ */
105
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
106
+
107
+/**
108
+ * Enter the next coroutine in the queue
109
+ */
110
+bool qemu_co_enter_next(CoQueue *queue);
111
+
112
+/**
113
+ * Checks if the CoQueue is empty.
114
+ */
115
+bool qemu_co_queue_empty(CoQueue *queue);
116
+
117
+
118
typedef struct CoRwlock {
119
bool writer;
120
int reader;
121
--
36
--
122
2.9.3
37
2.24.1
123
38
124
39
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This patch prepares for the removal of unnecessary lockcnt inc/dec pairs.
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Extract the dispatching loop for file descriptor handlers into a new
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
function aio_dispatch_handlers, and then inline aio_dispatch into
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
aio_poll.
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
7
Message-id: 20200120141858.587874-4-stefanha@redhat.com
8
aio_dispatch can now become void.
8
Message-Id: <20200120141858.587874-4-stefanha@redhat.com>
9
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
14
Message-id: 20170213135235.12274-17-pbonzini@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
10
---
17
include/block/aio.h | 6 +-----
11
include/block/block.h | 1 +
18
util/aio-posix.c | 44 ++++++++++++++------------------------------
12
1 file changed, 1 insertion(+)
19
util/aio-win32.c | 13 ++++---------
20
util/async.c | 2 +-
21
4 files changed, 20 insertions(+), 45 deletions(-)
22
13
23
diff --git a/include/block/aio.h b/include/block/aio.h
14
diff --git a/include/block/block.h b/include/block/block.h
24
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
25
--- a/include/block/aio.h
16
--- a/include/block/block.h
26
+++ b/include/block/aio.h
17
+++ b/include/block/block.h
27
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx);
18
@@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry {
28
/* Dispatch any pending callbacks from the GSource attached to the AioContext.
19
ignoring the format layer */
29
*
20
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
30
* This is used internally in the implementation of the GSource.
21
#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
31
- *
22
+#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
32
- * @dispatch_fds: true to process fds, false to skip them
23
33
- * (can be used as an optimization by callers that know there
24
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
34
- * are no fds ready)
35
*/
36
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds);
37
+void aio_dispatch(AioContext *ctx);
38
39
/* Progress in completing AIO work to occur. This can issue new pending
40
* aio as a result of executing I/O completion or bh callbacks.
41
diff --git a/util/aio-posix.c b/util/aio-posix.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/util/aio-posix.c
44
+++ b/util/aio-posix.c
45
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
46
AioHandler *node, *tmp;
47
bool progress = false;
48
49
- /*
50
- * We have to walk very carefully in case aio_set_fd_handler is
51
- * called while we're walking.
52
- */
53
- qemu_lockcnt_inc(&ctx->list_lock);
54
-
55
QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
56
int revents;
57
58
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
59
}
60
}
61
62
- qemu_lockcnt_dec(&ctx->list_lock);
63
return progress;
64
}
65
66
-/*
67
- * Note that dispatch_fds == false has the side-effect of post-poning the
68
- * freeing of deleted handlers.
69
- */
70
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
71
+void aio_dispatch(AioContext *ctx)
72
{
73
- bool progress;
74
+ aio_bh_poll(ctx);
75
76
- /*
77
- * If there are callbacks left that have been queued, we need to call them.
78
- * Do not call select in this case, because it is possible that the caller
79
- * does not need a complete flush (as is the case for aio_poll loops).
80
- */
81
- progress = aio_bh_poll(ctx);
82
+ qemu_lockcnt_inc(&ctx->list_lock);
83
+ aio_dispatch_handlers(ctx);
84
+ qemu_lockcnt_dec(&ctx->list_lock);
85
86
- if (dispatch_fds) {
87
- progress |= aio_dispatch_handlers(ctx);
88
- }
89
-
90
- /* Run our timers */
91
- progress |= timerlistgroup_run_timers(&ctx->tlg);
92
-
93
- return progress;
94
+ timerlistgroup_run_timers(&ctx->tlg);
95
}
96
97
/* These thread-local variables are used only in a small part of aio_poll
98
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
99
npfd = 0;
100
qemu_lockcnt_dec(&ctx->list_lock);
101
102
- /* Run dispatch even if there were no readable fds to run timers */
103
- if (aio_dispatch(ctx, ret > 0)) {
104
- progress = true;
105
+ progress |= aio_bh_poll(ctx);
106
+
107
+ if (ret > 0) {
108
+ qemu_lockcnt_inc(&ctx->list_lock);
109
+ progress |= aio_dispatch_handlers(ctx);
110
+ qemu_lockcnt_dec(&ctx->list_lock);
111
}
112
113
+ progress |= timerlistgroup_run_timers(&ctx->tlg);
114
+
115
return progress;
116
}
117
118
diff --git a/util/aio-win32.c b/util/aio-win32.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/util/aio-win32.c
121
+++ b/util/aio-win32.c
122
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
123
return progress;
124
}
125
126
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
127
+void aio_dispatch(AioContext *ctx)
128
{
129
- bool progress;
130
-
131
- progress = aio_bh_poll(ctx);
132
- if (dispatch_fds) {
133
- progress |= aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
134
- }
135
- progress |= timerlistgroup_run_timers(&ctx->tlg);
136
- return progress;
137
+ aio_bh_poll(ctx);
138
+ aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
139
+ timerlistgroup_run_timers(&ctx->tlg);
140
}
141
142
bool aio_poll(AioContext *ctx, bool blocking)
143
diff --git a/util/async.c b/util/async.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/util/async.c
146
+++ b/util/async.c
147
@@ -XXX,XX +XXX,XX @@ aio_ctx_dispatch(GSource *source,
148
AioContext *ctx = (AioContext *) source;
149
150
assert(callback == NULL);
151
- aio_dispatch(ctx, true);
152
+ aio_dispatch(ctx);
153
return true;
154
}
155
25
156
--
26
--
157
2.9.3
27
2.24.1
158
28
159
29
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This uses the lock-free mutex described in the paper '"Blocking without
3
Aborts when sqe fails to be set as sqes cannot be returned to the
4
Locking", or LFTHREADS: A lock-free thread library' by Gidenstam and
4
ring. Adds slow path for short reads for older kernels
5
Papatriantafilou. The same technique is used in OSv, and in fact
6
the code is essentially a conversion to C of OSv's code.
7
5
8
[Added missing coroutine_fn in tests/test-aio-multithread.c.
6
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
9
--Stefan]
7
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
10
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Message-id: 20200120141858.587874-5-stefanha@redhat.com
12
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-Id: <20200120141858.587874-5-stefanha@redhat.com>
13
Message-id: 20170213181244.16297-2-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
12
---
16
include/qemu/coroutine.h | 17 ++++-
13
MAINTAINERS | 8 +
17
tests/test-aio-multithread.c | 86 ++++++++++++++++++++++++
14
block/Makefile.objs | 3 +
18
util/qemu-coroutine-lock.c | 155 ++++++++++++++++++++++++++++++++++++++++---
15
block/io_uring.c | 401 ++++++++++++++++++++++++++++++++++++++++
19
util/trace-events | 1 +
16
include/block/aio.h | 16 +-
20
4 files changed, 246 insertions(+), 13 deletions(-)
17
include/block/raw-aio.h | 12 ++
18
5 files changed, 439 insertions(+), 1 deletion(-)
19
create mode 100644 block/io_uring.c
21
20
22
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
21
diff --git a/MAINTAINERS b/MAINTAINERS
23
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/coroutine.h
23
--- a/MAINTAINERS
25
+++ b/include/qemu/coroutine.h
24
+++ b/MAINTAINERS
26
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue);
25
@@ -XXX,XX +XXX,XX @@ F: block/file-posix.c
27
/**
26
F: block/file-win32.c
28
* Provides a mutex that can be used to synchronise coroutines
27
F: block/win32-aio.c
29
*/
28
30
+struct CoWaitRecord;
29
+Linux io_uring
31
typedef struct CoMutex {
30
+M: Aarushi Mehta <mehta.aaru20@gmail.com>
32
- bool locked;
31
+M: Julia Suvorova <jusual@redhat.com>
33
+ /* Count of pending lockers; 0 for a free mutex, 1 for an
32
+M: Stefan Hajnoczi <stefanha@redhat.com>
34
+ * uncontended mutex.
33
+L: qemu-block@nongnu.org
34
+S: Maintained
35
+F: block/io_uring.c
36
+
37
qcow2
38
M: Kevin Wolf <kwolf@redhat.com>
39
M: Max Reitz <mreitz@redhat.com>
40
diff --git a/block/Makefile.objs b/block/Makefile.objs
41
index XXXXXXX..XXXXXXX 100644
42
--- a/block/Makefile.objs
43
+++ b/block/Makefile.objs
44
@@ -XXX,XX +XXX,XX @@ block-obj-y += block-backend.o snapshot.o qapi.o
45
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
46
block-obj-$(CONFIG_POSIX) += file-posix.o
47
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
48
+block-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
49
block-obj-y += null.o mirror.o commit.o io.o create.o
50
block-obj-y += throttle-groups.o
51
block-obj-$(CONFIG_LINUX) += nvme.o
52
@@ -XXX,XX +XXX,XX @@ block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o
53
dmg-lzfse.o-libs := $(LZFSE_LIBS)
54
qcow.o-libs := -lz
55
linux-aio.o-libs := -laio
56
+io_uring.o-cflags := $(LINUX_IO_URING_CFLAGS)
57
+io_uring.o-libs := $(LINUX_IO_URING_LIBS)
58
parallels.o-cflags := $(LIBXML2_CFLAGS)
59
parallels.o-libs := $(LIBXML2_LIBS)
60
diff --git a/block/io_uring.c b/block/io_uring.c
61
new file mode 100644
62
index XXXXXXX..XXXXXXX
63
--- /dev/null
64
+++ b/block/io_uring.c
65
@@ -XXX,XX +XXX,XX @@
66
+/*
67
+ * Linux io_uring support.
68
+ *
69
+ * Copyright (C) 2009 IBM, Corp.
70
+ * Copyright (C) 2009 Red Hat, Inc.
71
+ * Copyright (C) 2019 Aarushi Mehta
72
+ *
73
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
74
+ * See the COPYING file in the top-level directory.
75
+ */
76
+#include "qemu/osdep.h"
77
+#include <liburing.h>
78
+#include "qemu-common.h"
79
+#include "block/aio.h"
80
+#include "qemu/queue.h"
81
+#include "block/block.h"
82
+#include "block/raw-aio.h"
83
+#include "qemu/coroutine.h"
84
+#include "qapi/error.h"
85
+
86
+/* io_uring ring size */
87
+#define MAX_ENTRIES 128
88
+
89
+typedef struct LuringAIOCB {
90
+ Coroutine *co;
91
+ struct io_uring_sqe sqeq;
92
+ ssize_t ret;
93
+ QEMUIOVector *qiov;
94
+ bool is_read;
95
+ QSIMPLEQ_ENTRY(LuringAIOCB) next;
96
+
97
+ /*
98
+ * Buffered reads may require resubmission, see
99
+ * luring_resubmit_short_read().
35
+ */
100
+ */
36
+ unsigned locked;
101
+ int total_read;
37
+
102
+ QEMUIOVector resubmit_qiov;
38
+ /* A queue of waiters. Elements are added atomically in front of
103
+} LuringAIOCB;
39
+ * from_push. to_pop is only populated, and popped from, by whoever
104
+
40
+ * is in charge of the next wakeup. This can be an unlocker or,
105
+typedef struct LuringQueue {
41
+ * through the handoff protocol, a locker that is about to go to sleep.
106
+ int plugged;
107
+ unsigned int in_queue;
108
+ unsigned int in_flight;
109
+ bool blocked;
110
+ QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue;
111
+} LuringQueue;
112
+
113
+typedef struct LuringState {
114
+ AioContext *aio_context;
115
+
116
+ struct io_uring ring;
117
+
118
+ /* io queue for submit at batch. Protected by AioContext lock. */
119
+ LuringQueue io_q;
120
+
121
+ /* I/O completion processing. Only runs in I/O thread. */
122
+ QEMUBH *completion_bh;
123
+} LuringState;
124
+
125
+/**
126
+ * luring_resubmit:
127
+ *
128
+ * Resubmit a request by appending it to submit_queue. The caller must ensure
129
+ * that ioq_submit() is called later so that submit_queue requests are started.
130
+ */
131
+static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
132
+{
133
+ QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
134
+ s->io_q.in_queue++;
135
+}
136
+
137
+/**
138
+ * luring_resubmit_short_read:
139
+ *
140
+ * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
141
+ * context") a buffered I/O request with the start of the file range in the
142
+ * page cache could result in a short read. Applications need to resubmit the
143
+ * remaining read request.
144
+ *
145
+ * This is a slow path but recent kernels never take it.
146
+ */
147
+static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
148
+ int nread)
149
+{
150
+ QEMUIOVector *resubmit_qiov;
151
+ size_t remaining;
152
+
153
+ /* Update read position */
154
+ luringcb->total_read = nread;
155
+ remaining = luringcb->qiov->size - luringcb->total_read;
156
+
157
+ /* Shorten qiov */
158
+ resubmit_qiov = &luringcb->resubmit_qiov;
159
+ if (resubmit_qiov->iov == NULL) {
160
+ qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov);
161
+ } else {
162
+ qemu_iovec_reset(resubmit_qiov);
163
+ }
164
+ qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read,
165
+ remaining);
166
+
167
+ /* Update sqe */
168
+ luringcb->sqeq.off = nread;
169
+ luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
170
+ luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
171
+
172
+ luring_resubmit(s, luringcb);
173
+}
174
+
175
+/**
176
+ * luring_process_completions:
177
+ * @s: AIO state
178
+ *
179
+ * Fetches completed I/O requests, consumes cqes and invokes their callbacks
180
+ * The function is somewhat tricky because it supports nested event loops, for
181
+ * example when a request callback invokes aio_poll().
182
+ *
183
+ * Function schedules BH completion so it can be called again in a nested
184
+ * event loop. When there are no events left to complete the BH is being
185
+ * canceled.
186
+ *
187
+ */
188
+static void luring_process_completions(LuringState *s)
189
+{
190
+ struct io_uring_cqe *cqes;
191
+ int total_bytes;
192
+ /*
193
+ * Request completion callbacks can run the nested event loop.
194
+ * Schedule ourselves so the nested event loop will "see" remaining
195
+ * completed requests and process them. Without this, completion
196
+ * callbacks that wait for other requests using a nested event loop
197
+ * would hang forever.
198
+ *
199
+ * This workaround is needed because io_uring uses poll_wait, which
200
+ * is woken up when new events are added to the uring, thus polling on
201
+ * the same uring fd will block unless more events are received.
202
+ *
203
+ * Other leaf block drivers (drivers that access the data themselves)
204
+ * are networking based, so they poll sockets for data and run the
205
+ * correct coroutine.
42
+ */
206
+ */
43
+ QSLIST_HEAD(, CoWaitRecord) from_push, to_pop;
207
+ qemu_bh_schedule(s->completion_bh);
44
+
208
+
45
+ unsigned handoff, sequence;
209
+ while (io_uring_peek_cqe(&s->ring, &cqes) == 0) {
46
+
210
+ LuringAIOCB *luringcb;
47
Coroutine *holder;
211
+ int ret;
48
- CoQueue queue;
212
+
49
} CoMutex;
213
+ if (!cqes) {
50
51
/**
52
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tests/test-aio-multithread.c
55
+++ b/tests/test-aio-multithread.c
56
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_schedule_10(void)
57
test_multi_co_schedule(10);
58
}
59
60
+/* CoMutex thread-safety. */
61
+
62
+static uint32_t atomic_counter;
63
+static uint32_t running;
64
+static uint32_t counter;
65
+static CoMutex comutex;
66
+
67
+static void coroutine_fn test_multi_co_mutex_entry(void *opaque)
68
+{
69
+ while (!atomic_mb_read(&now_stopping)) {
70
+ qemu_co_mutex_lock(&comutex);
71
+ counter++;
72
+ qemu_co_mutex_unlock(&comutex);
73
+
74
+ /* Increase atomic_counter *after* releasing the mutex. Otherwise
75
+ * there is a chance (it happens about 1 in 3 runs) that the iothread
76
+ * exits before the coroutine is woken up, causing a spurious
77
+ * assertion failure.
78
+ */
79
+ atomic_inc(&atomic_counter);
80
+ }
81
+ atomic_dec(&running);
82
+}
83
+
84
+static void test_multi_co_mutex(int threads, int seconds)
85
+{
86
+ int i;
87
+
88
+ qemu_co_mutex_init(&comutex);
89
+ counter = 0;
90
+ atomic_counter = 0;
91
+ now_stopping = false;
92
+
93
+ create_aio_contexts();
94
+ assert(threads <= NUM_CONTEXTS);
95
+ running = threads;
96
+ for (i = 0; i < threads; i++) {
97
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_mutex_entry, NULL);
98
+ aio_co_schedule(ctx[i], co1);
99
+ }
100
+
101
+ g_usleep(seconds * 1000000);
102
+
103
+ atomic_mb_set(&now_stopping, true);
104
+ while (running > 0) {
105
+ g_usleep(100000);
106
+ }
107
+
108
+ join_aio_contexts();
109
+ g_test_message("%d iterations/second\n", counter / seconds);
110
+ g_assert_cmpint(counter, ==, atomic_counter);
111
+}
112
+
113
+/* Testing with NUM_CONTEXTS threads focuses on the queue. The mutex however
114
+ * is too contended (and the threads spend too much time in aio_poll)
115
+ * to actually stress the handoff protocol.
116
+ */
117
+static void test_multi_co_mutex_1(void)
118
+{
119
+ test_multi_co_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_co_mutex_10(void)
123
+{
124
+ test_multi_co_mutex(NUM_CONTEXTS, 10);
125
+}
126
+
127
+/* Testing with fewer threads stresses the handoff protocol too. Still, the
128
+ * case where the locker _can_ pick up a handoff is very rare, happening
129
+ * about 10 times in 1 million, so increase the runtime a bit compared to
130
+ * other "quick" testcases that only run for 1 second.
131
+ */
132
+static void test_multi_co_mutex_2_3(void)
133
+{
134
+ test_multi_co_mutex(2, 3);
135
+}
136
+
137
+static void test_multi_co_mutex_2_30(void)
138
+{
139
+ test_multi_co_mutex(2, 30);
140
+}
141
+
142
/* End of tests. */
143
144
int main(int argc, char **argv)
145
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
146
g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
147
if (g_test_quick()) {
148
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
149
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
150
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
151
} else {
152
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
153
+ g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
154
+ g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
155
}
156
return g_test_run();
157
}
158
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
159
index XXXXXXX..XXXXXXX 100644
160
--- a/util/qemu-coroutine-lock.c
161
+++ b/util/qemu-coroutine-lock.c
162
@@ -XXX,XX +XXX,XX @@
163
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
164
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
165
* THE SOFTWARE.
166
+ *
167
+ * The lock-free mutex implementation is based on OSv
168
+ * (core/lfmutex.cc, include/lockfree/mutex.hh).
169
+ * Copyright (C) 2013 Cloudius Systems, Ltd.
170
*/
171
172
#include "qemu/osdep.h"
173
@@ -XXX,XX +XXX,XX @@ bool qemu_co_queue_empty(CoQueue *queue)
174
return QSIMPLEQ_FIRST(&queue->entries) == NULL;
175
}
176
177
+/* The wait records are handled with a multiple-producer, single-consumer
178
+ * lock-free queue. There cannot be two concurrent pop_waiter() calls
179
+ * because pop_waiter() can only be called while mutex->handoff is zero.
180
+ * This can happen in three cases:
181
+ * - in qemu_co_mutex_unlock, before the hand-off protocol has started.
182
+ * In this case, qemu_co_mutex_lock will see mutex->handoff == 0 and
183
+ * not take part in the handoff.
184
+ * - in qemu_co_mutex_lock, if it steals the hand-off responsibility from
185
+ * qemu_co_mutex_unlock. In this case, qemu_co_mutex_unlock will fail
186
+ * the cmpxchg (it will see either 0 or the next sequence value) and
187
+ * exit. The next hand-off cannot begin until qemu_co_mutex_lock has
188
+ * woken up someone.
189
+ * - in qemu_co_mutex_unlock, if it takes the hand-off token itself.
190
+ * In this case another iteration starts with mutex->handoff == 0;
191
+ * a concurrent qemu_co_mutex_lock will fail the cmpxchg, and
192
+ * qemu_co_mutex_unlock will go back to case (1).
193
+ *
194
+ * The following functions manage this queue.
195
+ */
196
+typedef struct CoWaitRecord {
197
+ Coroutine *co;
198
+ QSLIST_ENTRY(CoWaitRecord) next;
199
+} CoWaitRecord;
200
+
201
+static void push_waiter(CoMutex *mutex, CoWaitRecord *w)
202
+{
203
+ w->co = qemu_coroutine_self();
204
+ QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next);
205
+}
206
+
207
+static void move_waiters(CoMutex *mutex)
208
+{
209
+ QSLIST_HEAD(, CoWaitRecord) reversed;
210
+ QSLIST_MOVE_ATOMIC(&reversed, &mutex->from_push);
211
+ while (!QSLIST_EMPTY(&reversed)) {
212
+ CoWaitRecord *w = QSLIST_FIRST(&reversed);
213
+ QSLIST_REMOVE_HEAD(&reversed, next);
214
+ QSLIST_INSERT_HEAD(&mutex->to_pop, w, next);
215
+ }
216
+}
217
+
218
+static CoWaitRecord *pop_waiter(CoMutex *mutex)
219
+{
220
+ CoWaitRecord *w;
221
+
222
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
223
+ move_waiters(mutex);
224
+ if (QSLIST_EMPTY(&mutex->to_pop)) {
225
+ return NULL;
226
+ }
227
+ }
228
+ w = QSLIST_FIRST(&mutex->to_pop);
229
+ QSLIST_REMOVE_HEAD(&mutex->to_pop, next);
230
+ return w;
231
+}
232
+
233
+static bool has_waiters(CoMutex *mutex)
234
+{
235
+ return QSLIST_EMPTY(&mutex->to_pop) || QSLIST_EMPTY(&mutex->from_push);
236
+}
237
+
238
void qemu_co_mutex_init(CoMutex *mutex)
239
{
240
memset(mutex, 0, sizeof(*mutex));
241
- qemu_co_queue_init(&mutex->queue);
242
}
243
244
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
245
+static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
246
{
247
Coroutine *self = qemu_coroutine_self();
248
+ CoWaitRecord w;
249
+ unsigned old_handoff;
250
251
trace_qemu_co_mutex_lock_entry(mutex, self);
252
+ w.co = self;
253
+ push_waiter(mutex, &w);
254
255
- while (mutex->locked) {
256
- qemu_co_queue_wait(&mutex->queue);
257
+ /* This is the "Responsibility Hand-Off" protocol; a lock() picks from
258
+ * a concurrent unlock() the responsibility of waking somebody up.
259
+ */
260
+ old_handoff = atomic_mb_read(&mutex->handoff);
261
+ if (old_handoff &&
262
+ has_waiters(mutex) &&
263
+ atomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
264
+ /* There can be no concurrent pops, because there can be only
265
+ * one active handoff at a time.
266
+ */
267
+ CoWaitRecord *to_wake = pop_waiter(mutex);
268
+ Coroutine *co = to_wake->co;
269
+ if (co == self) {
270
+ /* We got the lock ourselves! */
271
+ assert(to_wake == &w);
272
+ return;
273
+ }
274
+
275
+ aio_co_wake(co);
276
}
277
278
- mutex->locked = true;
279
- mutex->holder = self;
280
- self->locks_held++;
281
-
282
+ qemu_coroutine_yield();
283
trace_qemu_co_mutex_lock_return(mutex, self);
284
}
285
286
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
287
+{
288
+ Coroutine *self = qemu_coroutine_self();
289
+
290
+ if (atomic_fetch_inc(&mutex->locked) == 0) {
291
+ /* Uncontended. */
292
+ trace_qemu_co_mutex_lock_uncontended(mutex, self);
293
+ } else {
294
+ qemu_co_mutex_lock_slowpath(mutex);
295
+ }
296
+ mutex->holder = self;
297
+ self->locks_held++;
298
+}
299
+
300
void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
301
{
302
Coroutine *self = qemu_coroutine_self();
303
304
trace_qemu_co_mutex_unlock_entry(mutex, self);
305
306
- assert(mutex->locked == true);
307
+ assert(mutex->locked);
308
assert(mutex->holder == self);
309
assert(qemu_in_coroutine());
310
311
- mutex->locked = false;
312
mutex->holder = NULL;
313
self->locks_held--;
314
- qemu_co_queue_next(&mutex->queue);
315
+ if (atomic_fetch_dec(&mutex->locked) == 1) {
316
+ /* No waiting qemu_co_mutex_lock(). Pfew, that was easy! */
317
+ return;
318
+ }
319
+
320
+ for (;;) {
321
+ CoWaitRecord *to_wake = pop_waiter(mutex);
322
+ unsigned our_handoff;
323
+
324
+ if (to_wake) {
325
+ Coroutine *co = to_wake->co;
326
+ aio_co_wake(co);
327
+ break;
214
+ break;
328
+ }
215
+ }
329
+
216
+
330
+ /* Some concurrent lock() is in progress (we know this because
217
+ luringcb = io_uring_cqe_get_data(cqes);
331
+ * mutex->locked was >1) but it hasn't yet put itself on the wait
218
+ ret = cqes->res;
332
+ * queue. Pick a sequence number for the handoff protocol (not 0).
219
+ io_uring_cqe_seen(&s->ring, cqes);
220
+ cqes = NULL;
221
+
222
+ /* Change counters one-by-one because we can be nested. */
223
+ s->io_q.in_flight--;
224
+
225
+ /* total_read is non-zero only for resubmitted read requests */
226
+ total_bytes = ret + luringcb->total_read;
227
+
228
+ if (ret < 0) {
229
+ if (ret == -EINTR) {
230
+ luring_resubmit(s, luringcb);
231
+ continue;
232
+ }
233
+ } else if (!luringcb->qiov) {
234
+ goto end;
235
+ } else if (total_bytes == luringcb->qiov->size) {
236
+ ret = 0;
237
+ /* Only read/write */
238
+ } else {
239
+ /* Short Read/Write */
240
+ if (luringcb->is_read) {
241
+ if (ret > 0) {
242
+ luring_resubmit_short_read(s, luringcb, ret);
243
+ continue;
244
+ } else {
245
+ /* Pad with zeroes */
246
+ qemu_iovec_memset(luringcb->qiov, total_bytes, 0,
247
+ luringcb->qiov->size - total_bytes);
248
+ ret = 0;
249
+ }
250
+ } else {
251
+ ret = -ENOSPC;;
252
+ }
253
+ }
254
+end:
255
+ luringcb->ret = ret;
256
+ qemu_iovec_destroy(&luringcb->resubmit_qiov);
257
+
258
+ /*
259
+ * If the coroutine is already entered it must be in ioq_submit()
260
+ * and will notice luringcb->ret has been filled in when it
261
+ * eventually runs later. Coroutines cannot be entered recursively
262
+ * so avoid doing that!
333
+ */
263
+ */
334
+ if (++mutex->sequence == 0) {
264
+ if (!qemu_coroutine_entered(luringcb->co)) {
335
+ mutex->sequence = 1;
265
+ aio_co_wake(luringcb->co);
336
+ }
266
+ }
337
+
267
+ }
338
+ our_handoff = mutex->sequence;
268
+ qemu_bh_cancel(s->completion_bh);
339
+ atomic_mb_set(&mutex->handoff, our_handoff);
269
+}
340
+ if (!has_waiters(mutex)) {
270
+
341
+ /* The concurrent lock has not added itself yet, so it
271
+static int ioq_submit(LuringState *s)
342
+ * will be able to pick our handoff.
272
+{
343
+ */
273
+ int ret = 0;
274
+ LuringAIOCB *luringcb, *luringcb_next;
275
+
276
+ while (s->io_q.in_queue > 0) {
277
+ /*
278
+ * Try to fetch sqes from the ring for requests waiting in
279
+ * the overflow queue
280
+ */
281
+ QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next,
282
+ luringcb_next) {
283
+ struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring);
284
+ if (!sqes) {
285
+ break;
286
+ }
287
+ /* Prep sqe for submission */
288
+ *sqes = luringcb->sqeq;
289
+ QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next);
290
+ }
291
+ ret = io_uring_submit(&s->ring);
292
+ /* Prevent infinite loop if submission is refused */
293
+ if (ret <= 0) {
294
+ if (ret == -EAGAIN) {
295
+ continue;
296
+ }
344
+ break;
297
+ break;
345
+ }
298
+ }
346
+
299
+ s->io_q.in_flight += ret;
347
+ /* Try to do the handoff protocol ourselves; if somebody else has
300
+ s->io_q.in_queue -= ret;
348
+ * already taken it, however, we're done and they're responsible.
301
+ }
302
+ s->io_q.blocked = (s->io_q.in_queue > 0);
303
+
304
+ if (s->io_q.in_flight) {
305
+ /*
306
+ * We can try to complete something just right away if there are
307
+ * still requests in-flight.
349
+ */
308
+ */
350
+ if (atomic_cmpxchg(&mutex->handoff, our_handoff, 0) != our_handoff) {
309
+ luring_process_completions(s);
351
+ break;
310
+ }
352
+ }
311
+ return ret;
353
+ }
312
+}
354
313
+
355
trace_qemu_co_mutex_unlock_return(mutex, self);
314
+static void luring_process_completions_and_submit(LuringState *s)
356
}
315
+{
357
diff --git a/util/trace-events b/util/trace-events
316
+ aio_context_acquire(s->aio_context);
317
+ luring_process_completions(s);
318
+
319
+ if (!s->io_q.plugged && s->io_q.in_queue > 0) {
320
+ ioq_submit(s);
321
+ }
322
+ aio_context_release(s->aio_context);
323
+}
324
+
325
+static void qemu_luring_completion_bh(void *opaque)
326
+{
327
+ LuringState *s = opaque;
328
+ luring_process_completions_and_submit(s);
329
+}
330
+
331
+static void qemu_luring_completion_cb(void *opaque)
332
+{
333
+ LuringState *s = opaque;
334
+ luring_process_completions_and_submit(s);
335
+}
336
+
337
+static void ioq_init(LuringQueue *io_q)
338
+{
339
+ QSIMPLEQ_INIT(&io_q->submit_queue);
340
+ io_q->plugged = 0;
341
+ io_q->in_queue = 0;
342
+ io_q->in_flight = 0;
343
+ io_q->blocked = false;
344
+}
345
+
346
+void luring_io_plug(BlockDriverState *bs, LuringState *s)
347
+{
348
+ s->io_q.plugged++;
349
+}
350
+
351
+void luring_io_unplug(BlockDriverState *bs, LuringState *s)
352
+{
353
+ assert(s->io_q.plugged);
354
+ if (--s->io_q.plugged == 0 &&
355
+ !s->io_q.blocked && s->io_q.in_queue > 0) {
356
+ ioq_submit(s);
357
+ }
358
+}
359
+
360
+/**
361
+ * luring_do_submit:
362
+ * @fd: file descriptor for I/O
363
+ * @luringcb: AIO control block
364
+ * @s: AIO state
365
+ * @offset: offset for request
366
+ * @type: type of request
367
+ *
368
+ * Fetches sqes from ring, adds to pending queue and preps them
369
+ *
370
+ */
371
+static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
372
+ uint64_t offset, int type)
373
+{
374
+ struct io_uring_sqe *sqes = &luringcb->sqeq;
375
+
376
+ switch (type) {
377
+ case QEMU_AIO_WRITE:
378
+ io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
379
+ luringcb->qiov->niov, offset);
380
+ break;
381
+ case QEMU_AIO_READ:
382
+ io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
383
+ luringcb->qiov->niov, offset);
384
+ break;
385
+ case QEMU_AIO_FLUSH:
386
+ io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC);
387
+ break;
388
+ default:
389
+ fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n",
390
+ __func__, type);
391
+ abort();
392
+ }
393
+ io_uring_sqe_set_data(sqes, luringcb);
394
+
395
+ QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
396
+ s->io_q.in_queue++;
397
+
398
+ if (!s->io_q.blocked &&
399
+ (!s->io_q.plugged ||
400
+ s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
401
+ return ioq_submit(s);
402
+ }
403
+ return 0;
404
+}
405
+
406
+int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
407
+ uint64_t offset, QEMUIOVector *qiov, int type)
408
+{
409
+ int ret;
410
+ LuringAIOCB luringcb = {
411
+ .co = qemu_coroutine_self(),
412
+ .ret = -EINPROGRESS,
413
+ .qiov = qiov,
414
+ .is_read = (type == QEMU_AIO_READ),
415
+ };
416
+
417
+ ret = luring_do_submit(fd, &luringcb, s, offset, type);
418
+ if (ret < 0) {
419
+ return ret;
420
+ }
421
+
422
+ if (luringcb.ret == -EINPROGRESS) {
423
+ qemu_coroutine_yield();
424
+ }
425
+ return luringcb.ret;
426
+}
427
+
428
+void luring_detach_aio_context(LuringState *s, AioContext *old_context)
429
+{
430
+ aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL,
431
+ s);
432
+ qemu_bh_delete(s->completion_bh);
433
+ s->aio_context = NULL;
434
+}
435
+
436
+void luring_attach_aio_context(LuringState *s, AioContext *new_context)
437
+{
438
+ s->aio_context = new_context;
439
+ s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
440
+ aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
441
+ qemu_luring_completion_cb, NULL, NULL, s);
442
+}
443
+
444
+LuringState *luring_init(Error **errp)
445
+{
446
+ int rc;
447
+ LuringState *s = g_new0(LuringState, 1);
448
+ struct io_uring *ring = &s->ring;
449
+
450
+ rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
451
+ if (rc < 0) {
452
+ error_setg_errno(errp, errno, "failed to init linux io_uring ring");
453
+ g_free(s);
454
+ return NULL;
455
+ }
456
+
457
+ ioq_init(&s->io_q);
458
+ return s;
459
+
460
+}
461
+
462
+void luring_cleanup(LuringState *s)
463
+{
464
+ io_uring_queue_exit(&s->ring);
465
+ g_free(s);
466
+}
467
diff --git a/include/block/aio.h b/include/block/aio.h
358
index XXXXXXX..XXXXXXX 100644
468
index XXXXXXX..XXXXXXX 100644
359
--- a/util/trace-events
469
--- a/include/block/aio.h
360
+++ b/util/trace-events
470
+++ b/include/block/aio.h
361
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
471
@@ -XXX,XX +XXX,XX @@ typedef void IOHandler(void *opaque);
362
472
struct Coroutine;
363
# util/qemu-coroutine-lock.c
473
struct ThreadPool;
364
qemu_co_queue_run_restart(void *co) "co %p"
474
struct LinuxAioState;
365
+qemu_co_mutex_lock_uncontended(void *mutex, void *self) "mutex %p self %p"
475
+struct LuringState;
366
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
476
367
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
477
struct AioContext {
368
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
478
GSource source;
479
@@ -XXX,XX +XXX,XX @@ struct AioContext {
480
struct ThreadPool *thread_pool;
481
482
#ifdef CONFIG_LINUX_AIO
483
- /* State for native Linux AIO. Uses aio_context_acquire/release for
484
+ /*
485
+ * State for native Linux AIO. Uses aio_context_acquire/release for
486
* locking.
487
*/
488
struct LinuxAioState *linux_aio;
489
#endif
490
+#ifdef CONFIG_LINUX_IO_URING
491
+ /*
492
+ * State for Linux io_uring. Uses aio_context_acquire/release for
493
+ * locking.
494
+ */
495
+ struct LuringState *linux_io_uring;
496
+#endif
497
498
/* TimerLists for calling timers - one per clock type. Has its own
499
* locking.
500
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
501
/* Return the LinuxAioState bound to this AioContext */
502
struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
503
504
+/* Setup the LuringState bound to this AioContext */
505
+struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
506
+
507
+/* Return the LuringState bound to this AioContext */
508
+struct LuringState *aio_get_linux_io_uring(AioContext *ctx);
509
/**
510
* aio_timer_new_with_attrs:
511
* @ctx: the aio context
512
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
513
index XXXXXXX..XXXXXXX 100644
514
--- a/include/block/raw-aio.h
515
+++ b/include/block/raw-aio.h
516
@@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
517
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
518
void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
519
#endif
520
+/* io_uring.c - Linux io_uring implementation */
521
+#ifdef CONFIG_LINUX_IO_URING
522
+typedef struct LuringState LuringState;
523
+LuringState *luring_init(Error **errp);
524
+void luring_cleanup(LuringState *s);
525
+int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
526
+ uint64_t offset, QEMUIOVector *qiov, int type);
527
+void luring_detach_aio_context(LuringState *s, AioContext *old_context);
528
+void luring_attach_aio_context(LuringState *s, AioContext *new_context);
529
+void luring_io_plug(BlockDriverState *bs, LuringState *s);
530
+void luring_io_unplug(BlockDriverState *bs, LuringState *s);
531
+#endif
532
533
#ifdef _WIN32
534
typedef struct QEMUWin32AIOState QEMUWin32AIOState;
369
--
535
--
370
2.9.3
536
2.24.1
371
537
372
538
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
AioContext is fairly self contained, the only dependency is QEMUTimer but
3
Follow linux-aio.o and stub out the block/io_uring.o APIs that will be
4
that in turn doesn't need anything else. So move them out of block-obj-y
4
missing when a binary is linked with obj-util-y but without
5
to avoid introducing a dependency from io/ to block-obj-y.
5
block-util-y (e.g. vhost-user-gpu).
6
6
7
main-loop and its dependency iohandler also need to be moved, because
7
For example, the stubs are necessary so that a binary using util/async.o
8
later in this series io/ will call iohandler_get_aio_context.
8
from obj-util-y for qemu_bh_new() links successfully. In this case
9
block/io_uring.o from block-util-y isn't needed and we can avoid
10
dragging in the block layer by linking the stubs instead. The stub
11
functions never get called.
9
12
10
[Changed copyright "the QEMU team" to "other QEMU contributors" as
13
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
11
suggested by Daniel Berrange and agreed by Paolo.
14
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
12
--Stefan]
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
16
Message-id: 20200120141858.587874-6-stefanha@redhat.com
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
17
Message-Id: <20200120141858.587874-6-stefanha@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Message-id: 20170213135235.12274-2-pbonzini@redhat.com
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
19
---
19
Makefile.objs | 4 ---
20
MAINTAINERS | 1 +
20
stubs/Makefile.objs | 1 +
21
stubs/Makefile.objs | 1 +
21
tests/Makefile.include | 11 ++++----
22
stubs/io_uring.c | 32 ++++++++++++++++++++++++++++++++
22
util/Makefile.objs | 6 +++-
23
3 files changed, 34 insertions(+)
23
block/io.c | 29 -------------------
24
create mode 100644 stubs/io_uring.c
24
stubs/linux-aio.c | 32 +++++++++++++++++++++
25
stubs/set-fd-handler.c | 11 --------
26
aio-posix.c => util/aio-posix.c | 2 +-
27
aio-win32.c => util/aio-win32.c | 0
28
util/aiocb.c | 55 +++++++++++++++++++++++++++++++++++++
29
async.c => util/async.c | 3 +-
30
iohandler.c => util/iohandler.c | 0
31
main-loop.c => util/main-loop.c | 0
32
qemu-timer.c => util/qemu-timer.c | 0
33
thread-pool.c => util/thread-pool.c | 2 +-
34
trace-events | 11 --------
35
util/trace-events | 11 ++++++++
36
17 files changed, 114 insertions(+), 64 deletions(-)
37
create mode 100644 stubs/linux-aio.c
38
rename aio-posix.c => util/aio-posix.c (99%)
39
rename aio-win32.c => util/aio-win32.c (100%)
40
create mode 100644 util/aiocb.c
41
rename async.c => util/async.c (99%)
42
rename iohandler.c => util/iohandler.c (100%)
43
rename main-loop.c => util/main-loop.c (100%)
44
rename qemu-timer.c => util/qemu-timer.c (100%)
45
rename thread-pool.c => util/thread-pool.c (99%)
46
25
47
diff --git a/Makefile.objs b/Makefile.objs
26
diff --git a/MAINTAINERS b/MAINTAINERS
48
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
49
--- a/Makefile.objs
28
--- a/MAINTAINERS
50
+++ b/Makefile.objs
29
+++ b/MAINTAINERS
51
@@ -XXX,XX +XXX,XX @@ chardev-obj-y = chardev/
30
@@ -XXX,XX +XXX,XX @@ M: Stefan Hajnoczi <stefanha@redhat.com>
52
#######################################################################
31
L: qemu-block@nongnu.org
53
# block-obj-y is code used by both qemu system emulation and qemu-img
32
S: Maintained
54
33
F: block/io_uring.c
55
-block-obj-y = async.o thread-pool.o
34
+F: stubs/io_uring.c
56
block-obj-y += nbd/
35
57
block-obj-y += block.o blockjob.o
36
qcow2
58
-block-obj-y += main-loop.o iohandler.o qemu-timer.o
37
M: Kevin Wolf <kwolf@redhat.com>
59
-block-obj-$(CONFIG_POSIX) += aio-posix.o
60
-block-obj-$(CONFIG_WIN32) += aio-win32.o
61
block-obj-y += block/
62
block-obj-y += qemu-io-cmds.o
63
block-obj-$(CONFIG_REPLICATION) += replication.o
64
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
38
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
65
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
66
--- a/stubs/Makefile.objs
40
--- a/stubs/Makefile.objs
67
+++ b/stubs/Makefile.objs
41
+++ b/stubs/Makefile.objs
68
@@ -XXX,XX +XXX,XX @@ stub-obj-y += get-vm-name.o
42
@@ -XXX,XX +XXX,XX @@ stub-obj-y += iothread.o
69
stub-obj-y += iothread.o
70
stub-obj-y += iothread-lock.o
43
stub-obj-y += iothread-lock.o
71
stub-obj-y += is-daemonized.o
44
stub-obj-y += is-daemonized.o
72
+stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
45
stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
46
+stub-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
73
stub-obj-y += machine-init-done.o
47
stub-obj-y += machine-init-done.o
74
stub-obj-y += migr-blocker.o
48
stub-obj-y += migr-blocker.o
75
stub-obj-y += monitor.o
49
stub-obj-y += change-state-handler.o
76
diff --git a/tests/Makefile.include b/tests/Makefile.include
50
diff --git a/stubs/io_uring.c b/stubs/io_uring.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/tests/Makefile.include
79
+++ b/tests/Makefile.include
80
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
81
check-unit-y += tests/test-iov$(EXESUF)
82
gcov-files-test-iov-y = util/iov.c
83
check-unit-y += tests/test-aio$(EXESUF)
84
+gcov-files-test-aio-y = util/async.c util/qemu-timer.o
85
+gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
86
+gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
87
check-unit-y += tests/test-throttle$(EXESUF)
88
gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
89
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
90
@@ -XXX,XX +XXX,XX @@ tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y)
91
tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y)
92
tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
93
94
-tests/test-char$(EXESUF): tests/test-char.o qemu-timer.o \
95
-    $(test-util-obj-y) $(qtest-obj-y) $(test-block-obj-y) $(chardev-obj-y)
96
+tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
97
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
98
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
99
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
100
@@ -XXX,XX +XXX,XX @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
101
    migration/vmstate.o migration/qemu-file.o \
102
migration/qemu-file-channel.o migration/qjson.o \
103
    $(test-io-obj-y)
104
-tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
105
-    $(test-util-obj-y)
106
+tests/test-timed-average$(EXESUF): tests/test-timed-average.o $(test-util-obj-y)
107
tests/test-base64$(EXESUF): tests/test-base64.o \
108
    libqemuutil.a libqemustub.a
109
tests/ptimer-test$(EXESUF): tests/ptimer-test.o tests/ptimer-test-stubs.o hw/core/ptimer.o libqemustub.a
110
@@ -XXX,XX +XXX,XX @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
111
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
112
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
113
tests/postcopy-test$(EXESUF): tests/postcopy-test.o
114
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-timer.o \
115
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \
116
    $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y) \
117
    $(chardev-obj-y)
118
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
119
diff --git a/util/Makefile.objs b/util/Makefile.objs
120
index XXXXXXX..XXXXXXX 100644
121
--- a/util/Makefile.objs
122
+++ b/util/Makefile.objs
123
@@ -XXX,XX +XXX,XX @@
124
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
125
util-obj-y += bufferiszero.o
126
util-obj-y += lockcnt.o
127
+util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o
128
+util-obj-y += main-loop.o iohandler.o
129
+util-obj-$(CONFIG_POSIX) += aio-posix.o
130
util-obj-$(CONFIG_POSIX) += compatfd.o
131
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
132
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
133
util-obj-$(CONFIG_POSIX) += oslib-posix.o
134
util-obj-$(CONFIG_POSIX) += qemu-openpty.o
135
util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
136
-util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
137
util-obj-$(CONFIG_POSIX) += memfd.o
138
+util-obj-$(CONFIG_WIN32) += aio-win32.o
139
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
140
util-obj-$(CONFIG_WIN32) += oslib-win32.o
141
util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
142
util-obj-y += envlist.o path.o module.o
143
diff --git a/block/io.c b/block/io.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/block/io.c
146
+++ b/block/io.c
147
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
148
return &acb->common;
149
}
150
151
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
152
- BlockCompletionFunc *cb, void *opaque)
153
-{
154
- BlockAIOCB *acb;
155
-
156
- acb = g_malloc(aiocb_info->aiocb_size);
157
- acb->aiocb_info = aiocb_info;
158
- acb->bs = bs;
159
- acb->cb = cb;
160
- acb->opaque = opaque;
161
- acb->refcnt = 1;
162
- return acb;
163
-}
164
-
165
-void qemu_aio_ref(void *p)
166
-{
167
- BlockAIOCB *acb = p;
168
- acb->refcnt++;
169
-}
170
-
171
-void qemu_aio_unref(void *p)
172
-{
173
- BlockAIOCB *acb = p;
174
- assert(acb->refcnt > 0);
175
- if (--acb->refcnt == 0) {
176
- g_free(acb);
177
- }
178
-}
179
-
180
/**************************************************************/
181
/* Coroutine block device emulation */
182
183
diff --git a/stubs/linux-aio.c b/stubs/linux-aio.c
184
new file mode 100644
51
new file mode 100644
185
index XXXXXXX..XXXXXXX
52
index XXXXXXX..XXXXXXX
186
--- /dev/null
53
--- /dev/null
187
+++ b/stubs/linux-aio.c
54
+++ b/stubs/io_uring.c
188
@@ -XXX,XX +XXX,XX @@
55
@@ -XXX,XX +XXX,XX @@
189
+/*
56
+/*
190
+ * Linux native AIO support.
57
+ * Linux io_uring support.
191
+ *
58
+ *
192
+ * Copyright (C) 2009 IBM, Corp.
59
+ * Copyright (C) 2009 IBM, Corp.
193
+ * Copyright (C) 2009 Red Hat, Inc.
60
+ * Copyright (C) 2009 Red Hat, Inc.
194
+ *
61
+ *
195
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
62
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
196
+ * See the COPYING file in the top-level directory.
63
+ * See the COPYING file in the top-level directory.
197
+ */
64
+ */
198
+#include "qemu/osdep.h"
65
+#include "qemu/osdep.h"
199
+#include "block/aio.h"
66
+#include "block/aio.h"
200
+#include "block/raw-aio.h"
67
+#include "block/raw-aio.h"
201
+
68
+
202
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
69
+void luring_detach_aio_context(LuringState *s, AioContext *old_context)
203
+{
70
+{
204
+ abort();
71
+ abort();
205
+}
72
+}
206
+
73
+
207
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
74
+void luring_attach_aio_context(LuringState *s, AioContext *new_context)
208
+{
75
+{
209
+ abort();
76
+ abort();
210
+}
77
+}
211
+
78
+
212
+LinuxAioState *laio_init(void)
79
+LuringState *luring_init(Error **errp)
213
+{
80
+{
214
+ abort();
81
+ abort();
215
+}
82
+}
216
+
83
+
217
+void laio_cleanup(LinuxAioState *s)
84
+void luring_cleanup(LuringState *s)
218
+{
85
+{
219
+ abort();
86
+ abort();
220
+}
87
+}
221
diff --git a/stubs/set-fd-handler.c b/stubs/set-fd-handler.c
222
index XXXXXXX..XXXXXXX 100644
223
--- a/stubs/set-fd-handler.c
224
+++ b/stubs/set-fd-handler.c
225
@@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd,
226
{
227
abort();
228
}
229
-
230
-void aio_set_fd_handler(AioContext *ctx,
231
- int fd,
232
- bool is_external,
233
- IOHandler *io_read,
234
- IOHandler *io_write,
235
- AioPollFn *io_poll,
236
- void *opaque)
237
-{
238
- abort();
239
-}
240
diff --git a/aio-posix.c b/util/aio-posix.c
241
similarity index 99%
242
rename from aio-posix.c
243
rename to util/aio-posix.c
244
index XXXXXXX..XXXXXXX 100644
245
--- a/aio-posix.c
246
+++ b/util/aio-posix.c
247
@@ -XXX,XX +XXX,XX @@
248
#include "qemu/rcu_queue.h"
249
#include "qemu/sockets.h"
250
#include "qemu/cutils.h"
251
-#include "trace-root.h"
252
+#include "trace.h"
253
#ifdef CONFIG_EPOLL_CREATE1
254
#include <sys/epoll.h>
255
#endif
256
diff --git a/aio-win32.c b/util/aio-win32.c
257
similarity index 100%
258
rename from aio-win32.c
259
rename to util/aio-win32.c
260
diff --git a/util/aiocb.c b/util/aiocb.c
261
new file mode 100644
262
index XXXXXXX..XXXXXXX
263
--- /dev/null
264
+++ b/util/aiocb.c
265
@@ -XXX,XX +XXX,XX @@
266
+/*
267
+ * BlockAIOCB allocation
268
+ *
269
+ * Copyright (c) 2003-2017 Fabrice Bellard and other QEMU contributors
270
+ *
271
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
272
+ * of this software and associated documentation files (the "Software"), to deal
273
+ * in the Software without restriction, including without limitation the rights
274
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
275
+ * copies of the Software, and to permit persons to whom the Software is
276
+ * furnished to do so, subject to the following conditions:
277
+ *
278
+ * The above copyright notice and this permission notice shall be included in
279
+ * all copies or substantial portions of the Software.
280
+ *
281
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
282
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
283
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
284
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
285
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
286
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
287
+ * THE SOFTWARE.
288
+ */
289
+
290
+#include "qemu/osdep.h"
291
+#include "block/aio.h"
292
+
293
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
294
+ BlockCompletionFunc *cb, void *opaque)
295
+{
296
+ BlockAIOCB *acb;
297
+
298
+ acb = g_malloc(aiocb_info->aiocb_size);
299
+ acb->aiocb_info = aiocb_info;
300
+ acb->bs = bs;
301
+ acb->cb = cb;
302
+ acb->opaque = opaque;
303
+ acb->refcnt = 1;
304
+ return acb;
305
+}
306
+
307
+void qemu_aio_ref(void *p)
308
+{
309
+ BlockAIOCB *acb = p;
310
+ acb->refcnt++;
311
+}
312
+
313
+void qemu_aio_unref(void *p)
314
+{
315
+ BlockAIOCB *acb = p;
316
+ assert(acb->refcnt > 0);
317
+ if (--acb->refcnt == 0) {
318
+ g_free(acb);
319
+ }
320
+}
321
diff --git a/async.c b/util/async.c
322
similarity index 99%
323
rename from async.c
324
rename to util/async.c
325
index XXXXXXX..XXXXXXX 100644
326
--- a/async.c
327
+++ b/util/async.c
328
@@ -XXX,XX +XXX,XX @@
329
/*
330
- * QEMU System Emulator
331
+ * Data plane event loop
332
*
333
* Copyright (c) 2003-2008 Fabrice Bellard
334
+ * Copyright (c) 2009-2017 QEMU contributors
335
*
336
* Permission is hereby granted, free of charge, to any person obtaining a copy
337
* of this software and associated documentation files (the "Software"), to deal
338
diff --git a/iohandler.c b/util/iohandler.c
339
similarity index 100%
340
rename from iohandler.c
341
rename to util/iohandler.c
342
diff --git a/main-loop.c b/util/main-loop.c
343
similarity index 100%
344
rename from main-loop.c
345
rename to util/main-loop.c
346
diff --git a/qemu-timer.c b/util/qemu-timer.c
347
similarity index 100%
348
rename from qemu-timer.c
349
rename to util/qemu-timer.c
350
diff --git a/thread-pool.c b/util/thread-pool.c
351
similarity index 99%
352
rename from thread-pool.c
353
rename to util/thread-pool.c
354
index XXXXXXX..XXXXXXX 100644
355
--- a/thread-pool.c
356
+++ b/util/thread-pool.c
357
@@ -XXX,XX +XXX,XX @@
358
#include "qemu/queue.h"
359
#include "qemu/thread.h"
360
#include "qemu/coroutine.h"
361
-#include "trace-root.h"
362
+#include "trace.h"
363
#include "block/thread-pool.h"
364
#include "qemu/main-loop.h"
365
366
diff --git a/trace-events b/trace-events
367
index XXXXXXX..XXXXXXX 100644
368
--- a/trace-events
369
+++ b/trace-events
370
@@ -XXX,XX +XXX,XX @@
371
#
372
# The <format-string> should be a sprintf()-compatible format string.
373
374
-# aio-posix.c
375
-run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
376
-run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
377
-poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
378
-poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
379
-
380
-# thread-pool.c
381
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
382
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
383
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
384
-
385
# ioport.c
386
cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
387
cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
388
diff --git a/util/trace-events b/util/trace-events
389
index XXXXXXX..XXXXXXX 100644
390
--- a/util/trace-events
391
+++ b/util/trace-events
392
@@ -XXX,XX +XXX,XX @@
393
# See docs/tracing.txt for syntax documentation.
394
395
+# util/aio-posix.c
396
+run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
397
+run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
398
+poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
399
+poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
400
+
401
+# util/thread-pool.c
402
+thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
403
+thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
404
+thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
405
+
406
# util/buffer.c
407
buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
408
buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
409
--
88
--
410
2.9.3
89
2.24.1
411
90
412
91
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
aio_co_wake provides the infrastructure to start a coroutine on a "home"
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
AioContext. It will be used by CoMutex and CoQueue, so that coroutines
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
don't jump from one context to another when they go to sleep on a
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
mutex or waitqueue. However, it can also be used as a more efficient
6
Message-id: 20200120141858.587874-7-stefanha@redhat.com
7
alternative to one-shot bottom halves, and saves the effort of tracking
7
Message-Id: <20200120141858.587874-7-stefanha@redhat.com>
8
which AioContext a coroutine is running on.
9
10
aio_co_schedule is the part of aio_co_wake that starts a coroutine
11
on a remove AioContext, but it is also useful to implement e.g.
12
bdrv_set_aio_context callbacks.
13
14
The implementation of aio_co_schedule is based on a lock-free
15
multiple-producer, single-consumer queue. The multiple producers use
16
cmpxchg to add to a LIFO stack. The consumer (a per-AioContext bottom
17
half) grabs all items added so far, inverts the list to make it FIFO,
18
and goes through it one item at a time until it's empty. The data
19
structure was inspired by OSv, which uses it in the very code we'll
20
"port" to QEMU for the thread-safe CoMutex.
21
22
Most of the new code is really tests.
23
24
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
25
Reviewed-by: Fam Zheng <famz@redhat.com>
26
Message-id: 20170213135235.12274-3-pbonzini@redhat.com
27
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
28
---
9
---
29
tests/Makefile.include | 8 +-
10
util/async.c | 36 ++++++++++++++++++++++++++++++++++++
30
include/block/aio.h | 32 +++++++
11
1 file changed, 36 insertions(+)
31
include/qemu/coroutine_int.h | 11 ++-
32
tests/iothread.h | 25 +++++
33
tests/iothread.c | 91 ++++++++++++++++++
34
tests/test-aio-multithread.c | 213 +++++++++++++++++++++++++++++++++++++++++++
35
util/async.c | 65 +++++++++++++
36
util/qemu-coroutine.c | 8 ++
37
util/trace-events | 4 +
38
9 files changed, 453 insertions(+), 4 deletions(-)
39
create mode 100644 tests/iothread.h
40
create mode 100644 tests/iothread.c
41
create mode 100644 tests/test-aio-multithread.c
42
12
43
diff --git a/tests/Makefile.include b/tests/Makefile.include
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tests/Makefile.include
46
+++ b/tests/Makefile.include
47
@@ -XXX,XX +XXX,XX @@ check-unit-y += tests/test-aio$(EXESUF)
48
gcov-files-test-aio-y = util/async.c util/qemu-timer.o
49
gcov-files-test-aio-$(CONFIG_WIN32) += util/aio-win32.c
50
gcov-files-test-aio-$(CONFIG_POSIX) += util/aio-posix.c
51
+check-unit-y += tests/test-aio-multithread$(EXESUF)
52
+gcov-files-test-aio-multithread-y = $(gcov-files-test-aio-y)
53
+gcov-files-test-aio-multithread-y += util/qemu-coroutine.c tests/iothread.c
54
check-unit-y += tests/test-throttle$(EXESUF)
55
-gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
56
-gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
57
check-unit-y += tests/test-thread-pool$(EXESUF)
58
gcov-files-test-thread-pool-y = thread-pool.c
59
gcov-files-test-hbitmap-y = util/hbitmap.c
60
@@ -XXX,XX +XXX,XX @@ test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \
61
    $(test-qom-obj-y)
62
test-crypto-obj-y = $(crypto-obj-y) $(test-qom-obj-y)
63
test-io-obj-y = $(io-obj-y) $(test-crypto-obj-y)
64
-test-block-obj-y = $(block-obj-y) $(test-io-obj-y)
65
+test-block-obj-y = $(block-obj-y) $(test-io-obj-y) tests/iothread.o
66
67
tests/check-qint$(EXESUF): tests/check-qint.o $(test-util-obj-y)
68
tests/check-qstring$(EXESUF): tests/check-qstring.o $(test-util-obj-y)
69
@@ -XXX,XX +XXX,XX @@ tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
70
tests/test-char$(EXESUF): tests/test-char.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y) $(chardev-obj-y)
71
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
72
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
73
+tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
74
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
75
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
76
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
77
diff --git a/include/block/aio.h b/include/block/aio.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/block/aio.h
80
+++ b/include/block/aio.h
81
@@ -XXX,XX +XXX,XX @@ typedef void QEMUBHFunc(void *opaque);
82
typedef bool AioPollFn(void *opaque);
83
typedef void IOHandler(void *opaque);
84
85
+struct Coroutine;
86
struct ThreadPool;
87
struct LinuxAioState;
88
89
@@ -XXX,XX +XXX,XX @@ struct AioContext {
90
bool notified;
91
EventNotifier notifier;
92
93
+ QSLIST_HEAD(, Coroutine) scheduled_coroutines;
94
+ QEMUBH *co_schedule_bh;
95
+
96
/* Thread pool for performing work and receiving completion callbacks.
97
* Has its own locking.
98
*/
99
@@ -XXX,XX +XXX,XX @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
100
}
101
102
/**
103
+ * aio_co_schedule:
104
+ * @ctx: the aio context
105
+ * @co: the coroutine
106
+ *
107
+ * Start a coroutine on a remote AioContext.
108
+ *
109
+ * The coroutine must not be entered by anyone else while aio_co_schedule()
110
+ * is active. In addition the coroutine must have yielded unless ctx
111
+ * is the context in which the coroutine is running (i.e. the value of
112
+ * qemu_get_current_aio_context() from the coroutine itself).
113
+ */
114
+void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
115
+
116
+/**
117
+ * aio_co_wake:
118
+ * @co: the coroutine
119
+ *
120
+ * Restart a coroutine on the AioContext where it was running last, thus
121
+ * preventing coroutines from jumping from one context to another when they
122
+ * go to sleep.
123
+ *
124
+ * aio_co_wake may be executed either in coroutine or non-coroutine
125
+ * context. The coroutine must not be entered by anyone else while
126
+ * aio_co_wake() is active.
127
+ */
128
+void aio_co_wake(struct Coroutine *co);
129
+
130
+/**
131
* Return the AioContext whose event loop runs in the current thread.
132
*
133
* If called from an IOThread this will be the IOThread's AioContext. If
134
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
135
index XXXXXXX..XXXXXXX 100644
136
--- a/include/qemu/coroutine_int.h
137
+++ b/include/qemu/coroutine_int.h
138
@@ -XXX,XX +XXX,XX @@ struct Coroutine {
139
CoroutineEntry *entry;
140
void *entry_arg;
141
Coroutine *caller;
142
+
143
+ /* Only used when the coroutine has terminated. */
144
QSLIST_ENTRY(Coroutine) pool_next;
145
+
146
size_t locks_held;
147
148
- /* Coroutines that should be woken up when we yield or terminate */
149
+ /* Coroutines that should be woken up when we yield or terminate.
150
+ * Only used when the coroutine is running.
151
+ */
152
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
153
+
154
+ /* Only used when the coroutine has yielded. */
155
+ AioContext *ctx;
156
QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
157
+ QSLIST_ENTRY(Coroutine) co_scheduled_next;
158
};
159
160
Coroutine *qemu_coroutine_new(void);
161
diff --git a/tests/iothread.h b/tests/iothread.h
162
new file mode 100644
163
index XXXXXXX..XXXXXXX
164
--- /dev/null
165
+++ b/tests/iothread.h
166
@@ -XXX,XX +XXX,XX @@
167
+/*
168
+ * Event loop thread implementation for unit tests
169
+ *
170
+ * Copyright Red Hat Inc., 2013, 2016
171
+ *
172
+ * Authors:
173
+ * Stefan Hajnoczi <stefanha@redhat.com>
174
+ * Paolo Bonzini <pbonzini@redhat.com>
175
+ *
176
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
177
+ * See the COPYING file in the top-level directory.
178
+ */
179
+#ifndef TEST_IOTHREAD_H
180
+#define TEST_IOTHREAD_H
181
+
182
+#include "block/aio.h"
183
+#include "qemu/thread.h"
184
+
185
+typedef struct IOThread IOThread;
186
+
187
+IOThread *iothread_new(void);
188
+void iothread_join(IOThread *iothread);
189
+AioContext *iothread_get_aio_context(IOThread *iothread);
190
+
191
+#endif
192
diff --git a/tests/iothread.c b/tests/iothread.c
193
new file mode 100644
194
index XXXXXXX..XXXXXXX
195
--- /dev/null
196
+++ b/tests/iothread.c
197
@@ -XXX,XX +XXX,XX @@
198
+/*
199
+ * Event loop thread implementation for unit tests
200
+ *
201
+ * Copyright Red Hat Inc., 2013, 2016
202
+ *
203
+ * Authors:
204
+ * Stefan Hajnoczi <stefanha@redhat.com>
205
+ * Paolo Bonzini <pbonzini@redhat.com>
206
+ *
207
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
208
+ * See the COPYING file in the top-level directory.
209
+ *
210
+ */
211
+
212
+#include "qemu/osdep.h"
213
+#include "qapi/error.h"
214
+#include "block/aio.h"
215
+#include "qemu/main-loop.h"
216
+#include "qemu/rcu.h"
217
+#include "iothread.h"
218
+
219
+struct IOThread {
220
+ AioContext *ctx;
221
+
222
+ QemuThread thread;
223
+ QemuMutex init_done_lock;
224
+ QemuCond init_done_cond; /* is thread initialization done? */
225
+ bool stopping;
226
+};
227
+
228
+static __thread IOThread *my_iothread;
229
+
230
+AioContext *qemu_get_current_aio_context(void)
231
+{
232
+ return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
233
+}
234
+
235
+static void *iothread_run(void *opaque)
236
+{
237
+ IOThread *iothread = opaque;
238
+
239
+ rcu_register_thread();
240
+
241
+ my_iothread = iothread;
242
+ qemu_mutex_lock(&iothread->init_done_lock);
243
+ iothread->ctx = aio_context_new(&error_abort);
244
+ qemu_cond_signal(&iothread->init_done_cond);
245
+ qemu_mutex_unlock(&iothread->init_done_lock);
246
+
247
+ while (!atomic_read(&iothread->stopping)) {
248
+ aio_poll(iothread->ctx, true);
249
+ }
250
+
251
+ rcu_unregister_thread();
252
+ return NULL;
253
+}
254
+
255
+void iothread_join(IOThread *iothread)
256
+{
257
+ iothread->stopping = true;
258
+ aio_notify(iothread->ctx);
259
+ qemu_thread_join(&iothread->thread);
260
+ qemu_cond_destroy(&iothread->init_done_cond);
261
+ qemu_mutex_destroy(&iothread->init_done_lock);
262
+ aio_context_unref(iothread->ctx);
263
+ g_free(iothread);
264
+}
265
+
266
+IOThread *iothread_new(void)
267
+{
268
+ IOThread *iothread = g_new0(IOThread, 1);
269
+
270
+ qemu_mutex_init(&iothread->init_done_lock);
271
+ qemu_cond_init(&iothread->init_done_cond);
272
+ qemu_thread_create(&iothread->thread, NULL, iothread_run,
273
+ iothread, QEMU_THREAD_JOINABLE);
274
+
275
+ /* Wait for initialization to complete */
276
+ qemu_mutex_lock(&iothread->init_done_lock);
277
+ while (iothread->ctx == NULL) {
278
+ qemu_cond_wait(&iothread->init_done_cond,
279
+ &iothread->init_done_lock);
280
+ }
281
+ qemu_mutex_unlock(&iothread->init_done_lock);
282
+ return iothread;
283
+}
284
+
285
+AioContext *iothread_get_aio_context(IOThread *iothread)
286
+{
287
+ return iothread->ctx;
288
+}
289
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
290
new file mode 100644
291
index XXXXXXX..XXXXXXX
292
--- /dev/null
293
+++ b/tests/test-aio-multithread.c
294
@@ -XXX,XX +XXX,XX @@
295
+/*
296
+ * AioContext multithreading tests
297
+ *
298
+ * Copyright Red Hat, Inc. 2016
299
+ *
300
+ * Authors:
301
+ * Paolo Bonzini <pbonzini@redhat.com>
302
+ *
303
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
304
+ * See the COPYING.LIB file in the top-level directory.
305
+ */
306
+
307
+#include "qemu/osdep.h"
308
+#include <glib.h>
309
+#include "block/aio.h"
310
+#include "qapi/error.h"
311
+#include "qemu/coroutine.h"
312
+#include "qemu/thread.h"
313
+#include "qemu/error-report.h"
314
+#include "iothread.h"
315
+
316
+/* AioContext management */
317
+
318
+#define NUM_CONTEXTS 5
319
+
320
+static IOThread *threads[NUM_CONTEXTS];
321
+static AioContext *ctx[NUM_CONTEXTS];
322
+static __thread int id = -1;
323
+
324
+static QemuEvent done_event;
325
+
326
+/* Run a function synchronously on a remote iothread. */
327
+
328
+typedef struct CtxRunData {
329
+ QEMUBHFunc *cb;
330
+ void *arg;
331
+} CtxRunData;
332
+
333
+static void ctx_run_bh_cb(void *opaque)
334
+{
335
+ CtxRunData *data = opaque;
336
+
337
+ data->cb(data->arg);
338
+ qemu_event_set(&done_event);
339
+}
340
+
341
+static void ctx_run(int i, QEMUBHFunc *cb, void *opaque)
342
+{
343
+ CtxRunData data = {
344
+ .cb = cb,
345
+ .arg = opaque
346
+ };
347
+
348
+ qemu_event_reset(&done_event);
349
+ aio_bh_schedule_oneshot(ctx[i], ctx_run_bh_cb, &data);
350
+ qemu_event_wait(&done_event);
351
+}
352
+
353
+/* Starting the iothreads. */
354
+
355
+static void set_id_cb(void *opaque)
356
+{
357
+ int *i = opaque;
358
+
359
+ id = *i;
360
+}
361
+
362
+static void create_aio_contexts(void)
363
+{
364
+ int i;
365
+
366
+ for (i = 0; i < NUM_CONTEXTS; i++) {
367
+ threads[i] = iothread_new();
368
+ ctx[i] = iothread_get_aio_context(threads[i]);
369
+ }
370
+
371
+ qemu_event_init(&done_event, false);
372
+ for (i = 0; i < NUM_CONTEXTS; i++) {
373
+ ctx_run(i, set_id_cb, &i);
374
+ }
375
+}
376
+
377
+/* Stopping the iothreads. */
378
+
379
+static void join_aio_contexts(void)
380
+{
381
+ int i;
382
+
383
+ for (i = 0; i < NUM_CONTEXTS; i++) {
384
+ aio_context_ref(ctx[i]);
385
+ }
386
+ for (i = 0; i < NUM_CONTEXTS; i++) {
387
+ iothread_join(threads[i]);
388
+ }
389
+ for (i = 0; i < NUM_CONTEXTS; i++) {
390
+ aio_context_unref(ctx[i]);
391
+ }
392
+ qemu_event_destroy(&done_event);
393
+}
394
+
395
+/* Basic test for the stuff above. */
396
+
397
+static void test_lifecycle(void)
398
+{
399
+ create_aio_contexts();
400
+ join_aio_contexts();
401
+}
402
+
403
+/* aio_co_schedule test. */
404
+
405
+static Coroutine *to_schedule[NUM_CONTEXTS];
406
+
407
+static bool now_stopping;
408
+
409
+static int count_retry;
410
+static int count_here;
411
+static int count_other;
412
+
413
+static bool schedule_next(int n)
414
+{
415
+ Coroutine *co;
416
+
417
+ co = atomic_xchg(&to_schedule[n], NULL);
418
+ if (!co) {
419
+ atomic_inc(&count_retry);
420
+ return false;
421
+ }
422
+
423
+ if (n == id) {
424
+ atomic_inc(&count_here);
425
+ } else {
426
+ atomic_inc(&count_other);
427
+ }
428
+
429
+ aio_co_schedule(ctx[n], co);
430
+ return true;
431
+}
432
+
433
+static void finish_cb(void *opaque)
434
+{
435
+ schedule_next(id);
436
+}
437
+
438
+static coroutine_fn void test_multi_co_schedule_entry(void *opaque)
439
+{
440
+ g_assert(to_schedule[id] == NULL);
441
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
442
+
443
+ while (!atomic_mb_read(&now_stopping)) {
444
+ int n;
445
+
446
+ n = g_test_rand_int_range(0, NUM_CONTEXTS);
447
+ schedule_next(n);
448
+ qemu_coroutine_yield();
449
+
450
+ g_assert(to_schedule[id] == NULL);
451
+ atomic_mb_set(&to_schedule[id], qemu_coroutine_self());
452
+ }
453
+}
454
+
455
+
456
+static void test_multi_co_schedule(int seconds)
457
+{
458
+ int i;
459
+
460
+ count_here = count_other = count_retry = 0;
461
+ now_stopping = false;
462
+
463
+ create_aio_contexts();
464
+ for (i = 0; i < NUM_CONTEXTS; i++) {
465
+ Coroutine *co1 = qemu_coroutine_create(test_multi_co_schedule_entry, NULL);
466
+ aio_co_schedule(ctx[i], co1);
467
+ }
468
+
469
+ g_usleep(seconds * 1000000);
470
+
471
+ atomic_mb_set(&now_stopping, true);
472
+ for (i = 0; i < NUM_CONTEXTS; i++) {
473
+ ctx_run(i, finish_cb, NULL);
474
+ to_schedule[i] = NULL;
475
+ }
476
+
477
+ join_aio_contexts();
478
+ g_test_message("scheduled %d, queued %d, retry %d, total %d\n",
479
+ count_other, count_here, count_retry,
480
+ count_here + count_other + count_retry);
481
+}
482
+
483
+static void test_multi_co_schedule_1(void)
484
+{
485
+ test_multi_co_schedule(1);
486
+}
487
+
488
+static void test_multi_co_schedule_10(void)
489
+{
490
+ test_multi_co_schedule(10);
491
+}
492
+
493
+/* End of tests. */
494
+
495
+int main(int argc, char **argv)
496
+{
497
+ init_clocks();
498
+
499
+ g_test_init(&argc, &argv, NULL);
500
+ g_test_add_func("/aio/multi/lifecycle", test_lifecycle);
501
+ if (g_test_quick()) {
502
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
503
+ } else {
504
+ g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
505
+ }
506
+ return g_test_run();
507
+}
508
diff --git a/util/async.c b/util/async.c
13
diff --git a/util/async.c b/util/async.c
509
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
510
--- a/util/async.c
15
--- a/util/async.c
511
+++ b/util/async.c
16
+++ b/util/async.c
512
@@ -XXX,XX +XXX,XX @@
513
#include "qemu/main-loop.h"
514
#include "qemu/atomic.h"
515
#include "block/raw-aio.h"
516
+#include "qemu/coroutine_int.h"
517
+#include "trace.h"
518
519
/***********************************************************/
520
/* bottom halves (can be seen as timers which expire ASAP) */
521
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
17
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
522
}
18
}
523
#endif
19
#endif
524
20
525
+ assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
21
+#ifdef CONFIG_LINUX_IO_URING
526
+ qemu_bh_delete(ctx->co_schedule_bh);
22
+ if (ctx->linux_io_uring) {
23
+ luring_detach_aio_context(ctx->linux_io_uring, ctx);
24
+ luring_cleanup(ctx->linux_io_uring);
25
+ ctx->linux_io_uring = NULL;
26
+ }
27
+#endif
527
+
28
+
528
qemu_lockcnt_lock(&ctx->list_lock);
29
assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
529
assert(!qemu_lockcnt_count(&ctx->list_lock));
30
qemu_bh_delete(ctx->co_schedule_bh);
530
while (ctx->first_bh) {
31
531
@@ -XXX,XX +XXX,XX @@ static bool event_notifier_poll(void *opaque)
32
@@ -XXX,XX +XXX,XX @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
532
return atomic_read(&ctx->notified);
533
}
33
}
534
34
#endif
535
+static void co_schedule_bh_cb(void *opaque)
35
36
+#ifdef CONFIG_LINUX_IO_URING
37
+LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
536
+{
38
+{
537
+ AioContext *ctx = opaque;
39
+ if (ctx->linux_io_uring) {
538
+ QSLIST_HEAD(, Coroutine) straight, reversed;
40
+ return ctx->linux_io_uring;
539
+
540
+ QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
541
+ QSLIST_INIT(&straight);
542
+
543
+ while (!QSLIST_EMPTY(&reversed)) {
544
+ Coroutine *co = QSLIST_FIRST(&reversed);
545
+ QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
546
+ QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
547
+ }
41
+ }
548
+
42
+
549
+ while (!QSLIST_EMPTY(&straight)) {
43
+ ctx->linux_io_uring = luring_init(errp);
550
+ Coroutine *co = QSLIST_FIRST(&straight);
44
+ if (!ctx->linux_io_uring) {
551
+ QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
45
+ return NULL;
552
+ trace_aio_co_schedule_bh_cb(ctx, co);
553
+ qemu_coroutine_enter(co);
554
+ }
46
+ }
47
+
48
+ luring_attach_aio_context(ctx->linux_io_uring, ctx);
49
+ return ctx->linux_io_uring;
555
+}
50
+}
556
+
51
+
557
AioContext *aio_context_new(Error **errp)
52
+LuringState *aio_get_linux_io_uring(AioContext *ctx)
53
+{
54
+ assert(ctx->linux_io_uring);
55
+ return ctx->linux_io_uring;
56
+}
57
+#endif
58
+
59
void aio_notify(AioContext *ctx)
558
{
60
{
559
int ret;
61
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
560
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
62
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
561
}
63
#ifdef CONFIG_LINUX_AIO
562
g_source_set_can_recurse(&ctx->source, true);
64
ctx->linux_aio = NULL;
563
qemu_lockcnt_init(&ctx->list_lock);
65
#endif
564
+
66
+
565
+ ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
67
+#ifdef CONFIG_LINUX_IO_URING
566
+ QSLIST_INIT(&ctx->scheduled_coroutines);
68
+ ctx->linux_io_uring = NULL;
69
+#endif
567
+
70
+
568
aio_set_event_notifier(ctx, &ctx->notifier,
71
ctx->thread_pool = NULL;
569
false,
72
qemu_rec_mutex_init(&ctx->lock);
570
(EventNotifierHandler *)
73
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
571
@@ -XXX,XX +XXX,XX @@ fail:
572
return NULL;
573
}
574
575
+void aio_co_schedule(AioContext *ctx, Coroutine *co)
576
+{
577
+ trace_aio_co_schedule(ctx, co);
578
+ QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
579
+ co, co_scheduled_next);
580
+ qemu_bh_schedule(ctx->co_schedule_bh);
581
+}
582
+
583
+void aio_co_wake(struct Coroutine *co)
584
+{
585
+ AioContext *ctx;
586
+
587
+ /* Read coroutine before co->ctx. Matches smp_wmb in
588
+ * qemu_coroutine_enter.
589
+ */
590
+ smp_read_barrier_depends();
591
+ ctx = atomic_read(&co->ctx);
592
+
593
+ if (ctx != qemu_get_current_aio_context()) {
594
+ aio_co_schedule(ctx, co);
595
+ return;
596
+ }
597
+
598
+ if (qemu_in_coroutine()) {
599
+ Coroutine *self = qemu_coroutine_self();
600
+ assert(self != co);
601
+ QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
602
+ } else {
603
+ aio_context_acquire(ctx);
604
+ qemu_coroutine_enter(co);
605
+ aio_context_release(ctx);
606
+ }
607
+}
608
+
609
void aio_context_ref(AioContext *ctx)
610
{
611
g_source_ref(&ctx->source);
612
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
613
index XXXXXXX..XXXXXXX 100644
614
--- a/util/qemu-coroutine.c
615
+++ b/util/qemu-coroutine.c
616
@@ -XXX,XX +XXX,XX @@
617
#include "qemu/atomic.h"
618
#include "qemu/coroutine.h"
619
#include "qemu/coroutine_int.h"
620
+#include "block/aio.h"
621
622
enum {
623
POOL_BATCH_SIZE = 64,
624
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
625
}
626
627
co->caller = self;
628
+ co->ctx = qemu_get_current_aio_context();
629
+
630
+ /* Store co->ctx before anything that stores co. Matches
631
+ * barrier in aio_co_wake.
632
+ */
633
+ smp_wmb();
634
+
635
ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
636
637
qemu_co_queue_run_restart(co);
638
diff --git a/util/trace-events b/util/trace-events
639
index XXXXXXX..XXXXXXX 100644
640
--- a/util/trace-events
641
+++ b/util/trace-events
642
@@ -XXX,XX +XXX,XX @@ run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
643
poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
644
poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
645
646
+# util/async.c
647
+aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
648
+aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
649
+
650
# util/thread-pool.c
651
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
652
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
653
--
74
--
654
2.9.3
75
2.24.1
655
76
656
77
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
qcow2_create2 calls this. Do not run a nested event loop, as that
4
breaks when aio_co_wake tries to queue the coroutine on the co_queue_wakeup
5
list of the currently running one.
6
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-4-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/block-backend.c | 12 ++++++++----
14
1 file changed, 8 insertions(+), 4 deletions(-)
15
16
diff --git a/block/block-backend.c b/block/block-backend.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/block-backend.c
19
+++ b/block/block-backend.c
20
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
21
{
22
QEMUIOVector qiov;
23
struct iovec iov;
24
- Coroutine *co;
25
BlkRwCo rwco;
26
27
iov = (struct iovec) {
28
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
29
.ret = NOT_DONE,
30
};
31
32
- co = qemu_coroutine_create(co_entry, &rwco);
33
- qemu_coroutine_enter(co);
34
- BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
35
+ if (qemu_in_coroutine()) {
36
+ /* Fast-path if already in coroutine context */
37
+ co_entry(&rwco);
38
+ } else {
39
+ Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
40
+ qemu_coroutine_enter(co);
41
+ BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
42
+ }
43
44
return rwco.ret;
45
}
46
--
47
2.9.3
48
49
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This covers both file descriptor callbacks and polling callbacks,
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
since they execute related code.
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-8-stefanha@redhat.com
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-Id: <20200120141858.587874-8-stefanha@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Message-id: 20170213135235.12274-14-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
9
---
13
block/curl.c | 16 +++++++++++++---
10
block.c | 22 ++++++++++++++++++++++
14
block/iscsi.c | 4 ++++
11
blockdev.c | 12 ++++--------
15
block/linux-aio.c | 4 ++++
12
include/block/block.h | 1 +
16
block/nfs.c | 6 ++++++
13
3 files changed, 27 insertions(+), 8 deletions(-)
17
block/sheepdog.c | 29 +++++++++++++++--------------
18
block/ssh.c | 29 +++++++++--------------------
19
block/win32-aio.c | 10 ++++++----
20
hw/block/virtio-blk.c | 5 ++++-
21
hw/scsi/virtio-scsi.c | 6 ++++++
22
util/aio-posix.c | 7 -------
23
util/aio-win32.c | 6 ------
24
11 files changed, 67 insertions(+), 55 deletions(-)
25
14
26
diff --git a/block/curl.c b/block/curl.c
15
diff --git a/block.c b/block.c
27
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
28
--- a/block/curl.c
17
--- a/block.c
29
+++ b/block/curl.c
18
+++ b/block.c
30
@@ -XXX,XX +XXX,XX @@ static void curl_multi_check_completion(BDRVCURLState *s)
19
@@ -XXX,XX +XXX,XX @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
31
}
20
return detect_zeroes;
32
}
21
}
33
22
34
-static void curl_multi_do(void *arg)
23
+/**
35
+static void curl_multi_do_locked(CURLState *s)
24
+ * Set open flags for aio engine
36
{
25
+ *
37
- CURLState *s = (CURLState *)arg;
26
+ * Return 0 on success, -1 if the engine specified is invalid
38
CURLSocket *socket, *next_socket;
27
+ */
39
int running;
28
+int bdrv_parse_aio(const char *mode, int *flags)
40
int r;
41
@@ -XXX,XX +XXX,XX @@ static void curl_multi_do(void *arg)
42
}
43
}
44
45
+static void curl_multi_do(void *arg)
46
+{
29
+{
47
+ CURLState *s = (CURLState *)arg;
30
+ if (!strcmp(mode, "threads")) {
31
+ /* do nothing, default */
32
+ } else if (!strcmp(mode, "native")) {
33
+ *flags |= BDRV_O_NATIVE_AIO;
34
+#ifdef CONFIG_LINUX_IO_URING
35
+ } else if (!strcmp(mode, "io_uring")) {
36
+ *flags |= BDRV_O_IO_URING;
37
+#endif
38
+ } else {
39
+ return -1;
40
+ }
48
+
41
+
49
+ aio_context_acquire(s->s->aio_context);
42
+ return 0;
50
+ curl_multi_do_locked(s);
51
+ aio_context_release(s->s->aio_context);
52
+}
43
+}
53
+
44
+
54
static void curl_multi_read(void *arg)
45
/**
55
{
46
* Set open flags for a given discard mode
56
CURLState *s = (CURLState *)arg;
47
*
57
48
diff --git a/blockdev.c b/blockdev.c
58
- curl_multi_do(arg);
59
+ aio_context_acquire(s->s->aio_context);
60
+ curl_multi_do_locked(s);
61
curl_multi_check_completion(s->s);
62
+ aio_context_release(s->s->aio_context);
63
}
64
65
static void curl_multi_timeout_do(void *arg)
66
diff --git a/block/iscsi.c b/block/iscsi.c
67
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
68
--- a/block/iscsi.c
50
--- a/blockdev.c
69
+++ b/block/iscsi.c
51
+++ b/blockdev.c
70
@@ -XXX,XX +XXX,XX @@ iscsi_process_read(void *arg)
52
@@ -XXX,XX +XXX,XX @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
71
IscsiLun *iscsilun = arg;
53
}
72
struct iscsi_context *iscsi = iscsilun->iscsi;
54
73
55
if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
74
+ aio_context_acquire(iscsilun->aio_context);
56
- if (!strcmp(aio, "native")) {
75
iscsi_service(iscsi, POLLIN);
57
- *bdrv_flags |= BDRV_O_NATIVE_AIO;
76
iscsi_set_events(iscsilun);
58
- } else if (!strcmp(aio, "threads")) {
77
+ aio_context_release(iscsilun->aio_context);
59
- /* this is the default */
78
}
60
- } else {
79
61
- error_setg(errp, "invalid aio option");
80
static void
62
- return;
81
@@ -XXX,XX +XXX,XX @@ iscsi_process_write(void *arg)
63
+ if (bdrv_parse_aio(aio, bdrv_flags) < 0) {
82
IscsiLun *iscsilun = arg;
64
+ error_setg(errp, "invalid aio option");
83
struct iscsi_context *iscsi = iscsilun->iscsi;
65
+ return;
84
66
}
85
+ aio_context_acquire(iscsilun->aio_context);
67
}
86
iscsi_service(iscsi, POLLOUT);
68
}
87
iscsi_set_events(iscsilun);
69
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_common_drive_opts = {
88
+ aio_context_release(iscsilun->aio_context);
70
},{
89
}
71
.name = "aio",
90
72
.type = QEMU_OPT_STRING,
91
static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
73
- .help = "host AIO implementation (threads, native)",
92
diff --git a/block/linux-aio.c b/block/linux-aio.c
74
+ .help = "host AIO implementation (threads, native, io_uring)",
75
},{
76
.name = BDRV_OPT_CACHE_WB,
77
.type = QEMU_OPT_BOOL,
78
diff --git a/include/block/block.h b/include/block/block.h
93
index XXXXXXX..XXXXXXX 100644
79
index XXXXXXX..XXXXXXX 100644
94
--- a/block/linux-aio.c
80
--- a/include/block/block.h
95
+++ b/block/linux-aio.c
81
+++ b/include/block/block.h
96
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_completion_cb(EventNotifier *e)
82
@@ -XXX,XX +XXX,XX @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
97
LinuxAioState *s = container_of(e, LinuxAioState, e);
83
void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
98
84
Error **errp);
99
if (event_notifier_test_and_clear(&s->e)) {
85
100
+ aio_context_acquire(s->aio_context);
86
+int bdrv_parse_aio(const char *mode, int *flags);
101
qemu_laio_process_completions_and_submit(s);
87
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
102
+ aio_context_release(s->aio_context);
88
int bdrv_parse_discard_flags(const char *mode, int *flags);
103
}
89
BdrvChild *bdrv_open_child(const char *filename,
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool qemu_laio_poll_cb(void *opaque)
107
return false;
108
}
109
110
+ aio_context_acquire(s->aio_context);
111
qemu_laio_process_completions_and_submit(s);
112
+ aio_context_release(s->aio_context);
113
return true;
114
}
115
116
diff --git a/block/nfs.c b/block/nfs.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/block/nfs.c
119
+++ b/block/nfs.c
120
@@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client)
121
static void nfs_process_read(void *arg)
122
{
123
NFSClient *client = arg;
124
+
125
+ aio_context_acquire(client->aio_context);
126
nfs_service(client->context, POLLIN);
127
nfs_set_events(client);
128
+ aio_context_release(client->aio_context);
129
}
130
131
static void nfs_process_write(void *arg)
132
{
133
NFSClient *client = arg;
134
+
135
+ aio_context_acquire(client->aio_context);
136
nfs_service(client->context, POLLOUT);
137
nfs_set_events(client);
138
+ aio_context_release(client->aio_context);
139
}
140
141
static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
142
diff --git a/block/sheepdog.c b/block/sheepdog.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/block/sheepdog.c
145
+++ b/block/sheepdog.c
146
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
147
return ret;
148
}
149
150
-static void restart_co_req(void *opaque)
151
-{
152
- Coroutine *co = opaque;
153
-
154
- qemu_coroutine_enter(co);
155
-}
156
-
157
typedef struct SheepdogReqCo {
158
int sockfd;
159
BlockDriverState *bs;
160
@@ -XXX,XX +XXX,XX @@ typedef struct SheepdogReqCo {
161
unsigned int *rlen;
162
int ret;
163
bool finished;
164
+ Coroutine *co;
165
} SheepdogReqCo;
166
167
+static void restart_co_req(void *opaque)
168
+{
169
+ SheepdogReqCo *srco = opaque;
170
+
171
+ aio_co_wake(srco->co);
172
+}
173
+
174
static coroutine_fn void do_co_req(void *opaque)
175
{
176
int ret;
177
- Coroutine *co;
178
SheepdogReqCo *srco = opaque;
179
int sockfd = srco->sockfd;
180
SheepdogReq *hdr = srco->hdr;
181
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
182
unsigned int *wlen = srco->wlen;
183
unsigned int *rlen = srco->rlen;
184
185
- co = qemu_coroutine_self();
186
+ srco->co = qemu_coroutine_self();
187
aio_set_fd_handler(srco->aio_context, sockfd, false,
188
- NULL, restart_co_req, NULL, co);
189
+ NULL, restart_co_req, NULL, srco);
190
191
ret = send_co_req(sockfd, hdr, data, wlen);
192
if (ret < 0) {
193
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void do_co_req(void *opaque)
194
}
195
196
aio_set_fd_handler(srco->aio_context, sockfd, false,
197
- restart_co_req, NULL, NULL, co);
198
+ restart_co_req, NULL, NULL, srco);
199
200
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
201
if (ret != sizeof(*hdr)) {
202
@@ -XXX,XX +XXX,XX @@ out:
203
aio_set_fd_handler(srco->aio_context, sockfd, false,
204
NULL, NULL, NULL, NULL);
205
206
+ srco->co = NULL;
207
srco->ret = ret;
208
srco->finished = true;
209
if (srco->bs) {
210
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
211
* We've finished all requests which belong to the AIOCB, so
212
* we can switch back to sd_co_readv/writev now.
213
*/
214
- qemu_coroutine_enter(acb->coroutine);
215
+ aio_co_wake(acb->coroutine);
216
}
217
218
return;
219
@@ -XXX,XX +XXX,XX @@ static void co_read_response(void *opaque)
220
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
221
}
222
223
- qemu_coroutine_enter(s->co_recv);
224
+ aio_co_wake(s->co_recv);
225
}
226
227
static void co_write_request(void *opaque)
228
{
229
BDRVSheepdogState *s = opaque;
230
231
- qemu_coroutine_enter(s->co_send);
232
+ aio_co_wake(s->co_send);
233
}
234
235
/*
236
diff --git a/block/ssh.c b/block/ssh.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/block/ssh.c
239
+++ b/block/ssh.c
240
@@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque)
241
242
DPRINTF("co=%p", co);
243
244
- qemu_coroutine_enter(co);
245
+ aio_co_wake(co);
246
}
247
248
-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
249
+/* A non-blocking call returned EAGAIN, so yield, ensuring the
250
+ * handlers are set up so that we'll be rescheduled when there is an
251
+ * interesting event on the socket.
252
+ */
253
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
254
{
255
int r;
256
IOHandler *rd_handler = NULL, *wr_handler = NULL;
257
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
258
259
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
260
false, rd_handler, wr_handler, NULL, co);
261
-}
262
-
263
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
264
- BlockDriverState *bs)
265
-{
266
- DPRINTF("s->sock=%d", s->sock);
267
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
268
- false, NULL, NULL, NULL, NULL);
269
-}
270
-
271
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
272
- * handlers are set up so that we'll be rescheduled when there is an
273
- * interesting event on the socket.
274
- */
275
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
276
-{
277
- set_fd_handler(s, bs);
278
qemu_coroutine_yield();
279
- clear_fd_handler(s, bs);
280
+ DPRINTF("s->sock=%d - back", s->sock);
281
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
282
+ NULL, NULL, NULL, NULL);
283
}
284
285
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
286
diff --git a/block/win32-aio.c b/block/win32-aio.c
287
index XXXXXXX..XXXXXXX 100644
288
--- a/block/win32-aio.c
289
+++ b/block/win32-aio.c
290
@@ -XXX,XX +XXX,XX @@ struct QEMUWin32AIOState {
291
HANDLE hIOCP;
292
EventNotifier e;
293
int count;
294
- bool is_aio_context_attached;
295
+ AioContext *aio_ctx;
296
};
297
298
typedef struct QEMUWin32AIOCB {
299
@@ -XXX,XX +XXX,XX @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
300
}
301
302
303
+ aio_context_acquire(s->aio_ctx);
304
waiocb->common.cb(waiocb->common.opaque, ret);
305
+ aio_context_release(s->aio_ctx);
306
qemu_aio_unref(waiocb);
307
}
308
309
@@ -XXX,XX +XXX,XX @@ void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
310
AioContext *old_context)
311
{
312
aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
313
- aio->is_aio_context_attached = false;
314
+ aio->aio_ctx = NULL;
315
}
316
317
void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
318
AioContext *new_context)
319
{
320
- aio->is_aio_context_attached = true;
321
+ aio->aio_ctx = new_context;
322
aio_set_event_notifier(new_context, &aio->e, false,
323
win32_aio_completion_cb, NULL);
324
}
325
@@ -XXX,XX +XXX,XX @@ out_free_state:
326
327
void win32_aio_cleanup(QEMUWin32AIOState *aio)
328
{
329
- assert(!aio->is_aio_context_attached);
330
+ assert(!aio->aio_ctx);
331
CloseHandle(aio->hIOCP);
332
event_notifier_cleanup(&aio->e);
333
g_free(aio);
334
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
335
index XXXXXXX..XXXXXXX 100644
336
--- a/hw/block/virtio-blk.c
337
+++ b/hw/block/virtio-blk.c
338
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
339
{
340
VirtIOBlockIoctlReq *ioctl_req = opaque;
341
VirtIOBlockReq *req = ioctl_req->req;
342
- VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
343
+ VirtIOBlock *s = req->dev;
344
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
345
struct virtio_scsi_inhdr *scsi;
346
struct sg_io_hdr *hdr;
347
348
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
349
VirtIOBlockReq *req;
350
MultiReqBuffer mrb = {};
351
352
+ aio_context_acquire(blk_get_aio_context(s->blk));
353
blk_io_plug(s->blk);
354
355
do {
356
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
357
}
358
359
blk_io_unplug(s->blk);
360
+ aio_context_release(blk_get_aio_context(s->blk));
361
}
362
363
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
364
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
365
index XXXXXXX..XXXXXXX 100644
366
--- a/hw/scsi/virtio-scsi.c
367
+++ b/hw/scsi/virtio-scsi.c
368
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
369
{
370
VirtIOSCSIReq *req;
371
372
+ virtio_scsi_acquire(s);
373
while ((req = virtio_scsi_pop_req(s, vq))) {
374
virtio_scsi_handle_ctrl_req(s, req);
375
}
376
+ virtio_scsi_release(s);
377
}
378
379
static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
380
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
381
382
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
383
384
+ virtio_scsi_acquire(s);
385
do {
386
virtio_queue_set_notification(vq, 0);
387
388
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
389
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
390
virtio_scsi_handle_cmd_req_submit(s, req);
391
}
392
+ virtio_scsi_release(s);
393
}
394
395
static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
396
@@ -XXX,XX +XXX,XX @@ out:
397
398
void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
399
{
400
+ virtio_scsi_acquire(s);
401
if (s->events_dropped) {
402
virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
403
}
404
+ virtio_scsi_release(s);
405
}
406
407
static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
408
diff --git a/util/aio-posix.c b/util/aio-posix.c
409
index XXXXXXX..XXXXXXX 100644
410
--- a/util/aio-posix.c
411
+++ b/util/aio-posix.c
412
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
413
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
414
aio_node_check(ctx, node->is_external) &&
415
node->io_read) {
416
- aio_context_acquire(ctx);
417
node->io_read(node->opaque);
418
- aio_context_release(ctx);
419
420
/* aio_notify() does not count as progress */
421
if (node->opaque != &ctx->notifier) {
422
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
423
(revents & (G_IO_OUT | G_IO_ERR)) &&
424
aio_node_check(ctx, node->is_external) &&
425
node->io_write) {
426
- aio_context_acquire(ctx);
427
node->io_write(node->opaque);
428
- aio_context_release(ctx);
429
progress = true;
430
}
431
432
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
433
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
434
}
435
436
- aio_context_acquire(ctx);
437
progress = try_poll_mode(ctx, blocking);
438
- aio_context_release(ctx);
439
-
440
if (!progress) {
441
assert(npfd == 0);
442
443
diff --git a/util/aio-win32.c b/util/aio-win32.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/util/aio-win32.c
446
+++ b/util/aio-win32.c
447
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
448
(revents || event_notifier_get_handle(node->e) == event) &&
449
node->io_notify) {
450
node->pfd.revents = 0;
451
- aio_context_acquire(ctx);
452
node->io_notify(node->e);
453
- aio_context_release(ctx);
454
455
/* aio_notify() does not count as progress */
456
if (node->e != &ctx->notifier) {
457
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
458
(node->io_read || node->io_write)) {
459
node->pfd.revents = 0;
460
if ((revents & G_IO_IN) && node->io_read) {
461
- aio_context_acquire(ctx);
462
node->io_read(node->opaque);
463
- aio_context_release(ctx);
464
progress = true;
465
}
466
if ((revents & G_IO_OUT) && node->io_write) {
467
- aio_context_acquire(ctx);
468
node->io_write(node->opaque);
469
- aio_context_release(ctx);
470
progress = true;
471
}
472
473
--
90
--
474
2.9.3
91
2.24.1
475
92
476
93
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Pull the increment/decrement pair out of aio_bh_poll and into the
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
callers.
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-id: 20200120141858.587874-9-stefanha@redhat.com
8
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Message-Id: <20200120141858.587874-9-stefanha@redhat.com>
9
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
10
Message-id: 20170213135235.12274-18-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
10
---
13
util/aio-posix.c | 8 +++-----
11
block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++---------
14
util/aio-win32.c | 8 ++++----
12
1 file changed, 79 insertions(+), 19 deletions(-)
15
util/async.c | 12 ++++++------
13
16
3 files changed, 13 insertions(+), 15 deletions(-)
14
diff --git a/block/file-posix.c b/block/file-posix.c
17
18
diff --git a/util/aio-posix.c b/util/aio-posix.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/util/aio-posix.c
16
--- a/block/file-posix.c
21
+++ b/util/aio-posix.c
17
+++ b/block/file-posix.c
22
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
18
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState {
23
19
bool has_write_zeroes:1;
24
void aio_dispatch(AioContext *ctx)
20
bool discard_zeroes:1;
21
bool use_linux_aio:1;
22
+ bool use_linux_io_uring:1;
23
bool page_cache_inconsistent:1;
24
bool has_fallocate;
25
bool needs_alignment;
26
@@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = {
27
{
28
.name = "aio",
29
.type = QEMU_OPT_STRING,
30
- .help = "host AIO implementation (threads, native)",
31
+ .help = "host AIO implementation (threads, native, io_uring)",
32
},
33
{
34
.name = "locking",
35
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
36
goto fail;
37
}
38
39
- aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
40
- ? BLOCKDEV_AIO_OPTIONS_NATIVE
41
- : BLOCKDEV_AIO_OPTIONS_THREADS;
42
+ if (bdrv_flags & BDRV_O_NATIVE_AIO) {
43
+ aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE;
44
+#ifdef CONFIG_LINUX_IO_URING
45
+ } else if (bdrv_flags & BDRV_O_IO_URING) {
46
+ aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING;
47
+#endif
48
+ } else {
49
+ aio_default = BLOCKDEV_AIO_OPTIONS_THREADS;
50
+ }
51
+
52
aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
53
qemu_opt_get(opts, "aio"),
54
aio_default, &local_err);
55
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
56
ret = -EINVAL;
57
goto fail;
58
}
59
+
60
s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
61
+#ifdef CONFIG_LINUX_IO_URING
62
+ s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
63
+#endif
64
65
locking = qapi_enum_parse(&OnOffAuto_lookup,
66
qemu_opt_get(opts, "locking"),
67
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
68
}
69
#endif /* !defined(CONFIG_LINUX_AIO) */
70
71
+#ifdef CONFIG_LINUX_IO_URING
72
+ if (s->use_linux_io_uring) {
73
+ if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
74
+ error_prepend(errp, "Unable to use io_uring: ");
75
+ goto fail;
76
+ }
77
+ }
78
+#else
79
+ if (s->use_linux_io_uring) {
80
+ error_setg(errp, "aio=io_uring was specified, but is not supported "
81
+ "in this build.");
82
+ ret = -EINVAL;
83
+ goto fail;
84
+ }
85
+#endif /* !defined(CONFIG_LINUX_IO_URING) */
86
+
87
s->has_discard = true;
88
s->has_write_zeroes = true;
89
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
90
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
91
return -EIO;
92
93
/*
94
- * Check if the underlying device requires requests to be aligned,
95
- * and if the request we are trying to submit is aligned or not.
96
- * If this is the case tell the low-level driver that it needs
97
- * to copy the buffer.
98
+ * When using O_DIRECT, the request must be aligned to be able to use
99
+ * either libaio or io_uring interface. If not fail back to regular thread
100
+ * pool read/write code which emulates this for us if we
101
+ * set QEMU_AIO_MISALIGNED.
102
*/
103
- if (s->needs_alignment) {
104
- if (!bdrv_qiov_is_aligned(bs, qiov)) {
105
- type |= QEMU_AIO_MISALIGNED;
106
+ if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
107
+ type |= QEMU_AIO_MISALIGNED;
108
+#ifdef CONFIG_LINUX_IO_URING
109
+ } else if (s->use_linux_io_uring) {
110
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
111
+ assert(qiov->size == bytes);
112
+ return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
113
+#endif
114
#ifdef CONFIG_LINUX_AIO
115
- } else if (s->use_linux_aio) {
116
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
117
- assert(qiov->size == bytes);
118
- return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
119
+ } else if (s->use_linux_aio) {
120
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
121
+ assert(qiov->size == bytes);
122
+ return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
123
#endif
124
- }
125
}
126
127
acb = (RawPosixAIOData) {
128
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
129
130
static void raw_aio_plug(BlockDriverState *bs)
25
{
131
{
26
+ qemu_lockcnt_inc(&ctx->list_lock);
132
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
27
aio_bh_poll(ctx);
133
#ifdef CONFIG_LINUX_AIO
28
-
134
- BDRVRawState *s = bs->opaque;
29
- qemu_lockcnt_inc(&ctx->list_lock);
135
if (s->use_linux_aio) {
30
aio_dispatch_handlers(ctx);
136
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
31
qemu_lockcnt_dec(&ctx->list_lock);
137
laio_io_plug(bs, aio);
32
138
}
33
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
139
#endif
34
}
140
+#ifdef CONFIG_LINUX_IO_URING
35
141
+ if (s->use_linux_io_uring) {
36
npfd = 0;
142
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
37
- qemu_lockcnt_dec(&ctx->list_lock);
143
+ luring_io_plug(bs, aio);
38
144
+ }
39
progress |= aio_bh_poll(ctx);
145
+#endif
40
146
}
41
if (ret > 0) {
147
42
- qemu_lockcnt_inc(&ctx->list_lock);
148
static void raw_aio_unplug(BlockDriverState *bs)
43
progress |= aio_dispatch_handlers(ctx);
149
{
44
- qemu_lockcnt_dec(&ctx->list_lock);
150
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
45
}
151
#ifdef CONFIG_LINUX_AIO
46
152
- BDRVRawState *s = bs->opaque;
47
+ qemu_lockcnt_dec(&ctx->list_lock);
153
if (s->use_linux_aio) {
48
+
154
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
49
progress |= timerlistgroup_run_timers(&ctx->tlg);
155
laio_io_unplug(bs, aio);
50
156
}
51
return progress;
157
#endif
52
diff --git a/util/aio-win32.c b/util/aio-win32.c
158
+#ifdef CONFIG_LINUX_IO_URING
53
index XXXXXXX..XXXXXXX 100644
159
+ if (s->use_linux_io_uring) {
54
--- a/util/aio-win32.c
160
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
55
+++ b/util/aio-win32.c
161
+ luring_io_unplug(bs, aio);
56
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
162
+ }
57
bool progress = false;
163
+#endif
58
AioHandler *tmp;
164
}
59
165
60
- qemu_lockcnt_inc(&ctx->list_lock);
166
static int raw_co_flush_to_disk(BlockDriverState *bs)
61
-
167
@@ -XXX,XX +XXX,XX @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
62
/*
168
.aio_type = QEMU_AIO_FLUSH,
63
* We have to walk very carefully in case aio_set_fd_handler is
169
};
64
* called while we're walking.
170
65
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
171
+#ifdef CONFIG_LINUX_IO_URING
172
+ if (s->use_linux_io_uring) {
173
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
174
+ return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
175
+ }
176
+#endif
177
return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
178
}
179
180
static void raw_aio_attach_aio_context(BlockDriverState *bs,
181
AioContext *new_context)
182
{
183
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
184
#ifdef CONFIG_LINUX_AIO
185
- BDRVRawState *s = bs->opaque;
186
if (s->use_linux_aio) {
187
Error *local_err = NULL;
188
if (!aio_setup_linux_aio(new_context, &local_err)) {
189
@@ -XXX,XX +XXX,XX @@ static void raw_aio_attach_aio_context(BlockDriverState *bs,
66
}
190
}
67
}
191
}
68
192
#endif
69
- qemu_lockcnt_dec(&ctx->list_lock);
193
+#ifdef CONFIG_LINUX_IO_URING
70
return progress;
194
+ if (s->use_linux_io_uring) {
71
}
195
+ Error *local_err;
72
196
+ if (!aio_setup_linux_io_uring(new_context, &local_err)) {
73
void aio_dispatch(AioContext *ctx)
197
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
74
{
198
+ "falling back to thread pool: ");
75
+ qemu_lockcnt_inc(&ctx->list_lock);
199
+ s->use_linux_io_uring = false;
76
aio_bh_poll(ctx);
200
+ }
77
aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
201
+ }
78
+ qemu_lockcnt_dec(&ctx->list_lock);
202
+#endif
79
timerlistgroup_run_timers(&ctx->tlg);
203
}
80
}
204
81
205
static void raw_close(BlockDriverState *bs)
82
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
83
}
84
}
85
86
- qemu_lockcnt_dec(&ctx->list_lock);
87
first = true;
88
89
/* ctx->notifier is always registered. */
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
progress |= aio_dispatch_handlers(ctx, event);
92
} while (count > 0);
93
94
+ qemu_lockcnt_dec(&ctx->list_lock);
95
+
96
progress |= timerlistgroup_run_timers(&ctx->tlg);
97
return progress;
98
}
99
diff --git a/util/async.c b/util/async.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/util/async.c
102
+++ b/util/async.c
103
@@ -XXX,XX +XXX,XX @@ void aio_bh_call(QEMUBH *bh)
104
bh->cb(bh->opaque);
105
}
106
107
-/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
108
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
109
+ * The count in ctx->list_lock is incremented before the call, and is
110
+ * not affected by the call.
111
+ */
112
int aio_bh_poll(AioContext *ctx)
113
{
114
QEMUBH *bh, **bhp, *next;
115
int ret;
116
bool deleted = false;
117
118
- qemu_lockcnt_inc(&ctx->list_lock);
119
-
120
ret = 0;
121
for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
122
next = atomic_rcu_read(&bh->next);
123
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
124
125
/* remove deleted bhs */
126
if (!deleted) {
127
- qemu_lockcnt_dec(&ctx->list_lock);
128
return ret;
129
}
130
131
- if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
132
+ if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
133
bhp = &ctx->first_bh;
134
while (*bhp) {
135
bh = *bhp;
136
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
137
bhp = &bh->next;
138
}
139
}
140
- qemu_lockcnt_unlock(&ctx->list_lock);
141
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
142
}
143
return ret;
144
}
145
--
206
--
146
2.9.3
207
2.24.1
147
208
148
209
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
This is in preparation for making qio_channel_yield work on
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
AioContexts other than the main one.
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
6
Message-id: 20200120141858.587874-10-stefanha@redhat.com
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-Id: <20200120141858.587874-10-stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Message-id: 20170213135235.12274-6-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
9
---
13
include/io/channel.h | 25 +++++++++++++++++++++++++
10
block/io_uring.c | 23 ++++++++++++++++++++---
14
io/channel-command.c | 13 +++++++++++++
11
block/trace-events | 12 ++++++++++++
15
io/channel-file.c | 11 +++++++++++
12
2 files changed, 32 insertions(+), 3 deletions(-)
16
io/channel-socket.c | 16 +++++++++++-----
17
io/channel-tls.c | 12 ++++++++++++
18
io/channel-watch.c | 6 ++++++
19
io/channel.c | 11 +++++++++++
20
7 files changed, 89 insertions(+), 5 deletions(-)
21
13
22
diff --git a/include/io/channel.h b/include/io/channel.h
14
diff --git a/block/io_uring.c b/block/io_uring.c
23
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
24
--- a/include/io/channel.h
16
--- a/block/io_uring.c
25
+++ b/include/io/channel.h
17
+++ b/block/io_uring.c
26
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
27
19
#include "block/raw-aio.h"
28
#include "qemu-common.h"
20
#include "qemu/coroutine.h"
29
#include "qom/object.h"
21
#include "qapi/error.h"
30
+#include "block/aio.h"
22
+#include "trace.h"
31
23
32
#define TYPE_QIO_CHANNEL "qio-channel"
24
/* io_uring ring size */
33
#define QIO_CHANNEL(obj) \
25
#define MAX_ENTRIES 128
34
@@ -XXX,XX +XXX,XX @@ struct QIOChannelClass {
26
@@ -XXX,XX +XXX,XX @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
35
off_t offset,
27
QEMUIOVector *resubmit_qiov;
36
int whence,
28
size_t remaining;
37
Error **errp);
29
38
+ void (*io_set_aio_fd_handler)(QIOChannel *ioc,
30
+ trace_luring_resubmit_short_read(s, luringcb, nread);
39
+ AioContext *ctx,
40
+ IOHandler *io_read,
41
+ IOHandler *io_write,
42
+ void *opaque);
43
};
44
45
/* General I/O handling functions */
46
@@ -XXX,XX +XXX,XX @@ void qio_channel_yield(QIOChannel *ioc,
47
void qio_channel_wait(QIOChannel *ioc,
48
GIOCondition condition);
49
50
+/**
51
+ * qio_channel_set_aio_fd_handler:
52
+ * @ioc: the channel object
53
+ * @ctx: the AioContext to set the handlers on
54
+ * @io_read: the read handler
55
+ * @io_write: the write handler
56
+ * @opaque: the opaque value passed to the handler
57
+ *
58
+ * This is used internally by qio_channel_yield(). It can
59
+ * be used by channel implementations to forward the handlers
60
+ * to another channel (e.g. from #QIOChannelTLS to the
61
+ * underlying socket).
62
+ */
63
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
64
+ AioContext *ctx,
65
+ IOHandler *io_read,
66
+ IOHandler *io_write,
67
+ void *opaque);
68
+
31
+
69
#endif /* QIO_CHANNEL_H */
32
/* Update read position */
70
diff --git a/io/channel-command.c b/io/channel-command.c
33
luringcb->total_read = nread;
71
index XXXXXXX..XXXXXXX 100644
34
remaining = luringcb->qiov->size - luringcb->total_read;
72
--- a/io/channel-command.c
35
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s)
73
+++ b/io/channel-command.c
36
74
@@ -XXX,XX +XXX,XX @@ static int qio_channel_command_close(QIOChannel *ioc,
37
/* Change counters one-by-one because we can be nested. */
38
s->io_q.in_flight--;
39
+ trace_luring_process_completion(s, luringcb, ret);
40
41
/* total_read is non-zero only for resubmitted read requests */
42
total_bytes = ret + luringcb->total_read;
43
@@ -XXX,XX +XXX,XX @@ static int ioq_submit(LuringState *s)
44
QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next);
45
}
46
ret = io_uring_submit(&s->ring);
47
+ trace_luring_io_uring_submit(s, ret);
48
/* Prevent infinite loop if submission is refused */
49
if (ret <= 0) {
50
if (ret == -EAGAIN) {
51
@@ -XXX,XX +XXX,XX @@ static void ioq_init(LuringQueue *io_q)
52
53
void luring_io_plug(BlockDriverState *bs, LuringState *s)
54
{
55
+ trace_luring_io_plug(s);
56
s->io_q.plugged++;
75
}
57
}
76
58
77
59
void luring_io_unplug(BlockDriverState *bs, LuringState *s)
78
+static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
79
+ AioContext *ctx,
80
+ IOHandler *io_read,
81
+ IOHandler *io_write,
82
+ void *opaque)
83
+{
84
+ QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
85
+ aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque);
86
+ aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque);
87
+}
88
+
89
+
90
static GSource *qio_channel_command_create_watch(QIOChannel *ioc,
91
GIOCondition condition)
92
{
60
{
93
@@ -XXX,XX +XXX,XX @@ static void qio_channel_command_class_init(ObjectClass *klass,
61
assert(s->io_q.plugged);
94
ioc_klass->io_set_blocking = qio_channel_command_set_blocking;
62
+ trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
95
ioc_klass->io_close = qio_channel_command_close;
63
+ s->io_q.in_queue, s->io_q.in_flight);
96
ioc_klass->io_create_watch = qio_channel_command_create_watch;
64
if (--s->io_q.plugged == 0 &&
97
+ ioc_klass->io_set_aio_fd_handler = qio_channel_command_set_aio_fd_handler;
65
!s->io_q.blocked && s->io_q.in_queue > 0) {
98
}
66
ioq_submit(s);
99
67
@@ -XXX,XX +XXX,XX @@ void luring_io_unplug(BlockDriverState *bs, LuringState *s)
100
static const TypeInfo qio_channel_command_info = {
68
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
101
diff --git a/io/channel-file.c b/io/channel-file.c
69
uint64_t offset, int type)
102
index XXXXXXX..XXXXXXX 100644
103
--- a/io/channel-file.c
104
+++ b/io/channel-file.c
105
@@ -XXX,XX +XXX,XX @@ static int qio_channel_file_close(QIOChannel *ioc,
106
}
107
108
109
+static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
110
+ AioContext *ctx,
111
+ IOHandler *io_read,
112
+ IOHandler *io_write,
113
+ void *opaque)
114
+{
115
+ QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
116
+ aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque);
117
+}
118
+
119
static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
120
GIOCondition condition)
121
{
70
{
122
@@ -XXX,XX +XXX,XX @@ static void qio_channel_file_class_init(ObjectClass *klass,
71
+ int ret;
123
ioc_klass->io_seek = qio_channel_file_seek;
72
struct io_uring_sqe *sqes = &luringcb->sqeq;
124
ioc_klass->io_close = qio_channel_file_close;
73
125
ioc_klass->io_create_watch = qio_channel_file_create_watch;
74
switch (type) {
126
+ ioc_klass->io_set_aio_fd_handler = qio_channel_file_set_aio_fd_handler;
75
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
127
}
76
128
77
QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
129
static const TypeInfo qio_channel_file_info = {
78
s->io_q.in_queue++;
130
diff --git a/io/channel-socket.c b/io/channel-socket.c
79
-
131
index XXXXXXX..XXXXXXX 100644
80
+ trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged,
132
--- a/io/channel-socket.c
81
+ s->io_q.in_queue, s->io_q.in_flight);
133
+++ b/io/channel-socket.c
82
if (!s->io_q.blocked &&
134
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
83
(!s->io_q.plugged ||
135
qemu_set_block(sioc->fd);
84
s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
136
} else {
85
- return ioq_submit(s);
137
qemu_set_nonblock(sioc->fd);
86
+ ret = ioq_submit(s);
138
-#ifdef WIN32
87
+ trace_luring_do_submit_done(s, ret);
139
- WSAEventSelect(sioc->fd, ioc->event,
88
+ return ret;
140
- FD_READ | FD_ACCEPT | FD_CLOSE |
141
- FD_CONNECT | FD_WRITE | FD_OOB);
142
-#endif
143
}
89
}
144
return 0;
90
return 0;
145
}
91
}
146
@@ -XXX,XX +XXX,XX @@ qio_channel_socket_shutdown(QIOChannel *ioc,
92
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
147
return 0;
93
.qiov = qiov,
94
.is_read = (type == QEMU_AIO_READ),
95
};
96
-
97
+ trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
98
+ type);
99
ret = luring_do_submit(fd, &luringcb, s, offset, type);
100
+
101
if (ret < 0) {
102
return ret;
103
}
104
@@ -XXX,XX +XXX,XX @@ LuringState *luring_init(Error **errp)
105
LuringState *s = g_new0(LuringState, 1);
106
struct io_uring *ring = &s->ring;
107
108
+ trace_luring_init_state(s, sizeof(*s));
109
+
110
rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
111
if (rc < 0) {
112
error_setg_errno(errp, errno, "failed to init linux io_uring ring");
113
@@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s)
114
{
115
io_uring_queue_exit(&s->ring);
116
g_free(s);
117
+ trace_luring_cleanup_state(s);
148
}
118
}
149
119
diff --git a/block/trace-events b/block/trace-events
150
+static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
120
index XXXXXXX..XXXXXXX 100644
151
+ AioContext *ctx,
121
--- a/block/trace-events
152
+ IOHandler *io_read,
122
+++ b/block/trace-events
153
+ IOHandler *io_write,
123
@@ -XXX,XX +XXX,XX @@ qmp_block_stream(void *bs) "bs %p"
154
+ void *opaque)
124
file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
155
+{
125
file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64
156
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
126
157
+ aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque);
127
+#io_uring.c
158
+}
128
+luring_init_state(void *s, size_t size) "s %p size %zu"
129
+luring_cleanup_state(void *s) "%p freed"
130
+luring_io_plug(void *s) "LuringState %p plug"
131
+luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
132
+luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
133
+luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d"
134
+luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d"
135
+luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d"
136
+luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d"
137
+luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d"
159
+
138
+
160
static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
139
# qcow2.c
161
GIOCondition condition)
140
qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
162
{
141
qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
163
@@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_class_init(ObjectClass *klass,
164
ioc_klass->io_set_cork = qio_channel_socket_set_cork;
165
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
166
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
167
+ ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
168
}
169
170
static const TypeInfo qio_channel_socket_info = {
171
diff --git a/io/channel-tls.c b/io/channel-tls.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/io/channel-tls.c
174
+++ b/io/channel-tls.c
175
@@ -XXX,XX +XXX,XX @@ static int qio_channel_tls_close(QIOChannel *ioc,
176
return qio_channel_close(tioc->master, errp);
177
}
178
179
+static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
180
+ AioContext *ctx,
181
+ IOHandler *io_read,
182
+ IOHandler *io_write,
183
+ void *opaque)
184
+{
185
+ QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
186
+
187
+ qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
188
+}
189
+
190
static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
191
GIOCondition condition)
192
{
193
@@ -XXX,XX +XXX,XX @@ static void qio_channel_tls_class_init(ObjectClass *klass,
194
ioc_klass->io_close = qio_channel_tls_close;
195
ioc_klass->io_shutdown = qio_channel_tls_shutdown;
196
ioc_klass->io_create_watch = qio_channel_tls_create_watch;
197
+ ioc_klass->io_set_aio_fd_handler = qio_channel_tls_set_aio_fd_handler;
198
}
199
200
static const TypeInfo qio_channel_tls_info = {
201
diff --git a/io/channel-watch.c b/io/channel-watch.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/io/channel-watch.c
204
+++ b/io/channel-watch.c
205
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
206
GSource *source;
207
QIOChannelSocketSource *ssource;
208
209
+#ifdef WIN32
210
+ WSAEventSelect(socket, ioc->event,
211
+ FD_READ | FD_ACCEPT | FD_CLOSE |
212
+ FD_CONNECT | FD_WRITE | FD_OOB);
213
+#endif
214
+
215
source = g_source_new(&qio_channel_socket_source_funcs,
216
sizeof(QIOChannelSocketSource));
217
ssource = (QIOChannelSocketSource *)source;
218
diff --git a/io/channel.c b/io/channel.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/io/channel.c
221
+++ b/io/channel.c
222
@@ -XXX,XX +XXX,XX @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
223
}
224
225
226
+void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
227
+ AioContext *ctx,
228
+ IOHandler *io_read,
229
+ IOHandler *io_write,
230
+ void *opaque)
231
+{
232
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
233
+
234
+ klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque);
235
+}
236
+
237
guint qio_channel_add_watch(QIOChannel *ioc,
238
GIOCondition condition,
239
QIOChannelFunc func,
240
--
142
--
241
2.9.3
143
2.24.1
242
144
243
145
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Add two implementations of the same benchmark as the previous patch,
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
but using pthreads. One uses a normal QemuMutex, the other is Linux
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
only and implements a fair mutex based on MCS locks and futexes.
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
This shows that the slower performance of the 5-thread case is due to
6
Message-id: 20200120141858.587874-11-stefanha@redhat.com
7
the fairness of CoMutex, rather than to coroutines. If fairness does
7
Message-Id: <20200120141858.587874-11-stefanha@redhat.com>
8
not matter, as is the case with two threads, CoMutex can actually be
9
faster than pthreads.
10
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Message-id: 20170213181244.16297-4-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
9
---
16
tests/test-aio-multithread.c | 164 +++++++++++++++++++++++++++++++++++++++++++
10
block/io_uring.c | 17 ++++++++++++++++-
17
1 file changed, 164 insertions(+)
11
1 file changed, 16 insertions(+), 1 deletion(-)
18
12
19
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
13
diff --git a/block/io_uring.c b/block/io_uring.c
20
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
21
--- a/tests/test-aio-multithread.c
15
--- a/block/io_uring.c
22
+++ b/tests/test-aio-multithread.c
16
+++ b/block/io_uring.c
23
@@ -XXX,XX +XXX,XX @@ static void test_multi_co_mutex_2_30(void)
17
@@ -XXX,XX +XXX,XX @@ static void qemu_luring_completion_cb(void *opaque)
24
test_multi_co_mutex(2, 30);
18
luring_process_completions_and_submit(s);
25
}
19
}
26
20
27
+/* Same test with fair mutexes, for performance comparison. */
21
+static bool qemu_luring_poll_cb(void *opaque)
22
+{
23
+ LuringState *s = opaque;
24
+ struct io_uring_cqe *cqes;
28
+
25
+
29
+#ifdef CONFIG_LINUX
26
+ if (io_uring_peek_cqe(&s->ring, &cqes) == 0) {
30
+#include "qemu/futex.h"
27
+ if (cqes) {
31
+
28
+ luring_process_completions_and_submit(s);
32
+/* The nodes for the mutex reside in this structure (on which we try to avoid
29
+ return true;
33
+ * false sharing). The head of the mutex is in the "mutex_head" variable.
34
+ */
35
+static struct {
36
+ int next, locked;
37
+ int padding[14];
38
+} nodes[NUM_CONTEXTS] __attribute__((__aligned__(64)));
39
+
40
+static int mutex_head = -1;
41
+
42
+static void mcs_mutex_lock(void)
43
+{
44
+ int prev;
45
+
46
+ nodes[id].next = -1;
47
+ nodes[id].locked = 1;
48
+ prev = atomic_xchg(&mutex_head, id);
49
+ if (prev != -1) {
50
+ atomic_set(&nodes[prev].next, id);
51
+ qemu_futex_wait(&nodes[id].locked, 1);
52
+ }
53
+}
54
+
55
+static void mcs_mutex_unlock(void)
56
+{
57
+ int next;
58
+ if (nodes[id].next == -1) {
59
+ if (atomic_read(&mutex_head) == id &&
60
+ atomic_cmpxchg(&mutex_head, id, -1) == id) {
61
+ /* Last item in the list, exit. */
62
+ return;
63
+ }
64
+ while (atomic_read(&nodes[id].next) == -1) {
65
+ /* mcs_mutex_lock did the xchg, but has not updated
66
+ * nodes[prev].next yet.
67
+ */
68
+ }
30
+ }
69
+ }
31
+ }
70
+
32
+
71
+ /* Wake up the next in line. */
33
+ return false;
72
+ next = nodes[id].next;
73
+ nodes[next].locked = 0;
74
+ qemu_futex_wake(&nodes[next].locked, 1);
75
+}
34
+}
76
+
35
+
77
+static void test_multi_fair_mutex_entry(void *opaque)
36
static void ioq_init(LuringQueue *io_q)
78
+{
37
{
79
+ while (!atomic_mb_read(&now_stopping)) {
38
QSIMPLEQ_INIT(&io_q->submit_queue);
80
+ mcs_mutex_lock();
39
@@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
81
+ counter++;
40
s->aio_context = new_context;
82
+ mcs_mutex_unlock();
41
s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
83
+ atomic_inc(&atomic_counter);
42
aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
84
+ }
43
- qemu_luring_completion_cb, NULL, NULL, s);
85
+ atomic_dec(&running);
44
+ qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s);
86
+}
87
+
88
+static void test_multi_fair_mutex(int threads, int seconds)
89
+{
90
+ int i;
91
+
92
+ assert(mutex_head == -1);
93
+ counter = 0;
94
+ atomic_counter = 0;
95
+ now_stopping = false;
96
+
97
+ create_aio_contexts();
98
+ assert(threads <= NUM_CONTEXTS);
99
+ running = threads;
100
+ for (i = 0; i < threads; i++) {
101
+ Coroutine *co1 = qemu_coroutine_create(test_multi_fair_mutex_entry, NULL);
102
+ aio_co_schedule(ctx[i], co1);
103
+ }
104
+
105
+ g_usleep(seconds * 1000000);
106
+
107
+ atomic_mb_set(&now_stopping, true);
108
+ while (running > 0) {
109
+ g_usleep(100000);
110
+ }
111
+
112
+ join_aio_contexts();
113
+ g_test_message("%d iterations/second\n", counter / seconds);
114
+ g_assert_cmpint(counter, ==, atomic_counter);
115
+}
116
+
117
+static void test_multi_fair_mutex_1(void)
118
+{
119
+ test_multi_fair_mutex(NUM_CONTEXTS, 1);
120
+}
121
+
122
+static void test_multi_fair_mutex_10(void)
123
+{
124
+ test_multi_fair_mutex(NUM_CONTEXTS, 10);
125
+}
126
+#endif
127
+
128
+/* Same test with pthread mutexes, for performance comparison and
129
+ * portability. */
130
+
131
+static QemuMutex mutex;
132
+
133
+static void test_multi_mutex_entry(void *opaque)
134
+{
135
+ while (!atomic_mb_read(&now_stopping)) {
136
+ qemu_mutex_lock(&mutex);
137
+ counter++;
138
+ qemu_mutex_unlock(&mutex);
139
+ atomic_inc(&atomic_counter);
140
+ }
141
+ atomic_dec(&running);
142
+}
143
+
144
+static void test_multi_mutex(int threads, int seconds)
145
+{
146
+ int i;
147
+
148
+ qemu_mutex_init(&mutex);
149
+ counter = 0;
150
+ atomic_counter = 0;
151
+ now_stopping = false;
152
+
153
+ create_aio_contexts();
154
+ assert(threads <= NUM_CONTEXTS);
155
+ running = threads;
156
+ for (i = 0; i < threads; i++) {
157
+ Coroutine *co1 = qemu_coroutine_create(test_multi_mutex_entry, NULL);
158
+ aio_co_schedule(ctx[i], co1);
159
+ }
160
+
161
+ g_usleep(seconds * 1000000);
162
+
163
+ atomic_mb_set(&now_stopping, true);
164
+ while (running > 0) {
165
+ g_usleep(100000);
166
+ }
167
+
168
+ join_aio_contexts();
169
+ g_test_message("%d iterations/second\n", counter / seconds);
170
+ g_assert_cmpint(counter, ==, atomic_counter);
171
+}
172
+
173
+static void test_multi_mutex_1(void)
174
+{
175
+ test_multi_mutex(NUM_CONTEXTS, 1);
176
+}
177
+
178
+static void test_multi_mutex_10(void)
179
+{
180
+ test_multi_mutex(NUM_CONTEXTS, 10);
181
+}
182
+
183
/* End of tests. */
184
185
int main(int argc, char **argv)
186
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
187
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_1);
188
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_1);
189
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_3);
190
+#ifdef CONFIG_LINUX
191
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_1);
192
+#endif
193
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_1);
194
} else {
195
g_test_add_func("/aio/multi/schedule", test_multi_co_schedule_10);
196
g_test_add_func("/aio/multi/mutex/contended", test_multi_co_mutex_10);
197
g_test_add_func("/aio/multi/mutex/handoff", test_multi_co_mutex_2_30);
198
+#ifdef CONFIG_LINUX
199
+ g_test_add_func("/aio/multi/mutex/mcs", test_multi_fair_mutex_10);
200
+#endif
201
+ g_test_add_func("/aio/multi/mutex/pthread", test_multi_mutex_10);
202
}
203
return g_test_run();
204
}
45
}
46
47
LuringState *luring_init(Error **errp)
205
--
48
--
206
2.9.3
49
2.24.1
207
50
208
51
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Once the thread pool starts using aio_co_wake, it will also need
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
qemu_get_current_aio_context(). Make test-thread-pool create
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
an AioContext with qemu_init_main_loop, so that stubs/iothread.c
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
and tests/iothread.c can provide the rest.
6
Message-id: 20200120141858.587874-12-stefanha@redhat.com
7
7
Message-Id: <20200120141858.587874-12-stefanha@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Message-id: 20170213135235.12274-5-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
9
---
14
tests/test-thread-pool.c | 12 +++---------
10
qemu-io.c | 25 +++++++++++++++++++++----
15
1 file changed, 3 insertions(+), 9 deletions(-)
11
1 file changed, 21 insertions(+), 4 deletions(-)
16
12
17
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
13
diff --git a/qemu-io.c b/qemu-io.c
18
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/test-thread-pool.c
15
--- a/qemu-io.c
20
+++ b/tests/test-thread-pool.c
16
+++ b/qemu-io.c
21
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static void open_help(void)
22
#include "qapi/error.h"
18
" -C, -- use copy-on-read\n"
23
#include "qemu/timer.h"
19
" -n, -- disable host cache, short for -t none\n"
24
#include "qemu/error-report.h"
20
" -U, -- force shared permissions\n"
25
+#include "qemu/main-loop.h"
21
-" -k, -- use kernel AIO implementation (on Linux only)\n"
26
22
+" -k, -- use kernel AIO implementation (Linux only, prefer use of -i)\n"
27
static AioContext *ctx;
23
+" -i, -- use AIO mode (threads, native or io_uring)\n"
28
static ThreadPool *pool;
24
" -t, -- use the given cache mode for the image\n"
29
@@ -XXX,XX +XXX,XX @@ static void test_cancel_async(void)
25
" -d, -- use the given discard mode for the image\n"
26
" -o, -- options to be given to the block driver"
27
@@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv)
28
QDict *opts;
29
bool force_share = false;
30
31
- while ((c = getopt(argc, argv, "snCro:kt:d:U")) != -1) {
32
+ while ((c = getopt(argc, argv, "snCro:ki:t:d:U")) != -1) {
33
switch (c) {
34
case 's':
35
flags |= BDRV_O_SNAPSHOT;
36
@@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv)
37
return -EINVAL;
38
}
39
break;
40
+ case 'i':
41
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
42
+ error_report("Invalid aio option: %s", optarg);
43
+ qemu_opts_reset(&empty_opts);
44
+ return -EINVAL;
45
+ }
46
+ break;
47
case 'o':
48
if (imageOpts) {
49
printf("--image-opts and 'open -o' are mutually exclusive\n");
50
@@ -XXX,XX +XXX,XX @@ static void usage(const char *name)
51
" -n, --nocache disable host cache, short for -t none\n"
52
" -C, --copy-on-read enable copy-on-read\n"
53
" -m, --misalign misalign allocations for O_DIRECT\n"
54
-" -k, --native-aio use kernel AIO implementation (on Linux only)\n"
55
+" -k, --native-aio use kernel AIO implementation\n"
56
+" (Linux only, prefer use of -i)\n"
57
+" -i, --aio=MODE use AIO mode (threads, native or io_uring)\n"
58
" -t, --cache=MODE use the given cache mode for the image\n"
59
" -d, --discard=MODE use the given discard mode for the image\n"
60
" -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
61
@@ -XXX,XX +XXX,XX @@ static QemuOptsList file_opts = {
30
int main(int argc, char **argv)
62
int main(int argc, char **argv)
31
{
63
{
32
int ret;
64
int readonly = 0;
33
- Error *local_error = NULL;
65
- const char *sopt = "hVc:d:f:rsnCmkt:T:U";
34
66
+ const char *sopt = "hVc:d:f:rsnCmki:t:T:U";
35
- init_clocks();
67
const struct option lopt[] = {
36
-
68
{ "help", no_argument, NULL, 'h' },
37
- ctx = aio_context_new(&local_error);
69
{ "version", no_argument, NULL, 'V' },
38
- if (!ctx) {
39
- error_reportf_err(local_error, "Failed to create AIO Context: ");
40
- exit(1);
41
- }
42
+ qemu_init_main_loop(&error_abort);
43
+ ctx = qemu_get_current_aio_context();
44
pool = aio_get_thread_pool(ctx);
45
46
g_test_init(&argc, &argv, NULL);
47
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
70
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
48
71
{ "copy-on-read", no_argument, NULL, 'C' },
49
ret = g_test_run();
72
{ "misalign", no_argument, NULL, 'm' },
50
73
{ "native-aio", no_argument, NULL, 'k' },
51
- aio_context_unref(ctx);
74
+ { "aio", required_argument, NULL, 'i' },
52
return ret;
75
{ "discard", required_argument, NULL, 'd' },
53
}
76
{ "cache", required_argument, NULL, 't' },
77
{ "trace", required_argument, NULL, 'T' },
78
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
79
case 'k':
80
flags |= BDRV_O_NATIVE_AIO;
81
break;
82
+ case 'i':
83
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
84
+ error_report("Invalid aio option: %s", optarg);
85
+ exit(1);
86
+ }
87
+ break;
88
case 't':
89
if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) {
90
error_report("Invalid cache option: %s", optarg);
54
--
91
--
55
2.9.3
92
2.24.1
56
93
57
94
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
Support separate coroutines for reading and writing, and place the
4
read/write handlers on the AioContext that the QIOChannel is registered
5
with.
6
7
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Message-id: 20170213135235.12274-7-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
include/io/channel.h | 47 ++++++++++++++++++++++++++--
15
io/channel.c | 86 +++++++++++++++++++++++++++++++++++++++-------------
16
2 files changed, 109 insertions(+), 24 deletions(-)
17
18
diff --git a/include/io/channel.h b/include/io/channel.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/io/channel.h
21
+++ b/include/io/channel.h
22
@@ -XXX,XX +XXX,XX @@
23
24
#include "qemu-common.h"
25
#include "qom/object.h"
26
+#include "qemu/coroutine.h"
27
#include "block/aio.h"
28
29
#define TYPE_QIO_CHANNEL "qio-channel"
30
@@ -XXX,XX +XXX,XX @@ struct QIOChannel {
31
Object parent;
32
unsigned int features; /* bitmask of QIOChannelFeatures */
33
char *name;
34
+ AioContext *ctx;
35
+ Coroutine *read_coroutine;
36
+ Coroutine *write_coroutine;
37
#ifdef _WIN32
38
HANDLE event; /* For use with GSource on Win32 */
39
#endif
40
@@ -XXX,XX +XXX,XX @@ guint qio_channel_add_watch(QIOChannel *ioc,
41
42
43
/**
44
+ * qio_channel_attach_aio_context:
45
+ * @ioc: the channel object
46
+ * @ctx: the #AioContext to set the handlers on
47
+ *
48
+ * Request that qio_channel_yield() sets I/O handlers on
49
+ * the given #AioContext. If @ctx is %NULL, qio_channel_yield()
50
+ * uses QEMU's main thread event loop.
51
+ *
52
+ * You can move a #QIOChannel from one #AioContext to another even if
53
+ * I/O handlers are set for a coroutine. However, #QIOChannel provides
54
+ * no synchronization between the calls to qio_channel_yield() and
55
+ * qio_channel_attach_aio_context().
56
+ *
57
+ * Therefore you should first call qio_channel_detach_aio_context()
58
+ * to ensure that the coroutine is not entered concurrently. Then,
59
+ * while the coroutine has yielded, call qio_channel_attach_aio_context(),
60
+ * and then aio_co_schedule() to place the coroutine on the new
61
+ * #AioContext. The calls to qio_channel_detach_aio_context()
62
+ * and qio_channel_attach_aio_context() should be protected with
63
+ * aio_context_acquire() and aio_context_release().
64
+ */
65
+void qio_channel_attach_aio_context(QIOChannel *ioc,
66
+ AioContext *ctx);
67
+
68
+/**
69
+ * qio_channel_detach_aio_context:
70
+ * @ioc: the channel object
71
+ *
72
+ * Disable any I/O handlers set by qio_channel_yield(). With the
73
+ * help of aio_co_schedule(), this allows moving a coroutine that was
74
+ * paused by qio_channel_yield() to another context.
75
+ */
76
+void qio_channel_detach_aio_context(QIOChannel *ioc);
77
+
78
+/**
79
* qio_channel_yield:
80
* @ioc: the channel object
81
* @condition: the I/O condition to wait for
82
*
83
- * Yields execution from the current coroutine until
84
- * the condition indicated by @condition becomes
85
- * available.
86
+ * Yields execution from the current coroutine until the condition
87
+ * indicated by @condition becomes available. @condition must
88
+ * be either %G_IO_IN or %G_IO_OUT; it cannot contain both. In
89
+ * addition, no two coroutine can be waiting on the same condition
90
+ * and channel at the same time.
91
*
92
* This must only be called from coroutine context
93
*/
94
diff --git a/io/channel.c b/io/channel.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/io/channel.c
97
+++ b/io/channel.c
98
@@ -XXX,XX +XXX,XX @@
99
#include "qemu/osdep.h"
100
#include "io/channel.h"
101
#include "qapi/error.h"
102
-#include "qemu/coroutine.h"
103
+#include "qemu/main-loop.h"
104
105
bool qio_channel_has_feature(QIOChannel *ioc,
106
QIOChannelFeature feature)
107
@@ -XXX,XX +XXX,XX @@ off_t qio_channel_io_seek(QIOChannel *ioc,
108
}
109
110
111
-typedef struct QIOChannelYieldData QIOChannelYieldData;
112
-struct QIOChannelYieldData {
113
- QIOChannel *ioc;
114
- Coroutine *co;
115
-};
116
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc);
117
118
+static void qio_channel_restart_read(void *opaque)
119
+{
120
+ QIOChannel *ioc = opaque;
121
+ Coroutine *co = ioc->read_coroutine;
122
+
123
+ ioc->read_coroutine = NULL;
124
+ qio_channel_set_aio_fd_handlers(ioc);
125
+ aio_co_wake(co);
126
+}
127
128
-static gboolean qio_channel_yield_enter(QIOChannel *ioc,
129
- GIOCondition condition,
130
- gpointer opaque)
131
+static void qio_channel_restart_write(void *opaque)
132
{
133
- QIOChannelYieldData *data = opaque;
134
- qemu_coroutine_enter(data->co);
135
- return FALSE;
136
+ QIOChannel *ioc = opaque;
137
+ Coroutine *co = ioc->write_coroutine;
138
+
139
+ ioc->write_coroutine = NULL;
140
+ qio_channel_set_aio_fd_handlers(ioc);
141
+ aio_co_wake(co);
142
}
143
144
+static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
145
+{
146
+ IOHandler *rd_handler = NULL, *wr_handler = NULL;
147
+ AioContext *ctx;
148
+
149
+ if (ioc->read_coroutine) {
150
+ rd_handler = qio_channel_restart_read;
151
+ }
152
+ if (ioc->write_coroutine) {
153
+ wr_handler = qio_channel_restart_write;
154
+ }
155
+
156
+ ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
157
+ qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
158
+}
159
+
160
+void qio_channel_attach_aio_context(QIOChannel *ioc,
161
+ AioContext *ctx)
162
+{
163
+ AioContext *old_ctx;
164
+ if (ioc->ctx == ctx) {
165
+ return;
166
+ }
167
+
168
+ old_ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
169
+ qio_channel_set_aio_fd_handler(ioc, old_ctx, NULL, NULL, NULL);
170
+ ioc->ctx = ctx;
171
+ qio_channel_set_aio_fd_handlers(ioc);
172
+}
173
+
174
+void qio_channel_detach_aio_context(QIOChannel *ioc)
175
+{
176
+ ioc->read_coroutine = NULL;
177
+ ioc->write_coroutine = NULL;
178
+ qio_channel_set_aio_fd_handlers(ioc);
179
+ ioc->ctx = NULL;
180
+}
181
182
void coroutine_fn qio_channel_yield(QIOChannel *ioc,
183
GIOCondition condition)
184
{
185
- QIOChannelYieldData data;
186
-
187
assert(qemu_in_coroutine());
188
- data.ioc = ioc;
189
- data.co = qemu_coroutine_self();
190
- qio_channel_add_watch(ioc,
191
- condition,
192
- qio_channel_yield_enter,
193
- &data,
194
- NULL);
195
+ if (condition == G_IO_IN) {
196
+ assert(!ioc->read_coroutine);
197
+ ioc->read_coroutine = qemu_coroutine_self();
198
+ } else if (condition == G_IO_OUT) {
199
+ assert(!ioc->write_coroutine);
200
+ ioc->write_coroutine = qemu_coroutine_self();
201
+ } else {
202
+ abort();
203
+ }
204
+ qio_channel_set_aio_fd_handlers(ioc);
205
qemu_coroutine_yield();
206
}
207
208
--
209
2.9.3
210
211
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
In the client, read the reply headers from a coroutine, switching the
4
read side between the "read header" coroutine and the I/O coroutine that
5
reads the body of the reply.
6
7
In the server, if the server can read more requests it will create a new
8
"read request" coroutine as soon as a request has been read. Otherwise,
9
the new coroutine is created in nbd_request_put.
10
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
15
Message-id: 20170213135235.12274-8-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
18
block/nbd-client.h | 2 +-
19
block/nbd-client.c | 117 ++++++++++++++++++++++++-----------------------------
20
nbd/client.c | 2 +-
21
nbd/common.c | 9 +----
22
nbd/server.c | 94 +++++++++++++-----------------------------
23
5 files changed, 83 insertions(+), 141 deletions(-)
24
25
diff --git a/block/nbd-client.h b/block/nbd-client.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/block/nbd-client.h
28
+++ b/block/nbd-client.h
29
@@ -XXX,XX +XXX,XX @@ typedef struct NBDClientSession {
30
31
CoMutex send_mutex;
32
CoQueue free_sema;
33
- Coroutine *send_coroutine;
34
+ Coroutine *read_reply_co;
35
int in_flight;
36
37
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
38
diff --git a/block/nbd-client.c b/block/nbd-client.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/nbd-client.c
41
+++ b/block/nbd-client.c
42
@@ -XXX,XX +XXX,XX @@
43
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
44
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
45
46
-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
47
+static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
48
{
49
+ NBDClientSession *s = nbd_get_client_session(bs);
50
int i;
51
52
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
53
@@ -XXX,XX +XXX,XX @@ static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
54
qemu_coroutine_enter(s->recv_coroutine[i]);
55
}
56
}
57
+ BDRV_POLL_WHILE(bs, s->read_reply_co);
58
}
59
60
static void nbd_teardown_connection(BlockDriverState *bs)
61
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
62
qio_channel_shutdown(client->ioc,
63
QIO_CHANNEL_SHUTDOWN_BOTH,
64
NULL);
65
- nbd_recv_coroutines_enter_all(client);
66
+ nbd_recv_coroutines_enter_all(bs);
67
68
nbd_client_detach_aio_context(bs);
69
object_unref(OBJECT(client->sioc));
70
@@ -XXX,XX +XXX,XX @@ static void nbd_teardown_connection(BlockDriverState *bs)
71
client->ioc = NULL;
72
}
73
74
-static void nbd_reply_ready(void *opaque)
75
+static coroutine_fn void nbd_read_reply_entry(void *opaque)
76
{
77
- BlockDriverState *bs = opaque;
78
- NBDClientSession *s = nbd_get_client_session(bs);
79
+ NBDClientSession *s = opaque;
80
uint64_t i;
81
int ret;
82
83
- if (!s->ioc) { /* Already closed */
84
- return;
85
- }
86
-
87
- if (s->reply.handle == 0) {
88
- /* No reply already in flight. Fetch a header. It is possible
89
- * that another thread has done the same thing in parallel, so
90
- * the socket is not readable anymore.
91
- */
92
+ for (;;) {
93
+ assert(s->reply.handle == 0);
94
ret = nbd_receive_reply(s->ioc, &s->reply);
95
- if (ret == -EAGAIN) {
96
- return;
97
- }
98
if (ret < 0) {
99
- s->reply.handle = 0;
100
- goto fail;
101
+ break;
102
}
103
- }
104
105
- /* There's no need for a mutex on the receive side, because the
106
- * handler acts as a synchronization point and ensures that only
107
- * one coroutine is called until the reply finishes. */
108
- i = HANDLE_TO_INDEX(s, s->reply.handle);
109
- if (i >= MAX_NBD_REQUESTS) {
110
- goto fail;
111
- }
112
+ /* There's no need for a mutex on the receive side, because the
113
+ * handler acts as a synchronization point and ensures that only
114
+ * one coroutine is called until the reply finishes.
115
+ */
116
+ i = HANDLE_TO_INDEX(s, s->reply.handle);
117
+ if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
118
+ break;
119
+ }
120
121
- if (s->recv_coroutine[i]) {
122
- qemu_coroutine_enter(s->recv_coroutine[i]);
123
- return;
124
+ /* We're woken up by the recv_coroutine itself. Note that there
125
+ * is no race between yielding and reentering read_reply_co. This
126
+ * is because:
127
+ *
128
+ * - if recv_coroutine[i] runs on the same AioContext, it is only
129
+ * entered after we yield
130
+ *
131
+ * - if recv_coroutine[i] runs on a different AioContext, reentering
132
+ * read_reply_co happens through a bottom half, which can only
133
+ * run after we yield.
134
+ */
135
+ aio_co_wake(s->recv_coroutine[i]);
136
+ qemu_coroutine_yield();
137
}
138
-
139
-fail:
140
- nbd_teardown_connection(bs);
141
-}
142
-
143
-static void nbd_restart_write(void *opaque)
144
-{
145
- BlockDriverState *bs = opaque;
146
-
147
- qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
148
+ s->read_reply_co = NULL;
149
}
150
151
static int nbd_co_send_request(BlockDriverState *bs,
152
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
153
QEMUIOVector *qiov)
154
{
155
NBDClientSession *s = nbd_get_client_session(bs);
156
- AioContext *aio_context;
157
int rc, ret, i;
158
159
qemu_co_mutex_lock(&s->send_mutex);
160
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
161
return -EPIPE;
162
}
163
164
- s->send_coroutine = qemu_coroutine_self();
165
- aio_context = bdrv_get_aio_context(bs);
166
-
167
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
168
- nbd_reply_ready, nbd_restart_write, NULL, bs);
169
if (qiov) {
170
qio_channel_set_cork(s->ioc, true);
171
rc = nbd_send_request(s->ioc, request);
172
@@ -XXX,XX +XXX,XX @@ static int nbd_co_send_request(BlockDriverState *bs,
173
} else {
174
rc = nbd_send_request(s->ioc, request);
175
}
176
- aio_set_fd_handler(aio_context, s->sioc->fd, false,
177
- nbd_reply_ready, NULL, NULL, bs);
178
- s->send_coroutine = NULL;
179
qemu_co_mutex_unlock(&s->send_mutex);
180
return rc;
181
}
182
@@ -XXX,XX +XXX,XX @@ static void nbd_co_receive_reply(NBDClientSession *s,
183
{
184
int ret;
185
186
- /* Wait until we're woken up by the read handler. TODO: perhaps
187
- * peek at the next reply and avoid yielding if it's ours? */
188
+ /* Wait until we're woken up by nbd_read_reply_entry. */
189
qemu_coroutine_yield();
190
*reply = s->reply;
191
if (reply->handle != request->handle ||
192
@@ -XXX,XX +XXX,XX @@ static void nbd_coroutine_start(NBDClientSession *s,
193
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
194
}
195
196
-static void nbd_coroutine_end(NBDClientSession *s,
197
+static void nbd_coroutine_end(BlockDriverState *bs,
198
NBDRequest *request)
199
{
200
+ NBDClientSession *s = nbd_get_client_session(bs);
201
int i = HANDLE_TO_INDEX(s, request->handle);
202
+
203
s->recv_coroutine[i] = NULL;
204
- if (s->in_flight-- == MAX_NBD_REQUESTS) {
205
- qemu_co_queue_next(&s->free_sema);
206
+ s->in_flight--;
207
+ qemu_co_queue_next(&s->free_sema);
208
+
209
+ /* Kick the read_reply_co to get the next reply. */
210
+ if (s->read_reply_co) {
211
+ aio_co_wake(s->read_reply_co);
212
}
213
}
214
215
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
216
} else {
217
nbd_co_receive_reply(client, &request, &reply, qiov);
218
}
219
- nbd_coroutine_end(client, &request);
220
+ nbd_coroutine_end(bs, &request);
221
return -reply.error;
222
}
223
224
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
225
} else {
226
nbd_co_receive_reply(client, &request, &reply, NULL);
227
}
228
- nbd_coroutine_end(client, &request);
229
+ nbd_coroutine_end(bs, &request);
230
return -reply.error;
231
}
232
233
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
234
} else {
235
nbd_co_receive_reply(client, &request, &reply, NULL);
236
}
237
- nbd_coroutine_end(client, &request);
238
+ nbd_coroutine_end(bs, &request);
239
return -reply.error;
240
}
241
242
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_flush(BlockDriverState *bs)
243
} else {
244
nbd_co_receive_reply(client, &request, &reply, NULL);
245
}
246
- nbd_coroutine_end(client, &request);
247
+ nbd_coroutine_end(bs, &request);
248
return -reply.error;
249
}
250
251
@@ -XXX,XX +XXX,XX @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
252
} else {
253
nbd_co_receive_reply(client, &request, &reply, NULL);
254
}
255
- nbd_coroutine_end(client, &request);
256
+ nbd_coroutine_end(bs, &request);
257
return -reply.error;
258
259
}
260
261
void nbd_client_detach_aio_context(BlockDriverState *bs)
262
{
263
- aio_set_fd_handler(bdrv_get_aio_context(bs),
264
- nbd_get_client_session(bs)->sioc->fd,
265
- false, NULL, NULL, NULL, NULL);
266
+ NBDClientSession *client = nbd_get_client_session(bs);
267
+ qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
268
}
269
270
void nbd_client_attach_aio_context(BlockDriverState *bs,
271
AioContext *new_context)
272
{
273
- aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
274
- false, nbd_reply_ready, NULL, NULL, bs);
275
+ NBDClientSession *client = nbd_get_client_session(bs);
276
+ qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
277
+ aio_co_schedule(new_context, client->read_reply_co);
278
}
279
280
void nbd_client_close(BlockDriverState *bs)
281
@@ -XXX,XX +XXX,XX @@ int nbd_client_init(BlockDriverState *bs,
282
/* Now that we're connected, set the socket to be non-blocking and
283
* kick the reply mechanism. */
284
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
285
-
286
+ client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
287
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
288
289
logout("Established connection with NBD server\n");
290
diff --git a/nbd/client.c b/nbd/client.c
291
index XXXXXXX..XXXXXXX 100644
292
--- a/nbd/client.c
293
+++ b/nbd/client.c
294
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
295
ssize_t ret;
296
297
ret = read_sync(ioc, buf, sizeof(buf));
298
- if (ret < 0) {
299
+ if (ret <= 0) {
300
return ret;
301
}
302
303
diff --git a/nbd/common.c b/nbd/common.c
304
index XXXXXXX..XXXXXXX 100644
305
--- a/nbd/common.c
306
+++ b/nbd/common.c
307
@@ -XXX,XX +XXX,XX @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
308
}
309
if (len == QIO_CHANNEL_ERR_BLOCK) {
310
if (qemu_in_coroutine()) {
311
- /* XXX figure out if we can create a variant on
312
- * qio_channel_yield() that works with AIO contexts
313
- * and consider using that in this branch */
314
- qemu_coroutine_yield();
315
- } else if (done) {
316
- /* XXX this is needed by nbd_reply_ready. */
317
- qio_channel_wait(ioc,
318
- do_read ? G_IO_IN : G_IO_OUT);
319
+ qio_channel_yield(ioc, do_read ? G_IO_IN : G_IO_OUT);
320
} else {
321
return -EAGAIN;
322
}
323
diff --git a/nbd/server.c b/nbd/server.c
324
index XXXXXXX..XXXXXXX 100644
325
--- a/nbd/server.c
326
+++ b/nbd/server.c
327
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
328
CoMutex send_lock;
329
Coroutine *send_coroutine;
330
331
- bool can_read;
332
-
333
QTAILQ_ENTRY(NBDClient) next;
334
int nb_requests;
335
bool closing;
336
@@ -XXX,XX +XXX,XX @@ struct NBDClient {
337
338
/* That's all folks */
339
340
-static void nbd_set_handlers(NBDClient *client);
341
-static void nbd_unset_handlers(NBDClient *client);
342
-static void nbd_update_can_read(NBDClient *client);
343
+static void nbd_client_receive_next_request(NBDClient *client);
344
345
static gboolean nbd_negotiate_continue(QIOChannel *ioc,
346
GIOCondition condition,
347
@@ -XXX,XX +XXX,XX @@ void nbd_client_put(NBDClient *client)
348
*/
349
assert(client->closing);
350
351
- nbd_unset_handlers(client);
352
+ qio_channel_detach_aio_context(client->ioc);
353
object_unref(OBJECT(client->sioc));
354
object_unref(OBJECT(client->ioc));
355
if (client->tlscreds) {
356
@@ -XXX,XX +XXX,XX @@ static NBDRequestData *nbd_request_get(NBDClient *client)
357
358
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
359
client->nb_requests++;
360
- nbd_update_can_read(client);
361
362
req = g_new0(NBDRequestData, 1);
363
nbd_client_get(client);
364
@@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req)
365
g_free(req);
366
367
client->nb_requests--;
368
- nbd_update_can_read(client);
369
+ nbd_client_receive_next_request(client);
370
+
371
nbd_client_put(client);
372
}
373
374
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
375
exp->ctx = ctx;
376
377
QTAILQ_FOREACH(client, &exp->clients, next) {
378
- nbd_set_handlers(client);
379
+ qio_channel_attach_aio_context(client->ioc, ctx);
380
+ if (client->recv_coroutine) {
381
+ aio_co_schedule(ctx, client->recv_coroutine);
382
+ }
383
+ if (client->send_coroutine) {
384
+ aio_co_schedule(ctx, client->send_coroutine);
385
+ }
386
}
387
}
388
389
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
390
TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
391
392
QTAILQ_FOREACH(client, &exp->clients, next) {
393
- nbd_unset_handlers(client);
394
+ qio_channel_detach_aio_context(client->ioc);
395
}
396
397
exp->ctx = NULL;
398
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
399
g_assert(qemu_in_coroutine());
400
qemu_co_mutex_lock(&client->send_lock);
401
client->send_coroutine = qemu_coroutine_self();
402
- nbd_set_handlers(client);
403
404
if (!len) {
405
rc = nbd_send_reply(client->ioc, reply);
406
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
407
}
408
409
client->send_coroutine = NULL;
410
- nbd_set_handlers(client);
411
qemu_co_mutex_unlock(&client->send_lock);
412
return rc;
413
}
414
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
415
ssize_t rc;
416
417
g_assert(qemu_in_coroutine());
418
- client->recv_coroutine = qemu_coroutine_self();
419
- nbd_update_can_read(client);
420
-
421
+ assert(client->recv_coroutine == qemu_coroutine_self());
422
rc = nbd_receive_request(client->ioc, request);
423
if (rc < 0) {
424
if (rc != -EAGAIN) {
425
@@ -XXX,XX +XXX,XX @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
426
427
out:
428
client->recv_coroutine = NULL;
429
- nbd_update_can_read(client);
430
+ nbd_client_receive_next_request(client);
431
432
return rc;
433
}
434
435
-static void nbd_trip(void *opaque)
436
+/* Owns a reference to the NBDClient passed as opaque. */
437
+static coroutine_fn void nbd_trip(void *opaque)
438
{
439
NBDClient *client = opaque;
440
NBDExport *exp = client->exp;
441
NBDRequestData *req;
442
- NBDRequest request;
443
+ NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
444
NBDReply reply;
445
ssize_t ret;
446
int flags;
447
448
TRACE("Reading request.");
449
if (client->closing) {
450
+ nbd_client_put(client);
451
return;
452
}
453
454
@@ -XXX,XX +XXX,XX @@ static void nbd_trip(void *opaque)
455
456
done:
457
nbd_request_put(req);
458
+ nbd_client_put(client);
459
return;
460
461
out:
462
nbd_request_put(req);
463
client_close(client);
464
+ nbd_client_put(client);
465
}
466
467
-static void nbd_read(void *opaque)
468
+static void nbd_client_receive_next_request(NBDClient *client)
469
{
470
- NBDClient *client = opaque;
471
-
472
- if (client->recv_coroutine) {
473
- qemu_coroutine_enter(client->recv_coroutine);
474
- } else {
475
- qemu_coroutine_enter(qemu_coroutine_create(nbd_trip, client));
476
- }
477
-}
478
-
479
-static void nbd_restart_write(void *opaque)
480
-{
481
- NBDClient *client = opaque;
482
-
483
- qemu_coroutine_enter(client->send_coroutine);
484
-}
485
-
486
-static void nbd_set_handlers(NBDClient *client)
487
-{
488
- if (client->exp && client->exp->ctx) {
489
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true,
490
- client->can_read ? nbd_read : NULL,
491
- client->send_coroutine ? nbd_restart_write : NULL,
492
- NULL, client);
493
- }
494
-}
495
-
496
-static void nbd_unset_handlers(NBDClient *client)
497
-{
498
- if (client->exp && client->exp->ctx) {
499
- aio_set_fd_handler(client->exp->ctx, client->sioc->fd, true, NULL,
500
- NULL, NULL, NULL);
501
- }
502
-}
503
-
504
-static void nbd_update_can_read(NBDClient *client)
505
-{
506
- bool can_read = client->recv_coroutine ||
507
- client->nb_requests < MAX_NBD_REQUESTS;
508
-
509
- if (can_read != client->can_read) {
510
- client->can_read = can_read;
511
- nbd_set_handlers(client);
512
-
513
- /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
514
- * in nbd_set_handlers() will have taken care of that */
515
+ if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
516
+ nbd_client_get(client);
517
+ client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
518
+ aio_co_schedule(client->exp->ctx, client->recv_coroutine);
519
}
520
}
521
522
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void nbd_co_client_start(void *opaque)
523
goto out;
524
}
525
qemu_co_mutex_init(&client->send_lock);
526
- nbd_set_handlers(client);
527
528
if (exp) {
529
QTAILQ_INSERT_TAIL(&exp->clients, client, next);
530
}
531
+
532
+ nbd_client_receive_next_request(client);
533
+
534
out:
535
g_free(data);
536
}
537
@@ -XXX,XX +XXX,XX @@ void nbd_client_new(NBDExport *exp,
538
object_ref(OBJECT(client->sioc));
539
client->ioc = QIO_CHANNEL(sioc);
540
object_ref(OBJECT(client->ioc));
541
- client->can_read = true;
542
client->close = close_fn;
543
544
data->client = client;
545
--
546
2.9.3
547
548
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
As a small step towards the introduction of multiqueue, we want
4
coroutines to remain on the same AioContext that started them,
5
unless they are moved explicitly with e.g. aio_co_schedule. This patch
6
avoids that coroutines switch AioContext when they use a CoMutex.
7
For now it does not make much of a difference, because the CoMutex
8
is not thread-safe and the AioContext itself is used to protect the
9
CoMutex from concurrent access. However, this is going to change.
10
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
15
Message-id: 20170213135235.12274-9-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
18
util/qemu-coroutine-lock.c | 5 ++---
19
util/trace-events | 1 -
20
2 files changed, 2 insertions(+), 4 deletions(-)
21
22
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/util/qemu-coroutine-lock.c
25
+++ b/util/qemu-coroutine-lock.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "qemu/coroutine.h"
28
#include "qemu/coroutine_int.h"
29
#include "qemu/queue.h"
30
+#include "block/aio.h"
31
#include "trace.h"
32
33
void qemu_co_queue_init(CoQueue *queue)
34
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_run_restart(Coroutine *co)
35
36
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
37
{
38
- Coroutine *self = qemu_coroutine_self();
39
Coroutine *next;
40
41
if (QSIMPLEQ_EMPTY(&queue->entries)) {
42
@@ -XXX,XX +XXX,XX @@ static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
43
44
while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
45
QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
46
- QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
47
- trace_qemu_co_queue_next(next);
48
+ aio_co_wake(next);
49
if (single) {
50
break;
51
}
52
diff --git a/util/trace-events b/util/trace-events
53
index XXXXXXX..XXXXXXX 100644
54
--- a/util/trace-events
55
+++ b/util/trace-events
56
@@ -XXX,XX +XXX,XX @@ qemu_coroutine_terminate(void *co) "self %p"
57
58
# util/qemu-coroutine-lock.c
59
qemu_co_queue_run_restart(void *co) "co %p"
60
-qemu_co_queue_next(void *nxt) "next %p"
61
qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
62
qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
63
qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
64
--
65
2.9.3
66
67
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
Keep the coroutine on the same AioContext. Without this change,
4
there would be a race between yielding the coroutine and reentering it.
5
While the race cannot happen now, because the code only runs from a single
6
AioContext, this will change with multiqueue support in the block layer.
7
8
While doing the change, replace custom bottom half with aio_co_schedule.
9
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
14
Message-id: 20170213135235.12274-10-pbonzini@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/blkdebug.c | 9 +--------
18
1 file changed, 1 insertion(+), 8 deletions(-)
19
20
diff --git a/block/blkdebug.c b/block/blkdebug.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/blkdebug.c
23
+++ b/block/blkdebug.c
24
@@ -XXX,XX +XXX,XX @@ out:
25
return ret;
26
}
27
28
-static void error_callback_bh(void *opaque)
29
-{
30
- Coroutine *co = opaque;
31
- qemu_coroutine_enter(co);
32
-}
33
-
34
static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
35
{
36
BDRVBlkdebugState *s = bs->opaque;
37
@@ -XXX,XX +XXX,XX @@ static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
38
}
39
40
if (!immediately) {
41
- aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
42
- qemu_coroutine_self());
43
+ aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
44
qemu_coroutine_yield();
45
}
46
47
--
48
2.9.3
49
50
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Running a very small critical section on pthread_mutex_t and CoMutex
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
shows that pthread_mutex_t is much faster because it doesn't actually
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
go to sleep. What happens is that the critical section is shorter
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
than the latency of entering the kernel and thus FUTEX_WAIT always
6
Message-id: 20200120141858.587874-13-stefanha@redhat.com
7
fails. With CoMutex there is no such latency but you still want to
7
Message-Id: <20200120141858.587874-13-stefanha@redhat.com>
8
avoid wait and wakeup. So introduce it artificially.
9
10
This only works with one waiters; because CoMutex is fair, it will
11
always have more waits and wakeups than a pthread_mutex_t.
12
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Message-id: 20170213181244.16297-3-pbonzini@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
9
---
18
include/qemu/coroutine.h | 5 +++++
10
qemu-img-cmds.hx | 4 ++--
19
util/qemu-coroutine-lock.c | 51 ++++++++++++++++++++++++++++++++++++++++------
11
qemu-img.c | 11 ++++++++++-
20
util/qemu-coroutine.c | 2 +-
12
qemu-img.texi | 5 ++++-
21
3 files changed, 51 insertions(+), 7 deletions(-)
13
3 files changed, 16 insertions(+), 4 deletions(-)
22
14
23
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
15
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
24
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
25
--- a/include/qemu/coroutine.h
17
--- a/qemu-img-cmds.hx
26
+++ b/include/qemu/coroutine.h
18
+++ b/qemu-img-cmds.hx
27
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex {
19
@@ -XXX,XX +XXX,XX @@ STEXI
28
*/
20
ETEXI
29
unsigned locked;
21
30
22
DEF("bench", img_bench,
31
+ /* Context that is holding the lock. Useful to avoid spinning
23
- "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename")
32
+ * when two coroutines on the same AioContext try to get the lock. :)
24
+ "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-i aio] [-w] [-U] filename")
33
+ */
25
STEXI
34
+ AioContext *ctx;
26
-@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
35
+
27
+@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-i @var{aio}] [-w] [-U] @var{filename}
36
/* A queue of waiters. Elements are added atomically in front of
28
ETEXI
37
* from_push. to_pop is only populated, and popped from, by whoever
29
38
* is in charge of the next wakeup. This can be an unlocker or,
30
DEF("check", img_check,
39
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
31
diff --git a/qemu-img.c b/qemu-img.c
40
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
41
--- a/util/qemu-coroutine-lock.c
33
--- a/qemu-img.c
42
+++ b/util/qemu-coroutine-lock.c
34
+++ b/qemu-img.c
43
@@ -XXX,XX +XXX,XX @@
35
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
44
#include "qemu-common.h"
36
{"force-share", no_argument, 0, 'U'},
45
#include "qemu/coroutine.h"
37
{0, 0, 0, 0}
46
#include "qemu/coroutine_int.h"
38
};
47
+#include "qemu/processor.h"
39
- c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
48
#include "qemu/queue.h"
40
+ c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
49
#include "block/aio.h"
41
+ NULL);
50
#include "trace.h"
42
if (c == -1) {
51
@@ -XXX,XX +XXX,XX @@ void qemu_co_mutex_init(CoMutex *mutex)
52
memset(mutex, 0, sizeof(*mutex));
53
}
54
55
-static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
56
+static void coroutine_fn qemu_co_mutex_wake(CoMutex *mutex, Coroutine *co)
57
+{
58
+ /* Read co before co->ctx; pairs with smp_wmb() in
59
+ * qemu_coroutine_enter().
60
+ */
61
+ smp_read_barrier_depends();
62
+ mutex->ctx = co->ctx;
63
+ aio_co_wake(co);
64
+}
65
+
66
+static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
67
+ CoMutex *mutex)
68
{
69
Coroutine *self = qemu_coroutine_self();
70
CoWaitRecord w;
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
72
if (co == self) {
73
/* We got the lock ourselves! */
74
assert(to_wake == &w);
75
+ mutex->ctx = ctx;
76
return;
77
}
78
79
- aio_co_wake(co);
80
+ qemu_co_mutex_wake(mutex, co);
81
}
82
83
qemu_coroutine_yield();
84
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(CoMutex *mutex)
85
86
void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
87
{
88
+ AioContext *ctx = qemu_get_current_aio_context();
89
Coroutine *self = qemu_coroutine_self();
90
+ int waiters, i;
91
92
- if (atomic_fetch_inc(&mutex->locked) == 0) {
93
+ /* Running a very small critical section on pthread_mutex_t and CoMutex
94
+ * shows that pthread_mutex_t is much faster because it doesn't actually
95
+ * go to sleep. What happens is that the critical section is shorter
96
+ * than the latency of entering the kernel and thus FUTEX_WAIT always
97
+ * fails. With CoMutex there is no such latency but you still want to
98
+ * avoid wait and wakeup. So introduce it artificially.
99
+ */
100
+ i = 0;
101
+retry_fast_path:
102
+ waiters = atomic_cmpxchg(&mutex->locked, 0, 1);
103
+ if (waiters != 0) {
104
+ while (waiters == 1 && ++i < 1000) {
105
+ if (atomic_read(&mutex->ctx) == ctx) {
106
+ break;
107
+ }
108
+ if (atomic_read(&mutex->locked) == 0) {
109
+ goto retry_fast_path;
110
+ }
111
+ cpu_relax();
112
+ }
113
+ waiters = atomic_fetch_inc(&mutex->locked);
114
+ }
115
+
116
+ if (waiters == 0) {
117
/* Uncontended. */
118
trace_qemu_co_mutex_lock_uncontended(mutex, self);
119
+ mutex->ctx = ctx;
120
} else {
121
- qemu_co_mutex_lock_slowpath(mutex);
122
+ qemu_co_mutex_lock_slowpath(ctx, mutex);
123
}
124
mutex->holder = self;
125
self->locks_held++;
126
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
127
assert(mutex->holder == self);
128
assert(qemu_in_coroutine());
129
130
+ mutex->ctx = NULL;
131
mutex->holder = NULL;
132
self->locks_held--;
133
if (atomic_fetch_dec(&mutex->locked) == 1) {
134
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
135
unsigned our_handoff;
136
137
if (to_wake) {
138
- Coroutine *co = to_wake->co;
139
- aio_co_wake(co);
140
+ qemu_co_mutex_wake(mutex, to_wake->co);
141
break;
43
break;
142
}
44
}
143
45
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
144
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
46
case 'n':
47
flags |= BDRV_O_NATIVE_AIO;
48
break;
49
+ case 'i':
50
+ ret = bdrv_parse_aio(optarg, &flags);
51
+ if (ret < 0) {
52
+ error_report("Invalid aio option: %s", optarg);
53
+ ret = -1;
54
+ goto out;
55
+ }
56
+ break;
57
case 'o':
58
{
59
offset = cvtnum(optarg);
60
diff --git a/qemu-img.texi b/qemu-img.texi
145
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
146
--- a/util/qemu-coroutine.c
62
--- a/qemu-img.texi
147
+++ b/util/qemu-coroutine.c
63
+++ b/qemu-img.texi
148
@@ -XXX,XX +XXX,XX @@ void qemu_coroutine_enter(Coroutine *co)
64
@@ -XXX,XX +XXX,XX @@ Command description:
149
co->ctx = qemu_get_current_aio_context();
65
Amends the image format specific @var{options} for the image file
150
66
@var{filename}. Not all file formats support this operation.
151
/* Store co->ctx before anything that stores co. Matches
67
152
- * barrier in aio_co_wake.
68
-@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
153
+ * barrier in aio_co_wake and qemu_co_mutex_wake.
69
+@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [-i @var{aio}] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
154
*/
70
155
smp_wmb();
71
Run a simple sequential I/O benchmark on the specified image. If @code{-w} is
72
specified, a write test is performed, otherwise a read test is performed.
73
@@ -XXX,XX +XXX,XX @@ If @code{-n} is specified, the native AIO backend is used if possible. On
74
Linux, this option only works if @code{-t none} or @code{-t directsync} is
75
specified as well.
76
77
+If @code{-i} is specified, aio option can be used to specify different AIO
78
+backends: @var{threads}, @var{native} or @var{io_uring}.
79
+
80
For write tests, by default a buffer filled with zeros is written. This can be
81
overridden with a pattern byte specified by @var{pattern}.
156
82
157
--
83
--
158
2.9.3
84
2.24.1
159
85
160
86
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
Acked-by: Eric Blake <eblake@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20170213135235.12274-19-pbonzini@redhat.com
7
Message-id: 20200120141858.587874-14-stefanha@redhat.com
8
Message-Id: <20200120141858.587874-14-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
---
10
include/block/block_int.h | 64 +++++++++++++++++++++++++-----------------
11
docs/interop/qemu-nbd.rst | 4 ++--
11
include/sysemu/block-backend.h | 14 ++++++---
12
qemu-nbd.c | 12 ++++--------
12
2 files changed, 49 insertions(+), 29 deletions(-)
13
2 files changed, 6 insertions(+), 10 deletions(-)
13
14
14
diff --git a/include/block/block_int.h b/include/block/block_int.h
15
diff --git a/docs/interop/qemu-nbd.rst b/docs/interop/qemu-nbd.rst
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/include/block/block_int.h
17
--- a/docs/interop/qemu-nbd.rst
17
+++ b/include/block/block_int.h
18
+++ b/docs/interop/qemu-nbd.rst
18
@@ -XXX,XX +XXX,XX @@ struct BdrvChild {
19
@@ -XXX,XX +XXX,XX @@ driver options if ``--image-opts`` is specified.
19
* copied as well.
20
20
*/
21
.. option:: --aio=AIO
21
struct BlockDriverState {
22
22
- int64_t total_sectors; /* if we are reading a disk image, give its
23
- Set the asynchronous I/O mode between ``threads`` (the default)
23
- size in sectors */
24
- and ``native`` (Linux only).
24
+ /* Protected by big QEMU lock or read-only after opening. No special
25
+ Set the asynchronous I/O mode between ``threads`` (the default),
25
+ * locking needed during I/O...
26
+ ``native`` (Linux only), and ``io_uring`` (Linux 5.1+).
26
+ */
27
27
int open_flags; /* flags used to open the file, re-used for re-open */
28
.. option:: --discard=DISCARD
28
bool read_only; /* if true, the media is read only */
29
29
bool encrypted; /* if true, the media is encrypted */
30
diff --git a/qemu-nbd.c b/qemu-nbd.c
30
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
31
bool sg; /* if true, the device is a /dev/sg* */
32
bool probed; /* if true, format was probed rather than specified */
33
34
- int copy_on_read; /* if nonzero, copy read backing sectors into image.
35
- note this is a reference count */
36
-
37
- CoQueue flush_queue; /* Serializing flush queue */
38
- bool active_flush_req; /* Flush request in flight? */
39
- unsigned int write_gen; /* Current data generation */
40
- unsigned int flushed_gen; /* Flushed write generation */
41
-
42
BlockDriver *drv; /* NULL means no media */
43
void *opaque;
44
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
46
BdrvChild *backing;
47
BdrvChild *file;
48
49
- /* Callback before write request is processed */
50
- NotifierWithReturnList before_write_notifiers;
51
-
52
- /* number of in-flight requests; overall and serialising */
53
- unsigned int in_flight;
54
- unsigned int serialising_in_flight;
55
-
56
- bool wakeup;
57
-
58
- /* Offset after the highest byte written to */
59
- uint64_t wr_highest_offset;
60
-
61
/* I/O Limits */
62
BlockLimits bl;
63
64
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
65
QTAILQ_ENTRY(BlockDriverState) bs_list;
66
/* element of the list of monitor-owned BDS */
67
QTAILQ_ENTRY(BlockDriverState) monitor_list;
68
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
69
int refcnt;
70
71
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
72
-
73
/* operation blockers */
74
QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
75
76
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
77
/* The error object in use for blocking operations on backing_hd */
78
Error *backing_blocker;
79
80
+ /* Protected by AioContext lock */
81
+
82
+ /* If true, copy read backing sectors into image. Can be >1 if more
83
+ * than one client has requested copy-on-read.
84
+ */
85
+ int copy_on_read;
86
+
87
+ /* If we are reading a disk image, give its size in sectors.
88
+ * Generally read-only; it is written to by load_vmstate and save_vmstate,
89
+ * but the block layer is quiescent during those.
90
+ */
91
+ int64_t total_sectors;
92
+
93
+ /* Callback before write request is processed */
94
+ NotifierWithReturnList before_write_notifiers;
95
+
96
+ /* number of in-flight requests; overall and serialising */
97
+ unsigned int in_flight;
98
+ unsigned int serialising_in_flight;
99
+
100
+ bool wakeup;
101
+
102
+ /* Offset after the highest byte written to */
103
+ uint64_t wr_highest_offset;
104
+
105
/* threshold limit for writes, in bytes. "High water mark". */
106
uint64_t write_threshold_offset;
107
NotifierWithReturn write_threshold_notifier;
108
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
109
/* counter for nested bdrv_io_plug */
110
unsigned io_plugged;
111
112
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
113
+ CoQueue flush_queue; /* Serializing flush queue */
114
+ bool active_flush_req; /* Flush request in flight? */
115
+ unsigned int write_gen; /* Current data generation */
116
+ unsigned int flushed_gen; /* Flushed write generation */
117
+
118
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
119
+
120
+ /* do we need to tell the quest if we have a volatile write cache? */
121
+ int enable_write_cache;
122
+
123
int quiesce_counter;
124
};
125
126
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
127
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
128
--- a/include/sysemu/block-backend.h
32
--- a/qemu-nbd.c
129
+++ b/include/sysemu/block-backend.h
33
+++ b/qemu-nbd.c
130
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
34
@@ -XXX,XX +XXX,XX @@ static void usage(const char *name)
131
* fields that must be public. This is in particular for QLIST_ENTRY() and
35
" '[ID_OR_NAME]'\n"
132
* friends so that BlockBackends can be kept in lists outside block-backend.c */
36
" -n, --nocache disable host cache\n"
133
typedef struct BlockBackendPublic {
37
" --cache=MODE set cache mode (none, writeback, ...)\n"
134
- /* I/O throttling.
38
-" --aio=MODE set AIO mode (native or threads)\n"
135
- * throttle_state tells us if this BlockBackend has I/O limits configured.
39
+" --aio=MODE set AIO mode (native, io_uring or threads)\n"
136
- * io_limits_disabled tells us if they are currently being enforced */
40
" --discard=MODE set discard mode (ignore, unmap)\n"
137
+ /* I/O throttling has its own locking, but also some fields are
41
" --detect-zeroes=MODE set detect-zeroes mode (off, on, unmap)\n"
138
+ * protected by the AioContext lock.
42
" --image-opts treat FILE as a full set of image options\n"
139
+ */
43
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
140
+
44
exit(EXIT_FAILURE);
141
+ /* Protected by AioContext lock. */
45
}
142
CoQueue throttled_reqs[2];
46
seen_aio = true;
143
+
47
- if (!strcmp(optarg, "native")) {
144
+ /* Nonzero if the I/O limits are currently being ignored; generally
48
- flags |= BDRV_O_NATIVE_AIO;
145
+ * it is zero. */
49
- } else if (!strcmp(optarg, "threads")) {
146
unsigned int io_limits_disabled;
50
- /* this is the default */
147
51
- } else {
148
/* The following fields are protected by the ThrottleGroup lock.
52
- error_report("invalid aio mode `%s'", optarg);
149
- * See the ThrottleGroup documentation for details. */
53
- exit(EXIT_FAILURE);
150
+ * See the ThrottleGroup documentation for details.
54
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
151
+ * throttle_state tells us if I/O limits are configured. */
55
+ error_report("Invalid aio mode '%s'", optarg);
152
ThrottleState *throttle_state;
56
+ exit(EXIT_FAILURE);
153
ThrottleTimers throttle_timers;
57
}
154
unsigned pending_reqs[2];
58
break;
59
case QEMU_NBD_OPT_DISCARD:
155
--
60
--
156
2.9.3
61
2.24.1
157
62
158
63
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Reviewed-by: Fam Zheng <famz@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
6
Message-id: 20200120141858.587874-15-stefanha@redhat.com
7
Message-id: 20170213135235.12274-13-pbonzini@redhat.com
7
Message-Id: <20200120141858.587874-15-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
9
---
10
block/qed.h | 3 +++
10
tests/qemu-iotests/check | 15 ++++++++++++++-
11
block/curl.c | 2 ++
11
tests/qemu-iotests/common.rc | 14 ++++++++++++++
12
block/io.c | 5 +++++
12
tests/qemu-iotests/iotests.py | 12 ++++++++++--
13
block/iscsi.c | 8 ++++++--
13
3 files changed, 38 insertions(+), 3 deletions(-)
14
block/null.c | 4 ++++
15
block/qed.c | 12 ++++++++++++
16
block/throttle-groups.c | 2 ++
17
util/aio-posix.c | 2 --
18
util/aio-win32.c | 2 --
19
util/qemu-coroutine-sleep.c | 2 +-
20
10 files changed, 35 insertions(+), 7 deletions(-)
21
14
22
diff --git a/block/qed.h b/block/qed.h
15
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
16
index XXXXXXX..XXXXXXX 100755
17
--- a/tests/qemu-iotests/check
18
+++ b/tests/qemu-iotests/check
19
@@ -XXX,XX +XXX,XX @@ sortme=false
20
expunge=true
21
have_test_arg=false
22
cachemode=false
23
+aiomode=false
24
25
tmp="${TEST_DIR}"/$$
26
rm -f $tmp.list $tmp.tmp $tmp.sed
27
@@ -XXX,XX +XXX,XX @@ export IMGFMT_GENERIC=true
28
export IMGPROTO=file
29
export IMGOPTS=""
30
export CACHEMODE="writeback"
31
+export AIOMODE="threads"
32
export QEMU_IO_OPTIONS=""
33
export QEMU_IO_OPTIONS_NO_FMT=""
34
export CACHEMODE_IS_DEFAULT=true
35
@@ -XXX,XX +XXX,XX @@ s/ .*//p
36
CACHEMODE_IS_DEFAULT=false
37
cachemode=false
38
continue
39
+ elif $aiomode
40
+ then
41
+ AIOMODE="$r"
42
+ aiomode=false
43
+ continue
44
fi
45
46
xpand=true
47
@@ -XXX,XX +XXX,XX @@ other options
48
-n show me, do not run tests
49
-o options -o options to pass to qemu-img create/convert
50
-c mode cache mode
51
+ -i mode AIO mode
52
-makecheck pretty print output for make check
53
54
testlist options
55
@@ -XXX,XX +XXX,XX @@ testlist options
56
cachemode=true
57
xpand=false
58
;;
59
+ -i)
60
+ aiomode=true
61
+ xpand=false
62
+ ;;
63
-T) # deprecated timestamp option
64
xpand=false
65
;;
66
-
67
-v)
68
verbose=true
69
xpand=false
70
@@ -XXX,XX +XXX,XX @@ done
71
72
# Set qemu-io cache mode with $CACHEMODE we have
73
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --cache $CACHEMODE"
74
+# Set qemu-io aio mode with $AIOMODE we have
75
+QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --aio $AIOMODE"
76
77
QEMU_IO_OPTIONS_NO_FMT="$QEMU_IO_OPTIONS"
78
if [ "$IMGOPTSSYNTAX" != "true" ]; then
79
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
23
index XXXXXXX..XXXXXXX 100644
80
index XXXXXXX..XXXXXXX 100644
24
--- a/block/qed.h
81
--- a/tests/qemu-iotests/common.rc
25
+++ b/block/qed.h
82
+++ b/tests/qemu-iotests/common.rc
26
@@ -XXX,XX +XXX,XX @@ enum {
83
@@ -XXX,XX +XXX,XX @@ _default_cache_mode()
27
*/
84
return
28
typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
85
fi
29
86
}
30
+void qed_acquire(BDRVQEDState *s);
87
+_supported_aio_modes()
31
+void qed_release(BDRVQEDState *s);
88
+{
89
+ for mode; do
90
+ if [ "$mode" = "$AIOMODE" ]; then
91
+ return
92
+ fi
93
+ done
94
+ _notrun "not suitable for aio mode: $AIOMODE"
95
+}
96
+_default_aio_mode()
97
+{
98
+ AIOMODE="$1"
99
+ QEMU_IO="$QEMU_IO --aio $1"
100
+}
101
102
_unsupported_imgopts()
103
{
104
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
105
index XXXXXXX..XXXXXXX 100644
106
--- a/tests/qemu-iotests/iotests.py
107
+++ b/tests/qemu-iotests/iotests.py
108
@@ -XXX,XX +XXX,XX @@ test_dir = os.environ.get('TEST_DIR')
109
sock_dir = os.environ.get('SOCK_DIR')
110
output_dir = os.environ.get('OUTPUT_DIR', '.')
111
cachemode = os.environ.get('CACHEMODE')
112
+aiomode = os.environ.get('AIOMODE')
113
qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE')
114
115
socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
116
@@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine):
117
options.append('file=%s' % path)
118
options.append('format=%s' % format)
119
options.append('cache=%s' % cachemode)
120
+ options.append('aio=%s' % aiomode)
121
122
if opts:
123
options.append(opts)
124
@@ -XXX,XX +XXX,XX @@ def verify_cache_mode(supported_cache_modes=[]):
125
if supported_cache_modes and (cachemode not in supported_cache_modes):
126
notrun('not suitable for this cache mode: %s' % cachemode)
127
128
+def verify_aio_mode(supported_aio_modes=[]):
129
+ if supported_aio_modes and (aiomode not in supported_aio_modes):
130
+ notrun('not suitable for this aio mode: %s' % aiomode)
32
+
131
+
33
/**
132
def supports_quorum():
34
* Generic callback for chaining async callbacks
133
return 'quorum' in qemu_img_pipe('--help')
35
*/
134
36
diff --git a/block/curl.c b/block/curl.c
135
@@ -XXX,XX +XXX,XX @@ def execute_unittest(output, verbosity, debug):
37
index XXXXXXX..XXXXXXX 100644
136
38
--- a/block/curl.c
137
def execute_test(test_function=None,
39
+++ b/block/curl.c
138
supported_fmts=[], supported_oses=['linux'],
40
@@ -XXX,XX +XXX,XX @@ static void curl_multi_timeout_do(void *arg)
139
- supported_cache_modes=[], unsupported_fmts=[],
41
return;
140
- supported_protocols=[], unsupported_protocols=[]):
42
}
141
+ supported_cache_modes=[], supported_aio_modes={},
43
142
+ unsupported_fmts=[], supported_protocols=[],
44
+ aio_context_acquire(s->aio_context);
143
+ unsupported_protocols=[]):
45
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
144
"""Run either unittest or script-style tests."""
46
145
47
curl_multi_check_completion(s);
146
# We are using TEST_DIR and QEMU_DEFAULT_MACHINE as proxies to
48
+ aio_context_release(s->aio_context);
147
@@ -XXX,XX +XXX,XX @@ def execute_test(test_function=None,
49
#else
148
verify_protocol(supported_protocols, unsupported_protocols)
50
abort();
149
verify_platform(supported_oses)
51
#endif
150
verify_cache_mode(supported_cache_modes)
52
diff --git a/block/io.c b/block/io.c
151
+ verify_aio_mode(supported_aio_modes)
53
index XXXXXXX..XXXXXXX 100644
152
54
--- a/block/io.c
153
if debug:
55
+++ b/block/io.c
154
output = sys.stdout
56
@@ -XXX,XX +XXX,XX @@ void bdrv_aio_cancel(BlockAIOCB *acb)
57
if (acb->aiocb_info->get_aio_context) {
58
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
59
} else if (acb->bs) {
60
+ /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
61
+ * assert that we're not using an I/O thread. Thread-safe
62
+ * code should use bdrv_aio_cancel_async exclusively.
63
+ */
64
+ assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
65
aio_poll(bdrv_get_aio_context(acb->bs), true);
66
} else {
67
abort();
68
diff --git a/block/iscsi.c b/block/iscsi.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/block/iscsi.c
71
+++ b/block/iscsi.c
72
@@ -XXX,XX +XXX,XX @@ static void iscsi_retry_timer_expired(void *opaque)
73
struct IscsiTask *iTask = opaque;
74
iTask->complete = 1;
75
if (iTask->co) {
76
- qemu_coroutine_enter(iTask->co);
77
+ aio_co_wake(iTask->co);
78
}
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static void iscsi_nop_timed_event(void *opaque)
82
{
83
IscsiLun *iscsilun = opaque;
84
85
+ aio_context_acquire(iscsilun->aio_context);
86
if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
87
error_report("iSCSI: NOP timeout. Reconnecting...");
88
iscsilun->request_timed_out = true;
89
} else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
90
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
91
- return;
92
+ goto out;
93
}
94
95
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
96
iscsi_set_events(iscsilun);
97
+
98
+out:
99
+ aio_context_release(iscsilun->aio_context);
100
}
101
102
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
103
diff --git a/block/null.c b/block/null.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/block/null.c
106
+++ b/block/null.c
107
@@ -XXX,XX +XXX,XX @@ static void null_bh_cb(void *opaque)
108
static void null_timer_cb(void *opaque)
109
{
110
NullAIOCB *acb = opaque;
111
+ AioContext *ctx = bdrv_get_aio_context(acb->common.bs);
112
+
113
+ aio_context_acquire(ctx);
114
acb->common.cb(acb->common.opaque, 0);
115
+ aio_context_release(ctx);
116
timer_deinit(&acb->timer);
117
qemu_aio_unref(acb);
118
}
119
diff --git a/block/qed.c b/block/qed.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/qed.c
122
+++ b/block/qed.c
123
@@ -XXX,XX +XXX,XX @@ static void qed_need_check_timer_cb(void *opaque)
124
125
trace_qed_need_check_timer_cb(s);
126
127
+ qed_acquire(s);
128
qed_plug_allocating_write_reqs(s);
129
130
/* Ensure writes are on disk before clearing flag */
131
bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
132
+ qed_release(s);
133
+}
134
+
135
+void qed_acquire(BDRVQEDState *s)
136
+{
137
+ aio_context_acquire(bdrv_get_aio_context(s->bs));
138
+}
139
+
140
+void qed_release(BDRVQEDState *s)
141
+{
142
+ aio_context_release(bdrv_get_aio_context(s->bs));
143
}
144
145
static void qed_start_need_check_timer(BDRVQEDState *s)
146
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/block/throttle-groups.c
149
+++ b/block/throttle-groups.c
150
@@ -XXX,XX +XXX,XX @@ static void timer_cb(BlockBackend *blk, bool is_write)
151
qemu_mutex_unlock(&tg->lock);
152
153
/* Run the request that was waiting for this timer */
154
+ aio_context_acquire(blk_get_aio_context(blk));
155
empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
156
+ aio_context_release(blk_get_aio_context(blk));
157
158
/* If the request queue was empty then we have to take care of
159
* scheduling the next one */
160
diff --git a/util/aio-posix.c b/util/aio-posix.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/util/aio-posix.c
163
+++ b/util/aio-posix.c
164
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
165
}
166
167
/* Run our timers */
168
- aio_context_acquire(ctx);
169
progress |= timerlistgroup_run_timers(&ctx->tlg);
170
- aio_context_release(ctx);
171
172
return progress;
173
}
174
diff --git a/util/aio-win32.c b/util/aio-win32.c
175
index XXXXXXX..XXXXXXX 100644
176
--- a/util/aio-win32.c
177
+++ b/util/aio-win32.c
178
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
179
progress |= aio_dispatch_handlers(ctx, event);
180
} while (count > 0);
181
182
- aio_context_acquire(ctx);
183
progress |= timerlistgroup_run_timers(&ctx->tlg);
184
- aio_context_release(ctx);
185
return progress;
186
}
187
188
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
189
index XXXXXXX..XXXXXXX 100644
190
--- a/util/qemu-coroutine-sleep.c
191
+++ b/util/qemu-coroutine-sleep.c
192
@@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque)
193
{
194
CoSleepCB *sleep_cb = opaque;
195
196
- qemu_coroutine_enter(sleep_cb->co);
197
+ aio_co_wake(sleep_cb->co);
198
}
199
200
void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
201
--
155
--
202
2.9.3
156
2.24.1
203
157
204
158
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
2
3
qed_aio_start_io and qed_aio_next_io will not have to acquire/release
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
the AioContext, while qed_aio_next_io_cb will. Split the functionality
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
and gain a little type-safety in the process.
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
6
Message-id: 20200120141858.587874-16-stefanha@redhat.com
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-Id: <20200120141858.587874-16-stefanha@redhat.com>
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
11
Message-id: 20170213135235.12274-11-pbonzini@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
9
---
14
block/qed.c | 39 +++++++++++++++++++++++++--------------
10
tests/qemu-iotests/028 | 2 +-
15
1 file changed, 25 insertions(+), 14 deletions(-)
11
tests/qemu-iotests/058 | 2 +-
16
12
tests/qemu-iotests/089 | 4 ++--
17
diff --git a/block/qed.c b/block/qed.c
13
tests/qemu-iotests/091 | 4 ++--
18
index XXXXXXX..XXXXXXX 100644
14
tests/qemu-iotests/109 | 2 +-
19
--- a/block/qed.c
15
tests/qemu-iotests/147 | 5 +++--
20
+++ b/block/qed.c
16
tests/qemu-iotests/181 | 8 ++++----
21
@@ -XXX,XX +XXX,XX @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
17
tests/qemu-iotests/183 | 4 ++--
22
return l2_table;
18
tests/qemu-iotests/185 | 10 +++++-----
23
}
19
tests/qemu-iotests/200 | 2 +-
24
20
tests/qemu-iotests/201 | 8 ++++----
25
-static void qed_aio_next_io(void *opaque, int ret);
21
11 files changed, 26 insertions(+), 25 deletions(-)
26
+static void qed_aio_next_io(QEDAIOCB *acb, int ret);
22
27
+
23
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
28
+static void qed_aio_start_io(QEDAIOCB *acb)
24
index XXXXXXX..XXXXXXX 100755
29
+{
25
--- a/tests/qemu-iotests/028
30
+ qed_aio_next_io(acb, 0);
26
+++ b/tests/qemu-iotests/028
31
+}
27
@@ -XXX,XX +XXX,XX @@ echo block-backup
32
+
28
echo
33
+static void qed_aio_next_io_cb(void *opaque, int ret)
29
34
+{
30
qemu_comm_method="monitor"
35
+ QEDAIOCB *acb = opaque;
31
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
36
+
32
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk
37
+ qed_aio_next_io(acb, ret);
33
h=$QEMU_HANDLE
38
+}
34
if [ "${VALGRIND_QEMU}" == "y" ]; then
39
35
QEMU_COMM_TIMEOUT=7
40
static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
36
diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
41
{
37
index XXXXXXX..XXXXXXX 100755
42
@@ -XXX,XX +XXX,XX @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
38
--- a/tests/qemu-iotests/058
43
39
+++ b/tests/qemu-iotests/058
44
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
40
@@ -XXX,XX +XXX,XX @@ nbd_snapshot_img="nbd:unix:$nbd_unix_socket"
45
if (acb) {
41
converted_image=$TEST_IMG.converted
46
- qed_aio_next_io(acb, 0);
42
47
+ qed_aio_start_io(acb);
43
# Use -f raw instead of -f $IMGFMT for the NBD connection
48
}
44
-QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE"
49
}
45
+QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE --aio=$AIOMODE"
50
46
51
@@ -XXX,XX +XXX,XX @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
47
echo
52
QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
48
echo "== preparing image =="
53
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
49
diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089
54
if (acb) {
50
index XXXXXXX..XXXXXXX 100755
55
- qed_aio_next_io(acb, 0);
51
--- a/tests/qemu-iotests/089
56
+ qed_aio_start_io(acb);
52
+++ b/tests/qemu-iotests/089
57
} else if (s->header.features & QED_F_NEED_CHECK) {
53
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \
58
qed_start_need_check_timer(s);
54
59
}
55
$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG"
60
@@ -XXX,XX +XXX,XX @@ static void qed_commit_l2_update(void *opaque, int ret)
56
61
acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
57
-$QEMU_IO_PROG --cache $CACHEMODE \
62
assert(acb->request.l2_table != NULL);
58
+$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \
63
59
-c 'read -P 42 0 512' -c 'read -P 23 512 512' \
64
- qed_aio_next_io(opaque, ret);
60
-c 'read -P 66 1024 512' "json:{
65
+ qed_aio_next_io(acb, ret);
61
\"driver\": \"$IMGFMT\",
66
}
62
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0x38000 512' "$TEST_IMG" | _filter_qemu_io
67
63
68
/**
64
# The "image.filename" part tests whether "a": { "b": "c" } and "a.b": "c" do
69
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
65
# the same (which they should).
70
if (need_alloc) {
66
-$QEMU_IO_PROG --cache $CACHEMODE \
71
/* Write out the whole new L2 table */
67
+$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \
72
qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
68
-c 'read -P 42 0x38000 512' "json:{
73
- qed_aio_write_l1_update, acb);
69
\"driver\": \"$IMGFMT\",
74
+ qed_aio_write_l1_update, acb);
70
\"file\": {
75
} else {
71
diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091
76
/* Write out only the updated part of the L2 table */
72
index XXXXXXX..XXXXXXX 100755
77
qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
73
--- a/tests/qemu-iotests/091
78
- qed_aio_next_io, acb);
74
+++ b/tests/qemu-iotests/091
79
+ qed_aio_next_io_cb, acb);
75
@@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM1 ===
80
}
76
echo
81
return;
77
82
78
qemu_comm_method="monitor"
83
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_main(void *opaque, int ret)
79
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
84
}
80
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk
85
81
h1=$QEMU_HANDLE
86
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
82
87
- next_fn = qed_aio_next_io;
83
echo
88
+ next_fn = qed_aio_next_io_cb;
84
echo === Starting QEMU VM2 ===
89
} else {
85
echo
90
if (s->bs->backing) {
86
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk \
91
next_fn = qed_aio_write_flush_before_l2_update;
87
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk \
92
@@ -XXX,XX +XXX,XX @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
88
-incoming "exec: cat '${MIG_FIFO}'"
93
if (acb->flags & QED_AIOCB_ZERO) {
89
h2=$QEMU_HANDLE
94
/* Skip ahead if the clusters are already zero */
90
95
if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
91
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
96
- qed_aio_next_io(acb, 0);
92
index XXXXXXX..XXXXXXX 100755
97
+ qed_aio_start_io(acb);
93
--- a/tests/qemu-iotests/109
98
return;
94
+++ b/tests/qemu-iotests/109
99
}
95
@@ -XXX,XX +XXX,XX @@ run_qemu()
100
96
local qmp_format="$3"
101
@@ -XXX,XX +XXX,XX @@ static void qed_aio_read_data(void *opaque, int ret,
97
local qmp_event="$4"
102
/* Handle zero cluster and backing file reads */
98
103
if (ret == QED_CLUSTER_ZERO) {
99
- _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},id=src
104
qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
100
+ _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src
105
- qed_aio_next_io(acb, 0);
101
_send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return"
106
+ qed_aio_start_io(acb);
102
107
return;
103
_send_qemu_cmd $QEMU_HANDLE \
108
} else if (ret != QED_CLUSTER_FOUND) {
104
diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
109
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
105
index XXXXXXX..XXXXXXX 100755
110
- &acb->backing_qiov, qed_aio_next_io, acb);
106
--- a/tests/qemu-iotests/147
111
+ &acb->backing_qiov, qed_aio_next_io_cb, acb);
107
+++ b/tests/qemu-iotests/147
112
return;
108
@@ -XXX,XX +XXX,XX @@ import socket
113
}
109
import stat
114
110
import time
115
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
111
import iotests
116
bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
112
-from iotests import cachemode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe
117
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
113
+from iotests import cachemode, aiomode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe
118
- qed_aio_next_io, acb);
114
119
+ qed_aio_next_io_cb, acb);
115
NBD_PORT_START = 32768
120
return;
116
NBD_PORT_END = NBD_PORT_START + 1024
121
117
@@ -XXX,XX +XXX,XX @@ class BuiltinNBD(NBDBlockdevAddBase):
122
err:
118
self.server.add_drive_raw('if=none,id=nbd-export,' +
123
@@ -XXX,XX +XXX,XX @@ err:
119
'file=%s,' % test_img +
124
/**
120
'format=%s,' % imgfmt +
125
* Begin next I/O or complete the request
121
- 'cache=%s' % cachemode)
126
*/
122
+ 'cache=%s' % cachemode +
127
-static void qed_aio_next_io(void *opaque, int ret)
123
+ 'aio=%s' % aiomode)
128
+static void qed_aio_next_io(QEDAIOCB *acb, int ret)
124
self.server.launch()
129
{
125
130
- QEDAIOCB *acb = opaque;
126
def tearDown(self):
131
BDRVQEDState *s = acb_to_s(acb);
127
diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181
132
QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
128
index XXXXXXX..XXXXXXX 100755
133
qed_aio_write_data : qed_aio_read_data;
129
--- a/tests/qemu-iotests/181
134
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
130
+++ b/tests/qemu-iotests/181
135
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
131
@@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor"
136
132
137
/* Start request */
133
if [ "$IMGOPTSSYNTAX" = "true" ]; then
138
- qed_aio_next_io(acb, 0);
134
_launch_qemu \
139
+ qed_aio_start_io(acb);
135
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk
140
return &acb->common;
136
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk
141
}
137
else
142
138
_launch_qemu \
139
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk
140
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk
141
fi
142
src=$QEMU_HANDLE
143
144
if [ "$IMGOPTSSYNTAX" = "true" ]; then
145
_launch_qemu \
146
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \
147
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \
148
-incoming "unix:${MIG_SOCKET}"
149
else
150
_launch_qemu \
151
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \
152
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \
153
-incoming "unix:${MIG_SOCKET}"
154
fi
155
dest=$QEMU_HANDLE
156
diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183
157
index XXXXXXX..XXXXXXX 100755
158
--- a/tests/qemu-iotests/183
159
+++ b/tests/qemu-iotests/183
160
@@ -XXX,XX +XXX,XX @@ echo
161
qemu_comm_method="qmp"
162
163
_launch_qemu \
164
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
165
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
166
src=$QEMU_HANDLE
167
_send_qemu_cmd $src "{ 'execute': 'qmp_capabilities' }" 'return'
168
169
_launch_qemu \
170
- -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,driver=$IMGFMT,id=disk \
171
+ -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk \
172
-incoming "unix:${MIG_SOCKET}"
173
dest=$QEMU_HANDLE
174
_send_qemu_cmd $dest "{ 'execute': 'qmp_capabilities' }" 'return'
175
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
176
index XXXXXXX..XXXXXXX 100755
177
--- a/tests/qemu-iotests/185
178
+++ b/tests/qemu-iotests/185
179
@@ -XXX,XX +XXX,XX @@ echo
180
qemu_comm_method="qmp"
181
182
_launch_qemu \
183
- -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk
184
+ -drive file="${TEST_IMG}.base",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
185
h=$QEMU_HANDLE
186
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
187
188
@@ -XXX,XX +XXX,XX @@ echo === Start active commit job and exit qemu ===
189
echo
190
191
_launch_qemu \
192
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
193
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
194
h=$QEMU_HANDLE
195
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
196
197
@@ -XXX,XX +XXX,XX @@ echo === Start mirror job and exit qemu ===
198
echo
199
200
_launch_qemu \
201
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
202
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
203
h=$QEMU_HANDLE
204
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
205
206
@@ -XXX,XX +XXX,XX @@ echo === Start backup job and exit qemu ===
207
echo
208
209
_launch_qemu \
210
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
211
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
212
h=$QEMU_HANDLE
213
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
214
215
@@ -XXX,XX +XXX,XX @@ echo === Start streaming job and exit qemu ===
216
echo
217
218
_launch_qemu \
219
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
220
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
221
h=$QEMU_HANDLE
222
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
223
224
diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200
225
index XXXXXXX..XXXXXXX 100755
226
--- a/tests/qemu-iotests/200
227
+++ b/tests/qemu-iotests/200
228
@@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM ===
229
echo
230
qemu_comm_method="qmp"
231
_launch_qemu -object iothread,id=iothread0 $virtio_scsi \
232
- -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,id=drive_sysdisk,format=$IMGFMT \
233
+ -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,aio=$AIOMODE,id=drive_sysdisk,format=$IMGFMT \
234
-device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0
235
h1=$QEMU_HANDLE
236
237
diff --git a/tests/qemu-iotests/201 b/tests/qemu-iotests/201
238
index XXXXXXX..XXXXXXX 100755
239
--- a/tests/qemu-iotests/201
240
+++ b/tests/qemu-iotests/201
241
@@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor"
242
243
if [ "$IMGOPTSSYNTAX" = "true" ]; then
244
_launch_qemu \
245
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk
246
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk
247
else
248
_launch_qemu \
249
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk
250
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk
251
fi
252
src=$QEMU_HANDLE
253
254
if [ "$IMGOPTSSYNTAX" = "true" ]; then
255
_launch_qemu \
256
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \
257
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \
258
-incoming "unix:${MIG_SOCKET}"
259
else
260
_launch_qemu \
261
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \
262
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \
263
-incoming "unix:${MIG_SOCKET}"
264
fi
265
dest=$QEMU_HANDLE
143
--
266
--
144
2.9.3
267
2.24.1
145
268
146
269
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
The AioContext data structures are now protected by list_lock and/or
4
they are walked with FOREACH_RCU primitives. There is no need anymore
5
to acquire the AioContext for the entire duration of aio_dispatch.
6
Instead, just acquire it before and after invoking the callbacks.
7
The next step is then to push it further down.
8
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
13
Message-id: 20170213135235.12274-12-pbonzini@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
util/aio-posix.c | 25 +++++++++++--------------
17
util/aio-win32.c | 15 +++++++--------
18
util/async.c | 2 ++
19
3 files changed, 20 insertions(+), 22 deletions(-)
20
21
diff --git a/util/aio-posix.c b/util/aio-posix.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/util/aio-posix.c
24
+++ b/util/aio-posix.c
25
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
26
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
27
aio_node_check(ctx, node->is_external) &&
28
node->io_read) {
29
+ aio_context_acquire(ctx);
30
node->io_read(node->opaque);
31
+ aio_context_release(ctx);
32
33
/* aio_notify() does not count as progress */
34
if (node->opaque != &ctx->notifier) {
35
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx)
36
(revents & (G_IO_OUT | G_IO_ERR)) &&
37
aio_node_check(ctx, node->is_external) &&
38
node->io_write) {
39
+ aio_context_acquire(ctx);
40
node->io_write(node->opaque);
41
+ aio_context_release(ctx);
42
progress = true;
43
}
44
45
@@ -XXX,XX +XXX,XX @@ bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
46
}
47
48
/* Run our timers */
49
+ aio_context_acquire(ctx);
50
progress |= timerlistgroup_run_timers(&ctx->tlg);
51
+ aio_context_release(ctx);
52
53
return progress;
54
}
55
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
56
int64_t timeout;
57
int64_t start = 0;
58
59
- aio_context_acquire(ctx);
60
- progress = false;
61
-
62
/* aio_notify can avoid the expensive event_notifier_set if
63
* everything (file descriptors, bottom halves, timers) will
64
* be re-evaluated before the next blocking poll(). This is
65
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
66
start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
67
}
68
69
- if (try_poll_mode(ctx, blocking)) {
70
- progress = true;
71
- } else {
72
+ aio_context_acquire(ctx);
73
+ progress = try_poll_mode(ctx, blocking);
74
+ aio_context_release(ctx);
75
+
76
+ if (!progress) {
77
assert(npfd == 0);
78
79
/* fill pollfds */
80
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
81
timeout = blocking ? aio_compute_timeout(ctx) : 0;
82
83
/* wait until next event */
84
- if (timeout) {
85
- aio_context_release(ctx);
86
- }
87
if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
88
AioHandler epoll_handler;
89
90
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
91
} else {
92
ret = qemu_poll_ns(pollfds, npfd, timeout);
93
}
94
- if (timeout) {
95
- aio_context_acquire(ctx);
96
- }
97
}
98
99
if (blocking) {
100
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
101
progress = true;
102
}
103
104
- aio_context_release(ctx);
105
-
106
return progress;
107
}
108
109
diff --git a/util/aio-win32.c b/util/aio-win32.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/util/aio-win32.c
112
+++ b/util/aio-win32.c
113
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
114
(revents || event_notifier_get_handle(node->e) == event) &&
115
node->io_notify) {
116
node->pfd.revents = 0;
117
+ aio_context_acquire(ctx);
118
node->io_notify(node->e);
119
+ aio_context_release(ctx);
120
121
/* aio_notify() does not count as progress */
122
if (node->e != &ctx->notifier) {
123
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
124
(node->io_read || node->io_write)) {
125
node->pfd.revents = 0;
126
if ((revents & G_IO_IN) && node->io_read) {
127
+ aio_context_acquire(ctx);
128
node->io_read(node->opaque);
129
+ aio_context_release(ctx);
130
progress = true;
131
}
132
if ((revents & G_IO_OUT) && node->io_write) {
133
+ aio_context_acquire(ctx);
134
node->io_write(node->opaque);
135
+ aio_context_release(ctx);
136
progress = true;
137
}
138
139
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
140
int count;
141
int timeout;
142
143
- aio_context_acquire(ctx);
144
progress = false;
145
146
/* aio_notify can avoid the expensive event_notifier_set if
147
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
148
149
timeout = blocking && !have_select_revents
150
? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
151
- if (timeout) {
152
- aio_context_release(ctx);
153
- }
154
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
155
if (blocking) {
156
assert(first);
157
atomic_sub(&ctx->notify_me, 2);
158
}
159
- if (timeout) {
160
- aio_context_acquire(ctx);
161
- }
162
163
if (first) {
164
aio_notify_accept(ctx);
165
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
166
progress |= aio_dispatch_handlers(ctx, event);
167
} while (count > 0);
168
169
+ aio_context_acquire(ctx);
170
progress |= timerlistgroup_run_timers(&ctx->tlg);
171
-
172
aio_context_release(ctx);
173
return progress;
174
}
175
diff --git a/util/async.c b/util/async.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/util/async.c
178
+++ b/util/async.c
179
@@ -XXX,XX +XXX,XX @@ int aio_bh_poll(AioContext *ctx)
180
ret = 1;
181
}
182
bh->idle = 0;
183
+ aio_context_acquire(ctx);
184
aio_bh_call(bh);
185
+ aio_context_release(ctx);
186
}
187
if (bh->deleted) {
188
deleted = true;
189
--
190
2.9.3
191
192
diff view generated by jsdifflib