1
The following changes since commit f90ea7ba7c5ae7010ee0ce062207ae42530f57d6:
1
The following changes since commit 6d40ce00c1166c317e298ad82ecf10e650c4f87d:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20171012' into staging (2017-10-12 17:06:50 +0100)
3
Update version for v6.0.0-rc1 release (2021-03-30 18:19:07 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to b867eaa17b3940760f51134e409cb0580dd3dde3:
9
for you to fetch changes up to b6489ac06695e257ea0a9841364577e247fdee30:
10
10
11
block/throttle.c: add bdrv_co_drain_begin/end callbacks (2017-10-13 12:38:41 +0100)
11
test-coroutine: Add rwlock downgrade test (2021-03-31 10:44:21 +0100)
12
13
----------------------------------------------------------------
14
Pull request
15
16
A fix for VDI image files and more generally for CoRwlock.
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
19
15
----------------------------------------------------------------
20
David Edmondson (4):
21
block/vdi: When writing new bmap entry fails, don't leak the buffer
22
block/vdi: Don't assume that blocks are larger than VdiHeader
23
coroutine-lock: Store the coroutine in the CoWaitRecord only once
24
test-coroutine: Add rwlock downgrade test
16
25
17
Manos Pitsidianakis (3):
26
Paolo Bonzini (2):
18
block: add bdrv_co_drain_end callback
27
coroutine-lock: Reimplement CoRwlock to fix downgrade bug
19
block: rename bdrv_co_drain to bdrv_co_drain_begin
28
test-coroutine: Add rwlock upgrade test
20
block/throttle.c: add bdrv_co_drain_begin/end callbacks
21
29
22
include/block/block_int.h | 13 ++++++++++---
30
include/qemu/coroutine.h | 17 ++--
23
block/io.c | 48 +++++++++++++++++++++++++++++++++--------------
31
block/vdi.c | 11 ++-
24
block/qed.c | 6 +++---
32
tests/unit/test-coroutine.c | 161 +++++++++++++++++++++++++++++++++++
25
block/throttle.c | 18 ++++++++++++++++++
33
util/qemu-coroutine-lock.c | 165 +++++++++++++++++++++++-------------
26
4 files changed, 65 insertions(+), 20 deletions(-)
34
4 files changed, 282 insertions(+), 72 deletions(-)
27
35
28
--
36
--
29
2.13.6
37
2.30.2
30
38
31
diff view generated by jsdifflib
New patch
1
From: David Edmondson <david.edmondson@oracle.com>
1
2
3
If a new bitmap entry is allocated, requiring the entire block to be
4
written, avoiding leaking the buffer allocated for the block should
5
the write fail.
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Acked-by: Max Reitz <mreitz@redhat.com>
11
Message-id: 20210325112941.365238-2-pbonzini@redhat.com
12
Message-Id: <20210309144015.557477-2-david.edmondson@oracle.com>
13
Acked-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/vdi.c | 1 +
18
1 file changed, 1 insertion(+)
19
20
diff --git a/block/vdi.c b/block/vdi.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/vdi.c
23
+++ b/block/vdi.c
24
@@ -XXX,XX +XXX,XX @@ nonallocating_write:
25
26
logout("finished data write\n");
27
if (ret < 0) {
28
+ g_free(block);
29
return ret;
30
}
31
32
--
33
2.30.2
34
diff view generated by jsdifflib
New patch
1
From: David Edmondson <david.edmondson@oracle.com>
1
2
3
Given that the block size is read from the header of the VDI file, a
4
wide variety of sizes might be seen. Rather than re-using a block
5
sized memory region when writing the VDI header, allocate an
6
appropriately sized buffer.
7
8
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Acked-by: Max Reitz <mreitz@redhat.com>
11
Message-id: 20210325112941.365238-3-pbonzini@redhat.com
12
Message-Id: <20210309144015.557477-3-david.edmondson@oracle.com>
13
Acked-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
block/vdi.c | 10 ++++++----
18
1 file changed, 6 insertions(+), 4 deletions(-)
19
20
diff --git a/block/vdi.c b/block/vdi.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/vdi.c
23
+++ b/block/vdi.c
24
@@ -XXX,XX +XXX,XX @@ nonallocating_write:
25
26
if (block) {
27
/* One or more new blocks were allocated. */
28
- VdiHeader *header = (VdiHeader *) block;
29
+ VdiHeader *header;
30
uint8_t *base;
31
uint64_t offset;
32
uint32_t n_sectors;
33
34
+ g_free(block);
35
+ header = g_malloc(sizeof(*header));
36
+
37
logout("now writing modified header\n");
38
assert(VDI_IS_ALLOCATED(bmap_first));
39
*header = s->header;
40
vdi_header_to_le(header);
41
- ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader));
42
- g_free(block);
43
- block = NULL;
44
+ ret = bdrv_pwrite(bs->file, 0, header, sizeof(*header));
45
+ g_free(header);
46
47
if (ret < 0) {
48
return ret;
49
--
50
2.30.2
51
diff view generated by jsdifflib
New patch
1
From: David Edmondson <david.edmondson@oracle.com>
1
2
3
When taking the slow path for mutex acquisition, set the coroutine
4
value in the CoWaitRecord in push_waiter(), rather than both there and
5
in the caller.
6
7
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
11
Message-id: 20210325112941.365238-4-pbonzini@redhat.com
12
Message-Id: <20210309144015.557477-4-david.edmondson@oracle.com>
13
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
util/qemu-coroutine-lock.c | 1 -
17
1 file changed, 1 deletion(-)
18
19
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/util/qemu-coroutine-lock.c
22
+++ b/util/qemu-coroutine-lock.c
23
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
24
unsigned old_handoff;
25
26
trace_qemu_co_mutex_lock_entry(mutex, self);
27
- w.co = self;
28
push_waiter(mutex, &w);
29
30
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
31
--
32
2.30.2
33
diff view generated by jsdifflib
1
From: Manos Pitsidianakis <el13635@mail.ntua.gr>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
BlockDriverState has a bdrv_co_drain() callback but no equivalent for
3
An invariant of the current rwlock is that if multiple coroutines hold a
4
the end of the drain. The throttle driver (block/throttle.c) needs a way
4
reader lock, all must be runnable. The unlock implementation relies on
5
to mark the end of the drain in order to toggle io_limits_disabled
5
this, choosing to wake a single coroutine when the final read lock
6
correctly, thus bdrv_co_drain_end is needed.
6
holder exits the critical section, assuming that it will wake a
7
7
coroutine attempting to acquire a write lock.
8
Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
8
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
The downgrade implementation violates this assumption by creating a
10
Reviewed-by: Fam Zheng <famz@redhat.com>
10
read lock owning coroutine that is exclusively runnable - any other
11
coroutines that are waiting to acquire a read lock are *not* made
12
runnable when the write lock holder converts its ownership to read
13
only.
14
15
More in general, the old implementation had lots of other fairness bugs.
16
The root cause of the bugs was that CoQueue would wake up readers even
17
if there were pending writers, and would wake up writers even if there
18
were readers. In that case, the coroutine would go back to sleep *at
19
the end* of the CoQueue, losing its place at the head of the line.
20
21
To fix this, keep the queue of waiters explicitly in the CoRwlock
22
instead of using CoQueue, and store for each whether it is a
23
potential reader or a writer. This way, downgrade can look at the
24
first queued coroutines and wake it only if it is a reader, causing
25
all other readers in line to be released in turn.
26
27
Reported-by: David Edmondson <david.edmondson@oracle.com>
28
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
29
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
30
Message-id: 20210325112941.365238-5-pbonzini@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
32
---
13
include/block/block_int.h | 11 +++++++++--
33
include/qemu/coroutine.h | 17 ++--
14
block/io.c | 48 +++++++++++++++++++++++++++++++++--------------
34
util/qemu-coroutine-lock.c | 164 +++++++++++++++++++++++--------------
15
2 files changed, 43 insertions(+), 16 deletions(-)
35
2 files changed, 114 insertions(+), 67 deletions(-)
16
36
17
diff --git a/include/block/block_int.h b/include/block/block_int.h
37
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
18
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block_int.h
39
--- a/include/qemu/coroutine.h
20
+++ b/include/block/block_int.h
40
+++ b/include/qemu/coroutine.h
21
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
41
@@ -XXX,XX +XXX,XX @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock);
22
int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
42
bool qemu_co_queue_empty(CoQueue *queue);
23
43
24
/**
44
25
- * Drain and stop any internal sources of requests in the driver, and
45
+typedef struct CoRwTicket CoRwTicket;
26
- * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
46
typedef struct CoRwlock {
27
+ * bdrv_co_drain is called if implemented in the beginning of a
47
- int pending_writer;
28
+ * drain operation to drain and stop any internal sources of requests in
48
- int reader;
29
+ * the driver.
49
CoMutex mutex;
30
+ * bdrv_co_drain_end is called if implemented at the end of the drain.
50
- CoQueue queue;
31
+ *
51
+
32
+ * They should be used by the driver to e.g. manage scheduled I/O
52
+ /* Number of readers, or -1 if owned for writing. */
33
+ * requests, or toggle an internal state. After the end of the drain new
53
+ int owners;
34
+ * requests will continue normally.
54
+
35
*/
55
+ /* Waiting coroutines. */
36
void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
56
+ QSIMPLEQ_HEAD(, CoRwTicket) tickets;
37
+ void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
57
} CoRwlock;
38
58
39
void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
59
/**
40
Error **errp);
60
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock);
41
diff --git a/block/io.c b/block/io.c
61
/**
62
* Write Locks the CoRwlock from a reader. This is a bit more efficient than
63
* @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
64
- * However, if the lock cannot be upgraded immediately, control is transferred
65
- * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade
66
- * only overrides CoRwlock fairness if there are no concurrent readers, so
67
- * another writer might run while @qemu_co_rwlock_upgrade blocks.
68
+ * Note that if the lock cannot be upgraded immediately, control is transferred
69
+ * to the caller of the current coroutine; another writer might run while
70
+ * @qemu_co_rwlock_upgrade blocks.
71
*/
72
void qemu_co_rwlock_upgrade(CoRwlock *lock);
73
74
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
42
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
43
--- a/block/io.c
76
--- a/util/qemu-coroutine-lock.c
44
+++ b/block/io.c
77
+++ b/util/qemu-coroutine-lock.c
45
@@ -XXX,XX +XXX,XX @@ typedef struct {
78
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
46
Coroutine *co;
79
trace_qemu_co_mutex_unlock_return(mutex, self);
47
BlockDriverState *bs;
80
}
48
bool done;
81
49
+ bool begin;
82
+struct CoRwTicket {
50
} BdrvCoDrainData;
83
+ bool read;
51
84
+ Coroutine *co;
52
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
85
+ QSIMPLEQ_ENTRY(CoRwTicket) next;
53
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
86
+};
54
BdrvCoDrainData *data = opaque;
87
+
55
BlockDriverState *bs = data->bs;
88
void qemu_co_rwlock_init(CoRwlock *lock)
56
89
{
57
- bs->drv->bdrv_co_drain(bs);
90
- memset(lock, 0, sizeof(*lock));
58
+ if (data->begin) {
91
- qemu_co_queue_init(&lock->queue);
59
+ bs->drv->bdrv_co_drain(bs);
92
qemu_co_mutex_init(&lock->mutex);
60
+ } else {
93
+ lock->owners = 0;
61
+ bs->drv->bdrv_co_drain_end(bs);
94
+ QSIMPLEQ_INIT(&lock->tickets);
95
+}
96
+
97
+/* Releases the internal CoMutex. */
98
+static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock)
99
+{
100
+ CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets);
101
+ Coroutine *co = NULL;
102
+
103
+ /*
104
+ * Setting lock->owners here prevents rdlock and wrlock from
105
+ * sneaking in between unlock and wake.
106
+ */
107
+
108
+ if (tkt) {
109
+ if (tkt->read) {
110
+ if (lock->owners >= 0) {
111
+ lock->owners++;
112
+ co = tkt->co;
113
+ }
114
+ } else {
115
+ if (lock->owners == 0) {
116
+ lock->owners = -1;
117
+ co = tkt->co;
118
+ }
119
+ }
62
+ }
120
+ }
63
121
+
64
/* Set data->done before reading bs->wakeup. */
122
+ if (co) {
65
atomic_mb_set(&data->done, true);
123
+ QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next);
66
bdrv_wakeup(bs);
124
+ qemu_co_mutex_unlock(&lock->mutex);
67
}
125
+ aio_co_wake(co);
68
126
+ } else {
69
-static void bdrv_drain_invoke(BlockDriverState *bs)
127
+ qemu_co_mutex_unlock(&lock->mutex);
70
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
128
+ }
71
{
129
}
72
- BdrvCoDrainData data = { .bs = bs, .done = false };
130
73
+ BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
131
void qemu_co_rwlock_rdlock(CoRwlock *lock)
74
132
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock)
75
- if (!bs->drv || !bs->drv->bdrv_co_drain) {
133
76
+ if (!bs->drv || (begin && !bs->drv->bdrv_co_drain) ||
134
qemu_co_mutex_lock(&lock->mutex);
77
+ (!begin && !bs->drv->bdrv_co_drain_end)) {
135
/* For fairness, wait if a writer is in line. */
78
return;
136
- while (lock->pending_writer) {
137
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
138
- }
139
- lock->reader++;
140
- qemu_co_mutex_unlock(&lock->mutex);
141
-
142
- /* The rest of the read-side critical section is run without the mutex. */
143
- self->locks_held++;
144
-}
145
-
146
-void qemu_co_rwlock_unlock(CoRwlock *lock)
147
-{
148
- Coroutine *self = qemu_coroutine_self();
149
-
150
- assert(qemu_in_coroutine());
151
- if (!lock->reader) {
152
- /* The critical section started in qemu_co_rwlock_wrlock. */
153
- qemu_co_queue_restart_all(&lock->queue);
154
+ if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) {
155
+ lock->owners++;
156
+ qemu_co_mutex_unlock(&lock->mutex);
157
} else {
158
- self->locks_held--;
159
+ CoRwTicket my_ticket = { true, self };
160
161
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
162
+ qemu_co_mutex_unlock(&lock->mutex);
163
+ qemu_coroutine_yield();
164
+ assert(lock->owners >= 1);
165
+
166
+ /* Possibly wake another reader, which will wake the next in line. */
167
qemu_co_mutex_lock(&lock->mutex);
168
- lock->reader--;
169
- assert(lock->reader >= 0);
170
- /* Wakeup only one waiting writer */
171
- if (!lock->reader) {
172
- qemu_co_queue_next(&lock->queue);
173
- }
174
+ qemu_co_rwlock_maybe_wake_one(lock);
79
}
175
}
80
176
- qemu_co_mutex_unlock(&lock->mutex);
81
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs)
177
+
82
BDRV_POLL_WHILE(bs, !data.done);
178
+ self->locks_held++;
83
}
179
+}
84
180
+
85
-static bool bdrv_drain_recurse(BlockDriverState *bs)
181
+void qemu_co_rwlock_unlock(CoRwlock *lock)
86
+static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
182
+{
87
{
183
+ Coroutine *self = qemu_coroutine_self();
88
BdrvChild *child, *tmp;
184
+
89
bool waited;
185
+ assert(qemu_in_coroutine());
90
186
+ self->locks_held--;
91
- waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
187
+
92
-
188
+ qemu_co_mutex_lock(&lock->mutex);
93
/* Ensure any pending metadata writes are submitted to bs->file. */
189
+ if (lock->owners > 0) {
94
- bdrv_drain_invoke(bs);
190
+ lock->owners--;
95
+ bdrv_drain_invoke(bs, begin);
191
+ } else {
96
+
192
+ assert(lock->owners == -1);
97
+ /* Wait for drained requests to finish */
193
+ lock->owners = 0;
98
+ waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
99
100
QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
101
BlockDriverState *bs = child->bs;
102
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
103
*/
104
bdrv_ref(bs);
105
}
106
- waited |= bdrv_drain_recurse(bs);
107
+ waited |= bdrv_drain_recurse(bs, begin);
108
if (in_main_loop) {
109
bdrv_unref(bs);
110
}
111
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
112
BlockDriverState *bs = data->bs;
113
114
bdrv_dec_in_flight(bs);
115
- bdrv_drained_begin(bs);
116
+ if (data->begin) {
117
+ bdrv_drained_begin(bs);
118
+ } else {
119
+ bdrv_drained_end(bs);
120
+ }
194
+ }
121
+
195
+
122
data->done = true;
196
+ qemu_co_rwlock_maybe_wake_one(lock);
123
aio_co_wake(co);
197
}
124
}
198
125
199
void qemu_co_rwlock_downgrade(CoRwlock *lock)
126
-static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
200
{
127
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
201
- Coroutine *self = qemu_coroutine_self();
128
+ bool begin)
202
+ qemu_co_mutex_lock(&lock->mutex);
129
{
203
+ assert(lock->owners == -1);
130
BdrvCoDrainData data;
204
+ lock->owners = 1;
131
205
132
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
206
- /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
133
.co = qemu_coroutine_self(),
207
- * qemu_co_rwlock_upgrade.
134
.bs = bs,
208
- */
135
.done = false,
209
- assert(lock->reader == 0);
136
+ .begin = begin,
210
- lock->reader++;
137
};
211
- qemu_co_mutex_unlock(&lock->mutex);
138
bdrv_inc_in_flight(bs);
212
-
139
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
213
- /* The rest of the read-side critical section is run without the mutex. */
140
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
214
- self->locks_held++;
141
void bdrv_drained_begin(BlockDriverState *bs)
215
+ /* Possibly wake another reader, which will wake the next in line. */
142
{
216
+ qemu_co_rwlock_maybe_wake_one(lock);
143
if (qemu_in_coroutine()) {
217
}
144
- bdrv_co_yield_to_drain(bs);
218
145
+ bdrv_co_yield_to_drain(bs, true);
219
void qemu_co_rwlock_wrlock(CoRwlock *lock)
146
return;
220
{
221
+ Coroutine *self = qemu_coroutine_self();
222
+
223
qemu_co_mutex_lock(&lock->mutex);
224
- lock->pending_writer++;
225
- while (lock->reader) {
226
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
227
+ if (lock->owners == 0) {
228
+ lock->owners = -1;
229
+ qemu_co_mutex_unlock(&lock->mutex);
230
+ } else {
231
+ CoRwTicket my_ticket = { false, qemu_coroutine_self() };
232
+
233
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
234
+ qemu_co_mutex_unlock(&lock->mutex);
235
+ qemu_coroutine_yield();
236
+ assert(lock->owners == -1);
147
}
237
}
148
238
- lock->pending_writer--;
149
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
239
150
bdrv_parent_drained_begin(bs);
240
- /* The rest of the write-side critical section is run with
241
- * the mutex taken, so that lock->reader remains zero.
242
- * There is no need to update self->locks_held.
243
- */
244
+ self->locks_held++;
245
}
246
247
void qemu_co_rwlock_upgrade(CoRwlock *lock)
248
{
249
- Coroutine *self = qemu_coroutine_self();
250
-
251
qemu_co_mutex_lock(&lock->mutex);
252
- assert(lock->reader > 0);
253
- lock->reader--;
254
- lock->pending_writer++;
255
- while (lock->reader) {
256
- qemu_co_queue_wait(&lock->queue, &lock->mutex);
257
+ assert(lock->owners > 0);
258
+ /* For fairness, wait if a writer is in line. */
259
+ if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) {
260
+ lock->owners = -1;
261
+ qemu_co_mutex_unlock(&lock->mutex);
262
+ } else {
263
+ CoRwTicket my_ticket = { false, qemu_coroutine_self() };
264
+
265
+ lock->owners--;
266
+ QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
267
+ qemu_co_rwlock_maybe_wake_one(lock);
268
+ qemu_coroutine_yield();
269
+ assert(lock->owners == -1);
151
}
270
}
152
271
- lock->pending_writer--;
153
- bdrv_drain_recurse(bs);
272
-
154
+ bdrv_drain_recurse(bs, true);
273
- /* The rest of the write-side critical section is run with
155
}
274
- * the mutex taken, similar to qemu_co_rwlock_wrlock. Do
156
275
- * not account for the lock twice in self->locks_held.
157
void bdrv_drained_end(BlockDriverState *bs)
276
- */
158
{
277
- self->locks_held--;
159
+ if (qemu_in_coroutine()) {
278
}
160
+ bdrv_co_yield_to_drain(bs, false);
161
+ return;
162
+ }
163
assert(bs->quiesce_counter > 0);
164
if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
165
return;
166
}
167
168
bdrv_parent_drained_end(bs);
169
+ bdrv_drain_recurse(bs, false);
170
aio_enable_external(bdrv_get_aio_context(bs));
171
}
172
173
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
174
aio_context_acquire(aio_context);
175
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
176
if (aio_context == bdrv_get_aio_context(bs)) {
177
- waited |= bdrv_drain_recurse(bs);
178
+ waited |= bdrv_drain_recurse(bs, true);
179
}
180
}
181
aio_context_release(aio_context);
182
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
183
aio_context_acquire(aio_context);
184
aio_enable_external(aio_context);
185
bdrv_parent_drained_end(bs);
186
+ bdrv_drain_recurse(bs, false);
187
aio_context_release(aio_context);
188
}
189
190
--
279
--
191
2.13.6
280
2.30.2
192
281
193
diff view generated by jsdifflib
1
From: Manos Pitsidianakis <el13635@mail.ntua.gr>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Test that rwlock upgrade is fair, and that readers go back to sleep if
4
Reviewed-by: Fam Zheng <famz@redhat.com>
4
a writer is in line.
5
Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
5
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-id: 20210325112941.365238-6-pbonzini@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
---
9
include/block/block_int.h | 4 ++--
10
tests/unit/test-coroutine.c | 62 +++++++++++++++++++++++++++++++++++++
10
block/io.c | 4 ++--
11
1 file changed, 62 insertions(+)
11
block/qed.c | 6 +++---
12
3 files changed, 7 insertions(+), 7 deletions(-)
13
12
14
diff --git a/include/block/block_int.h b/include/block/block_int.h
13
diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/include/block/block_int.h
15
--- a/tests/unit/test-coroutine.c
17
+++ b/include/block/block_int.h
16
+++ b/tests/unit/test-coroutine.c
18
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
17
@@ -XXX,XX +XXX,XX @@ static void test_co_mutex_lockable(void)
19
int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
18
g_assert(QEMU_MAKE_LOCKABLE(null_pointer) == NULL);
20
21
/**
22
- * bdrv_co_drain is called if implemented in the beginning of a
23
+ * bdrv_co_drain_begin is called if implemented in the beginning of a
24
* drain operation to drain and stop any internal sources of requests in
25
* the driver.
26
* bdrv_co_drain_end is called if implemented at the end of the drain.
27
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
28
* requests, or toggle an internal state. After the end of the drain new
29
* requests will continue normally.
30
*/
31
- void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
32
+ void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
33
void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
34
35
void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
36
diff --git a/block/io.c b/block/io.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/io.c
39
+++ b/block/io.c
40
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
41
BlockDriverState *bs = data->bs;
42
43
if (data->begin) {
44
- bs->drv->bdrv_co_drain(bs);
45
+ bs->drv->bdrv_co_drain_begin(bs);
46
} else {
47
bs->drv->bdrv_co_drain_end(bs);
48
}
49
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
50
{
51
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
52
53
- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain) ||
54
+ if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
55
(!begin && !bs->drv->bdrv_co_drain_end)) {
56
return;
57
}
58
diff --git a/block/qed.c b/block/qed.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/block/qed.c
61
+++ b/block/qed.c
62
@@ -XXX,XX +XXX,XX @@ static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
63
assert(!s->allocating_write_reqs_plugged);
64
if (s->allocating_acb != NULL) {
65
/* Another allocating write came concurrently. This cannot happen
66
- * from bdrv_qed_co_drain, but it can happen when the timer runs.
67
+ * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
68
*/
69
qemu_co_mutex_unlock(&s->table_lock);
70
return false;
71
@@ -XXX,XX +XXX,XX @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
72
}
73
}
19
}
74
20
75
-static void coroutine_fn bdrv_qed_co_drain(BlockDriverState *bs)
21
+static CoRwlock rwlock;
76
+static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
22
+
77
{
23
+/* Test that readers are properly sent back to the queue when upgrading,
78
BDRVQEDState *s = bs->opaque;
24
+ * even if they are the sole readers. The test scenario is as follows:
79
25
+ *
80
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = {
26
+ *
81
.bdrv_check = bdrv_qed_check,
27
+ * | c1 | c2 |
82
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
28
+ * |--------------+------------+
83
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
29
+ * | rdlock | |
84
- .bdrv_co_drain = bdrv_qed_co_drain,
30
+ * | yield | |
85
+ .bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
31
+ * | | wrlock |
86
};
32
+ * | | <queued> |
87
33
+ * | upgrade | |
88
static void bdrv_qed_init(void)
34
+ * | <queued> | <dequeued> |
35
+ * | | unlock |
36
+ * | <dequeued> | |
37
+ * | unlock | |
38
+ */
39
+
40
+static void coroutine_fn rwlock_yield_upgrade(void *opaque)
41
+{
42
+ qemu_co_rwlock_rdlock(&rwlock);
43
+ qemu_coroutine_yield();
44
+
45
+ qemu_co_rwlock_upgrade(&rwlock);
46
+ qemu_co_rwlock_unlock(&rwlock);
47
+
48
+ *(bool *)opaque = true;
49
+}
50
+
51
+static void coroutine_fn rwlock_wrlock_yield(void *opaque)
52
+{
53
+ qemu_co_rwlock_wrlock(&rwlock);
54
+ qemu_coroutine_yield();
55
+
56
+ qemu_co_rwlock_unlock(&rwlock);
57
+ *(bool *)opaque = true;
58
+}
59
+
60
+static void test_co_rwlock_upgrade(void)
61
+{
62
+ bool c1_done = false;
63
+ bool c2_done = false;
64
+ Coroutine *c1, *c2;
65
+
66
+ qemu_co_rwlock_init(&rwlock);
67
+ c1 = qemu_coroutine_create(rwlock_yield_upgrade, &c1_done);
68
+ c2 = qemu_coroutine_create(rwlock_wrlock_yield, &c2_done);
69
+
70
+ qemu_coroutine_enter(c1);
71
+ qemu_coroutine_enter(c2);
72
+
73
+ /* c1 now should go to sleep. */
74
+ qemu_coroutine_enter(c1);
75
+ g_assert(!c1_done);
76
+
77
+ qemu_coroutine_enter(c2);
78
+ g_assert(c1_done);
79
+ g_assert(c2_done);
80
+}
81
+
82
/*
83
* Check that creation, enter, and return work
84
*/
85
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
86
g_test_add_func("/basic/order", test_order);
87
g_test_add_func("/locking/co-mutex", test_co_mutex);
88
g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable);
89
+ g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade);
90
if (g_test_perf()) {
91
g_test_add_func("/perf/lifecycle", perf_lifecycle);
92
g_test_add_func("/perf/nesting", perf_nesting);
89
--
93
--
90
2.13.6
94
2.30.2
91
95
92
diff view generated by jsdifflib
1
From: Manos Pitsidianakis <el13635@mail.ntua.gr>
1
From: David Edmondson <david.edmondson@oracle.com>
2
2
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Test that downgrading an rwlock does not result in a failure to
4
Reviewed-by: Fam Zheng <famz@redhat.com>
4
schedule coroutines queued on the rwlock.
5
Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
5
6
The diagram associated with test_co_rwlock_downgrade() describes the
7
intended behaviour, but what was observed previously corresponds to:
8
9
| c1 | c2 | c3 | c4 |
10
|--------+------------+------------+----------|
11
| rdlock | | | |
12
| yield | | | |
13
| | wrlock | | |
14
| | <queued> | | |
15
| | | rdlock | |
16
| | | <queued> | |
17
| | | | wrlock |
18
| | | | <queued> |
19
| unlock | | | |
20
| yield | | | |
21
| | <dequeued> | | |
22
| | downgrade | | |
23
| | ... | | |
24
| | unlock | | |
25
| | | <dequeued> | |
26
| | | <queued> | |
27
28
This results in a failure...
29
30
ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done)
31
Bail out! ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done)
32
33
...as a result of the c3 coroutine failing to run to completion.
34
35
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
36
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
37
Message-id: 20210325112941.365238-7-pbonzini@redhat.com
38
Message-Id: <20210309144015.557477-5-david.edmondson@oracle.com>
39
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
40
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
41
---
8
block/throttle.c | 18 ++++++++++++++++++
42
tests/unit/test-coroutine.c | 99 +++++++++++++++++++++++++++++++++++++
9
1 file changed, 18 insertions(+)
43
1 file changed, 99 insertions(+)
10
44
11
diff --git a/block/throttle.c b/block/throttle.c
45
diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c
12
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
13
--- a/block/throttle.c
47
--- a/tests/unit/test-coroutine.c
14
+++ b/block/throttle.c
48
+++ b/tests/unit/test-coroutine.c
15
@@ -XXX,XX +XXX,XX @@ static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs,
49
@@ -XXX,XX +XXX,XX @@ static void test_co_rwlock_upgrade(void)
16
return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
50
g_assert(c2_done);
17
}
51
}
18
52
19
+static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
53
+static void coroutine_fn rwlock_rdlock_yield(void *opaque)
20
+{
54
+{
21
+ ThrottleGroupMember *tgm = bs->opaque;
55
+ qemu_co_rwlock_rdlock(&rwlock);
22
+ if (atomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
56
+ qemu_coroutine_yield();
23
+ throttle_group_restart_tgm(tgm);
57
+
24
+ }
58
+ qemu_co_rwlock_unlock(&rwlock);
59
+ qemu_coroutine_yield();
60
+
61
+ *(bool *)opaque = true;
25
+}
62
+}
26
+
63
+
27
+static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
64
+static void coroutine_fn rwlock_wrlock_downgrade(void *opaque)
28
+{
65
+{
29
+ ThrottleGroupMember *tgm = bs->opaque;
66
+ qemu_co_rwlock_wrlock(&rwlock);
30
+ assert(tgm->io_limits_disabled);
67
+
31
+ atomic_dec(&tgm->io_limits_disabled);
68
+ qemu_co_rwlock_downgrade(&rwlock);
69
+ qemu_co_rwlock_unlock(&rwlock);
70
+ *(bool *)opaque = true;
32
+}
71
+}
33
+
72
+
34
static BlockDriver bdrv_throttle = {
73
+static void coroutine_fn rwlock_rdlock(void *opaque)
35
.format_name = "throttle",
74
+{
36
.protocol_name = "throttle",
75
+ qemu_co_rwlock_rdlock(&rwlock);
37
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_throttle = {
38
.bdrv_reopen_abort = throttle_reopen_abort,
39
.bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,
40
41
+ .bdrv_co_drain_begin = throttle_co_drain_begin,
42
+ .bdrv_co_drain_end = throttle_co_drain_end,
43
+
76
+
44
.is_filter = true,
77
+ qemu_co_rwlock_unlock(&rwlock);
45
};
78
+ *(bool *)opaque = true;
46
79
+}
80
+
81
+static void coroutine_fn rwlock_wrlock(void *opaque)
82
+{
83
+ qemu_co_rwlock_wrlock(&rwlock);
84
+
85
+ qemu_co_rwlock_unlock(&rwlock);
86
+ *(bool *)opaque = true;
87
+}
88
+
89
+/*
90
+ * Check that downgrading a reader-writer lock does not cause a hang.
91
+ *
92
+ * Four coroutines are used to produce a situation where there are
93
+ * both reader and writer hopefuls waiting to acquire an rwlock that
94
+ * is held by a reader.
95
+ *
96
+ * The correct sequence of operations we aim to provoke can be
97
+ * represented as:
98
+ *
99
+ * | c1 | c2 | c3 | c4 |
100
+ * |--------+------------+------------+------------|
101
+ * | rdlock | | | |
102
+ * | yield | | | |
103
+ * | | wrlock | | |
104
+ * | | <queued> | | |
105
+ * | | | rdlock | |
106
+ * | | | <queued> | |
107
+ * | | | | wrlock |
108
+ * | | | | <queued> |
109
+ * | unlock | | | |
110
+ * | yield | | | |
111
+ * | | <dequeued> | | |
112
+ * | | downgrade | | |
113
+ * | | | <dequeued> | |
114
+ * | | | unlock | |
115
+ * | | ... | | |
116
+ * | | unlock | | |
117
+ * | | | | <dequeued> |
118
+ * | | | | unlock |
119
+ */
120
+static void test_co_rwlock_downgrade(void)
121
+{
122
+ bool c1_done = false;
123
+ bool c2_done = false;
124
+ bool c3_done = false;
125
+ bool c4_done = false;
126
+ Coroutine *c1, *c2, *c3, *c4;
127
+
128
+ qemu_co_rwlock_init(&rwlock);
129
+
130
+ c1 = qemu_coroutine_create(rwlock_rdlock_yield, &c1_done);
131
+ c2 = qemu_coroutine_create(rwlock_wrlock_downgrade, &c2_done);
132
+ c3 = qemu_coroutine_create(rwlock_rdlock, &c3_done);
133
+ c4 = qemu_coroutine_create(rwlock_wrlock, &c4_done);
134
+
135
+ qemu_coroutine_enter(c1);
136
+ qemu_coroutine_enter(c2);
137
+ qemu_coroutine_enter(c3);
138
+ qemu_coroutine_enter(c4);
139
+
140
+ qemu_coroutine_enter(c1);
141
+
142
+ g_assert(c2_done);
143
+ g_assert(c3_done);
144
+ g_assert(c4_done);
145
+
146
+ qemu_coroutine_enter(c1);
147
+
148
+ g_assert(c1_done);
149
+}
150
+
151
/*
152
* Check that creation, enter, and return work
153
*/
154
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
155
g_test_add_func("/locking/co-mutex", test_co_mutex);
156
g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable);
157
g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade);
158
+ g_test_add_func("/locking/co-rwlock/downgrade", test_co_rwlock_downgrade);
159
if (g_test_perf()) {
160
g_test_add_func("/perf/lifecycle", perf_lifecycle);
161
g_test_add_func("/perf/nesting", perf_nesting);
47
--
162
--
48
2.13.6
163
2.30.2
49
164
50
diff view generated by jsdifflib