1 | The following changes since commit 6d40ce00c1166c317e298ad82ecf10e650c4f87d: | 1 | The following changes since commit 887cba855bb6ff4775256f7968409281350b568c: |
---|---|---|---|
2 | 2 | ||
3 | Update version for v6.0.0-rc1 release (2021-03-30 18:19:07 +0100) | 3 | configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to b6489ac06695e257ea0a9841364577e247fdee30: | 9 | for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6: |
10 | 10 | ||
11 | test-coroutine: Add rwlock downgrade test (2021-03-31 10:44:21 +0100) | 11 | virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | A fix for VDI image files and more generally for CoRwlock. | ||
17 | |||
18 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
19 | 17 | ||
20 | David Edmondson (4): | 18 | Stefan Hajnoczi (1): |
21 | block/vdi: When writing new bmap entry fails, don't leak the buffer | 19 | virtio-blk: fix host notifier issues during dataplane start/stop |
22 | block/vdi: Don't assume that blocks are larger than VdiHeader | ||
23 | coroutine-lock: Store the coroutine in the CoWaitRecord only once | ||
24 | test-coroutine: Add rwlock downgrade test | ||
25 | 20 | ||
26 | Paolo Bonzini (2): | 21 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
27 | coroutine-lock: Reimplement CoRwlock to fix downgrade bug | 22 | 1 file changed, 38 insertions(+), 29 deletions(-) |
28 | test-coroutine: Add rwlock upgrade test | ||
29 | |||
30 | include/qemu/coroutine.h | 17 ++-- | ||
31 | block/vdi.c | 11 ++- | ||
32 | tests/unit/test-coroutine.c | 161 +++++++++++++++++++++++++++++++++++ | ||
33 | util/qemu-coroutine-lock.c | 165 +++++++++++++++++++++++------------- | ||
34 | 4 files changed, 282 insertions(+), 72 deletions(-) | ||
35 | 23 | ||
36 | -- | 24 | -- |
37 | 2.30.2 | 25 | 2.40.1 |
38 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: David Edmondson <david.edmondson@oracle.com> | ||
2 | 1 | ||
3 | If a new bitmap entry is allocated, requiring the entire block to be | ||
4 | written, avoiding leaking the buffer allocated for the block should | ||
5 | the write fail. | ||
6 | |||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
11 | Message-id: 20210325112941.365238-2-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-2-david.edmondson@oracle.com> | ||
13 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | ||
17 | block/vdi.c | 1 + | ||
18 | 1 file changed, 1 insertion(+) | ||
19 | |||
20 | diff --git a/block/vdi.c b/block/vdi.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/block/vdi.c | ||
23 | +++ b/block/vdi.c | ||
24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: | ||
25 | |||
26 | logout("finished data write\n"); | ||
27 | if (ret < 0) { | ||
28 | + g_free(block); | ||
29 | return ret; | ||
30 | } | ||
31 | |||
32 | -- | ||
33 | 2.30.2 | ||
34 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: David Edmondson <david.edmondson@oracle.com> | ||
2 | 1 | ||
3 | Given that the block size is read from the header of the VDI file, a | ||
4 | wide variety of sizes might be seen. Rather than re-using a block | ||
5 | sized memory region when writing the VDI header, allocate an | ||
6 | appropriately sized buffer. | ||
7 | |||
8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
11 | Message-id: 20210325112941.365238-3-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-3-david.edmondson@oracle.com> | ||
13 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | ||
17 | block/vdi.c | 10 ++++++---- | ||
18 | 1 file changed, 6 insertions(+), 4 deletions(-) | ||
19 | |||
20 | diff --git a/block/vdi.c b/block/vdi.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/block/vdi.c | ||
23 | +++ b/block/vdi.c | ||
24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: | ||
25 | |||
26 | if (block) { | ||
27 | /* One or more new blocks were allocated. */ | ||
28 | - VdiHeader *header = (VdiHeader *) block; | ||
29 | + VdiHeader *header; | ||
30 | uint8_t *base; | ||
31 | uint64_t offset; | ||
32 | uint32_t n_sectors; | ||
33 | |||
34 | + g_free(block); | ||
35 | + header = g_malloc(sizeof(*header)); | ||
36 | + | ||
37 | logout("now writing modified header\n"); | ||
38 | assert(VDI_IS_ALLOCATED(bmap_first)); | ||
39 | *header = s->header; | ||
40 | vdi_header_to_le(header); | ||
41 | - ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); | ||
42 | - g_free(block); | ||
43 | - block = NULL; | ||
44 | + ret = bdrv_pwrite(bs->file, 0, header, sizeof(*header)); | ||
45 | + g_free(header); | ||
46 | |||
47 | if (ret < 0) { | ||
48 | return ret; | ||
49 | -- | ||
50 | 2.30.2 | ||
51 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: David Edmondson <david.edmondson@oracle.com> | ||
2 | 1 | ||
3 | When taking the slow path for mutex acquisition, set the coroutine | ||
4 | value in the CoWaitRecord in push_waiter(), rather than both there and | ||
5 | in the caller. | ||
6 | |||
7 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Message-id: 20210325112941.365238-4-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-4-david.edmondson@oracle.com> | ||
13 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | ||
16 | util/qemu-coroutine-lock.c | 1 - | ||
17 | 1 file changed, 1 deletion(-) | ||
18 | |||
19 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/util/qemu-coroutine-lock.c | ||
22 | +++ b/util/qemu-coroutine-lock.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, | ||
24 | unsigned old_handoff; | ||
25 | |||
26 | trace_qemu_co_mutex_lock_entry(mutex, self); | ||
27 | - w.co = self; | ||
28 | push_waiter(mutex, &w); | ||
29 | |||
30 | /* This is the "Responsibility Hand-Off" protocol; a lock() picks from | ||
31 | -- | ||
32 | 2.30.2 | ||
33 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | The main loop thread can consume 100% CPU when using --device |
---|---|---|---|
2 | virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but | ||
3 | reading virtqueue host notifiers fails with EAGAIN. The file descriptors | ||
4 | are stale and remain registered with the AioContext because of bugs in | ||
5 | the virtio-blk dataplane start/stop code. | ||
2 | 6 | ||
3 | An invariant of the current rwlock is that if multiple coroutines hold a | 7 | The problem is that the dataplane start/stop code involves drain |
4 | reader lock, all must be runnable. The unlock implementation relies on | 8 | operations, which call virtio_blk_drained_begin() and |
5 | this, choosing to wake a single coroutine when the final read lock | 9 | virtio_blk_drained_end() at points where the host notifier is not |
6 | holder exits the critical section, assuming that it will wake a | 10 | operational: |
7 | coroutine attempting to acquire a write lock. | 11 | - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after |
12 | vblk->dataplane_started has been set to true but the host notifier has | ||
13 | not been attached yet. | ||
14 | - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context() | ||
15 | drain after the host notifier has already been detached but with | ||
16 | vblk->dataplane_started still set to true. | ||
8 | 17 | ||
9 | The downgrade implementation violates this assumption by creating a | 18 | I would like to simplify ->ioeventfd_start/stop() to avoid interactions |
10 | read lock owning coroutine that is exclusively runnable - any other | 19 | with drain entirely, but couldn't find a way to do that. Instead, this |
11 | coroutines that are waiting to acquire a read lock are *not* made | 20 | patch accepts the fragile nature of the code and reorders it so that |
12 | runnable when the write lock holder converts its ownership to read | 21 | vblk->dataplane_started is false during drain operations. This way the |
13 | only. | 22 | virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't |
23 | touch the host notifier. The result is that | ||
24 | virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have | ||
25 | complete control over the host notifier and stale file descriptors are | ||
26 | no longer left in the AioContext. | ||
14 | 27 | ||
15 | More in general, the old implementation had lots of other fairness bugs. | 28 | This patch fixes the 100% CPU consumption in the main loop thread and |
16 | The root cause of the bugs was that CoQueue would wake up readers even | 29 | correctly moves host notifier processing to the IOThread. |
17 | if there were pending writers, and would wake up writers even if there | ||
18 | were readers. In that case, the coroutine would go back to sleep *at | ||
19 | the end* of the CoQueue, losing its place at the head of the line. | ||
20 | 30 | ||
21 | To fix this, keep the queue of waiters explicitly in the CoRwlock | 31 | Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()") |
22 | instead of using CoQueue, and store for each whether it is a | 32 | Reported-by: Lukáš Doktor <ldoktor@redhat.com> |
23 | potential reader or a writer. This way, downgrade can look at the | 33 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
24 | first queued coroutines and wake it only if it is a reader, causing | 34 | Tested-by: Lukas Doktor <ldoktor@redhat.com> |
25 | all other readers in line to be released in turn. | 35 | Message-id: 20230704151527.193586-1-stefanha@redhat.com |
26 | |||
27 | Reported-by: David Edmondson <david.edmondson@oracle.com> | ||
28 | Reviewed-by: David Edmondson <david.edmondson@oracle.com> | ||
29 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
30 | Message-id: 20210325112941.365238-5-pbonzini@redhat.com | ||
31 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 36 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
32 | --- | 37 | --- |
33 | include/qemu/coroutine.h | 17 ++-- | 38 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
34 | util/qemu-coroutine-lock.c | 164 +++++++++++++++++++++++-------------- | 39 | 1 file changed, 38 insertions(+), 29 deletions(-) |
35 | 2 files changed, 114 insertions(+), 67 deletions(-) | ||
36 | 40 | ||
37 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h | 41 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
38 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/include/qemu/coroutine.h | 43 | --- a/hw/block/dataplane/virtio-blk.c |
40 | +++ b/include/qemu/coroutine.h | 44 | +++ b/hw/block/dataplane/virtio-blk.c |
41 | @@ -XXX,XX +XXX,XX @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); | 45 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
42 | bool qemu_co_queue_empty(CoQueue *queue); | 46 | |
43 | 47 | memory_region_transaction_commit(); | |
44 | 48 | ||
45 | +typedef struct CoRwTicket CoRwTicket; | 49 | - /* |
46 | typedef struct CoRwlock { | 50 | - * These fields are visible to the IOThread so we rely on implicit barriers |
47 | - int pending_writer; | 51 | - * in aio_context_acquire() on the write side and aio_notify_accept() on |
48 | - int reader; | 52 | - * the read side. |
49 | CoMutex mutex; | 53 | - */ |
50 | - CoQueue queue; | 54 | - s->starting = false; |
55 | - vblk->dataplane_started = true; | ||
56 | trace_virtio_blk_data_plane_start(s); | ||
57 | |||
58 | old_context = blk_get_aio_context(s->conf->conf.blk); | ||
59 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
60 | event_notifier_set(virtio_queue_get_host_notifier(vq)); | ||
61 | } | ||
62 | |||
63 | + /* | ||
64 | + * These fields must be visible to the IOThread when it processes the | ||
65 | + * virtqueue, otherwise it will think dataplane has not started yet. | ||
66 | + * | ||
67 | + * Make sure ->dataplane_started is false when blk_set_aio_context() is | ||
68 | + * called above so that draining does not cause the host notifier to be | ||
69 | + * detached/attached prematurely. | ||
70 | + */ | ||
71 | + s->starting = false; | ||
72 | + vblk->dataplane_started = true; | ||
73 | + smp_wmb(); /* paired with aio_notify_accept() on the read side */ | ||
51 | + | 74 | + |
52 | + /* Number of readers, or -1 if owned for writing. */ | 75 | /* Get this show started by hooking up our callbacks */ |
53 | + int owners; | 76 | if (!blk_in_drain(s->conf->conf.blk)) { |
77 | aio_context_acquire(s->ctx); | ||
78 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
79 | fail_guest_notifiers: | ||
80 | vblk->dataplane_disabled = true; | ||
81 | s->starting = false; | ||
82 | - vblk->dataplane_started = true; | ||
83 | return -ENOSYS; | ||
84 | } | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | ||
87 | aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | ||
88 | } | ||
89 | |||
90 | + /* | ||
91 | + * Batch all the host notifiers in a single transaction to avoid | ||
92 | + * quadratic time complexity in address_space_update_ioeventfds(). | ||
93 | + */ | ||
94 | + memory_region_transaction_begin(); | ||
54 | + | 95 | + |
55 | + /* Waiting coroutines. */ | 96 | + for (i = 0; i < nvqs; i++) { |
56 | + QSIMPLEQ_HEAD(, CoRwTicket) tickets; | 97 | + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); |
57 | } CoRwlock; | 98 | + } |
58 | |||
59 | /** | ||
60 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock); | ||
61 | /** | ||
62 | * Write Locks the CoRwlock from a reader. This is a bit more efficient than | ||
63 | * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock. | ||
64 | - * However, if the lock cannot be upgraded immediately, control is transferred | ||
65 | - * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade | ||
66 | - * only overrides CoRwlock fairness if there are no concurrent readers, so | ||
67 | - * another writer might run while @qemu_co_rwlock_upgrade blocks. | ||
68 | + * Note that if the lock cannot be upgraded immediately, control is transferred | ||
69 | + * to the caller of the current coroutine; another writer might run while | ||
70 | + * @qemu_co_rwlock_upgrade blocks. | ||
71 | */ | ||
72 | void qemu_co_rwlock_upgrade(CoRwlock *lock); | ||
73 | |||
74 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/util/qemu-coroutine-lock.c | ||
77 | +++ b/util/qemu-coroutine-lock.c | ||
78 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) | ||
79 | trace_qemu_co_mutex_unlock_return(mutex, self); | ||
80 | } | ||
81 | |||
82 | +struct CoRwTicket { | ||
83 | + bool read; | ||
84 | + Coroutine *co; | ||
85 | + QSIMPLEQ_ENTRY(CoRwTicket) next; | ||
86 | +}; | ||
87 | + | ||
88 | void qemu_co_rwlock_init(CoRwlock *lock) | ||
89 | { | ||
90 | - memset(lock, 0, sizeof(*lock)); | ||
91 | - qemu_co_queue_init(&lock->queue); | ||
92 | qemu_co_mutex_init(&lock->mutex); | ||
93 | + lock->owners = 0; | ||
94 | + QSIMPLEQ_INIT(&lock->tickets); | ||
95 | +} | ||
96 | + | ||
97 | +/* Releases the internal CoMutex. */ | ||
98 | +static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) | ||
99 | +{ | ||
100 | + CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets); | ||
101 | + Coroutine *co = NULL; | ||
102 | + | 99 | + |
103 | + /* | 100 | + /* |
104 | + * Setting lock->owners here prevents rdlock and wrlock from | 101 | + * The transaction expects the ioeventfds to be open when it |
105 | + * sneaking in between unlock and wake. | 102 | + * commits. Do it now, before the cleanup loop. |
106 | + */ | 103 | + */ |
104 | + memory_region_transaction_commit(); | ||
107 | + | 105 | + |
108 | + if (tkt) { | 106 | + for (i = 0; i < nvqs; i++) { |
109 | + if (tkt->read) { | 107 | + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); |
110 | + if (lock->owners >= 0) { | ||
111 | + lock->owners++; | ||
112 | + co = tkt->co; | ||
113 | + } | ||
114 | + } else { | ||
115 | + if (lock->owners == 0) { | ||
116 | + lock->owners = -1; | ||
117 | + co = tkt->co; | ||
118 | + } | ||
119 | + } | ||
120 | + } | 108 | + } |
121 | + | 109 | + |
122 | + if (co) { | 110 | + /* |
123 | + QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next); | 111 | + * Set ->dataplane_started to false before draining so that host notifiers |
124 | + qemu_co_mutex_unlock(&lock->mutex); | 112 | + * are not detached/attached anymore. |
125 | + aio_co_wake(co); | 113 | + */ |
126 | + } else { | 114 | + vblk->dataplane_started = false; |
127 | + qemu_co_mutex_unlock(&lock->mutex); | 115 | + |
128 | + } | 116 | aio_context_acquire(s->ctx); |
129 | } | 117 | |
130 | 118 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | |
131 | void qemu_co_rwlock_rdlock(CoRwlock *lock) | 119 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
132 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock) | 120 | |
133 | 121 | aio_context_release(s->ctx); | |
134 | qemu_co_mutex_lock(&lock->mutex); | 122 | |
135 | /* For fairness, wait if a writer is in line. */ | 123 | - /* |
136 | - while (lock->pending_writer) { | 124 | - * Batch all the host notifiers in a single transaction to avoid |
137 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | 125 | - * quadratic time complexity in address_space_update_ioeventfds(). |
126 | - */ | ||
127 | - memory_region_transaction_begin(); | ||
128 | - | ||
129 | - for (i = 0; i < nvqs; i++) { | ||
130 | - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
138 | - } | 131 | - } |
139 | - lock->reader++; | ||
140 | - qemu_co_mutex_unlock(&lock->mutex); | ||
141 | - | 132 | - |
142 | - /* The rest of the read-side critical section is run without the mutex. */ | 133 | - /* |
143 | - self->locks_held++; | 134 | - * The transaction expects the ioeventfds to be open when it |
144 | -} | 135 | - * commits. Do it now, before the cleanup loop. |
136 | - */ | ||
137 | - memory_region_transaction_commit(); | ||
145 | - | 138 | - |
146 | -void qemu_co_rwlock_unlock(CoRwlock *lock) | 139 | - for (i = 0; i < nvqs; i++) { |
147 | -{ | 140 | - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); |
148 | - Coroutine *self = qemu_coroutine_self(); | 141 | - } |
149 | - | 142 | - |
150 | - assert(qemu_in_coroutine()); | 143 | qemu_bh_cancel(s->bh); |
151 | - if (!lock->reader) { | 144 | notify_guest_bh(s); /* final chance to notify guest */ |
152 | - /* The critical section started in qemu_co_rwlock_wrlock. */ | 145 | |
153 | - qemu_co_queue_restart_all(&lock->queue); | 146 | /* Clean up guest notifier (irq) */ |
154 | + if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) { | 147 | k->set_guest_notifiers(qbus->parent, nvqs, false); |
155 | + lock->owners++; | 148 | |
156 | + qemu_co_mutex_unlock(&lock->mutex); | 149 | - vblk->dataplane_started = false; |
157 | } else { | 150 | s->stopping = false; |
158 | - self->locks_held--; | ||
159 | + CoRwTicket my_ticket = { true, self }; | ||
160 | |||
161 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
162 | + qemu_co_mutex_unlock(&lock->mutex); | ||
163 | + qemu_coroutine_yield(); | ||
164 | + assert(lock->owners >= 1); | ||
165 | + | ||
166 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
167 | qemu_co_mutex_lock(&lock->mutex); | ||
168 | - lock->reader--; | ||
169 | - assert(lock->reader >= 0); | ||
170 | - /* Wakeup only one waiting writer */ | ||
171 | - if (!lock->reader) { | ||
172 | - qemu_co_queue_next(&lock->queue); | ||
173 | - } | ||
174 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
175 | } | ||
176 | - qemu_co_mutex_unlock(&lock->mutex); | ||
177 | + | ||
178 | + self->locks_held++; | ||
179 | +} | ||
180 | + | ||
181 | +void qemu_co_rwlock_unlock(CoRwlock *lock) | ||
182 | +{ | ||
183 | + Coroutine *self = qemu_coroutine_self(); | ||
184 | + | ||
185 | + assert(qemu_in_coroutine()); | ||
186 | + self->locks_held--; | ||
187 | + | ||
188 | + qemu_co_mutex_lock(&lock->mutex); | ||
189 | + if (lock->owners > 0) { | ||
190 | + lock->owners--; | ||
191 | + } else { | ||
192 | + assert(lock->owners == -1); | ||
193 | + lock->owners = 0; | ||
194 | + } | ||
195 | + | ||
196 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
197 | } | ||
198 | |||
199 | void qemu_co_rwlock_downgrade(CoRwlock *lock) | ||
200 | { | ||
201 | - Coroutine *self = qemu_coroutine_self(); | ||
202 | + qemu_co_mutex_lock(&lock->mutex); | ||
203 | + assert(lock->owners == -1); | ||
204 | + lock->owners = 1; | ||
205 | |||
206 | - /* lock->mutex critical section started in qemu_co_rwlock_wrlock or | ||
207 | - * qemu_co_rwlock_upgrade. | ||
208 | - */ | ||
209 | - assert(lock->reader == 0); | ||
210 | - lock->reader++; | ||
211 | - qemu_co_mutex_unlock(&lock->mutex); | ||
212 | - | ||
213 | - /* The rest of the read-side critical section is run without the mutex. */ | ||
214 | - self->locks_held++; | ||
215 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
216 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
217 | } | ||
218 | |||
219 | void qemu_co_rwlock_wrlock(CoRwlock *lock) | ||
220 | { | ||
221 | + Coroutine *self = qemu_coroutine_self(); | ||
222 | + | ||
223 | qemu_co_mutex_lock(&lock->mutex); | ||
224 | - lock->pending_writer++; | ||
225 | - while (lock->reader) { | ||
226 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
227 | + if (lock->owners == 0) { | ||
228 | + lock->owners = -1; | ||
229 | + qemu_co_mutex_unlock(&lock->mutex); | ||
230 | + } else { | ||
231 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | ||
232 | + | ||
233 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
234 | + qemu_co_mutex_unlock(&lock->mutex); | ||
235 | + qemu_coroutine_yield(); | ||
236 | + assert(lock->owners == -1); | ||
237 | } | ||
238 | - lock->pending_writer--; | ||
239 | |||
240 | - /* The rest of the write-side critical section is run with | ||
241 | - * the mutex taken, so that lock->reader remains zero. | ||
242 | - * There is no need to update self->locks_held. | ||
243 | - */ | ||
244 | + self->locks_held++; | ||
245 | } | ||
246 | |||
247 | void qemu_co_rwlock_upgrade(CoRwlock *lock) | ||
248 | { | ||
249 | - Coroutine *self = qemu_coroutine_self(); | ||
250 | - | ||
251 | qemu_co_mutex_lock(&lock->mutex); | ||
252 | - assert(lock->reader > 0); | ||
253 | - lock->reader--; | ||
254 | - lock->pending_writer++; | ||
255 | - while (lock->reader) { | ||
256 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
257 | + assert(lock->owners > 0); | ||
258 | + /* For fairness, wait if a writer is in line. */ | ||
259 | + if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) { | ||
260 | + lock->owners = -1; | ||
261 | + qemu_co_mutex_unlock(&lock->mutex); | ||
262 | + } else { | ||
263 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | ||
264 | + | ||
265 | + lock->owners--; | ||
266 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
267 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
268 | + qemu_coroutine_yield(); | ||
269 | + assert(lock->owners == -1); | ||
270 | } | ||
271 | - lock->pending_writer--; | ||
272 | - | ||
273 | - /* The rest of the write-side critical section is run with | ||
274 | - * the mutex taken, similar to qemu_co_rwlock_wrlock. Do | ||
275 | - * not account for the lock twice in self->locks_held. | ||
276 | - */ | ||
277 | - self->locks_held--; | ||
278 | } | 151 | } |
279 | -- | 152 | -- |
280 | 2.30.2 | 153 | 2.40.1 |
281 | 154 | ||
155 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | Test that rwlock upgrade is fair, and that readers go back to sleep if | ||
4 | a writer is in line. | ||
5 | |||
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
7 | Message-id: 20210325112941.365238-6-pbonzini@redhat.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | tests/unit/test-coroutine.c | 62 +++++++++++++++++++++++++++++++++++++ | ||
11 | 1 file changed, 62 insertions(+) | ||
12 | |||
13 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tests/unit/test-coroutine.c | ||
16 | +++ b/tests/unit/test-coroutine.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void test_co_mutex_lockable(void) | ||
18 | g_assert(QEMU_MAKE_LOCKABLE(null_pointer) == NULL); | ||
19 | } | ||
20 | |||
21 | +static CoRwlock rwlock; | ||
22 | + | ||
23 | +/* Test that readers are properly sent back to the queue when upgrading, | ||
24 | + * even if they are the sole readers. The test scenario is as follows: | ||
25 | + * | ||
26 | + * | ||
27 | + * | c1 | c2 | | ||
28 | + * |--------------+------------+ | ||
29 | + * | rdlock | | | ||
30 | + * | yield | | | ||
31 | + * | | wrlock | | ||
32 | + * | | <queued> | | ||
33 | + * | upgrade | | | ||
34 | + * | <queued> | <dequeued> | | ||
35 | + * | | unlock | | ||
36 | + * | <dequeued> | | | ||
37 | + * | unlock | | | ||
38 | + */ | ||
39 | + | ||
40 | +static void coroutine_fn rwlock_yield_upgrade(void *opaque) | ||
41 | +{ | ||
42 | + qemu_co_rwlock_rdlock(&rwlock); | ||
43 | + qemu_coroutine_yield(); | ||
44 | + | ||
45 | + qemu_co_rwlock_upgrade(&rwlock); | ||
46 | + qemu_co_rwlock_unlock(&rwlock); | ||
47 | + | ||
48 | + *(bool *)opaque = true; | ||
49 | +} | ||
50 | + | ||
51 | +static void coroutine_fn rwlock_wrlock_yield(void *opaque) | ||
52 | +{ | ||
53 | + qemu_co_rwlock_wrlock(&rwlock); | ||
54 | + qemu_coroutine_yield(); | ||
55 | + | ||
56 | + qemu_co_rwlock_unlock(&rwlock); | ||
57 | + *(bool *)opaque = true; | ||
58 | +} | ||
59 | + | ||
60 | +static void test_co_rwlock_upgrade(void) | ||
61 | +{ | ||
62 | + bool c1_done = false; | ||
63 | + bool c2_done = false; | ||
64 | + Coroutine *c1, *c2; | ||
65 | + | ||
66 | + qemu_co_rwlock_init(&rwlock); | ||
67 | + c1 = qemu_coroutine_create(rwlock_yield_upgrade, &c1_done); | ||
68 | + c2 = qemu_coroutine_create(rwlock_wrlock_yield, &c2_done); | ||
69 | + | ||
70 | + qemu_coroutine_enter(c1); | ||
71 | + qemu_coroutine_enter(c2); | ||
72 | + | ||
73 | + /* c1 now should go to sleep. */ | ||
74 | + qemu_coroutine_enter(c1); | ||
75 | + g_assert(!c1_done); | ||
76 | + | ||
77 | + qemu_coroutine_enter(c2); | ||
78 | + g_assert(c1_done); | ||
79 | + g_assert(c2_done); | ||
80 | +} | ||
81 | + | ||
82 | /* | ||
83 | * Check that creation, enter, and return work | ||
84 | */ | ||
85 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
86 | g_test_add_func("/basic/order", test_order); | ||
87 | g_test_add_func("/locking/co-mutex", test_co_mutex); | ||
88 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | ||
89 | + g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | ||
90 | if (g_test_perf()) { | ||
91 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | ||
92 | g_test_add_func("/perf/nesting", perf_nesting); | ||
93 | -- | ||
94 | 2.30.2 | ||
95 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: David Edmondson <david.edmondson@oracle.com> | ||
2 | 1 | ||
3 | Test that downgrading an rwlock does not result in a failure to | ||
4 | schedule coroutines queued on the rwlock. | ||
5 | |||
6 | The diagram associated with test_co_rwlock_downgrade() describes the | ||
7 | intended behaviour, but what was observed previously corresponds to: | ||
8 | |||
9 | | c1 | c2 | c3 | c4 | | ||
10 | |--------+------------+------------+----------| | ||
11 | | rdlock | | | | | ||
12 | | yield | | | | | ||
13 | | | wrlock | | | | ||
14 | | | <queued> | | | | ||
15 | | | | rdlock | | | ||
16 | | | | <queued> | | | ||
17 | | | | | wrlock | | ||
18 | | | | | <queued> | | ||
19 | | unlock | | | | | ||
20 | | yield | | | | | ||
21 | | | <dequeued> | | | | ||
22 | | | downgrade | | | | ||
23 | | | ... | | | | ||
24 | | | unlock | | | | ||
25 | | | | <dequeued> | | | ||
26 | | | | <queued> | | | ||
27 | |||
28 | This results in a failure... | ||
29 | |||
30 | ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) | ||
31 | Bail out! ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) | ||
32 | |||
33 | ...as a result of the c3 coroutine failing to run to completion. | ||
34 | |||
35 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
36 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
37 | Message-id: 20210325112941.365238-7-pbonzini@redhat.com | ||
38 | Message-Id: <20210309144015.557477-5-david.edmondson@oracle.com> | ||
39 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
40 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
41 | --- | ||
42 | tests/unit/test-coroutine.c | 99 +++++++++++++++++++++++++++++++++++++ | ||
43 | 1 file changed, 99 insertions(+) | ||
44 | |||
45 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tests/unit/test-coroutine.c | ||
48 | +++ b/tests/unit/test-coroutine.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void test_co_rwlock_upgrade(void) | ||
50 | g_assert(c2_done); | ||
51 | } | ||
52 | |||
53 | +static void coroutine_fn rwlock_rdlock_yield(void *opaque) | ||
54 | +{ | ||
55 | + qemu_co_rwlock_rdlock(&rwlock); | ||
56 | + qemu_coroutine_yield(); | ||
57 | + | ||
58 | + qemu_co_rwlock_unlock(&rwlock); | ||
59 | + qemu_coroutine_yield(); | ||
60 | + | ||
61 | + *(bool *)opaque = true; | ||
62 | +} | ||
63 | + | ||
64 | +static void coroutine_fn rwlock_wrlock_downgrade(void *opaque) | ||
65 | +{ | ||
66 | + qemu_co_rwlock_wrlock(&rwlock); | ||
67 | + | ||
68 | + qemu_co_rwlock_downgrade(&rwlock); | ||
69 | + qemu_co_rwlock_unlock(&rwlock); | ||
70 | + *(bool *)opaque = true; | ||
71 | +} | ||
72 | + | ||
73 | +static void coroutine_fn rwlock_rdlock(void *opaque) | ||
74 | +{ | ||
75 | + qemu_co_rwlock_rdlock(&rwlock); | ||
76 | + | ||
77 | + qemu_co_rwlock_unlock(&rwlock); | ||
78 | + *(bool *)opaque = true; | ||
79 | +} | ||
80 | + | ||
81 | +static void coroutine_fn rwlock_wrlock(void *opaque) | ||
82 | +{ | ||
83 | + qemu_co_rwlock_wrlock(&rwlock); | ||
84 | + | ||
85 | + qemu_co_rwlock_unlock(&rwlock); | ||
86 | + *(bool *)opaque = true; | ||
87 | +} | ||
88 | + | ||
89 | +/* | ||
90 | + * Check that downgrading a reader-writer lock does not cause a hang. | ||
91 | + * | ||
92 | + * Four coroutines are used to produce a situation where there are | ||
93 | + * both reader and writer hopefuls waiting to acquire an rwlock that | ||
94 | + * is held by a reader. | ||
95 | + * | ||
96 | + * The correct sequence of operations we aim to provoke can be | ||
97 | + * represented as: | ||
98 | + * | ||
99 | + * | c1 | c2 | c3 | c4 | | ||
100 | + * |--------+------------+------------+------------| | ||
101 | + * | rdlock | | | | | ||
102 | + * | yield | | | | | ||
103 | + * | | wrlock | | | | ||
104 | + * | | <queued> | | | | ||
105 | + * | | | rdlock | | | ||
106 | + * | | | <queued> | | | ||
107 | + * | | | | wrlock | | ||
108 | + * | | | | <queued> | | ||
109 | + * | unlock | | | | | ||
110 | + * | yield | | | | | ||
111 | + * | | <dequeued> | | | | ||
112 | + * | | downgrade | | | | ||
113 | + * | | | <dequeued> | | | ||
114 | + * | | | unlock | | | ||
115 | + * | | ... | | | | ||
116 | + * | | unlock | | | | ||
117 | + * | | | | <dequeued> | | ||
118 | + * | | | | unlock | | ||
119 | + */ | ||
120 | +static void test_co_rwlock_downgrade(void) | ||
121 | +{ | ||
122 | + bool c1_done = false; | ||
123 | + bool c2_done = false; | ||
124 | + bool c3_done = false; | ||
125 | + bool c4_done = false; | ||
126 | + Coroutine *c1, *c2, *c3, *c4; | ||
127 | + | ||
128 | + qemu_co_rwlock_init(&rwlock); | ||
129 | + | ||
130 | + c1 = qemu_coroutine_create(rwlock_rdlock_yield, &c1_done); | ||
131 | + c2 = qemu_coroutine_create(rwlock_wrlock_downgrade, &c2_done); | ||
132 | + c3 = qemu_coroutine_create(rwlock_rdlock, &c3_done); | ||
133 | + c4 = qemu_coroutine_create(rwlock_wrlock, &c4_done); | ||
134 | + | ||
135 | + qemu_coroutine_enter(c1); | ||
136 | + qemu_coroutine_enter(c2); | ||
137 | + qemu_coroutine_enter(c3); | ||
138 | + qemu_coroutine_enter(c4); | ||
139 | + | ||
140 | + qemu_coroutine_enter(c1); | ||
141 | + | ||
142 | + g_assert(c2_done); | ||
143 | + g_assert(c3_done); | ||
144 | + g_assert(c4_done); | ||
145 | + | ||
146 | + qemu_coroutine_enter(c1); | ||
147 | + | ||
148 | + g_assert(c1_done); | ||
149 | +} | ||
150 | + | ||
151 | /* | ||
152 | * Check that creation, enter, and return work | ||
153 | */ | ||
154 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
155 | g_test_add_func("/locking/co-mutex", test_co_mutex); | ||
156 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | ||
157 | g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | ||
158 | + g_test_add_func("/locking/co-rwlock/downgrade", test_co_rwlock_downgrade); | ||
159 | if (g_test_perf()) { | ||
160 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | ||
161 | g_test_add_func("/perf/nesting", perf_nesting); | ||
162 | -- | ||
163 | 2.30.2 | ||
164 | diff view generated by jsdifflib |