1 | The following changes since commit 464588675455afda2899e20a0b120e4075de50c7: | 1 | The following changes since commit 6d40ce00c1166c317e298ad82ecf10e650c4f87d: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/sstabellini/tags/xen-20170627-tag' into staging (2017-06-29 11:45:01 +0100) | 3 | Update version for v6.0.0-rc1 release (2021-03-30 18:19:07 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to c324fd0a39cee43c13f6d1fb34f74fc5e2fb007b: | 9 | for you to fetch changes up to b6489ac06695e257ea0a9841364577e247fdee30: |
10 | 10 | ||
11 | virtio-pci: use ioeventfd even when KVM is disabled (2017-06-30 11:03:45 +0100) | 11 | test-coroutine: Add rwlock downgrade test (2021-03-31 10:44:21 +0100) |
12 | |||
13 | ---------------------------------------------------------------- | ||
14 | Pull request | ||
15 | |||
16 | A fix for VDI image files and more generally for CoRwlock. | ||
12 | 17 | ||
13 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
14 | 19 | ||
15 | ---------------------------------------------------------------- | 20 | David Edmondson (4): |
21 | block/vdi: When writing new bmap entry fails, don't leak the buffer | ||
22 | block/vdi: Don't assume that blocks are larger than VdiHeader | ||
23 | coroutine-lock: Store the coroutine in the CoWaitRecord only once | ||
24 | test-coroutine: Add rwlock downgrade test | ||
16 | 25 | ||
17 | Stefan Hajnoczi (7): | 26 | Paolo Bonzini (2): |
18 | virtio-blk: trace vdev so devices can be distinguished | 27 | coroutine-lock: Reimplement CoRwlock to fix downgrade bug |
19 | libqos: fix typo in virtio.h QVirtQueue->used comment | 28 | test-coroutine: Add rwlock upgrade test |
20 | libqos: add virtio used ring support | ||
21 | tests: fix virtio-scsi-test ISR dependence | ||
22 | tests: fix virtio-blk-test ISR dependence | ||
23 | tests: fix virtio-net-test ISR dependence | ||
24 | virtio-pci: use ioeventfd even when KVM is disabled | ||
25 | 29 | ||
26 | tests/libqos/virtio.h | 8 ++++++- | 30 | include/qemu/coroutine.h | 17 ++-- |
27 | hw/block/virtio-blk.c | 12 ++++++---- | 31 | block/vdi.c | 11 ++- |
28 | hw/virtio/virtio-pci.c | 2 +- | 32 | tests/unit/test-coroutine.c | 161 +++++++++++++++++++++++++++++++++++ |
29 | tests/libqos/virtio.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ | 33 | util/qemu-coroutine-lock.c | 165 +++++++++++++++++++++++------------- |
30 | tests/virtio-blk-test.c | 27 ++++++++++++++-------- | 34 | 4 files changed, 282 insertions(+), 72 deletions(-) |
31 | tests/virtio-net-test.c | 6 ++--- | ||
32 | tests/virtio-scsi-test.c | 2 +- | ||
33 | hw/block/trace-events | 10 ++++---- | ||
34 | 8 files changed, 101 insertions(+), 26 deletions(-) | ||
35 | 35 | ||
36 | -- | 36 | -- |
37 | 2.9.4 | 37 | 2.30.2 |
38 | 38 | ||
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | It is hard to analyze trace logs with multiple virtio-blk devices | ||
2 | because none of the trace events include the VirtIODevice *vdev. | ||
3 | 1 | ||
4 | This patch adds vdev so it's clear which device a request is associated | ||
5 | with. | ||
6 | |||
7 | I considered using VirtIOBlock *s instead but VirtIODevice *vdev is more | ||
8 | general and may be correlated with generic virtio trace events like | ||
9 | virtio_set_status. | ||
10 | |||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
13 | Message-id: 20170614092930.11234-1-stefanha@redhat.com | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | --- | ||
16 | hw/block/virtio-blk.c | 12 +++++++----- | ||
17 | hw/block/trace-events | 10 +++++----- | ||
18 | 2 files changed, 12 insertions(+), 10 deletions(-) | ||
19 | |||
20 | diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/hw/block/virtio-blk.c | ||
23 | +++ b/hw/block/virtio-blk.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) | ||
25 | VirtIOBlock *s = req->dev; | ||
26 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
27 | |||
28 | - trace_virtio_blk_req_complete(req, status); | ||
29 | + trace_virtio_blk_req_complete(vdev, req, status); | ||
30 | |||
31 | stb_p(&req->in->status, status); | ||
32 | virtqueue_push(req->vq, &req->elem, req->in_len); | ||
33 | @@ -XXX,XX +XXX,XX @@ static void virtio_blk_rw_complete(void *opaque, int ret) | ||
34 | { | ||
35 | VirtIOBlockReq *next = opaque; | ||
36 | VirtIOBlock *s = next->dev; | ||
37 | + VirtIODevice *vdev = VIRTIO_DEVICE(s); | ||
38 | |||
39 | aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); | ||
40 | while (next) { | ||
41 | VirtIOBlockReq *req = next; | ||
42 | next = req->mr_next; | ||
43 | - trace_virtio_blk_rw_complete(req, ret); | ||
44 | + trace_virtio_blk_rw_complete(vdev, req, ret); | ||
45 | |||
46 | if (req->qiov.nalloc != -1) { | ||
47 | /* If nalloc is != 1 req->qiov is a local copy of the original | ||
48 | @@ -XXX,XX +XXX,XX @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, | ||
49 | mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; | ||
50 | } | ||
51 | |||
52 | - trace_virtio_blk_submit_multireq(mrb, start, num_reqs, | ||
53 | + trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev), | ||
54 | + mrb, start, num_reqs, | ||
55 | sector_num << BDRV_SECTOR_BITS, | ||
56 | qiov->size, is_write); | ||
57 | block_acct_merge_done(blk_get_stats(blk), | ||
58 | @@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) | ||
59 | |||
60 | if (is_write) { | ||
61 | qemu_iovec_init_external(&req->qiov, iov, out_num); | ||
62 | - trace_virtio_blk_handle_write(req, req->sector_num, | ||
63 | + trace_virtio_blk_handle_write(vdev, req, req->sector_num, | ||
64 | req->qiov.size / BDRV_SECTOR_SIZE); | ||
65 | } else { | ||
66 | qemu_iovec_init_external(&req->qiov, in_iov, in_num); | ||
67 | - trace_virtio_blk_handle_read(req, req->sector_num, | ||
68 | + trace_virtio_blk_handle_read(vdev, req, req->sector_num, | ||
69 | req->qiov.size / BDRV_SECTOR_SIZE); | ||
70 | } | ||
71 | |||
72 | diff --git a/hw/block/trace-events b/hw/block/trace-events | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/hw/block/trace-events | ||
75 | +++ b/hw/block/trace-events | ||
76 | @@ -XXX,XX +XXX,XX @@ | ||
77 | # See docs/tracing.txt for syntax documentation. | ||
78 | |||
79 | # hw/block/virtio-blk.c | ||
80 | -virtio_blk_req_complete(void *req, int status) "req %p status %d" | ||
81 | -virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" | ||
82 | -virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" | ||
83 | -virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" | ||
84 | -virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d" | ||
85 | +virtio_blk_req_complete(void *vdev, void *req, int status) "vdev %p req %p status %d" | ||
86 | +virtio_blk_rw_complete(void *vdev, void *req, int ret) "vdev %p req %p ret %d" | ||
87 | +virtio_blk_handle_write(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu" | ||
88 | +virtio_blk_handle_read(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu" | ||
89 | +virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "vdev %p mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d" | ||
90 | |||
91 | # hw/block/hd-geometry.c | ||
92 | hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" | ||
93 | -- | ||
94 | 2.9.4 | ||
95 | |||
96 | diff view generated by jsdifflib |
1 | Old kvm.ko versions only supported a tiny number of ioeventfds so | 1 | From: David Edmondson <david.edmondson@oracle.com> |
---|---|---|---|
2 | virtio-pci avoids ioeventfds when kvm_has_many_ioeventfds() returns 0. | ||
3 | 2 | ||
4 | Do not check kvm_has_many_ioeventfds() when KVM is disabled since it | 3 | If a new bitmap entry is allocated, requiring the entire block to be |
5 | always returns 0. Since commit 8c56c1a592b5092d91da8d8943c17777d6462a6f | 4 | written, avoiding leaking the buffer allocated for the block should |
6 | ("memory: emulate ioeventfd") it has been possible to use ioeventfds in | 5 | the write fail. |
7 | qtest or TCG mode. | ||
8 | 6 | ||
9 | This patch makes -device virtio-blk-pci,iothread=iothread0 work even | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
10 | when KVM is disabled. | 8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> |
11 | 9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | |
12 | I have tested that virtio-blk-pci works under TCG both with and without | 10 | Acked-by: Max Reitz <mreitz@redhat.com> |
13 | iothread. | 11 | Message-id: 20210325112941.365238-2-pbonzini@redhat.com |
14 | 12 | Message-Id: <20210309144015.557477-2-david.edmondson@oracle.com> | |
15 | This patch fixes qemu-iotests 068, which was accidentally merged early | 13 | Acked-by: Max Reitz <mreitz@redhat.com> |
16 | despite the dependency on ioeventfd. | 14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
17 | |||
18 | Cc: Michael S. Tsirkin <mst@redhat.com> | ||
19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
21 | Reviewed-by: Fam Zheng <famz@redhat.com> | ||
22 | Tested-by: Eric Blake <eblake@redhat.com> | ||
23 | Tested-by: Kevin Wolf <kwolf@redhat.com> | ||
24 | Message-id: 20170628184724.21378-7-stefanha@redhat.com | ||
25 | Message-id: 20170615163813.7255-2-stefanha@redhat.com | ||
26 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
27 | --- | 16 | --- |
28 | hw/virtio/virtio-pci.c | 2 +- | 17 | block/vdi.c | 1 + |
29 | 1 file changed, 1 insertion(+), 1 deletion(-) | 18 | 1 file changed, 1 insertion(+) |
30 | 19 | ||
31 | diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c | 20 | diff --git a/block/vdi.c b/block/vdi.c |
32 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/hw/virtio/virtio-pci.c | 22 | --- a/block/vdi.c |
34 | +++ b/hw/virtio/virtio-pci.c | 23 | +++ b/block/vdi.c |
35 | @@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) | 24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: |
36 | bool pcie_port = pci_bus_is_express(pci_dev->bus) && | 25 | |
37 | !pci_bus_is_root(pci_dev->bus); | 26 | logout("finished data write\n"); |
38 | 27 | if (ret < 0) { | |
39 | - if (!kvm_has_many_ioeventfds()) { | 28 | + g_free(block); |
40 | + if (kvm_enabled() && !kvm_has_many_ioeventfds()) { | 29 | return ret; |
41 | proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; | ||
42 | } | 30 | } |
43 | 31 | ||
44 | -- | 32 | -- |
45 | 2.9.4 | 33 | 2.30.2 |
46 | 34 | ||
47 | diff view generated by jsdifflib |
1 | Use the new used ring APIs instead of assuming ISR being set means the | 1 | From: David Edmondson <david.edmondson@oracle.com> |
---|---|---|---|
2 | request has completed. | ||
3 | 2 | ||
4 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 3 | Given that the block size is read from the header of the VDI file, a |
5 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 4 | wide variety of sizes might be seen. Rather than re-using a block |
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | 5 | sized memory region when writing the VDI header, allocate an |
7 | Tested-by: Eric Blake <eblake@redhat.com> | 6 | appropriately sized buffer. |
8 | Tested-by: Kevin Wolf <kwolf@redhat.com> | 7 | |
9 | Message-id: 20170628184724.21378-6-stefanha@redhat.com | 8 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> |
9 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
11 | Message-id: 20210325112941.365238-3-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-3-david.edmondson@oracle.com> | ||
13 | Acked-by: Max Reitz <mreitz@redhat.com> | ||
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | --- | 16 | --- |
12 | tests/virtio-net-test.c | 6 +++--- | 17 | block/vdi.c | 10 ++++++---- |
13 | 1 file changed, 3 insertions(+), 3 deletions(-) | 18 | 1 file changed, 6 insertions(+), 4 deletions(-) |
14 | 19 | ||
15 | diff --git a/tests/virtio-net-test.c b/tests/virtio-net-test.c | 20 | diff --git a/block/vdi.c b/block/vdi.c |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/virtio-net-test.c | 22 | --- a/block/vdi.c |
18 | +++ b/tests/virtio-net-test.c | 23 | +++ b/block/vdi.c |
19 | @@ -XXX,XX +XXX,XX @@ static void rx_test(QVirtioDevice *dev, | 24 | @@ -XXX,XX +XXX,XX @@ nonallocating_write: |
20 | ret = iov_send(socket, iov, 2, 0, sizeof(len) + sizeof(test)); | 25 | |
21 | g_assert_cmpint(ret, ==, sizeof(test) + sizeof(len)); | 26 | if (block) { |
22 | 27 | /* One or more new blocks were allocated. */ | |
23 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); | 28 | - VdiHeader *header = (VdiHeader *) block; |
24 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); | 29 | + VdiHeader *header; |
25 | memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test)); | 30 | uint8_t *base; |
26 | g_assert_cmpstr(buffer, ==, "TEST"); | 31 | uint64_t offset; |
27 | 32 | uint32_t n_sectors; | |
28 | @@ -XXX,XX +XXX,XX @@ static void tx_test(QVirtioDevice *dev, | 33 | |
29 | free_head = qvirtqueue_add(vq, req_addr, 64, false, false); | 34 | + g_free(block); |
30 | qvirtqueue_kick(dev, vq, free_head); | 35 | + header = g_malloc(sizeof(*header)); |
31 | 36 | + | |
32 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); | 37 | logout("now writing modified header\n"); |
33 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); | 38 | assert(VDI_IS_ALLOCATED(bmap_first)); |
34 | guest_free(alloc, req_addr); | 39 | *header = s->header; |
35 | 40 | vdi_header_to_le(header); | |
36 | ret = qemu_recv(socket, &len, sizeof(len), 0); | 41 | - ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); |
37 | @@ -XXX,XX +XXX,XX @@ static void rx_stop_cont_test(QVirtioDevice *dev, | 42 | - g_free(block); |
38 | rsp = qmp("{ 'execute' : 'cont'}"); | 43 | - block = NULL; |
39 | QDECREF(rsp); | 44 | + ret = bdrv_pwrite(bs->file, 0, header, sizeof(*header)); |
40 | 45 | + g_free(header); | |
41 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US); | 46 | |
42 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_NET_TIMEOUT_US); | 47 | if (ret < 0) { |
43 | memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test)); | 48 | return ret; |
44 | g_assert_cmpstr(buffer, ==, "TEST"); | ||
45 | |||
46 | -- | 49 | -- |
47 | 2.9.4 | 50 | 2.30.2 |
48 | 51 | ||
49 | diff view generated by jsdifflib |
1 | Use the new used ring APIs instead of assuming ISR being set means the | 1 | From: David Edmondson <david.edmondson@oracle.com> |
---|---|---|---|
2 | request has completed. | ||
3 | 2 | ||
4 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 3 | When taking the slow path for mutex acquisition, set the coroutine |
5 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 4 | value in the CoWaitRecord in push_waiter(), rather than both there and |
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | 5 | in the caller. |
7 | Tested-by: Eric Blake <eblake@redhat.com> | 6 | |
8 | Tested-by: Kevin Wolf <kwolf@redhat.com> | 7 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> |
9 | Message-id: 20170628184724.21378-5-stefanha@redhat.com | 8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
9 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
11 | Message-id: 20210325112941.365238-4-pbonzini@redhat.com | ||
12 | Message-Id: <20210309144015.557477-4-david.edmondson@oracle.com> | ||
13 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | --- | 15 | --- |
12 | tests/virtio-blk-test.c | 27 +++++++++++++++++---------- | 16 | util/qemu-coroutine-lock.c | 1 - |
13 | 1 file changed, 17 insertions(+), 10 deletions(-) | 17 | 1 file changed, 1 deletion(-) |
14 | 18 | ||
15 | diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c | 19 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/virtio-blk-test.c | 21 | --- a/util/qemu-coroutine-lock.c |
18 | +++ b/tests/virtio-blk-test.c | 22 | +++ b/util/qemu-coroutine-lock.c |
19 | @@ -XXX,XX +XXX,XX @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, | 23 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, |
20 | 24 | unsigned old_handoff; | |
21 | qvirtqueue_kick(dev, vq, free_head); | 25 | |
22 | 26 | trace_qemu_co_mutex_lock_entry(mutex, self); | |
23 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); | 27 | - w.co = self; |
24 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); | 28 | push_waiter(mutex, &w); |
25 | status = readb(req_addr + 528); | 29 | |
26 | g_assert_cmpint(status, ==, 0); | 30 | /* This is the "Responsibility Hand-Off" protocol; a lock() picks from |
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, | ||
29 | |||
30 | qvirtqueue_kick(dev, vq, free_head); | ||
31 | |||
32 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); | ||
33 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); | ||
34 | status = readb(req_addr + 528); | ||
35 | g_assert_cmpint(status, ==, 0); | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, | ||
38 | qvirtqueue_add(vq, req_addr + 528, 1, true, false); | ||
39 | qvirtqueue_kick(dev, vq, free_head); | ||
40 | |||
41 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); | ||
42 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); | ||
43 | status = readb(req_addr + 528); | ||
44 | g_assert_cmpint(status, ==, 0); | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, | ||
47 | |||
48 | qvirtqueue_kick(dev, vq, free_head); | ||
49 | |||
50 | - qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US); | ||
51 | + qvirtio_wait_used_elem(dev, vq, free_head, QVIRTIO_BLK_TIMEOUT_US); | ||
52 | status = readb(req_addr + 528); | ||
53 | g_assert_cmpint(status, ==, 0); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static void pci_indirect(void) | ||
56 | free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect); | ||
57 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
58 | |||
59 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, | ||
60 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, | ||
61 | QVIRTIO_BLK_TIMEOUT_US); | ||
62 | status = readb(req_addr + 528); | ||
63 | g_assert_cmpint(status, ==, 0); | ||
64 | @@ -XXX,XX +XXX,XX @@ static void pci_indirect(void) | ||
65 | free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect); | ||
66 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
67 | |||
68 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, | ||
69 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, | ||
70 | QVIRTIO_BLK_TIMEOUT_US); | ||
71 | status = readb(req_addr + 528); | ||
72 | g_assert_cmpint(status, ==, 0); | ||
73 | @@ -XXX,XX +XXX,XX @@ static void pci_msix(void) | ||
74 | qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); | ||
75 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
76 | |||
77 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, | ||
78 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, | ||
79 | QVIRTIO_BLK_TIMEOUT_US); | ||
80 | |||
81 | status = readb(req_addr + 528); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void pci_msix(void) | ||
83 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
84 | |||
85 | |||
86 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, | ||
87 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, | ||
88 | QVIRTIO_BLK_TIMEOUT_US); | ||
89 | |||
90 | status = readb(req_addr + 528); | ||
91 | @@ -XXX,XX +XXX,XX @@ static void pci_idx(void) | ||
92 | uint64_t capacity; | ||
93 | uint32_t features; | ||
94 | uint32_t free_head; | ||
95 | + uint32_t write_head; | ||
96 | + uint32_t desc_idx; | ||
97 | uint8_t status; | ||
98 | char *data; | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ static void pci_idx(void) | ||
101 | qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); | ||
102 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
103 | |||
104 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, QVIRTIO_BLK_TIMEOUT_US); | ||
105 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, free_head, | ||
106 | + QVIRTIO_BLK_TIMEOUT_US); | ||
107 | |||
108 | /* Write request */ | ||
109 | req.type = VIRTIO_BLK_T_OUT; | ||
110 | @@ -XXX,XX +XXX,XX @@ static void pci_idx(void) | ||
111 | qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); | ||
112 | qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); | ||
113 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
114 | + write_head = free_head; | ||
115 | |||
116 | /* No notification expected */ | ||
117 | status = qvirtio_wait_status_byte_no_isr(&dev->vdev, | ||
118 | @@ -XXX,XX +XXX,XX @@ static void pci_idx(void) | ||
119 | |||
120 | qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head); | ||
121 | |||
122 | - qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, | ||
123 | + /* We get just one notification for both requests */ | ||
124 | + qvirtio_wait_used_elem(&dev->vdev, &vqpci->vq, write_head, | ||
125 | QVIRTIO_BLK_TIMEOUT_US); | ||
126 | + g_assert(qvirtqueue_get_buf(&vqpci->vq, &desc_idx)); | ||
127 | + g_assert_cmpint(desc_idx, ==, free_head); | ||
128 | |||
129 | status = readb(req_addr + 528); | ||
130 | g_assert_cmpint(status, ==, 0); | ||
131 | -- | 31 | -- |
132 | 2.9.4 | 32 | 2.30.2 |
133 | 33 | ||
134 | diff view generated by jsdifflib |
1 | Use the new used ring APIs instead of assuming ISR being set means the | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | request has completed. | 2 | |
3 | 3 | An invariant of the current rwlock is that if multiple coroutines hold a | |
4 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 4 | reader lock, all must be runnable. The unlock implementation relies on |
5 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 5 | this, choosing to wake a single coroutine when the final read lock |
6 | Reviewed-by: Fam Zheng <famz@redhat.com> | 6 | holder exits the critical section, assuming that it will wake a |
7 | Tested-by: Eric Blake <eblake@redhat.com> | 7 | coroutine attempting to acquire a write lock. |
8 | Tested-by: Kevin Wolf <kwolf@redhat.com> | 8 | |
9 | Message-id: 20170628184724.21378-4-stefanha@redhat.com | 9 | The downgrade implementation violates this assumption by creating a |
10 | read lock owning coroutine that is exclusively runnable - any other | ||
11 | coroutines that are waiting to acquire a read lock are *not* made | ||
12 | runnable when the write lock holder converts its ownership to read | ||
13 | only. | ||
14 | |||
15 | More in general, the old implementation had lots of other fairness bugs. | ||
16 | The root cause of the bugs was that CoQueue would wake up readers even | ||
17 | if there were pending writers, and would wake up writers even if there | ||
18 | were readers. In that case, the coroutine would go back to sleep *at | ||
19 | the end* of the CoQueue, losing its place at the head of the line. | ||
20 | |||
21 | To fix this, keep the queue of waiters explicitly in the CoRwlock | ||
22 | instead of using CoQueue, and store for each whether it is a | ||
23 | potential reader or a writer. This way, downgrade can look at the | ||
24 | first queued coroutines and wake it only if it is a reader, causing | ||
25 | all other readers in line to be released in turn. | ||
26 | |||
27 | Reported-by: David Edmondson <david.edmondson@oracle.com> | ||
28 | Reviewed-by: David Edmondson <david.edmondson@oracle.com> | ||
29 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
30 | Message-id: 20210325112941.365238-5-pbonzini@redhat.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 31 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | --- | 32 | --- |
12 | tests/virtio-scsi-test.c | 2 +- | 33 | include/qemu/coroutine.h | 17 ++-- |
13 | 1 file changed, 1 insertion(+), 1 deletion(-) | 34 | util/qemu-coroutine-lock.c | 164 +++++++++++++++++++++++-------------- |
14 | 35 | 2 files changed, 114 insertions(+), 67 deletions(-) | |
15 | diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c | 36 | |
37 | diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tests/virtio-scsi-test.c | 39 | --- a/include/qemu/coroutine.h |
18 | +++ b/tests/virtio-scsi-test.c | 40 | +++ b/include/qemu/coroutine.h |
19 | @@ -XXX,XX +XXX,XX @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb, | 41 | @@ -XXX,XX +XXX,XX @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); |
42 | bool qemu_co_queue_empty(CoQueue *queue); | ||
43 | |||
44 | |||
45 | +typedef struct CoRwTicket CoRwTicket; | ||
46 | typedef struct CoRwlock { | ||
47 | - int pending_writer; | ||
48 | - int reader; | ||
49 | CoMutex mutex; | ||
50 | - CoQueue queue; | ||
51 | + | ||
52 | + /* Number of readers, or -1 if owned for writing. */ | ||
53 | + int owners; | ||
54 | + | ||
55 | + /* Waiting coroutines. */ | ||
56 | + QSIMPLEQ_HEAD(, CoRwTicket) tickets; | ||
57 | } CoRwlock; | ||
58 | |||
59 | /** | ||
60 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock); | ||
61 | /** | ||
62 | * Write Locks the CoRwlock from a reader. This is a bit more efficient than | ||
63 | * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock. | ||
64 | - * However, if the lock cannot be upgraded immediately, control is transferred | ||
65 | - * to the caller of the current coroutine. Also, @qemu_co_rwlock_upgrade | ||
66 | - * only overrides CoRwlock fairness if there are no concurrent readers, so | ||
67 | - * another writer might run while @qemu_co_rwlock_upgrade blocks. | ||
68 | + * Note that if the lock cannot be upgraded immediately, control is transferred | ||
69 | + * to the caller of the current coroutine; another writer might run while | ||
70 | + * @qemu_co_rwlock_upgrade blocks. | ||
71 | */ | ||
72 | void qemu_co_rwlock_upgrade(CoRwlock *lock); | ||
73 | |||
74 | diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/util/qemu-coroutine-lock.c | ||
77 | +++ b/util/qemu-coroutine-lock.c | ||
78 | @@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) | ||
79 | trace_qemu_co_mutex_unlock_return(mutex, self); | ||
80 | } | ||
81 | |||
82 | +struct CoRwTicket { | ||
83 | + bool read; | ||
84 | + Coroutine *co; | ||
85 | + QSIMPLEQ_ENTRY(CoRwTicket) next; | ||
86 | +}; | ||
87 | + | ||
88 | void qemu_co_rwlock_init(CoRwlock *lock) | ||
89 | { | ||
90 | - memset(lock, 0, sizeof(*lock)); | ||
91 | - qemu_co_queue_init(&lock->queue); | ||
92 | qemu_co_mutex_init(&lock->mutex); | ||
93 | + lock->owners = 0; | ||
94 | + QSIMPLEQ_INIT(&lock->tickets); | ||
95 | +} | ||
96 | + | ||
97 | +/* Releases the internal CoMutex. */ | ||
98 | +static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) | ||
99 | +{ | ||
100 | + CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets); | ||
101 | + Coroutine *co = NULL; | ||
102 | + | ||
103 | + /* | ||
104 | + * Setting lock->owners here prevents rdlock and wrlock from | ||
105 | + * sneaking in between unlock and wake. | ||
106 | + */ | ||
107 | + | ||
108 | + if (tkt) { | ||
109 | + if (tkt->read) { | ||
110 | + if (lock->owners >= 0) { | ||
111 | + lock->owners++; | ||
112 | + co = tkt->co; | ||
113 | + } | ||
114 | + } else { | ||
115 | + if (lock->owners == 0) { | ||
116 | + lock->owners = -1; | ||
117 | + co = tkt->co; | ||
118 | + } | ||
119 | + } | ||
120 | + } | ||
121 | + | ||
122 | + if (co) { | ||
123 | + QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next); | ||
124 | + qemu_co_mutex_unlock(&lock->mutex); | ||
125 | + aio_co_wake(co); | ||
126 | + } else { | ||
127 | + qemu_co_mutex_unlock(&lock->mutex); | ||
128 | + } | ||
129 | } | ||
130 | |||
131 | void qemu_co_rwlock_rdlock(CoRwlock *lock) | ||
132 | @@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_rdlock(CoRwlock *lock) | ||
133 | |||
134 | qemu_co_mutex_lock(&lock->mutex); | ||
135 | /* For fairness, wait if a writer is in line. */ | ||
136 | - while (lock->pending_writer) { | ||
137 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
138 | - } | ||
139 | - lock->reader++; | ||
140 | - qemu_co_mutex_unlock(&lock->mutex); | ||
141 | - | ||
142 | - /* The rest of the read-side critical section is run without the mutex. */ | ||
143 | - self->locks_held++; | ||
144 | -} | ||
145 | - | ||
146 | -void qemu_co_rwlock_unlock(CoRwlock *lock) | ||
147 | -{ | ||
148 | - Coroutine *self = qemu_coroutine_self(); | ||
149 | - | ||
150 | - assert(qemu_in_coroutine()); | ||
151 | - if (!lock->reader) { | ||
152 | - /* The critical section started in qemu_co_rwlock_wrlock. */ | ||
153 | - qemu_co_queue_restart_all(&lock->queue); | ||
154 | + if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) { | ||
155 | + lock->owners++; | ||
156 | + qemu_co_mutex_unlock(&lock->mutex); | ||
157 | } else { | ||
158 | - self->locks_held--; | ||
159 | + CoRwTicket my_ticket = { true, self }; | ||
160 | |||
161 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
162 | + qemu_co_mutex_unlock(&lock->mutex); | ||
163 | + qemu_coroutine_yield(); | ||
164 | + assert(lock->owners >= 1); | ||
165 | + | ||
166 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
167 | qemu_co_mutex_lock(&lock->mutex); | ||
168 | - lock->reader--; | ||
169 | - assert(lock->reader >= 0); | ||
170 | - /* Wakeup only one waiting writer */ | ||
171 | - if (!lock->reader) { | ||
172 | - qemu_co_queue_next(&lock->queue); | ||
173 | - } | ||
174 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
20 | } | 175 | } |
21 | 176 | - qemu_co_mutex_unlock(&lock->mutex); | |
22 | qvirtqueue_kick(vs->dev, vq, free_head); | 177 | + |
23 | - qvirtio_wait_queue_isr(vs->dev, vq, QVIRTIO_SCSI_TIMEOUT_US); | 178 | + self->locks_held++; |
24 | + qvirtio_wait_used_elem(vs->dev, vq, free_head, QVIRTIO_SCSI_TIMEOUT_US); | 179 | +} |
25 | 180 | + | |
26 | response = readb(resp_addr + | 181 | +void qemu_co_rwlock_unlock(CoRwlock *lock) |
27 | offsetof(struct virtio_scsi_cmd_resp, response)); | 182 | +{ |
183 | + Coroutine *self = qemu_coroutine_self(); | ||
184 | + | ||
185 | + assert(qemu_in_coroutine()); | ||
186 | + self->locks_held--; | ||
187 | + | ||
188 | + qemu_co_mutex_lock(&lock->mutex); | ||
189 | + if (lock->owners > 0) { | ||
190 | + lock->owners--; | ||
191 | + } else { | ||
192 | + assert(lock->owners == -1); | ||
193 | + lock->owners = 0; | ||
194 | + } | ||
195 | + | ||
196 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
197 | } | ||
198 | |||
199 | void qemu_co_rwlock_downgrade(CoRwlock *lock) | ||
200 | { | ||
201 | - Coroutine *self = qemu_coroutine_self(); | ||
202 | + qemu_co_mutex_lock(&lock->mutex); | ||
203 | + assert(lock->owners == -1); | ||
204 | + lock->owners = 1; | ||
205 | |||
206 | - /* lock->mutex critical section started in qemu_co_rwlock_wrlock or | ||
207 | - * qemu_co_rwlock_upgrade. | ||
208 | - */ | ||
209 | - assert(lock->reader == 0); | ||
210 | - lock->reader++; | ||
211 | - qemu_co_mutex_unlock(&lock->mutex); | ||
212 | - | ||
213 | - /* The rest of the read-side critical section is run without the mutex. */ | ||
214 | - self->locks_held++; | ||
215 | + /* Possibly wake another reader, which will wake the next in line. */ | ||
216 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
217 | } | ||
218 | |||
219 | void qemu_co_rwlock_wrlock(CoRwlock *lock) | ||
220 | { | ||
221 | + Coroutine *self = qemu_coroutine_self(); | ||
222 | + | ||
223 | qemu_co_mutex_lock(&lock->mutex); | ||
224 | - lock->pending_writer++; | ||
225 | - while (lock->reader) { | ||
226 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
227 | + if (lock->owners == 0) { | ||
228 | + lock->owners = -1; | ||
229 | + qemu_co_mutex_unlock(&lock->mutex); | ||
230 | + } else { | ||
231 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | ||
232 | + | ||
233 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
234 | + qemu_co_mutex_unlock(&lock->mutex); | ||
235 | + qemu_coroutine_yield(); | ||
236 | + assert(lock->owners == -1); | ||
237 | } | ||
238 | - lock->pending_writer--; | ||
239 | |||
240 | - /* The rest of the write-side critical section is run with | ||
241 | - * the mutex taken, so that lock->reader remains zero. | ||
242 | - * There is no need to update self->locks_held. | ||
243 | - */ | ||
244 | + self->locks_held++; | ||
245 | } | ||
246 | |||
247 | void qemu_co_rwlock_upgrade(CoRwlock *lock) | ||
248 | { | ||
249 | - Coroutine *self = qemu_coroutine_self(); | ||
250 | - | ||
251 | qemu_co_mutex_lock(&lock->mutex); | ||
252 | - assert(lock->reader > 0); | ||
253 | - lock->reader--; | ||
254 | - lock->pending_writer++; | ||
255 | - while (lock->reader) { | ||
256 | - qemu_co_queue_wait(&lock->queue, &lock->mutex); | ||
257 | + assert(lock->owners > 0); | ||
258 | + /* For fairness, wait if a writer is in line. */ | ||
259 | + if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) { | ||
260 | + lock->owners = -1; | ||
261 | + qemu_co_mutex_unlock(&lock->mutex); | ||
262 | + } else { | ||
263 | + CoRwTicket my_ticket = { false, qemu_coroutine_self() }; | ||
264 | + | ||
265 | + lock->owners--; | ||
266 | + QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next); | ||
267 | + qemu_co_rwlock_maybe_wake_one(lock); | ||
268 | + qemu_coroutine_yield(); | ||
269 | + assert(lock->owners == -1); | ||
270 | } | ||
271 | - lock->pending_writer--; | ||
272 | - | ||
273 | - /* The rest of the write-side critical section is run with | ||
274 | - * the mutex taken, similar to qemu_co_rwlock_wrlock. Do | ||
275 | - * not account for the lock twice in self->locks_held. | ||
276 | - */ | ||
277 | - self->locks_held--; | ||
278 | } | ||
28 | -- | 279 | -- |
29 | 2.9.4 | 280 | 2.30.2 |
30 | 281 | ||
31 | diff view generated by jsdifflib |
1 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 2 | |
3 | Reviewed-by: Fam Zheng <famz@redhat.com> | 3 | Test that rwlock upgrade is fair, and that readers go back to sleep if |
4 | Tested-by: Eric Blake <eblake@redhat.com> | 4 | a writer is in line. |
5 | Tested-by: Kevin Wolf <kwolf@redhat.com> | 5 | |
6 | Message-id: 20170628184724.21378-2-stefanha@redhat.com | 6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
7 | Message-id: 20210325112941.365238-6-pbonzini@redhat.com | ||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | --- | 9 | --- |
9 | tests/libqos/virtio.h | 2 +- | 10 | tests/unit/test-coroutine.c | 62 +++++++++++++++++++++++++++++++++++++ |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 11 | 1 file changed, 62 insertions(+) |
11 | 12 | ||
12 | diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h | 13 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tests/libqos/virtio.h | 15 | --- a/tests/unit/test-coroutine.c |
15 | +++ b/tests/libqos/virtio.h | 16 | +++ b/tests/unit/test-coroutine.c |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct QVirtioDevice { | 17 | @@ -XXX,XX +XXX,XX @@ static void test_co_mutex_lockable(void) |
17 | typedef struct QVirtQueue { | 18 | g_assert(QEMU_MAKE_LOCKABLE(null_pointer) == NULL); |
18 | uint64_t desc; /* This points to an array of struct vring_desc */ | 19 | } |
19 | uint64_t avail; /* This points to a struct vring_avail */ | 20 | |
20 | - uint64_t used; /* This points to a struct vring_desc */ | 21 | +static CoRwlock rwlock; |
21 | + uint64_t used; /* This points to a struct vring_used */ | 22 | + |
22 | uint16_t index; | 23 | +/* Test that readers are properly sent back to the queue when upgrading, |
23 | uint32_t size; | 24 | + * even if they are the sole readers. The test scenario is as follows: |
24 | uint32_t free_head; | 25 | + * |
26 | + * | ||
27 | + * | c1 | c2 | | ||
28 | + * |--------------+------------+ | ||
29 | + * | rdlock | | | ||
30 | + * | yield | | | ||
31 | + * | | wrlock | | ||
32 | + * | | <queued> | | ||
33 | + * | upgrade | | | ||
34 | + * | <queued> | <dequeued> | | ||
35 | + * | | unlock | | ||
36 | + * | <dequeued> | | | ||
37 | + * | unlock | | | ||
38 | + */ | ||
39 | + | ||
40 | +static void coroutine_fn rwlock_yield_upgrade(void *opaque) | ||
41 | +{ | ||
42 | + qemu_co_rwlock_rdlock(&rwlock); | ||
43 | + qemu_coroutine_yield(); | ||
44 | + | ||
45 | + qemu_co_rwlock_upgrade(&rwlock); | ||
46 | + qemu_co_rwlock_unlock(&rwlock); | ||
47 | + | ||
48 | + *(bool *)opaque = true; | ||
49 | +} | ||
50 | + | ||
51 | +static void coroutine_fn rwlock_wrlock_yield(void *opaque) | ||
52 | +{ | ||
53 | + qemu_co_rwlock_wrlock(&rwlock); | ||
54 | + qemu_coroutine_yield(); | ||
55 | + | ||
56 | + qemu_co_rwlock_unlock(&rwlock); | ||
57 | + *(bool *)opaque = true; | ||
58 | +} | ||
59 | + | ||
60 | +static void test_co_rwlock_upgrade(void) | ||
61 | +{ | ||
62 | + bool c1_done = false; | ||
63 | + bool c2_done = false; | ||
64 | + Coroutine *c1, *c2; | ||
65 | + | ||
66 | + qemu_co_rwlock_init(&rwlock); | ||
67 | + c1 = qemu_coroutine_create(rwlock_yield_upgrade, &c1_done); | ||
68 | + c2 = qemu_coroutine_create(rwlock_wrlock_yield, &c2_done); | ||
69 | + | ||
70 | + qemu_coroutine_enter(c1); | ||
71 | + qemu_coroutine_enter(c2); | ||
72 | + | ||
73 | + /* c1 now should go to sleep. */ | ||
74 | + qemu_coroutine_enter(c1); | ||
75 | + g_assert(!c1_done); | ||
76 | + | ||
77 | + qemu_coroutine_enter(c2); | ||
78 | + g_assert(c1_done); | ||
79 | + g_assert(c2_done); | ||
80 | +} | ||
81 | + | ||
82 | /* | ||
83 | * Check that creation, enter, and return work | ||
84 | */ | ||
85 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
86 | g_test_add_func("/basic/order", test_order); | ||
87 | g_test_add_func("/locking/co-mutex", test_co_mutex); | ||
88 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | ||
89 | + g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | ||
90 | if (g_test_perf()) { | ||
91 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | ||
92 | g_test_add_func("/perf/nesting", perf_nesting); | ||
25 | -- | 93 | -- |
26 | 2.9.4 | 94 | 2.30.2 |
27 | 95 | ||
28 | diff view generated by jsdifflib |
1 | Existing tests do not touch the virtqueue used ring. Instead they poll | 1 | From: David Edmondson <david.edmondson@oracle.com> |
---|---|---|---|
2 | the virtqueue ISR register and peek into their request's device-specific | ||
3 | status field. | ||
4 | 2 | ||
5 | It turns out that the virtqueue ISR register can be set to 1 more than | 3 | Test that downgrading an rwlock does not result in a failure to |
6 | once for a single notification (see commit | 4 | schedule coroutines queued on the rwlock. |
7 | 83d768b5640946b7da55ce8335509df297e2c7cd "virtio: set ISR on dataplane | ||
8 | notifications"). This causes problems for tests that assume a 1:1 | ||
9 | correspondence between the ISR being 1 and request completion. | ||
10 | 5 | ||
11 | Peeking at device-specific status fields is also problematic if the | 6 | The diagram associated with test_co_rwlock_downgrade() describes the |
12 | device has no field that can be abused for EINPROGRESS polling | 7 | intended behaviour, but what was observed previously corresponds to: |
13 | semantics. This is the case if all the field's values may be set by the | ||
14 | device; there's no magic constant left for polling. | ||
15 | 8 | ||
16 | It's time to process the used ring for completed requests, just like a | 9 | | c1 | c2 | c3 | c4 | |
17 | real virtio guest driver. This patch adds the necessary APIs. | 10 | |--------+------------+------------+----------| |
11 | | rdlock | | | | | ||
12 | | yield | | | | | ||
13 | | | wrlock | | | | ||
14 | | | <queued> | | | | ||
15 | | | | rdlock | | | ||
16 | | | | <queued> | | | ||
17 | | | | | wrlock | | ||
18 | | | | | <queued> | | ||
19 | | unlock | | | | | ||
20 | | yield | | | | | ||
21 | | | <dequeued> | | | | ||
22 | | | downgrade | | | | ||
23 | | | ... | | | | ||
24 | | | unlock | | | | ||
25 | | | | <dequeued> | | | ||
26 | | | | <queued> | | | ||
18 | 27 | ||
19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 28 | This results in a failure... |
20 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 29 | |
21 | Reviewed-by: Fam Zheng <famz@redhat.com> | 30 | ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) |
22 | Tested-by: Eric Blake <eblake@redhat.com> | 31 | Bail out! ERROR:../tests/test-coroutine.c:369:test_co_rwlock_downgrade: assertion failed: (c3_done) |
23 | Tested-by: Kevin Wolf <kwolf@redhat.com> | 32 | |
24 | Message-id: 20170628184724.21378-3-stefanha@redhat.com | 33 | ...as a result of the c3 coroutine failing to run to completion. |
34 | |||
35 | Signed-off-by: David Edmondson <david.edmondson@oracle.com> | ||
36 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
37 | Message-id: 20210325112941.365238-7-pbonzini@redhat.com | ||
38 | Message-Id: <20210309144015.557477-5-david.edmondson@oracle.com> | ||
39 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
25 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 40 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
26 | --- | 41 | --- |
27 | tests/libqos/virtio.h | 6 ++++++ | 42 | tests/unit/test-coroutine.c | 99 +++++++++++++++++++++++++++++++++++++ |
28 | tests/libqos/virtio.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ | 43 | 1 file changed, 99 insertions(+) |
29 | 2 files changed, 66 insertions(+) | ||
30 | 44 | ||
31 | diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h | 45 | diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c |
32 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/tests/libqos/virtio.h | 47 | --- a/tests/unit/test-coroutine.c |
34 | +++ b/tests/libqos/virtio.h | 48 | +++ b/tests/unit/test-coroutine.c |
35 | @@ -XXX,XX +XXX,XX @@ typedef struct QVirtQueue { | 49 | @@ -XXX,XX +XXX,XX @@ static void test_co_rwlock_upgrade(void) |
36 | uint32_t free_head; | 50 | g_assert(c2_done); |
37 | uint32_t num_free; | ||
38 | uint32_t align; | ||
39 | + uint16_t last_used_idx; | ||
40 | bool indirect; | ||
41 | bool event; | ||
42 | } QVirtQueue; | ||
43 | @@ -XXX,XX +XXX,XX @@ uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d, | ||
44 | QVirtQueue *vq, | ||
45 | uint64_t addr, | ||
46 | gint64 timeout_us); | ||
47 | +void qvirtio_wait_used_elem(QVirtioDevice *d, | ||
48 | + QVirtQueue *vq, | ||
49 | + uint32_t desc_idx, | ||
50 | + gint64 timeout_us); | ||
51 | void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us); | ||
52 | QVirtQueue *qvirtqueue_setup(QVirtioDevice *d, | ||
53 | QGuestAllocator *alloc, uint16_t index); | ||
54 | @@ -XXX,XX +XXX,XX @@ uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write, | ||
55 | bool next); | ||
56 | uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect); | ||
57 | void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head); | ||
58 | +bool qvirtqueue_get_buf(QVirtQueue *vq, uint32_t *desc_idx); | ||
59 | |||
60 | void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx); | ||
61 | #endif | ||
62 | diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/tests/libqos/virtio.c | ||
65 | +++ b/tests/libqos/virtio.c | ||
66 | @@ -XXX,XX +XXX,XX @@ uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d, | ||
67 | return val; | ||
68 | } | 51 | } |
69 | 52 | ||
70 | +/* | 53 | +static void coroutine_fn rwlock_rdlock_yield(void *opaque) |
71 | + * qvirtio_wait_used_elem: | ||
72 | + * @desc_idx: The next expected vq->desc[] index in the used ring | ||
73 | + * @timeout_us: How many microseconds to wait before failing | ||
74 | + * | ||
75 | + * This function waits for the next completed request on the used ring. | ||
76 | + */ | ||
77 | +void qvirtio_wait_used_elem(QVirtioDevice *d, | ||
78 | + QVirtQueue *vq, | ||
79 | + uint32_t desc_idx, | ||
80 | + gint64 timeout_us) | ||
81 | +{ | 54 | +{ |
82 | + gint64 start_time = g_get_monotonic_time(); | 55 | + qemu_co_rwlock_rdlock(&rwlock); |
56 | + qemu_coroutine_yield(); | ||
83 | + | 57 | + |
84 | + for (;;) { | 58 | + qemu_co_rwlock_unlock(&rwlock); |
85 | + uint32_t got_desc_idx; | 59 | + qemu_coroutine_yield(); |
86 | + | 60 | + |
87 | + clock_step(100); | 61 | + *(bool *)opaque = true; |
88 | + | ||
89 | + if (d->bus->get_queue_isr_status(d, vq) && | ||
90 | + qvirtqueue_get_buf(vq, &got_desc_idx)) { | ||
91 | + g_assert_cmpint(got_desc_idx, ==, desc_idx); | ||
92 | + return; | ||
93 | + } | ||
94 | + | ||
95 | + g_assert(g_get_monotonic_time() - start_time <= timeout_us); | ||
96 | + } | ||
97 | +} | 62 | +} |
98 | + | 63 | + |
99 | void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us) | 64 | +static void coroutine_fn rwlock_wrlock_downgrade(void *opaque) |
100 | { | ||
101 | gint64 start_time = g_get_monotonic_time(); | ||
102 | @@ -XXX,XX +XXX,XX @@ void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head) | ||
103 | } | ||
104 | } | ||
105 | |||
106 | +/* | ||
107 | + * qvirtqueue_get_buf: | ||
108 | + * @desc_idx: A pointer that is filled with the vq->desc[] index, may be NULL | ||
109 | + * | ||
110 | + * This function gets the next used element if there is one ready. | ||
111 | + * | ||
112 | + * Returns: true if an element was ready, false otherwise | ||
113 | + */ | ||
114 | +bool qvirtqueue_get_buf(QVirtQueue *vq, uint32_t *desc_idx) | ||
115 | +{ | 65 | +{ |
116 | + uint16_t idx; | 66 | + qemu_co_rwlock_wrlock(&rwlock); |
117 | + | 67 | + |
118 | + idx = readw(vq->used + offsetof(struct vring_used, idx)); | 68 | + qemu_co_rwlock_downgrade(&rwlock); |
119 | + if (idx == vq->last_used_idx) { | 69 | + qemu_co_rwlock_unlock(&rwlock); |
120 | + return false; | 70 | + *(bool *)opaque = true; |
121 | + } | ||
122 | + | ||
123 | + if (desc_idx) { | ||
124 | + uint64_t elem_addr; | ||
125 | + | ||
126 | + elem_addr = vq->used + | ||
127 | + offsetof(struct vring_used, ring) + | ||
128 | + (vq->last_used_idx % vq->size) * | ||
129 | + sizeof(struct vring_used_elem); | ||
130 | + *desc_idx = readl(elem_addr + offsetof(struct vring_used_elem, id)); | ||
131 | + } | ||
132 | + | ||
133 | + vq->last_used_idx++; | ||
134 | + return true; | ||
135 | +} | 71 | +} |
136 | + | 72 | + |
137 | void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx) | 73 | +static void coroutine_fn rwlock_rdlock(void *opaque) |
138 | { | 74 | +{ |
139 | g_assert(vq->event); | 75 | + qemu_co_rwlock_rdlock(&rwlock); |
76 | + | ||
77 | + qemu_co_rwlock_unlock(&rwlock); | ||
78 | + *(bool *)opaque = true; | ||
79 | +} | ||
80 | + | ||
81 | +static void coroutine_fn rwlock_wrlock(void *opaque) | ||
82 | +{ | ||
83 | + qemu_co_rwlock_wrlock(&rwlock); | ||
84 | + | ||
85 | + qemu_co_rwlock_unlock(&rwlock); | ||
86 | + *(bool *)opaque = true; | ||
87 | +} | ||
88 | + | ||
89 | +/* | ||
90 | + * Check that downgrading a reader-writer lock does not cause a hang. | ||
91 | + * | ||
92 | + * Four coroutines are used to produce a situation where there are | ||
93 | + * both reader and writer hopefuls waiting to acquire an rwlock that | ||
94 | + * is held by a reader. | ||
95 | + * | ||
96 | + * The correct sequence of operations we aim to provoke can be | ||
97 | + * represented as: | ||
98 | + * | ||
99 | + * | c1 | c2 | c3 | c4 | | ||
100 | + * |--------+------------+------------+------------| | ||
101 | + * | rdlock | | | | | ||
102 | + * | yield | | | | | ||
103 | + * | | wrlock | | | | ||
104 | + * | | <queued> | | | | ||
105 | + * | | | rdlock | | | ||
106 | + * | | | <queued> | | | ||
107 | + * | | | | wrlock | | ||
108 | + * | | | | <queued> | | ||
109 | + * | unlock | | | | | ||
110 | + * | yield | | | | | ||
111 | + * | | <dequeued> | | | | ||
112 | + * | | downgrade | | | | ||
113 | + * | | | <dequeued> | | | ||
114 | + * | | | unlock | | | ||
115 | + * | | ... | | | | ||
116 | + * | | unlock | | | | ||
117 | + * | | | | <dequeued> | | ||
118 | + * | | | | unlock | | ||
119 | + */ | ||
120 | +static void test_co_rwlock_downgrade(void) | ||
121 | +{ | ||
122 | + bool c1_done = false; | ||
123 | + bool c2_done = false; | ||
124 | + bool c3_done = false; | ||
125 | + bool c4_done = false; | ||
126 | + Coroutine *c1, *c2, *c3, *c4; | ||
127 | + | ||
128 | + qemu_co_rwlock_init(&rwlock); | ||
129 | + | ||
130 | + c1 = qemu_coroutine_create(rwlock_rdlock_yield, &c1_done); | ||
131 | + c2 = qemu_coroutine_create(rwlock_wrlock_downgrade, &c2_done); | ||
132 | + c3 = qemu_coroutine_create(rwlock_rdlock, &c3_done); | ||
133 | + c4 = qemu_coroutine_create(rwlock_wrlock, &c4_done); | ||
134 | + | ||
135 | + qemu_coroutine_enter(c1); | ||
136 | + qemu_coroutine_enter(c2); | ||
137 | + qemu_coroutine_enter(c3); | ||
138 | + qemu_coroutine_enter(c4); | ||
139 | + | ||
140 | + qemu_coroutine_enter(c1); | ||
141 | + | ||
142 | + g_assert(c2_done); | ||
143 | + g_assert(c3_done); | ||
144 | + g_assert(c4_done); | ||
145 | + | ||
146 | + qemu_coroutine_enter(c1); | ||
147 | + | ||
148 | + g_assert(c1_done); | ||
149 | +} | ||
150 | + | ||
151 | /* | ||
152 | * Check that creation, enter, and return work | ||
153 | */ | ||
154 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
155 | g_test_add_func("/locking/co-mutex", test_co_mutex); | ||
156 | g_test_add_func("/locking/co-mutex/lockable", test_co_mutex_lockable); | ||
157 | g_test_add_func("/locking/co-rwlock/upgrade", test_co_rwlock_upgrade); | ||
158 | + g_test_add_func("/locking/co-rwlock/downgrade", test_co_rwlock_downgrade); | ||
159 | if (g_test_perf()) { | ||
160 | g_test_add_func("/perf/lifecycle", perf_lifecycle); | ||
161 | g_test_add_func("/perf/nesting", perf_nesting); | ||
140 | -- | 162 | -- |
141 | 2.9.4 | 163 | 2.30.2 |
142 | 164 | ||
143 | diff view generated by jsdifflib |