1 | The following changes since commit a0def594286d9110a6035e02eef558cf3cf5d847: | 1 | The following changes since commit 8507c9d5c9a62de2a0e281b640f995e26eac46af: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2017-01-30 10:23:20 +0000) | 3 | Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2020-11-03 15:59:44 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to acf6e5f0962c4be670d4a93ede77423512521876: | 9 | for you to fetch changes up to fc107d86840b3364e922c26cf7631b7fd38ce523: |
10 | 10 | ||
11 | sheepdog: reorganize check for overlapping requests (2017-02-01 00:17:20 -0500) | 11 | util/vfio-helpers: Assert offset is aligned to page size (2020-11-03 19:06:23 +0000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block patches | 14 | Pull request for 5.2 |
15 | |||
16 | NVMe fixes to solve IOMMU issues on non-x86 and error message/tracing | ||
17 | improvements. Elena Afanasova's ioeventfd fixes are also included. | ||
18 | |||
19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | |||
15 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
16 | 22 | ||
17 | Paolo Bonzini (5): | 23 | Elena Afanasova (2): |
18 | sheepdog: remove unused cancellation support | 24 | accel/kvm: add PIO ioeventfds only in case kvm_eventfds_allowed is |
19 | sheepdog: reorganize coroutine flow | 25 | true |
20 | sheepdog: do not use BlockAIOCB | 26 | softmmu/memory: fix memory_region_ioeventfd_equal() |
21 | sheepdog: simplify inflight_aio_head management | ||
22 | sheepdog: reorganize check for overlapping requests | ||
23 | 27 | ||
24 | block/sheepdog.c | 289 ++++++++++++++++--------------------------------------- | 28 | Eric Auger (4): |
25 | 1 file changed, 84 insertions(+), 205 deletions(-) | 29 | block/nvme: Change size and alignment of IDENTIFY response buffer |
30 | block/nvme: Change size and alignment of queue | ||
31 | block/nvme: Change size and alignment of prp_list_pages | ||
32 | block/nvme: Align iov's va and size on host page size | ||
33 | |||
34 | Philippe Mathieu-Daudé (27): | ||
35 | MAINTAINERS: Cover "block/nvme.h" file | ||
36 | block/nvme: Use hex format to display offset in trace events | ||
37 | block/nvme: Report warning with warn_report() | ||
38 | block/nvme: Trace controller capabilities | ||
39 | block/nvme: Trace nvme_poll_queue() per queue | ||
40 | block/nvme: Improve nvme_free_req_queue_wait() trace information | ||
41 | block/nvme: Trace queue pair creation/deletion | ||
42 | block/nvme: Move definitions before structure declarations | ||
43 | block/nvme: Use unsigned integer for queue counter/size | ||
44 | block/nvme: Make nvme_identify() return boolean indicating error | ||
45 | block/nvme: Make nvme_init_queue() return boolean indicating error | ||
46 | block/nvme: Introduce Completion Queue definitions | ||
47 | block/nvme: Use definitions instead of magic values in add_io_queue() | ||
48 | block/nvme: Correctly initialize Admin Queue Attributes | ||
49 | block/nvme: Simplify ADMIN queue access | ||
50 | block/nvme: Simplify nvme_cmd_sync() | ||
51 | block/nvme: Set request_alignment at initialization | ||
52 | block/nvme: Correct minimum device page size | ||
53 | block/nvme: Fix use of write-only doorbells page on Aarch64 arch | ||
54 | block/nvme: Fix nvme_submit_command() on big-endian host | ||
55 | util/vfio-helpers: Improve reporting unsupported IOMMU type | ||
56 | util/vfio-helpers: Trace PCI I/O config accesses | ||
57 | util/vfio-helpers: Trace PCI BAR region info | ||
58 | util/vfio-helpers: Trace where BARs are mapped | ||
59 | util/vfio-helpers: Improve DMA trace events | ||
60 | util/vfio-helpers: Convert vfio_dump_mapping to trace events | ||
61 | util/vfio-helpers: Assert offset is aligned to page size | ||
62 | |||
63 | MAINTAINERS | 2 + | ||
64 | include/block/nvme.h | 18 ++-- | ||
65 | accel/kvm/kvm-all.c | 6 +- | ||
66 | block/nvme.c | 209 ++++++++++++++++++++++++------------------- | ||
67 | softmmu/memory.c | 11 ++- | ||
68 | util/vfio-helpers.c | 43 +++++---- | ||
69 | block/trace-events | 30 ++++--- | ||
70 | util/trace-events | 10 ++- | ||
71 | 8 files changed, 195 insertions(+), 134 deletions(-) | ||
26 | 72 | ||
27 | -- | 73 | -- |
28 | 2.9.3 | 74 | 2.28.0 |
29 | 75 | ||
30 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Elena Afanasova <eafanasova@gmail.com> | ||
1 | 2 | ||
3 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
4 | Signed-off-by: Elena Afanasova <eafanasova@gmail.com> | ||
5 | Message-Id: <20201017210102.26036-1-eafanasova@gmail.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | --- | ||
8 | accel/kvm/kvm-all.c | 6 ++++-- | ||
9 | 1 file changed, 4 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/accel/kvm/kvm-all.c | ||
14 | +++ b/accel/kvm/kvm-all.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms) | ||
16 | |||
17 | kvm_memory_listener_register(s, &s->memory_listener, | ||
18 | &address_space_memory, 0); | ||
19 | - memory_listener_register(&kvm_io_listener, | ||
20 | - &address_space_io); | ||
21 | + if (kvm_eventfds_allowed) { | ||
22 | + memory_listener_register(&kvm_io_listener, | ||
23 | + &address_space_io); | ||
24 | + } | ||
25 | memory_listener_register(&kvm_coalesced_pio_listener, | ||
26 | &address_space_io); | ||
27 | |||
28 | -- | ||
29 | 2.28.0 | ||
30 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Elena Afanasova <eafanasova@gmail.com> | ||
1 | 2 | ||
3 | Eventfd can be registered with a zero length when fast_mmio is true. | ||
4 | Handle this case properly when dispatching through QEMU. | ||
5 | |||
6 | Signed-off-by: Elena Afanasova <eafanasova@gmail.com> | ||
7 | Message-id: cf71a62eb04e61932ff8ffdd02e0b2aab4f495a0.camel@gmail.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | softmmu/memory.c | 11 +++++++++-- | ||
11 | 1 file changed, 9 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/softmmu/memory.c b/softmmu/memory.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/softmmu/memory.c | ||
16 | +++ b/softmmu/memory.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a, | ||
18 | static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a, | ||
19 | MemoryRegionIoeventfd *b) | ||
20 | { | ||
21 | - return !memory_region_ioeventfd_before(a, b) | ||
22 | - && !memory_region_ioeventfd_before(b, a); | ||
23 | + if (int128_eq(a->addr.start, b->addr.start) && | ||
24 | + (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) || | ||
25 | + (int128_eq(a->addr.size, b->addr.size) && | ||
26 | + (a->match_data == b->match_data) && | ||
27 | + ((a->match_data && (a->data == b->data)) || !a->match_data) && | ||
28 | + (a->e == b->e)))) | ||
29 | + return true; | ||
30 | + | ||
31 | + return false; | ||
32 | } | ||
33 | |||
34 | /* Range of memory in the global map. Addresses are absolute. */ | ||
35 | -- | ||
36 | 2.28.0 | ||
37 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | The "block/nvme.h" header is shared by both the NVMe block | ||
4 | driver and the NVMe emulated device. Add the 'F:' entry on | ||
5 | both sections, so all maintainers/reviewers are notified | ||
6 | when it is changed. | ||
7 | |||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Reviewed-by: Klaus Jensen <k.jensen@samsung.com> | ||
11 | Message-Id: <20200701140634.25994-1-philmd@redhat.com> | ||
12 | --- | ||
13 | MAINTAINERS | 2 ++ | ||
14 | 1 file changed, 2 insertions(+) | ||
15 | |||
16 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/MAINTAINERS | ||
19 | +++ b/MAINTAINERS | ||
20 | @@ -XXX,XX +XXX,XX @@ M: Klaus Jensen <its@irrelevant.dk> | ||
21 | L: qemu-block@nongnu.org | ||
22 | S: Supported | ||
23 | F: hw/block/nvme* | ||
24 | +F: include/block/nvme.h | ||
25 | F: tests/qtest/nvme-test.c | ||
26 | F: docs/specs/nvme.txt | ||
27 | T: git git://git.infradead.org/qemu-nvme.git nvme-next | ||
28 | @@ -XXX,XX +XXX,XX @@ R: Fam Zheng <fam@euphon.net> | ||
29 | L: qemu-block@nongnu.org | ||
30 | S: Supported | ||
31 | F: block/nvme* | ||
32 | +F: include/block/nvme.h | ||
33 | T: git https://github.com/stefanha/qemu.git block | ||
34 | |||
35 | Bootdevice | ||
36 | -- | ||
37 | 2.28.0 | ||
38 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Use the same format used for the hw/vfio/ trace events. | ||
4 | |||
5 | Suggested-by: Eric Auger <eric.auger@redhat.com> | ||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-3-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/trace-events | 12 ++++++------ | ||
15 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
16 | |||
17 | diff --git a/block/trace-events b/block/trace-events | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/trace-events | ||
20 | +++ b/block/trace-events | ||
21 | @@ -XXX,XX +XXX,XX @@ nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" | ||
22 | nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" | ||
23 | nvme_handle_event(void *s) "s %p" | ||
24 | nvme_poll_cb(void *s) "s %p" | ||
25 | -nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d niov %d" | ||
26 | -nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset %"PRId64" bytes %"PRId64" flags %d" | ||
27 | +nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d" | ||
28 | +nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d" | ||
29 | nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" | ||
30 | -nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" | ||
31 | -nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" | ||
32 | -nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" | ||
33 | -nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" | ||
34 | +nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d" | ||
35 | +nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d" | ||
36 | +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" | ||
37 | +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" | ||
38 | nvme_dma_map_flush(void *s) "s %p" | ||
39 | nvme_free_req_queue_wait(void *q) "q %p" | ||
40 | nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" | ||
41 | -- | ||
42 | 2.28.0 | ||
43 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Instead of displaying warning on stderr, use warn_report() | ||
4 | which also displays it on the monitor. | ||
5 | |||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-4-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/nvme.c | 4 ++-- | ||
15 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/block/nvme.c b/block/nvme.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/nvme.c | ||
20 | +++ b/block/nvme.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q) | ||
22 | } | ||
23 | cid = le16_to_cpu(c->cid); | ||
24 | if (cid == 0 || cid > NVME_QUEUE_SIZE) { | ||
25 | - fprintf(stderr, "Unexpected CID in completion queue: %" PRIu32 "\n", | ||
26 | - cid); | ||
27 | + warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", " | ||
28 | + "queue size: %u", cid, NVME_QUEUE_SIZE); | ||
29 | continue; | ||
30 | } | ||
31 | trace_nvme_complete_command(s, q->index, cid); | ||
32 | -- | ||
33 | 2.28.0 | ||
34 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Controllers have different capabilities and report them in the | ||
4 | CAP register. We are particularly interested by the page size | ||
5 | limits. | ||
6 | |||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Message-id: 20201029093306.1063879-5-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | block/nvme.c | 13 +++++++++++++ | ||
16 | block/trace-events | 2 ++ | ||
17 | 2 files changed, 15 insertions(+) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
24 | * Initialization". */ | ||
25 | |||
26 | cap = le64_to_cpu(regs->cap); | ||
27 | + trace_nvme_controller_capability_raw(cap); | ||
28 | + trace_nvme_controller_capability("Maximum Queue Entries Supported", | ||
29 | + 1 + NVME_CAP_MQES(cap)); | ||
30 | + trace_nvme_controller_capability("Contiguous Queues Required", | ||
31 | + NVME_CAP_CQR(cap)); | ||
32 | + trace_nvme_controller_capability("Doorbell Stride", | ||
33 | + 2 << (2 + NVME_CAP_DSTRD(cap))); | ||
34 | + trace_nvme_controller_capability("Subsystem Reset Supported", | ||
35 | + NVME_CAP_NSSRS(cap)); | ||
36 | + trace_nvme_controller_capability("Memory Page Size Minimum", | ||
37 | + 1 << (12 + NVME_CAP_MPSMIN(cap))); | ||
38 | + trace_nvme_controller_capability("Memory Page Size Maximum", | ||
39 | + 1 << (12 + NVME_CAP_MPSMAX(cap))); | ||
40 | if (!NVME_CAP_CSS(cap)) { | ||
41 | error_setg(errp, "Device doesn't support NVMe command set"); | ||
42 | ret = -EINVAL; | ||
43 | diff --git a/block/trace-events b/block/trace-events | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/block/trace-events | ||
46 | +++ b/block/trace-events | ||
47 | @@ -XXX,XX +XXX,XX @@ qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t | ||
48 | qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" | ||
49 | |||
50 | # nvme.c | ||
51 | +nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 | ||
52 | +nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 | ||
53 | nvme_kick(void *s, int queue) "s %p queue %d" | ||
54 | nvme_dma_flush_queue_wait(void *s) "s %p" | ||
55 | nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" | ||
56 | -- | ||
57 | 2.28.0 | ||
58 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | As we want to enable multiple queues, report the event | ||
4 | in each nvme_poll_queue() call, rather than once in | ||
5 | the callback calling nvme_poll_queues(). | ||
6 | |||
7 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Message-id: 20201029093306.1063879-6-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | block/nvme.c | 2 +- | ||
16 | block/trace-events | 2 +- | ||
17 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queue(NVMeQueuePair *q) | ||
24 | const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; | ||
25 | NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; | ||
26 | |||
27 | + trace_nvme_poll_queue(q->s, q->index); | ||
28 | /* | ||
29 | * Do an early check for completions. q->lock isn't needed because | ||
30 | * nvme_process_completion() only runs in the event loop thread and | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool nvme_poll_cb(void *opaque) | ||
32 | BDRVNVMeState *s = container_of(e, BDRVNVMeState, | ||
33 | irq_notifier[MSIX_SHARED_IRQ_IDX]); | ||
34 | |||
35 | - trace_nvme_poll_cb(s); | ||
36 | return nvme_poll_queues(s); | ||
37 | } | ||
38 | |||
39 | diff --git a/block/trace-events b/block/trace-events | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/block/trace-events | ||
42 | +++ b/block/trace-events | ||
43 | @@ -XXX,XX +XXX,XX @@ nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" | ||
44 | nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" | ||
45 | nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" | ||
46 | nvme_handle_event(void *s) "s %p" | ||
47 | -nvme_poll_cb(void *s) "s %p" | ||
48 | +nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u" | ||
49 | nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d" | ||
50 | nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d" | ||
51 | nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" | ||
52 | -- | ||
53 | 2.28.0 | ||
54 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | What we want to trace is the block driver state and the queue index. | ||
4 | |||
5 | Suggested-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-7-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/nvme.c | 2 +- | ||
15 | block/trace-events | 2 +- | ||
16 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
17 | |||
18 | diff --git a/block/nvme.c b/block/nvme.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/nvme.c | ||
21 | +++ b/block/nvme.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) | ||
23 | |||
24 | while (q->free_req_head == -1) { | ||
25 | if (qemu_in_coroutine()) { | ||
26 | - trace_nvme_free_req_queue_wait(q); | ||
27 | + trace_nvme_free_req_queue_wait(q->s, q->index); | ||
28 | qemu_co_queue_wait(&q->free_req_queue, &q->lock); | ||
29 | } else { | ||
30 | qemu_mutex_unlock(&q->lock); | ||
31 | diff --git a/block/trace-events b/block/trace-events | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/block/trace-events | ||
34 | +++ b/block/trace-events | ||
35 | @@ -XXX,XX +XXX,XX @@ nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s | ||
36 | nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64"" | ||
37 | nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" | ||
38 | nvme_dma_map_flush(void *s) "s %p" | ||
39 | -nvme_free_req_queue_wait(void *q) "q %p" | ||
40 | +nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" | ||
41 | nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" | ||
42 | nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 | ||
43 | nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" | ||
44 | -- | ||
45 | 2.28.0 | ||
46 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
4 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
5 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
7 | Message-id: 20201029093306.1063879-8-philmd@redhat.com | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
10 | --- | ||
11 | block/nvme.c | 3 +++ | ||
12 | block/trace-events | 2 ++ | ||
13 | 2 files changed, 5 insertions(+) | ||
14 | |||
15 | diff --git a/block/nvme.c b/block/nvme.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/nvme.c | ||
18 | +++ b/block/nvme.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, | ||
20 | |||
21 | static void nvme_free_queue_pair(NVMeQueuePair *q) | ||
22 | { | ||
23 | + trace_nvme_free_queue_pair(q->index, q); | ||
24 | if (q->completion_bh) { | ||
25 | qemu_bh_delete(q->completion_bh); | ||
26 | } | ||
27 | @@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, | ||
28 | if (!q) { | ||
29 | return NULL; | ||
30 | } | ||
31 | + trace_nvme_create_queue_pair(idx, q, size, aio_context, | ||
32 | + event_notifier_get_fd(s->irq_notifier)); | ||
33 | q->prp_list_pages = qemu_try_memalign(s->page_size, | ||
34 | s->page_size * NVME_NUM_REQS); | ||
35 | if (!q->prp_list_pages) { | ||
36 | diff --git a/block/trace-events b/block/trace-events | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/block/trace-events | ||
39 | +++ b/block/trace-events | ||
40 | @@ -XXX,XX +XXX,XX @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" byte | ||
41 | nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d" | ||
42 | nvme_dma_map_flush(void *s) "s %p" | ||
43 | nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u" | ||
44 | +nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d" | ||
45 | +nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p" | ||
46 | nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" | ||
47 | nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64 | ||
48 | nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d" | ||
49 | -- | ||
50 | 2.28.0 | ||
51 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | To be able to use some definitions in structure declarations, | ||
4 | move them earlier. No logical change. | ||
5 | |||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-9-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/nvme.c | 19 ++++++++++--------- | ||
15 | 1 file changed, 10 insertions(+), 9 deletions(-) | ||
16 | |||
17 | diff --git a/block/nvme.c b/block/nvme.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/nvme.c | ||
20 | +++ b/block/nvme.c | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | |||
23 | typedef struct BDRVNVMeState BDRVNVMeState; | ||
24 | |||
25 | +/* Same index is used for queues and IRQs */ | ||
26 | +#define INDEX_ADMIN 0 | ||
27 | +#define INDEX_IO(n) (1 + n) | ||
28 | + | ||
29 | +/* This driver shares a single MSIX IRQ for the admin and I/O queues */ | ||
30 | +enum { | ||
31 | + MSIX_SHARED_IRQ_IDX = 0, | ||
32 | + MSIX_IRQ_COUNT = 1 | ||
33 | +}; | ||
34 | + | ||
35 | typedef struct { | ||
36 | int32_t head, tail; | ||
37 | uint8_t *queue; | ||
38 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
39 | QEMUBH *completion_bh; | ||
40 | } NVMeQueuePair; | ||
41 | |||
42 | -#define INDEX_ADMIN 0 | ||
43 | -#define INDEX_IO(n) (1 + n) | ||
44 | - | ||
45 | -/* This driver shares a single MSIX IRQ for the admin and I/O queues */ | ||
46 | -enum { | ||
47 | - MSIX_SHARED_IRQ_IDX = 0, | ||
48 | - MSIX_IRQ_COUNT = 1 | ||
49 | -}; | ||
50 | - | ||
51 | struct BDRVNVMeState { | ||
52 | AioContext *aio_context; | ||
53 | QEMUVFIOState *vfio; | ||
54 | -- | ||
55 | 2.28.0 | ||
56 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | |
2 | |||
3 | We can not have negative queue count/size/index, use unsigned type. | ||
4 | Rename 'nr_queues' as 'queue_count' to match the spec naming. | ||
5 | |||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-10-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/nvme.c | 38 ++++++++++++++++++-------------------- | ||
15 | block/trace-events | 10 +++++----- | ||
16 | 2 files changed, 23 insertions(+), 25 deletions(-) | ||
17 | |||
18 | diff --git a/block/nvme.c b/block/nvme.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/nvme.c | ||
21 | +++ b/block/nvme.c | ||
22 | @@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState { | ||
23 | * [1..]: io queues. | ||
24 | */ | ||
25 | NVMeQueuePair **queues; | ||
26 | - int nr_queues; | ||
27 | + unsigned queue_count; | ||
28 | size_t page_size; | ||
29 | /* How many uint32_t elements does each doorbell entry take. */ | ||
30 | size_t doorbell_scale; | ||
31 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { | ||
32 | }; | ||
33 | |||
34 | static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, | ||
35 | - int nentries, int entry_bytes, Error **errp) | ||
36 | + unsigned nentries, size_t entry_bytes, Error **errp) | ||
37 | { | ||
38 | size_t bytes; | ||
39 | int r; | ||
40 | @@ -XXX,XX +XXX,XX @@ static void nvme_free_req_queue_cb(void *opaque) | ||
41 | |||
42 | static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, | ||
43 | AioContext *aio_context, | ||
44 | - int idx, int size, | ||
45 | + unsigned idx, size_t size, | ||
46 | Error **errp) | ||
47 | { | ||
48 | int i, r; | ||
49 | @@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queues(BDRVNVMeState *s) | ||
50 | bool progress = false; | ||
51 | int i; | ||
52 | |||
53 | - for (i = 0; i < s->nr_queues; i++) { | ||
54 | + for (i = 0; i < s->queue_count; i++) { | ||
55 | if (nvme_poll_queue(s->queues[i])) { | ||
56 | progress = true; | ||
57 | } | ||
58 | @@ -XXX,XX +XXX,XX @@ static void nvme_handle_event(EventNotifier *n) | ||
59 | static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
60 | { | ||
61 | BDRVNVMeState *s = bs->opaque; | ||
62 | - int n = s->nr_queues; | ||
63 | + unsigned n = s->queue_count; | ||
64 | NVMeQueuePair *q; | ||
65 | NvmeCmd cmd; | ||
66 | - int queue_size = NVME_QUEUE_SIZE; | ||
67 | + unsigned queue_size = NVME_QUEUE_SIZE; | ||
68 | |||
69 | q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs), | ||
70 | n, queue_size, errp); | ||
71 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
72 | .cdw11 = cpu_to_le32(0x3), | ||
73 | }; | ||
74 | if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { | ||
75 | - error_setg(errp, "Failed to create CQ io queue [%d]", n); | ||
76 | + error_setg(errp, "Failed to create CQ io queue [%u]", n); | ||
77 | goto out_error; | ||
78 | } | ||
79 | cmd = (NvmeCmd) { | ||
80 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
81 | .cdw11 = cpu_to_le32(0x1 | (n << 16)), | ||
82 | }; | ||
83 | if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { | ||
84 | - error_setg(errp, "Failed to create SQ io queue [%d]", n); | ||
85 | + error_setg(errp, "Failed to create SQ io queue [%u]", n); | ||
86 | goto out_error; | ||
87 | } | ||
88 | s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); | ||
89 | s->queues[n] = q; | ||
90 | - s->nr_queues++; | ||
91 | + s->queue_count++; | ||
92 | return true; | ||
93 | out_error: | ||
94 | nvme_free_queue_pair(q); | ||
95 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
96 | ret = -EINVAL; | ||
97 | goto out; | ||
98 | } | ||
99 | - s->nr_queues = 1; | ||
100 | + s->queue_count = 1; | ||
101 | QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); | ||
102 | regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | | ||
103 | (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); | ||
104 | @@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable, | ||
105 | |||
106 | static void nvme_close(BlockDriverState *bs) | ||
107 | { | ||
108 | - int i; | ||
109 | BDRVNVMeState *s = bs->opaque; | ||
110 | |||
111 | - for (i = 0; i < s->nr_queues; ++i) { | ||
112 | + for (unsigned i = 0; i < s->queue_count; ++i) { | ||
113 | nvme_free_queue_pair(s->queues[i]); | ||
114 | } | ||
115 | g_free(s->queues); | ||
116 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, | ||
117 | }; | ||
118 | |||
119 | trace_nvme_prw_aligned(s, is_write, offset, bytes, flags, qiov->niov); | ||
120 | - assert(s->nr_queues > 1); | ||
121 | + assert(s->queue_count > 1); | ||
122 | req = nvme_get_free_req(ioq); | ||
123 | assert(req); | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs) | ||
126 | .ret = -EINPROGRESS, | ||
127 | }; | ||
128 | |||
129 | - assert(s->nr_queues > 1); | ||
130 | + assert(s->queue_count > 1); | ||
131 | req = nvme_get_free_req(ioq); | ||
132 | assert(req); | ||
133 | nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); | ||
134 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, | ||
135 | cmd.cdw12 = cpu_to_le32(cdw12); | ||
136 | |||
137 | trace_nvme_write_zeroes(s, offset, bytes, flags); | ||
138 | - assert(s->nr_queues > 1); | ||
139 | + assert(s->queue_count > 1); | ||
140 | req = nvme_get_free_req(ioq); | ||
141 | assert(req); | ||
142 | |||
143 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, | ||
144 | return -ENOTSUP; | ||
145 | } | ||
146 | |||
147 | - assert(s->nr_queues > 1); | ||
148 | + assert(s->queue_count > 1); | ||
149 | |||
150 | buf = qemu_try_memalign(s->page_size, s->page_size); | ||
151 | if (!buf) { | ||
152 | @@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs) | ||
153 | { | ||
154 | BDRVNVMeState *s = bs->opaque; | ||
155 | |||
156 | - for (int i = 0; i < s->nr_queues; i++) { | ||
157 | + for (unsigned i = 0; i < s->queue_count; i++) { | ||
158 | NVMeQueuePair *q = s->queues[i]; | ||
159 | |||
160 | qemu_bh_delete(q->completion_bh); | ||
161 | @@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs, | ||
162 | aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
163 | false, nvme_handle_event, nvme_poll_cb); | ||
164 | |||
165 | - for (int i = 0; i < s->nr_queues; i++) { | ||
166 | + for (unsigned i = 0; i < s->queue_count; i++) { | ||
167 | NVMeQueuePair *q = s->queues[i]; | ||
168 | |||
169 | q->completion_bh = | ||
170 | @@ -XXX,XX +XXX,XX @@ static void nvme_aio_plug(BlockDriverState *bs) | ||
171 | |||
172 | static void nvme_aio_unplug(BlockDriverState *bs) | ||
173 | { | ||
174 | - int i; | ||
175 | BDRVNVMeState *s = bs->opaque; | ||
176 | assert(s->plugged); | ||
177 | s->plugged = false; | ||
178 | - for (i = INDEX_IO(0); i < s->nr_queues; i++) { | ||
179 | + for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) { | ||
180 | NVMeQueuePair *q = s->queues[i]; | ||
181 | qemu_mutex_lock(&q->lock); | ||
182 | nvme_kick(q); | ||
183 | diff --git a/block/trace-events b/block/trace-events | ||
184 | index XXXXXXX..XXXXXXX 100644 | ||
185 | --- a/block/trace-events | ||
186 | +++ b/block/trace-events | ||
187 | @@ -XXX,XX +XXX,XX @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s | ||
188 | # nvme.c | ||
189 | nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64 | ||
190 | nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64 | ||
191 | -nvme_kick(void *s, int queue) "s %p queue %d" | ||
192 | +nvme_kick(void *s, unsigned q_index) "s %p q #%u" | ||
193 | nvme_dma_flush_queue_wait(void *s) "s %p" | ||
194 | nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" | ||
195 | -nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d" | ||
196 | -nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d" | ||
197 | -nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" | ||
198 | -nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" | ||
199 | +nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d" | ||
200 | +nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u" | ||
201 | +nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" | ||
202 | +nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d" | ||
203 | nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" | ||
204 | nvme_handle_event(void *s) "s %p" | ||
205 | nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u" | ||
206 | -- | ||
207 | 2.28.0 | ||
208 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Delimit co_recv's lifetime clearly in aio_read_response. | 3 | Just for consistency, following the example documented since |
4 | commit e3fe3988d7 ("error: Document Error API usage rules"), | ||
5 | return a boolean value indicating an error is set or not. | ||
6 | Directly pass errp as the local_err is not requested in our | ||
7 | case. | ||
4 | 8 | ||
5 | Do a simple qemu_coroutine_enter in aio_read_response, letting | 9 | Tested-by: Eric Auger <eric.auger@redhat.com> |
6 | sd_co_writev call sd_write_done. | 10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-11-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 12 +++++++----- | ||
17 | 1 file changed, 7 insertions(+), 5 deletions(-) | ||
7 | 18 | ||
8 | Handle nr_pending in the same way in sd_co_rw_vector, | 19 | diff --git a/block/nvme.c b/block/nvme.c |
9 | sd_write_done and sd_co_flush_to_disk. | ||
10 | |||
11 | Remove sd_co_rw_vector's return value; just leave with no | ||
12 | pending requests. | ||
13 | |||
14 | [Jeff: added missing 'return' back, spotted by Paolo after | ||
15 | series was applied.] | ||
16 | |||
17 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
18 | --- | ||
19 | block/sheepdog.c | 115 ++++++++++++++++++++----------------------------------- | ||
20 | 1 file changed, 42 insertions(+), 73 deletions(-) | ||
21 | |||
22 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/block/sheepdog.c | 21 | --- a/block/nvme.c |
25 | +++ b/block/sheepdog.c | 22 | +++ b/block/nvme.c |
26 | @@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB { | 23 | @@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, |
27 | enum AIOCBState aiocb_type; | 24 | return ret; |
28 | |||
29 | Coroutine *coroutine; | ||
30 | - void (*aio_done_func)(SheepdogAIOCB *); | ||
31 | - | ||
32 | int nr_pending; | ||
33 | |||
34 | uint32_t min_affect_data_idx; | ||
35 | @@ -XXX,XX +XXX,XX @@ static const char * sd_strerror(int err) | ||
36 | * | ||
37 | * 1. In sd_co_rw_vector, we send the I/O requests to the server and | ||
38 | * link the requests to the inflight_list in the | ||
39 | - * BDRVSheepdogState. The function exits without waiting for | ||
40 | + * BDRVSheepdogState. The function yields while waiting for | ||
41 | * receiving the response. | ||
42 | * | ||
43 | * 2. We receive the response in aio_read_response, the fd handler to | ||
44 | - * the sheepdog connection. If metadata update is needed, we send | ||
45 | - * the write request to the vdi object in sd_write_done, the write | ||
46 | - * completion function. We switch back to sd_co_readv/writev after | ||
47 | - * all the requests belonging to the AIOCB are finished. | ||
48 | + * the sheepdog connection. We switch back to sd_co_readv/sd_writev | ||
49 | + * after all the requests belonging to the AIOCB are finished. If | ||
50 | + * needed, sd_co_writev will send another requests for the vdi object. | ||
51 | */ | ||
52 | |||
53 | static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, | ||
54 | @@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) | ||
55 | acb->nr_pending--; | ||
56 | } | 25 | } |
57 | 26 | ||
58 | -static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) | 27 | -static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
59 | -{ | 28 | +/* Returns true on success, false on failure. */ |
60 | - qemu_coroutine_enter(acb->coroutine); | 29 | +static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
61 | - qemu_aio_unref(acb); | 30 | { |
62 | -} | 31 | BDRVNVMeState *s = bs->opaque; |
63 | - | 32 | + bool ret = false; |
64 | static const AIOCBInfo sd_aiocb_info = { | 33 | union { |
65 | .aiocb_size = sizeof(SheepdogAIOCB), | 34 | NvmeIdCtrl ctrl; |
66 | }; | 35 | NvmeIdNs ns; |
67 | @@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, | 36 | @@ -XXX,XX +XXX,XX @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
68 | acb->sector_num = sector_num; | 37 | goto out; |
69 | acb->nb_sectors = nb_sectors; | ||
70 | |||
71 | - acb->aio_done_func = NULL; | ||
72 | acb->coroutine = qemu_coroutine_self(); | ||
73 | acb->ret = 0; | ||
74 | acb->nr_pending = 0; | ||
75 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | ||
76 | |||
77 | switch (acb->aiocb_type) { | ||
78 | case AIOCB_WRITE_UDATA: | ||
79 | - /* this coroutine context is no longer suitable for co_recv | ||
80 | - * because we may send data to update vdi objects */ | ||
81 | - s->co_recv = NULL; | ||
82 | if (!is_data_obj(aio_req->oid)) { | ||
83 | break; | ||
84 | } | ||
85 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | ||
86 | } | ||
87 | } | 38 | } |
88 | 39 | ||
89 | + /* No more data for this aio_req (reload_inode below uses its own file | 40 | + ret = true; |
90 | + * descriptor handler which doesn't use co_recv). | 41 | s->blkshift = lbaf->ds; |
91 | + */ | 42 | out: |
92 | + s->co_recv = NULL; | 43 | qemu_vfio_dma_unmap(s->vfio, id); |
44 | qemu_vfree(id); | ||
93 | + | 45 | + |
94 | switch (rsp.result) { | ||
95 | case SD_RES_SUCCESS: | ||
96 | break; | ||
97 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | ||
98 | aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); | ||
99 | } | ||
100 | resend_aioreq(s, aio_req); | ||
101 | - goto out; | ||
102 | + return; | ||
103 | default: | ||
104 | acb->ret = -EIO; | ||
105 | error_report("%s", sd_strerror(rsp.result)); | ||
106 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | ||
107 | * We've finished all requests which belong to the AIOCB, so | ||
108 | * we can switch back to sd_co_readv/writev now. | ||
109 | */ | ||
110 | - acb->aio_done_func(acb); | ||
111 | + qemu_coroutine_enter(acb->coroutine); | ||
112 | } | ||
113 | -out: | ||
114 | - s->co_recv = NULL; | ||
115 | + | ||
116 | return; | ||
117 | + | ||
118 | err: | ||
119 | - s->co_recv = NULL; | ||
120 | reconnect_to_sdog(opaque); | ||
121 | } | ||
122 | |||
123 | @@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) | ||
124 | /* | ||
125 | * This function is called after writing data objects. If we need to | ||
126 | * update metadata, this sends a write request to the vdi object. | ||
127 | - * Otherwise, this switches back to sd_co_readv/writev. | ||
128 | */ | ||
129 | static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) | ||
130 | { | ||
131 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) | ||
132 | mx = acb->max_dirty_data_idx; | ||
133 | if (mn <= mx) { | ||
134 | /* we need to update the vdi object. */ | ||
135 | + ++acb->nr_pending; | ||
136 | offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + | ||
137 | mn * sizeof(s->inode.data_vdi_id[0]); | ||
138 | data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); | ||
139 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) | ||
140 | data_len, offset, 0, false, 0, offset); | ||
141 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
142 | add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); | ||
143 | - | ||
144 | - acb->aio_done_func = sd_finish_aiocb; | ||
145 | - acb->aiocb_type = AIOCB_WRITE_UDATA; | ||
146 | - return; | ||
147 | + if (--acb->nr_pending) { | ||
148 | + qemu_coroutine_yield(); | ||
149 | + } | ||
150 | } | ||
151 | - | ||
152 | - sd_finish_aiocb(acb); | ||
153 | } | ||
154 | |||
155 | /* Delete current working VDI on the snapshot chain */ | ||
156 | @@ -XXX,XX +XXX,XX @@ out: | ||
157 | * Returns 1 when we need to wait a response, 0 when there is no sent | ||
158 | * request and -errno in error cases. | ||
159 | */ | ||
160 | -static int coroutine_fn sd_co_rw_vector(void *p) | ||
161 | +static void coroutine_fn sd_co_rw_vector(void *p) | ||
162 | { | ||
163 | SheepdogAIOCB *acb = p; | ||
164 | int ret = 0; | ||
165 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p) | ||
166 | ret = sd_create_branch(s); | ||
167 | if (ret) { | ||
168 | acb->ret = -EIO; | ||
169 | - goto out; | ||
170 | + return; | ||
171 | } | ||
172 | } | ||
173 | |||
174 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p) | ||
175 | idx++; | ||
176 | done += len; | ||
177 | } | ||
178 | -out: | ||
179 | - if (!--acb->nr_pending) { | ||
180 | - return acb->ret; | ||
181 | + if (--acb->nr_pending) { | ||
182 | + qemu_coroutine_yield(); | ||
183 | } | ||
184 | - return 1; | ||
185 | } | ||
186 | |||
187 | static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) | ||
188 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
189 | } | ||
190 | |||
191 | acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); | ||
192 | - acb->aio_done_func = sd_write_done; | ||
193 | acb->aiocb_type = AIOCB_WRITE_UDATA; | ||
194 | |||
195 | retry: | ||
196 | @@ -XXX,XX +XXX,XX @@ retry: | ||
197 | goto retry; | ||
198 | } | ||
199 | |||
200 | - ret = sd_co_rw_vector(acb); | ||
201 | - if (ret <= 0) { | ||
202 | - QLIST_REMOVE(acb, aiocb_siblings); | ||
203 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
204 | - qemu_aio_unref(acb); | ||
205 | - return ret; | ||
206 | - } | ||
207 | - | ||
208 | - qemu_coroutine_yield(); | ||
209 | + sd_co_rw_vector(acb); | ||
210 | + sd_write_done(acb); | ||
211 | |||
212 | QLIST_REMOVE(acb, aiocb_siblings); | ||
213 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
214 | - | ||
215 | - return acb->ret; | ||
216 | + ret = acb->ret; | ||
217 | + qemu_aio_unref(acb); | ||
218 | + return ret; | 46 | + return ret; |
219 | } | 47 | } |
220 | 48 | ||
221 | static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, | 49 | static bool nvme_poll_queue(NVMeQueuePair *q) |
222 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, | 50 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, |
223 | 51 | uint64_t cap; | |
224 | acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); | 52 | uint64_t timeout_ms; |
225 | acb->aiocb_type = AIOCB_READ_UDATA; | 53 | uint64_t deadline, now; |
226 | - acb->aio_done_func = sd_finish_aiocb; | 54 | - Error *local_err = NULL; |
227 | 55 | volatile NvmeBar *regs = NULL; | |
228 | retry: | 56 | |
229 | if (check_overlapping_aiocb(s, acb)) { | 57 | qemu_co_mutex_init(&s->dma_map_lock); |
230 | @@ -XXX,XX +XXX,XX @@ retry: | 58 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, |
231 | goto retry; | 59 | &s->irq_notifier[MSIX_SHARED_IRQ_IDX], |
60 | false, nvme_handle_event, nvme_poll_cb); | ||
61 | |||
62 | - nvme_identify(bs, namespace, &local_err); | ||
63 | - if (local_err) { | ||
64 | - error_propagate(errp, local_err); | ||
65 | + if (!nvme_identify(bs, namespace, errp)) { | ||
66 | ret = -EIO; | ||
67 | goto out; | ||
232 | } | 68 | } |
233 | |||
234 | - ret = sd_co_rw_vector(acb); | ||
235 | - if (ret <= 0) { | ||
236 | - QLIST_REMOVE(acb, aiocb_siblings); | ||
237 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
238 | - qemu_aio_unref(acb); | ||
239 | - return ret; | ||
240 | - } | ||
241 | - | ||
242 | - qemu_coroutine_yield(); | ||
243 | + sd_co_rw_vector(acb); | ||
244 | |||
245 | QLIST_REMOVE(acb, aiocb_siblings); | ||
246 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
247 | - return acb->ret; | ||
248 | + ret = acb->ret; | ||
249 | + qemu_aio_unref(acb); | ||
250 | + return ret; | ||
251 | } | ||
252 | |||
253 | static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) | ||
254 | { | ||
255 | BDRVSheepdogState *s = bs->opaque; | ||
256 | SheepdogAIOCB *acb; | ||
257 | + int ret; | ||
258 | AIOReq *aio_req; | ||
259 | |||
260 | if (s->cache_flags != SD_FLAG_CMD_CACHE) { | ||
261 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) | ||
262 | |||
263 | acb = sd_aio_setup(bs, NULL, 0, 0); | ||
264 | acb->aiocb_type = AIOCB_FLUSH_CACHE; | ||
265 | - acb->aio_done_func = sd_finish_aiocb; | ||
266 | |||
267 | + acb->nr_pending++; | ||
268 | aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), | ||
269 | 0, 0, 0, false, 0, 0); | ||
270 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
271 | add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type); | ||
272 | |||
273 | - qemu_coroutine_yield(); | ||
274 | - return acb->ret; | ||
275 | + if (--acb->nr_pending) { | ||
276 | + qemu_coroutine_yield(); | ||
277 | + } | ||
278 | + ret = acb->ret; | ||
279 | + qemu_aio_unref(acb); | ||
280 | + return ret; | ||
281 | } | ||
282 | |||
283 | static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) | ||
284 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, | ||
285 | acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS, | ||
286 | count >> BDRV_SECTOR_BITS); | ||
287 | acb->aiocb_type = AIOCB_DISCARD_OBJ; | ||
288 | - acb->aio_done_func = sd_finish_aiocb; | ||
289 | |||
290 | retry: | ||
291 | if (check_overlapping_aiocb(s, acb)) { | ||
292 | @@ -XXX,XX +XXX,XX @@ retry: | ||
293 | goto retry; | ||
294 | } | ||
295 | |||
296 | - ret = sd_co_rw_vector(acb); | ||
297 | - if (ret <= 0) { | ||
298 | - QLIST_REMOVE(acb, aiocb_siblings); | ||
299 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
300 | - qemu_aio_unref(acb); | ||
301 | - return ret; | ||
302 | - } | ||
303 | - | ||
304 | - qemu_coroutine_yield(); | ||
305 | + sd_co_rw_vector(acb); | ||
306 | |||
307 | QLIST_REMOVE(acb, aiocb_siblings); | ||
308 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
309 | - | ||
310 | - return acb->ret; | ||
311 | + ret = acb->ret; | ||
312 | + qemu_aio_unref(acb); | ||
313 | + return ret; | ||
314 | } | ||
315 | |||
316 | static coroutine_fn int64_t | ||
317 | -- | 69 | -- |
318 | 2.9.3 | 70 | 2.28.0 |
319 | 71 | ||
320 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Sheepdog's AIOCB are completely internal entities for a group of | 3 | Just for consistency, following the example documented since |
4 | requests and do not need dynamic allocation. | 4 | commit e3fe3988d7 ("error: Document Error API usage rules"), |
5 | return a boolean value indicating an error is set or not. | ||
6 | Directly pass errp as the local_err is not requested in our | ||
7 | case. This simplifies a bit nvme_create_queue_pair(). | ||
5 | 8 | ||
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Message-id: 20161129113245.32724-4-pbonzini@redhat.com | 10 | Tested-by: Eric Auger <eric.auger@redhat.com> |
8 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
12 | Message-id: 20201029093306.1063879-12-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | --- | 15 | --- |
10 | block/sheepdog.c | 99 ++++++++++++++++++++++---------------------------------- | 16 | block/nvme.c | 16 +++++++--------- |
11 | 1 file changed, 39 insertions(+), 60 deletions(-) | 17 | 1 file changed, 7 insertions(+), 9 deletions(-) |
12 | 18 | ||
13 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 19 | diff --git a/block/nvme.c b/block/nvme.c |
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/sheepdog.c | 21 | --- a/block/nvme.c |
16 | +++ b/block/sheepdog.c | 22 | +++ b/block/nvme.c |
17 | @@ -XXX,XX +XXX,XX @@ static inline size_t count_data_objs(const struct SheepdogInode *inode) | 23 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { |
18 | } while (0) | 24 | }, |
19 | |||
20 | typedef struct SheepdogAIOCB SheepdogAIOCB; | ||
21 | +typedef struct BDRVSheepdogState BDRVSheepdogState; | ||
22 | |||
23 | typedef struct AIOReq { | ||
24 | SheepdogAIOCB *aiocb; | ||
25 | @@ -XXX,XX +XXX,XX @@ enum AIOCBState { | ||
26 | || y->max_affect_data_idx < x->min_affect_data_idx)) | ||
27 | |||
28 | struct SheepdogAIOCB { | ||
29 | - BlockAIOCB common; | ||
30 | + BDRVSheepdogState *s; | ||
31 | |||
32 | QEMUIOVector *qiov; | ||
33 | |||
34 | @@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB { | ||
35 | QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; | ||
36 | }; | 25 | }; |
37 | 26 | ||
38 | -typedef struct BDRVSheepdogState { | 27 | -static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, |
39 | +struct BDRVSheepdogState { | 28 | +/* Returns true on success, false on failure. */ |
40 | BlockDriverState *bs; | 29 | +static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, |
41 | AioContext *aio_context; | 30 | unsigned nentries, size_t entry_bytes, Error **errp) |
42 | 31 | { | |
43 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVSheepdogState { | 32 | size_t bytes; |
44 | 33 | @@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, | |
45 | CoQueue overlapping_queue; | 34 | q->queue = qemu_try_memalign(s->page_size, bytes); |
46 | QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head; | 35 | if (!q->queue) { |
47 | -} BDRVSheepdogState; | 36 | error_setg(errp, "Cannot allocate queue"); |
48 | +}; | 37 | - return; |
49 | 38 | + return false; | |
50 | typedef struct BDRVSheepdogReopenState { | 39 | } |
51 | int fd; | 40 | memset(q->queue, 0, bytes); |
52 | @@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) | 41 | r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); |
53 | acb->nr_pending--; | 42 | if (r) { |
43 | error_setg(errp, "Cannot map queue"); | ||
44 | + return false; | ||
45 | } | ||
46 | + return true; | ||
54 | } | 47 | } |
55 | 48 | ||
56 | -static const AIOCBInfo sd_aiocb_info = { | 49 | static void nvme_free_queue_pair(NVMeQueuePair *q) |
57 | - .aiocb_size = sizeof(SheepdogAIOCB), | 50 | @@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, |
58 | -}; | 51 | Error **errp) |
59 | - | ||
60 | -static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, | ||
61 | - int64_t sector_num, int nb_sectors) | ||
62 | +static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, | ||
63 | + QEMUIOVector *qiov, int64_t sector_num, int nb_sectors, | ||
64 | + int type) | ||
65 | { | 52 | { |
66 | - SheepdogAIOCB *acb; | 53 | int i, r; |
67 | uint32_t object_size; | 54 | - Error *local_err = NULL; |
68 | - BDRVSheepdogState *s = bs->opaque; | 55 | NVMeQueuePair *q; |
69 | 56 | uint64_t prp_list_iova; | |
70 | object_size = (UINT32_C(1) << s->inode.block_size_shift); | 57 | |
71 | 58 | @@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, | |
72 | - acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL); | 59 | req->prp_list_iova = prp_list_iova + i * s->page_size; |
73 | + acb->s = s; | ||
74 | |||
75 | acb->qiov = qiov; | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, | ||
78 | |||
79 | acb->min_dirty_data_idx = UINT32_MAX; | ||
80 | acb->max_dirty_data_idx = 0; | ||
81 | - | ||
82 | - return acb; | ||
83 | + acb->aiocb_type = type; | ||
84 | } | ||
85 | |||
86 | /* Return -EIO in case of error, file descriptor on success */ | ||
87 | @@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) | ||
88 | */ | ||
89 | static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) | ||
90 | { | ||
91 | - BDRVSheepdogState *s = acb->common.bs->opaque; | ||
92 | + BDRVSheepdogState *s = acb->s; | ||
93 | struct iovec iov; | ||
94 | AIOReq *aio_req; | ||
95 | uint32_t offset, data_len, mn, mx; | ||
96 | @@ -XXX,XX +XXX,XX @@ out: | ||
97 | * Returns 1 when we need to wait a response, 0 when there is no sent | ||
98 | * request and -errno in error cases. | ||
99 | */ | ||
100 | -static void coroutine_fn sd_co_rw_vector(void *p) | ||
101 | +static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb) | ||
102 | { | ||
103 | - SheepdogAIOCB *acb = p; | ||
104 | int ret = 0; | ||
105 | unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE; | ||
106 | unsigned long idx; | ||
107 | uint32_t object_size; | ||
108 | uint64_t oid; | ||
109 | uint64_t offset; | ||
110 | - BDRVSheepdogState *s = acb->common.bs->opaque; | ||
111 | + BDRVSheepdogState *s = acb->s; | ||
112 | SheepdogInode *inode = &s->inode; | ||
113 | AIOReq *aio_req; | ||
114 | |||
115 | @@ -XXX,XX +XXX,XX @@ static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) | ||
116 | static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
117 | int nb_sectors, QEMUIOVector *qiov) | ||
118 | { | ||
119 | - SheepdogAIOCB *acb; | ||
120 | + SheepdogAIOCB acb; | ||
121 | int ret; | ||
122 | int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE; | ||
123 | BDRVSheepdogState *s = bs->opaque; | ||
124 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
125 | } | ||
126 | } | 60 | } |
127 | 61 | ||
128 | - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); | 62 | - nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err); |
129 | - acb->aiocb_type = AIOCB_WRITE_UDATA; | 63 | - if (local_err) { |
130 | + sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA); | 64 | - error_propagate(errp, local_err); |
131 | 65 | + if (!nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, errp)) { | |
132 | retry: | 66 | goto fail; |
133 | - if (check_overlapping_aiocb(s, acb)) { | ||
134 | + if (check_overlapping_aiocb(s, &acb)) { | ||
135 | qemu_co_queue_wait(&s->overlapping_queue); | ||
136 | goto retry; | ||
137 | } | 67 | } |
138 | 68 | q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail; | |
139 | - sd_co_rw_vector(acb); | 69 | |
140 | - sd_write_done(acb); | 70 | - nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err); |
141 | + sd_co_rw_vector(&acb); | 71 | - if (local_err) { |
142 | + sd_write_done(&acb); | 72 | - error_propagate(errp, local_err); |
143 | 73 | + if (!nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, errp)) { | |
144 | - QLIST_REMOVE(acb, aiocb_siblings); | 74 | goto fail; |
145 | + QLIST_REMOVE(&acb, aiocb_siblings); | ||
146 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
147 | - ret = acb->ret; | ||
148 | - qemu_aio_unref(acb); | ||
149 | - return ret; | ||
150 | + return acb.ret; | ||
151 | } | ||
152 | |||
153 | static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
154 | int nb_sectors, QEMUIOVector *qiov) | ||
155 | { | ||
156 | - SheepdogAIOCB *acb; | ||
157 | - int ret; | ||
158 | + SheepdogAIOCB acb; | ||
159 | BDRVSheepdogState *s = bs->opaque; | ||
160 | |||
161 | - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); | ||
162 | - acb->aiocb_type = AIOCB_READ_UDATA; | ||
163 | + sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA); | ||
164 | |||
165 | retry: | ||
166 | - if (check_overlapping_aiocb(s, acb)) { | ||
167 | + if (check_overlapping_aiocb(s, &acb)) { | ||
168 | qemu_co_queue_wait(&s->overlapping_queue); | ||
169 | goto retry; | ||
170 | } | 75 | } |
171 | 76 | q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head; | |
172 | - sd_co_rw_vector(acb); | ||
173 | + sd_co_rw_vector(&acb); | ||
174 | |||
175 | - QLIST_REMOVE(acb, aiocb_siblings); | ||
176 | + QLIST_REMOVE(&acb, aiocb_siblings); | ||
177 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
178 | - ret = acb->ret; | ||
179 | - qemu_aio_unref(acb); | ||
180 | - return ret; | ||
181 | + return acb.ret; | ||
182 | } | ||
183 | |||
184 | static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) | ||
185 | { | ||
186 | BDRVSheepdogState *s = bs->opaque; | ||
187 | - SheepdogAIOCB *acb; | ||
188 | - int ret; | ||
189 | + SheepdogAIOCB acb; | ||
190 | AIOReq *aio_req; | ||
191 | |||
192 | if (s->cache_flags != SD_FLAG_CMD_CACHE) { | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | - acb = sd_aio_setup(bs, NULL, 0, 0); | ||
197 | - acb->aiocb_type = AIOCB_FLUSH_CACHE; | ||
198 | + sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE); | ||
199 | |||
200 | - acb->nr_pending++; | ||
201 | - aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), | ||
202 | + acb.nr_pending++; | ||
203 | + aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id), | ||
204 | 0, 0, 0, false, 0, 0); | ||
205 | QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
206 | - add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type); | ||
207 | + add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type); | ||
208 | |||
209 | - if (--acb->nr_pending) { | ||
210 | + if (--acb.nr_pending) { | ||
211 | qemu_coroutine_yield(); | ||
212 | } | ||
213 | - ret = acb->ret; | ||
214 | - qemu_aio_unref(acb); | ||
215 | - return ret; | ||
216 | + return acb.ret; | ||
217 | } | ||
218 | |||
219 | static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) | ||
220 | @@ -XXX,XX +XXX,XX @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, | ||
221 | static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, | ||
222 | int count) | ||
223 | { | ||
224 | - SheepdogAIOCB *acb; | ||
225 | + SheepdogAIOCB acb; | ||
226 | BDRVSheepdogState *s = bs->opaque; | ||
227 | - int ret; | ||
228 | QEMUIOVector discard_iov; | ||
229 | struct iovec iov; | ||
230 | uint32_t zero = 0; | ||
231 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, | ||
232 | if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) { | ||
233 | return -ENOTSUP; | ||
234 | } | ||
235 | - acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS, | ||
236 | - count >> BDRV_SECTOR_BITS); | ||
237 | - acb->aiocb_type = AIOCB_DISCARD_OBJ; | ||
238 | + sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS, | ||
239 | + count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); | ||
240 | |||
241 | retry: | ||
242 | - if (check_overlapping_aiocb(s, acb)) { | ||
243 | + if (check_overlapping_aiocb(s, &acb)) { | ||
244 | qemu_co_queue_wait(&s->overlapping_queue); | ||
245 | goto retry; | ||
246 | } | ||
247 | |||
248 | - sd_co_rw_vector(acb); | ||
249 | + sd_co_rw_vector(&acb); | ||
250 | |||
251 | - QLIST_REMOVE(acb, aiocb_siblings); | ||
252 | + QLIST_REMOVE(&acb, aiocb_siblings); | ||
253 | qemu_co_queue_restart_all(&s->overlapping_queue); | ||
254 | - ret = acb->ret; | ||
255 | - qemu_aio_unref(acb); | ||
256 | - return ret; | ||
257 | + return acb.ret; | ||
258 | } | ||
259 | |||
260 | static coroutine_fn int64_t | ||
261 | -- | 77 | -- |
262 | 2.9.3 | 78 | 2.28.0 |
263 | 79 | ||
264 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Rename Submission Queue flags with 'Sq' to differentiate | ||
4 | submission queue flags from command queue flags, and introduce | ||
5 | Completion Queue flag definitions. | ||
6 | |||
7 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Message-id: 20201029093306.1063879-13-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | include/block/nvme.h | 18 ++++++++++++------ | ||
16 | 1 file changed, 12 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/include/block/nvme.h b/include/block/nvme.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/block/nvme.h | ||
21 | +++ b/include/block/nvme.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateCq { | ||
23 | #define NVME_CQ_FLAGS_PC(cq_flags) (cq_flags & 0x1) | ||
24 | #define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1) | ||
25 | |||
26 | +enum NvmeFlagsCq { | ||
27 | + NVME_CQ_PC = 1, | ||
28 | + NVME_CQ_IEN = 2, | ||
29 | +}; | ||
30 | + | ||
31 | typedef struct QEMU_PACKED NvmeCreateSq { | ||
32 | uint8_t opcode; | ||
33 | uint8_t flags; | ||
34 | @@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateSq { | ||
35 | #define NVME_SQ_FLAGS_PC(sq_flags) (sq_flags & 0x1) | ||
36 | #define NVME_SQ_FLAGS_QPRIO(sq_flags) ((sq_flags >> 1) & 0x3) | ||
37 | |||
38 | -enum NvmeQueueFlags { | ||
39 | - NVME_Q_PC = 1, | ||
40 | - NVME_Q_PRIO_URGENT = 0, | ||
41 | - NVME_Q_PRIO_HIGH = 1, | ||
42 | - NVME_Q_PRIO_NORMAL = 2, | ||
43 | - NVME_Q_PRIO_LOW = 3, | ||
44 | +enum NvmeFlagsSq { | ||
45 | + NVME_SQ_PC = 1, | ||
46 | + | ||
47 | + NVME_SQ_PRIO_URGENT = 0, | ||
48 | + NVME_SQ_PRIO_HIGH = 1, | ||
49 | + NVME_SQ_PRIO_NORMAL = 2, | ||
50 | + NVME_SQ_PRIO_LOW = 3, | ||
51 | }; | ||
52 | |||
53 | typedef struct QEMU_PACKED NvmeIdentify { | ||
54 | -- | ||
55 | 2.28.0 | ||
56 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Replace magic values by definitions, and simplifiy since the | ||
4 | number of queues will never reach 64K. | ||
5 | |||
6 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-id: 20201029093306.1063879-14-philmd@redhat.com | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
13 | --- | ||
14 | block/nvme.c | 9 +++++---- | ||
15 | 1 file changed, 5 insertions(+), 4 deletions(-) | ||
16 | |||
17 | diff --git a/block/nvme.c b/block/nvme.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/nvme.c | ||
20 | +++ b/block/nvme.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
22 | NvmeCmd cmd; | ||
23 | unsigned queue_size = NVME_QUEUE_SIZE; | ||
24 | |||
25 | + assert(n <= UINT16_MAX); | ||
26 | q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs), | ||
27 | n, queue_size, errp); | ||
28 | if (!q) { | ||
29 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
30 | cmd = (NvmeCmd) { | ||
31 | .opcode = NVME_ADM_CMD_CREATE_CQ, | ||
32 | .dptr.prp1 = cpu_to_le64(q->cq.iova), | ||
33 | - .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), | ||
34 | - .cdw11 = cpu_to_le32(0x3), | ||
35 | + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), | ||
36 | + .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC), | ||
37 | }; | ||
38 | if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { | ||
39 | error_setg(errp, "Failed to create CQ io queue [%u]", n); | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | ||
41 | cmd = (NvmeCmd) { | ||
42 | .opcode = NVME_ADM_CMD_CREATE_SQ, | ||
43 | .dptr.prp1 = cpu_to_le64(q->sq.iova), | ||
44 | - .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), | ||
45 | - .cdw11 = cpu_to_le32(0x1 | (n << 16)), | ||
46 | + .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), | ||
47 | + .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)), | ||
48 | }; | ||
49 | if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { | ||
50 | error_setg(errp, "Failed to create SQ io queue [%u]", n); | ||
51 | -- | ||
52 | 2.28.0 | ||
53 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | From the specification chapter 3.1.8 "AQA - Admin Queue Attributes" | ||
4 | the Admin Submission Queue Size field is a 0’s based value: | ||
5 | |||
6 | Admin Submission Queue Size (ASQS): | ||
7 | |||
8 | Defines the size of the Admin Submission Queue in entries. | ||
9 | Enabling a controller while this field is cleared to 00h | ||
10 | produces undefined results. The minimum size of the Admin | ||
11 | Submission Queue is two entries. The maximum size of the | ||
12 | Admin Submission Queue is 4096 entries. | ||
13 | This is a 0’s based value. | ||
14 | |||
15 | This bug has never been hit because the device initialization | ||
16 | uses a single command synchronously :) | ||
17 | |||
18 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
19 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
21 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
22 | Message-id: 20201029093306.1063879-15-philmd@redhat.com | ||
23 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
24 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
25 | --- | ||
26 | block/nvme.c | 6 +++--- | ||
27 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
28 | |||
29 | diff --git a/block/nvme.c b/block/nvme.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/block/nvme.c | ||
32 | +++ b/block/nvme.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
34 | goto out; | ||
35 | } | ||
36 | s->queue_count = 1; | ||
37 | - QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); | ||
38 | - regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | | ||
39 | - (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); | ||
40 | + QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); | ||
41 | + regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | | ||
42 | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); | ||
43 | regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); | ||
44 | regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); | ||
45 | |||
46 | -- | ||
47 | 2.28.0 | ||
48 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | We don't need to dereference from BDRVNVMeState each time. | ||
4 | Use a NVMeQueuePair pointer on the admin queue. | ||
5 | The nvme_init() becomes easier to review, matching the style | ||
6 | of nvme_add_io_queue(). | ||
7 | |||
8 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-16-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 12 ++++++------ | ||
17 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
24 | Error **errp) | ||
25 | { | ||
26 | BDRVNVMeState *s = bs->opaque; | ||
27 | + NVMeQueuePair *q; | ||
28 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
29 | int ret; | ||
30 | uint64_t cap; | ||
31 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
32 | |||
33 | /* Set up admin queue. */ | ||
34 | s->queues = g_new(NVMeQueuePair *, 1); | ||
35 | - s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0, | ||
36 | - NVME_QUEUE_SIZE, | ||
37 | - errp); | ||
38 | - if (!s->queues[INDEX_ADMIN]) { | ||
39 | + q = nvme_create_queue_pair(s, aio_context, 0, NVME_QUEUE_SIZE, errp); | ||
40 | + if (!q) { | ||
41 | ret = -EINVAL; | ||
42 | goto out; | ||
43 | } | ||
44 | + s->queues[INDEX_ADMIN] = q; | ||
45 | s->queue_count = 1; | ||
46 | QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); | ||
47 | regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | | ||
48 | ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); | ||
49 | - regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); | ||
50 | - regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); | ||
51 | + regs->asq = cpu_to_le64(q->sq.iova); | ||
52 | + regs->acq = cpu_to_le64(q->cq.iova); | ||
53 | |||
54 | /* After setting up all control registers we can enable device now. */ | ||
55 | regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | | ||
56 | -- | ||
57 | 2.28.0 | ||
58 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Wrap the code that was copied repeatedly in the two functions, | 3 | As all commands use the ADMIN queue, it is pointless to pass |
4 | sd_aio_setup and sd_aio_complete. | 4 | it as argument each time. Remove the argument, and rename the |
5 | function as nvme_admin_cmd_sync() to make this new behavior | ||
6 | clearer. | ||
5 | 7 | ||
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
7 | Message-id: 20161129113245.32724-6-pbonzini@redhat.com | 9 | Tested-by: Eric Auger <eric.auger@redhat.com> |
8 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-17-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | --- | 15 | --- |
10 | block/sheepdog.c | 66 ++++++++++++++++++++++++++------------------------------ | 16 | block/nvme.c | 19 ++++++++++--------- |
11 | 1 file changed, 30 insertions(+), 36 deletions(-) | 17 | 1 file changed, 10 insertions(+), 9 deletions(-) |
12 | 18 | ||
13 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 19 | diff --git a/block/nvme.c b/block/nvme.c |
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/sheepdog.c | 21 | --- a/block/nvme.c |
16 | +++ b/block/sheepdog.c | 22 | +++ b/block/nvme.c |
17 | @@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, | 23 | @@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, |
18 | return aio_req; | 24 | qemu_mutex_unlock(&q->lock); |
19 | } | 25 | } |
20 | 26 | ||
21 | +static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb) | 27 | -static void nvme_cmd_sync_cb(void *opaque, int ret) |
22 | +{ | 28 | +static void nvme_admin_cmd_sync_cb(void *opaque, int ret) |
23 | + SheepdogAIOCB *cb; | 29 | { |
24 | + | 30 | int *pret = opaque; |
25 | +retry: | 31 | *pret = ret; |
26 | + QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { | 32 | aio_wait_kick(); |
27 | + if (AIOCBOverlapping(acb, cb)) { | ||
28 | + qemu_co_queue_wait(&s->overlapping_queue); | ||
29 | + goto retry; | ||
30 | + } | ||
31 | + } | ||
32 | +} | ||
33 | + | ||
34 | static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, | ||
35 | QEMUIOVector *qiov, int64_t sector_num, int nb_sectors, | ||
36 | int type) | ||
37 | @@ -XXX,XX +XXX,XX @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, | ||
38 | acb->min_dirty_data_idx = UINT32_MAX; | ||
39 | acb->max_dirty_data_idx = 0; | ||
40 | acb->aiocb_type = type; | ||
41 | + | ||
42 | + if (type == AIOCB_FLUSH_CACHE) { | ||
43 | + return; | ||
44 | + } | ||
45 | + | ||
46 | + wait_for_overlapping_aiocb(s, acb); | ||
47 | + QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings); | ||
48 | } | 33 | } |
49 | 34 | ||
50 | /* Return -EIO in case of error, file descriptor on success */ | 35 | -static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, |
51 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb) | 36 | - NvmeCmd *cmd) |
37 | +static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd) | ||
38 | { | ||
39 | + BDRVNVMeState *s = bs->opaque; | ||
40 | + NVMeQueuePair *q = s->queues[INDEX_ADMIN]; | ||
41 | AioContext *aio_context = bdrv_get_aio_context(bs); | ||
42 | NVMeRequest *req; | ||
43 | int ret = -EINPROGRESS; | ||
44 | @@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, | ||
45 | if (!req) { | ||
46 | return -EBUSY; | ||
52 | } | 47 | } |
53 | } | 48 | - nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret); |
54 | 49 | + nvme_submit_command(q, req, cmd, nvme_admin_cmd_sync_cb, &ret); | |
55 | -static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) | 50 | |
56 | +static void sd_aio_complete(SheepdogAIOCB *acb) | 51 | AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS); |
57 | { | 52 | return ret; |
58 | - SheepdogAIOCB *cb; | 53 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
59 | - | 54 | |
60 | - QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { | 55 | memset(id, 0, sizeof(*id)); |
61 | - if (AIOCBOverlapping(aiocb, cb)) { | 56 | cmd.dptr.prp1 = cpu_to_le64(iova); |
62 | - return true; | 57 | - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { |
63 | - } | 58 | + if (nvme_admin_cmd_sync(bs, &cmd)) { |
64 | + if (acb->aiocb_type == AIOCB_FLUSH_CACHE) { | 59 | error_setg(errp, "Failed to identify controller"); |
65 | + return; | 60 | goto out; |
66 | } | 61 | } |
67 | 62 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) | |
68 | - QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings); | 63 | memset(id, 0, sizeof(*id)); |
69 | - return false; | 64 | cmd.cdw10 = 0; |
70 | + QLIST_REMOVE(acb, aiocb_siblings); | 65 | cmd.nsid = cpu_to_le32(namespace); |
71 | + qemu_co_queue_restart_all(&acb->s->overlapping_queue); | 66 | - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { |
72 | } | 67 | + if (nvme_admin_cmd_sync(bs, &cmd)) { |
73 | 68 | error_setg(errp, "Failed to identify namespace"); | |
74 | static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | 69 | goto out; |
75 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, | ||
76 | } | 70 | } |
77 | 71 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) | |
78 | sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA); | 72 | .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), |
79 | - | 73 | .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC), |
80 | -retry: | 74 | }; |
81 | - if (check_overlapping_aiocb(s, &acb)) { | 75 | - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { |
82 | - qemu_co_queue_wait(&s->overlapping_queue); | 76 | + if (nvme_admin_cmd_sync(bs, &cmd)) { |
83 | - goto retry; | 77 | error_setg(errp, "Failed to create CQ io queue [%u]", n); |
84 | - } | 78 | goto out_error; |
85 | - | ||
86 | sd_co_rw_vector(&acb); | ||
87 | sd_write_done(&acb); | ||
88 | + sd_aio_complete(&acb); | ||
89 | |||
90 | - QLIST_REMOVE(&acb, aiocb_siblings); | ||
91 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
92 | return acb.ret; | ||
93 | } | ||
94 | |||
95 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, | ||
96 | BDRVSheepdogState *s = bs->opaque; | ||
97 | |||
98 | sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA); | ||
99 | - | ||
100 | -retry: | ||
101 | - if (check_overlapping_aiocb(s, &acb)) { | ||
102 | - qemu_co_queue_wait(&s->overlapping_queue); | ||
103 | - goto retry; | ||
104 | - } | ||
105 | - | ||
106 | sd_co_rw_vector(&acb); | ||
107 | + sd_aio_complete(&acb); | ||
108 | |||
109 | - QLIST_REMOVE(&acb, aiocb_siblings); | ||
110 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
111 | return acb.ret; | ||
112 | } | ||
113 | |||
114 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) | ||
115 | if (--acb.nr_pending) { | ||
116 | qemu_coroutine_yield(); | ||
117 | } | 79 | } |
118 | + | 80 | @@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) |
119 | + sd_aio_complete(&acb); | 81 | .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n), |
120 | return acb.ret; | 82 | .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)), |
121 | } | 83 | }; |
122 | 84 | - if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) { | |
123 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, | 85 | + if (nvme_admin_cmd_sync(bs, &cmd)) { |
86 | error_setg(errp, "Failed to create SQ io queue [%u]", n); | ||
87 | goto out_error; | ||
124 | } | 88 | } |
125 | sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS, | 89 | @@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable, |
126 | count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ); | 90 | .cdw11 = cpu_to_le32(enable ? 0x01 : 0x00), |
127 | - | 91 | }; |
128 | -retry: | 92 | |
129 | - if (check_overlapping_aiocb(s, &acb)) { | 93 | - ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd); |
130 | - qemu_co_queue_wait(&s->overlapping_queue); | 94 | + ret = nvme_admin_cmd_sync(bs, &cmd); |
131 | - goto retry; | 95 | if (ret) { |
132 | - } | 96 | error_setg(errp, "Failed to configure NVMe write cache"); |
133 | - | 97 | } |
134 | sd_co_rw_vector(&acb); | ||
135 | + sd_aio_complete(&acb); | ||
136 | |||
137 | - QLIST_REMOVE(&acb, aiocb_siblings); | ||
138 | - qemu_co_queue_restart_all(&s->overlapping_queue); | ||
139 | return acb.ret; | ||
140 | } | ||
141 | |||
142 | -- | 98 | -- |
143 | 2.9.3 | 99 | 2.28.0 |
144 | 100 | ||
145 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Commit bdd6a90a9e5 ("block: Add VFIO based NVMe driver") | ||
4 | sets the request_alignment in nvme_refresh_limits(). | ||
5 | For consistency, also set it during initialization. | ||
6 | |||
7 | Reported-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-18-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 1 + | ||
17 | 1 file changed, 1 insertion(+) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
24 | s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); | ||
25 | s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); | ||
26 | bs->bl.opt_mem_alignment = s->page_size; | ||
27 | + bs->bl.request_alignment = s->page_size; | ||
28 | timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); | ||
29 | |||
30 | /* Reset device to get a clean state. */ | ||
31 | -- | ||
32 | 2.28.0 | ||
33 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | While trying to simplify the code using a macro, we forgot | ||
4 | the 12-bit shift... Correct that. | ||
5 | |||
6 | Fixes: fad1eb68862 ("block/nvme: Use register definitions from 'block/nvme.h'") | ||
7 | Reported-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-19-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 2 +- | ||
17 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
24 | goto out; | ||
25 | } | ||
26 | |||
27 | - s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); | ||
28 | + s->page_size = 1u << (12 + NVME_CAP_MPSMIN(cap)); | ||
29 | s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); | ||
30 | bs->bl.opt_mem_alignment = s->page_size; | ||
31 | bs->bl.request_alignment = s->page_size; | ||
32 | -- | ||
33 | 2.28.0 | ||
34 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Eric Auger <eric.auger@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Add to the list in add_aio_request and, indirectly, resend_aioreq. Inline | 3 | In preparation of 64kB host page support, let's change the size |
4 | free_aio_req in the caller, it does not simply undo alloc_aio_req's job. | 4 | and alignment of the IDENTIFY command response buffer so that |
5 | the VFIO DMA MAP succeeds. We align on the host page size. | ||
5 | 6 | ||
6 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 7 | Signed-off-by: Eric Auger <eric.auger@redhat.com> |
7 | Message-id: 20161129113245.32724-5-pbonzini@redhat.com | 8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
8 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-20-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
9 | --- | 15 | --- |
10 | block/sheepdog.c | 23 ++++++----------------- | 16 | block/nvme.c | 9 +++++---- |
11 | 1 file changed, 6 insertions(+), 17 deletions(-) | 17 | 1 file changed, 5 insertions(+), 4 deletions(-) |
12 | 18 | ||
13 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 19 | diff --git a/block/nvme.c b/block/nvme.c |
14 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block/sheepdog.c | 21 | --- a/block/nvme.c |
16 | +++ b/block/sheepdog.c | 22 | +++ b/block/nvme.c |
17 | @@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, | 23 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
18 | return aio_req; | 24 | .opcode = NVME_ADM_CMD_IDENTIFY, |
19 | } | 25 | .cdw10 = cpu_to_le32(0x1), |
20 | 26 | }; | |
21 | -static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) | 27 | + size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size); |
22 | -{ | 28 | |
23 | - SheepdogAIOCB *acb = aio_req->aiocb; | 29 | - id = qemu_try_memalign(s->page_size, sizeof(*id)); |
24 | - | 30 | + id = qemu_try_memalign(qemu_real_host_page_size, id_size); |
25 | - QLIST_REMOVE(aio_req, aio_siblings); | 31 | if (!id) { |
26 | - g_free(aio_req); | 32 | error_setg(errp, "Cannot allocate buffer for identify response"); |
27 | - | 33 | goto out; |
28 | - acb->nr_pending--; | ||
29 | -} | ||
30 | - | ||
31 | static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s, | ||
32 | QEMUIOVector *qiov, int64_t sector_num, int nb_sectors, | ||
33 | int type) | ||
34 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn void reconnect_to_sdog(void *opaque) | ||
35 | while (!QLIST_EMPTY(&s->failed_aio_head)) { | ||
36 | aio_req = QLIST_FIRST(&s->failed_aio_head); | ||
37 | QLIST_REMOVE(aio_req, aio_siblings); | ||
38 | - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
39 | resend_aioreq(s, aio_req); | ||
40 | } | 34 | } |
41 | } | 35 | - r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova); |
42 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | 36 | + r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova); |
43 | */ | 37 | if (r) { |
44 | s->co_recv = NULL; | 38 | error_setg(errp, "Cannot map buffer for DMA"); |
45 | 39 | goto out; | |
46 | + QLIST_REMOVE(aio_req, aio_siblings); | ||
47 | switch (rsp.result) { | ||
48 | case SD_RES_SUCCESS: | ||
49 | break; | ||
50 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque) | ||
51 | break; | ||
52 | } | 40 | } |
53 | 41 | ||
54 | - free_aio_req(s, aio_req); | 42 | - memset(id, 0, sizeof(*id)); |
55 | - if (!acb->nr_pending) { | 43 | + memset(id, 0, id_size); |
56 | + g_free(aio_req); | 44 | cmd.dptr.prp1 = cpu_to_le64(iova); |
57 | + | 45 | if (nvme_admin_cmd_sync(bs, &cmd)) { |
58 | + if (!--acb->nr_pending) { | 46 | error_setg(errp, "Failed to identify controller"); |
59 | /* | 47 | @@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp) |
60 | * We've finished all requests which belong to the AIOCB, so | 48 | s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES); |
61 | * we can switch back to sd_co_readv/writev now. | 49 | s->supports_discard = !!(oncs & NVME_ONCS_DSM); |
62 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, | 50 | |
63 | uint64_t old_oid = aio_req->base_oid; | 51 | - memset(id, 0, sizeof(*id)); |
64 | bool create = aio_req->create; | 52 | + memset(id, 0, id_size); |
65 | 53 | cmd.cdw10 = 0; | |
66 | + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | 54 | cmd.nsid = cpu_to_le32(namespace); |
67 | + | 55 | if (nvme_admin_cmd_sync(bs, &cmd)) { |
68 | if (!nr_copies) { | ||
69 | error_report("bug"); | ||
70 | } | ||
71 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) | ||
72 | iov.iov_len = sizeof(s->inode); | ||
73 | aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), | ||
74 | data_len, offset, 0, false, 0, offset); | ||
75 | - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
76 | add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA); | ||
77 | if (--acb->nr_pending) { | ||
78 | qemu_coroutine_yield(); | ||
79 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb) | ||
80 | old_oid, | ||
81 | acb->aiocb_type == AIOCB_DISCARD_OBJ ? | ||
82 | 0 : done); | ||
83 | - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
84 | - | ||
85 | add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, | ||
86 | acb->aiocb_type); | ||
87 | done: | ||
88 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) | ||
89 | acb.nr_pending++; | ||
90 | aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id), | ||
91 | 0, 0, 0, false, 0, 0); | ||
92 | - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); | ||
93 | add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type); | ||
94 | |||
95 | if (--acb.nr_pending) { | ||
96 | -- | 56 | -- |
97 | 2.9.3 | 57 | 2.28.0 |
98 | 58 | ||
99 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eric Auger <eric.auger@redhat.com> | ||
1 | 2 | ||
3 | In preparation of 64kB host page support, let's change the size | ||
4 | and alignment of the queue so that the VFIO DMA MAP succeeds. | ||
5 | We align on the host page size. | ||
6 | |||
7 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Message-id: 20201029093306.1063879-21-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | block/nvme.c | 4 ++-- | ||
16 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
17 | |||
18 | diff --git a/block/nvme.c b/block/nvme.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/nvme.c | ||
21 | +++ b/block/nvme.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q, | ||
23 | size_t bytes; | ||
24 | int r; | ||
25 | |||
26 | - bytes = ROUND_UP(nentries * entry_bytes, s->page_size); | ||
27 | + bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size); | ||
28 | q->head = q->tail = 0; | ||
29 | - q->queue = qemu_try_memalign(s->page_size, bytes); | ||
30 | + q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes); | ||
31 | if (!q->queue) { | ||
32 | error_setg(errp, "Cannot allocate queue"); | ||
33 | return false; | ||
34 | -- | ||
35 | 2.28.0 | ||
36 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eric Auger <eric.auger@redhat.com> | ||
1 | 2 | ||
3 | In preparation of 64kB host page support, let's change the size | ||
4 | and alignment of the prp_list_pages so that the VFIO DMA MAP succeeds | ||
5 | with 64kB host page size. We align on the host page size. | ||
6 | |||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-22-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 11 ++++++----- | ||
17 | 1 file changed, 6 insertions(+), 5 deletions(-) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, | ||
24 | int i, r; | ||
25 | NVMeQueuePair *q; | ||
26 | uint64_t prp_list_iova; | ||
27 | + size_t bytes; | ||
28 | |||
29 | q = g_try_new0(NVMeQueuePair, 1); | ||
30 | if (!q) { | ||
31 | @@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, | ||
32 | } | ||
33 | trace_nvme_create_queue_pair(idx, q, size, aio_context, | ||
34 | event_notifier_get_fd(s->irq_notifier)); | ||
35 | - q->prp_list_pages = qemu_try_memalign(s->page_size, | ||
36 | - s->page_size * NVME_NUM_REQS); | ||
37 | + bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS, | ||
38 | + qemu_real_host_page_size); | ||
39 | + q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes); | ||
40 | if (!q->prp_list_pages) { | ||
41 | goto fail; | ||
42 | } | ||
43 | - memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS); | ||
44 | + memset(q->prp_list_pages, 0, bytes); | ||
45 | qemu_mutex_init(&q->lock); | ||
46 | q->s = s; | ||
47 | q->index = idx; | ||
48 | qemu_co_queue_init(&q->free_req_queue); | ||
49 | q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q); | ||
50 | - r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, | ||
51 | - s->page_size * NVME_NUM_REQS, | ||
52 | + r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes, | ||
53 | false, &prp_list_iova); | ||
54 | if (r) { | ||
55 | goto fail; | ||
56 | -- | ||
57 | 2.28.0 | ||
58 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eric Auger <eric.auger@redhat.com> | ||
1 | 2 | ||
3 | Make sure iov's va and size are properly aligned on the | ||
4 | host page size. | ||
5 | |||
6 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Message-id: 20201029093306.1063879-23-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | block/nvme.c | 14 ++++++++------ | ||
16 | 1 file changed, 8 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/block/nvme.c b/block/nvme.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/nvme.c | ||
21 | +++ b/block/nvme.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd, | ||
23 | for (i = 0; i < qiov->niov; ++i) { | ||
24 | bool retry = true; | ||
25 | uint64_t iova; | ||
26 | + size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len, | ||
27 | + qemu_real_host_page_size); | ||
28 | try_map: | ||
29 | r = qemu_vfio_dma_map(s->vfio, | ||
30 | qiov->iov[i].iov_base, | ||
31 | - qiov->iov[i].iov_len, | ||
32 | - true, &iova); | ||
33 | + len, true, &iova); | ||
34 | if (r == -ENOMEM && retry) { | ||
35 | retry = false; | ||
36 | trace_nvme_dma_flush_queue_wait(s); | ||
37 | @@ -XXX,XX +XXX,XX @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs, | ||
38 | BDRVNVMeState *s = bs->opaque; | ||
39 | |||
40 | for (i = 0; i < qiov->niov; ++i) { | ||
41 | - if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, s->page_size) || | ||
42 | - !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, s->page_size)) { | ||
43 | + if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, | ||
44 | + qemu_real_host_page_size) || | ||
45 | + !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) { | ||
46 | trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base, | ||
47 | qiov->iov[i].iov_len, s->page_size); | ||
48 | return false; | ||
49 | @@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
50 | int r; | ||
51 | uint8_t *buf = NULL; | ||
52 | QEMUIOVector local_qiov; | ||
53 | - | ||
54 | + size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size); | ||
55 | assert(QEMU_IS_ALIGNED(offset, s->page_size)); | ||
56 | assert(QEMU_IS_ALIGNED(bytes, s->page_size)); | ||
57 | assert(bytes <= s->max_transfer); | ||
58 | @@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
59 | } | ||
60 | s->stats.unaligned_accesses++; | ||
61 | trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write); | ||
62 | - buf = qemu_try_memalign(s->page_size, bytes); | ||
63 | + buf = qemu_try_memalign(qemu_real_host_page_size, len); | ||
64 | |||
65 | if (!buf) { | ||
66 | return -ENOMEM; | ||
67 | -- | ||
68 | 2.28.0 | ||
69 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | qemu_vfio_pci_map_bar() calls mmap(), and mmap(2) states: | ||
4 | |||
5 | 'offset' must be a multiple of the page size as returned | ||
6 | by sysconf(_SC_PAGE_SIZE). | ||
7 | |||
8 | In commit f68453237b9 we started to use an offset of 4K which | ||
9 | broke this contract on Aarch64 arch. | ||
10 | |||
11 | Fix by mapping at offset 0, and and accessing doorbells at offset=4K. | ||
12 | |||
13 | Fixes: f68453237b9 ("block/nvme: Map doorbells pages write-only") | ||
14 | Reported-by: Eric Auger <eric.auger@redhat.com> | ||
15 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
16 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
18 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
19 | Message-id: 20201029093306.1063879-24-philmd@redhat.com | ||
20 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
21 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
22 | --- | ||
23 | block/nvme.c | 11 +++++++---- | ||
24 | 1 file changed, 7 insertions(+), 4 deletions(-) | ||
25 | |||
26 | diff --git a/block/nvme.c b/block/nvme.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/block/nvme.c | ||
29 | +++ b/block/nvme.c | ||
30 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
31 | struct BDRVNVMeState { | ||
32 | AioContext *aio_context; | ||
33 | QEMUVFIOState *vfio; | ||
34 | + void *bar0_wo_map; | ||
35 | /* Memory mapped registers */ | ||
36 | volatile struct { | ||
37 | uint32_t sq_tail; | ||
38 | @@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, | ||
39 | } | ||
40 | } | ||
41 | |||
42 | - s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar), | ||
43 | - NVME_DOORBELL_SIZE, PROT_WRITE, errp); | ||
44 | + s->bar0_wo_map = qemu_vfio_pci_map_bar(s->vfio, 0, 0, | ||
45 | + sizeof(NvmeBar) + NVME_DOORBELL_SIZE, | ||
46 | + PROT_WRITE, errp); | ||
47 | + s->doorbells = (void *)((uintptr_t)s->bar0_wo_map + sizeof(NvmeBar)); | ||
48 | if (!s->doorbells) { | ||
49 | ret = -EINVAL; | ||
50 | goto out; | ||
51 | @@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs) | ||
52 | &s->irq_notifier[MSIX_SHARED_IRQ_IDX], | ||
53 | false, NULL, NULL); | ||
54 | event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); | ||
55 | - qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells, | ||
56 | - sizeof(NvmeBar), NVME_DOORBELL_SIZE); | ||
57 | + qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, | ||
58 | + 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); | ||
59 | qemu_vfio_close(s->vfio); | ||
60 | |||
61 | g_free(s->device); | ||
62 | -- | ||
63 | 2.28.0 | ||
64 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | The Completion Queue Command Identifier is a 16-bit value, | ||
4 | so nvme_submit_command() is unlikely to work on big-endian | ||
5 | hosts, as the relevant bits are truncated. | ||
6 | Fix by using the correct byte-swap function. | ||
7 | |||
8 | Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver") | ||
9 | Reported-by: Keith Busch <kbusch@kernel.org> | ||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | Message-id: 20201029093306.1063879-25-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | block/nvme.c | 2 +- | ||
17 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/block/nvme.c b/block/nvme.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/block/nvme.c | ||
22 | +++ b/block/nvme.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, | ||
24 | assert(!req->cb); | ||
25 | req->cb = cb; | ||
26 | req->opaque = opaque; | ||
27 | - cmd->cid = cpu_to_le32(req->cid); | ||
28 | + cmd->cid = cpu_to_le16(req->cid); | ||
29 | |||
30 | trace_nvme_submit_command(q->s, q->index, req->cid); | ||
31 | nvme_trace_command(cmd); | ||
32 | -- | ||
33 | 2.28.0 | ||
34 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | Change the confuse "VFIO IOMMU check failed" error message by | ||
4 | the explicit "VFIO IOMMU Type1 is not supported" once. | ||
5 | |||
6 | Example on POWER: | ||
7 | |||
8 | $ qemu-system-ppc64 -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw | ||
9 | qemu-system-ppc64: -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw: VFIO IOMMU Type1 is not supported | ||
10 | |||
11 | Suggested-by: Alex Williamson <alex.williamson@redhat.com> | ||
12 | Reviewed-by: Fam Zheng <fam@euphon.net> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
15 | Message-id: 20201103020733.2303148-2-philmd@redhat.com | ||
16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
18 | --- | ||
19 | util/vfio-helpers.c | 2 +- | ||
20 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
21 | |||
22 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/util/vfio-helpers.c | ||
25 | +++ b/util/vfio-helpers.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, | ||
27 | } | ||
28 | |||
29 | if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { | ||
30 | - error_setg_errno(errp, errno, "VFIO IOMMU check failed"); | ||
31 | + error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported"); | ||
32 | ret = -EINVAL; | ||
33 | goto fail_container; | ||
34 | } | ||
35 | -- | ||
36 | 2.28.0 | ||
37 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | We sometime get kernel panic with some devices on Aarch64 | ||
4 | hosts. Alex Williamson suggests it might be broken PCIe | ||
5 | root complex. Add trace event to record the latest I/O | ||
6 | access before crashing. In case, assert our accesses are | ||
7 | aligned. | ||
8 | |||
9 | Reviewed-by: Fam Zheng <fam@euphon.net> | ||
10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Message-id: 20201103020733.2303148-3-philmd@redhat.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
15 | --- | ||
16 | util/vfio-helpers.c | 8 ++++++++ | ||
17 | util/trace-events | 2 ++ | ||
18 | 2 files changed, 10 insertions(+) | ||
19 | |||
20 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/util/vfio-helpers.c | ||
23 | +++ b/util/vfio-helpers.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf, | ||
25 | { | ||
26 | int ret; | ||
27 | |||
28 | + trace_qemu_vfio_pci_read_config(buf, ofs, size, | ||
29 | + s->config_region_info.offset, | ||
30 | + s->config_region_info.size); | ||
31 | + assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size)); | ||
32 | do { | ||
33 | ret = pread(s->device, buf, size, s->config_region_info.offset + ofs); | ||
34 | } while (ret == -1 && errno == EINTR); | ||
35 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int | ||
36 | { | ||
37 | int ret; | ||
38 | |||
39 | + trace_qemu_vfio_pci_write_config(buf, ofs, size, | ||
40 | + s->config_region_info.offset, | ||
41 | + s->config_region_info.size); | ||
42 | + assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size)); | ||
43 | do { | ||
44 | ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs); | ||
45 | } while (ret == -1 && errno == EINTR); | ||
46 | diff --git a/util/trace-events b/util/trace-events | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/util/trace-events | ||
49 | +++ b/util/trace-events | ||
50 | @@ -XXX,XX +XXX,XX @@ qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova | ||
51 | qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64 | ||
52 | qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p" | ||
53 | qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" | ||
54 | +qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
55 | +qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
56 | -- | ||
57 | 2.28.0 | ||
58 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | For debug purpose, trace BAR regions info. | ||
4 | |||
5 | Reviewed-by: Fam Zheng <fam@euphon.net> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Message-id: 20201103020733.2303148-4-philmd@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | --- | ||
12 | util/vfio-helpers.c | 8 ++++++++ | ||
13 | util/trace-events | 1 + | ||
14 | 2 files changed, 9 insertions(+) | ||
15 | |||
16 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/util/vfio-helpers.c | ||
19 | +++ b/util/vfio-helpers.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static inline void assert_bar_index_valid(QEMUVFIOState *s, int index) | ||
21 | |||
22 | static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp) | ||
23 | { | ||
24 | + g_autofree char *barname = NULL; | ||
25 | assert_bar_index_valid(s, index); | ||
26 | s->bar_region_info[index] = (struct vfio_region_info) { | ||
27 | .index = VFIO_PCI_BAR0_REGION_INDEX + index, | ||
28 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp) | ||
29 | error_setg_errno(errp, errno, "Failed to get BAR region info"); | ||
30 | return -errno; | ||
31 | } | ||
32 | + barname = g_strdup_printf("bar[%d]", index); | ||
33 | + trace_qemu_vfio_region_info(barname, s->bar_region_info[index].offset, | ||
34 | + s->bar_region_info[index].size, | ||
35 | + s->bar_region_info[index].cap_offset); | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device, | ||
40 | ret = -errno; | ||
41 | goto fail; | ||
42 | } | ||
43 | + trace_qemu_vfio_region_info("config", s->config_region_info.offset, | ||
44 | + s->config_region_info.size, | ||
45 | + s->config_region_info.cap_offset); | ||
46 | |||
47 | for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) { | ||
48 | ret = qemu_vfio_pci_init_bar(s, i, errp); | ||
49 | diff --git a/util/trace-events b/util/trace-events | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/util/trace-events | ||
52 | +++ b/util/trace-events | ||
53 | @@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *io | ||
54 | qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" | ||
55 | qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
56 | qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
57 | +qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32 | ||
58 | -- | ||
59 | 2.28.0 | ||
60 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | For debugging purpose, trace where a BAR is mapped. | ||
4 | |||
5 | Reviewed-by: Fam Zheng <fam@euphon.net> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Message-id: 20201103020733.2303148-5-philmd@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | --- | ||
12 | util/vfio-helpers.c | 2 ++ | ||
13 | util/trace-events | 1 + | ||
14 | 2 files changed, 3 insertions(+) | ||
15 | |||
16 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/util/vfio-helpers.c | ||
19 | +++ b/util/vfio-helpers.c | ||
20 | @@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, | ||
21 | p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset), | ||
22 | prot, MAP_SHARED, | ||
23 | s->device, s->bar_region_info[index].offset + offset); | ||
24 | + trace_qemu_vfio_pci_map_bar(index, s->bar_region_info[index].offset , | ||
25 | + size, offset, p); | ||
26 | if (p == MAP_FAILED) { | ||
27 | error_setg_errno(errp, errno, "Failed to map BAR region"); | ||
28 | p = NULL; | ||
29 | diff --git a/util/trace-events b/util/trace-events | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/util/trace-events | ||
32 | +++ b/util/trace-events | ||
33 | @@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" | ||
34 | qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
35 | qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
36 | qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32 | ||
37 | +qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" | ||
38 | -- | ||
39 | 2.28.0 | ||
40 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | For debugging purpose, trace where DMA regions are mapped. | ||
4 | |||
5 | Reviewed-by: Fam Zheng <fam@euphon.net> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
8 | Message-id: 20201103020733.2303148-6-philmd@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
11 | --- | ||
12 | util/vfio-helpers.c | 3 ++- | ||
13 | util/trace-events | 5 +++-- | ||
14 | 2 files changed, 5 insertions(+), 3 deletions(-) | ||
15 | |||
16 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/util/vfio-helpers.c | ||
19 | +++ b/util/vfio-helpers.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size, | ||
21 | .vaddr = (uintptr_t)host, | ||
22 | .size = size, | ||
23 | }; | ||
24 | - trace_qemu_vfio_do_mapping(s, host, size, iova); | ||
25 | + trace_qemu_vfio_do_mapping(s, host, iova, size); | ||
26 | |||
27 | if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) { | ||
28 | error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); | ||
29 | @@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size, | ||
30 | } | ||
31 | } | ||
32 | } | ||
33 | + trace_qemu_vfio_dma_mapped(s, host, iova0, size); | ||
34 | if (iova) { | ||
35 | *iova = iova0; | ||
36 | } | ||
37 | diff --git a/util/trace-events b/util/trace-events | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/util/trace-events | ||
40 | +++ b/util/trace-events | ||
41 | @@ -XXX,XX +XXX,XX @@ qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%z | ||
42 | qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx" | ||
43 | qemu_vfio_find_mapping(void *s, void *p) "s %p host %p" | ||
44 | qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64 | ||
45 | -qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64 | ||
46 | -qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p" | ||
47 | +qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx" | ||
48 | +qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p" | ||
49 | +qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx" | ||
50 | qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p" | ||
51 | qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
52 | qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" | ||
53 | -- | ||
54 | 2.28.0 | ||
55 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | SheepdogAIOCB is internal to sheepdog.c, hence it is never canceled. | 3 | The QEMU_VFIO_DEBUG definition is only modifiable at build-time. |
4 | Trace events can be enabled at run-time. As we prefer the latter, | ||
5 | convert qemu_vfio_dump_mappings() to use trace events instead | ||
6 | of fprintf(). | ||
4 | 7 | ||
5 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 8 | Reviewed-by: Fam Zheng <fam@euphon.net> |
6 | Message-id: 20161129113245.32724-2-pbonzini@redhat.com | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> |
11 | Message-id: 20201103020733.2303148-7-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
8 | --- | 14 | --- |
9 | block/sheepdog.c | 52 ---------------------------------------------------- | 15 | util/vfio-helpers.c | 19 ++++--------------- |
10 | 1 file changed, 52 deletions(-) | 16 | util/trace-events | 1 + |
17 | 2 files changed, 5 insertions(+), 15 deletions(-) | ||
11 | 18 | ||
12 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 19 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c |
13 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/block/sheepdog.c | 21 | --- a/util/vfio-helpers.c |
15 | +++ b/block/sheepdog.c | 22 | +++ b/util/vfio-helpers.c |
16 | @@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB { | 23 | @@ -XXX,XX +XXX,XX @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp) |
17 | Coroutine *coroutine; | 24 | return s; |
18 | void (*aio_done_func)(SheepdogAIOCB *); | ||
19 | |||
20 | - bool cancelable; | ||
21 | int nr_pending; | ||
22 | |||
23 | uint32_t min_affect_data_idx; | ||
24 | @@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) | ||
25 | { | ||
26 | SheepdogAIOCB *acb = aio_req->aiocb; | ||
27 | |||
28 | - acb->cancelable = false; | ||
29 | QLIST_REMOVE(aio_req, aio_siblings); | ||
30 | g_free(aio_req); | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) | ||
33 | qemu_aio_unref(acb); | ||
34 | } | 25 | } |
35 | 26 | ||
36 | -/* | 27 | -static void qemu_vfio_dump_mapping(IOVAMapping *m) |
37 | - * Check whether the specified acb can be canceled | ||
38 | - * | ||
39 | - * We can cancel aio when any request belonging to the acb is: | ||
40 | - * - Not processed by the sheepdog server. | ||
41 | - * - Not linked to the inflight queue. | ||
42 | - */ | ||
43 | -static bool sd_acb_cancelable(const SheepdogAIOCB *acb) | ||
44 | -{ | 28 | -{ |
45 | - BDRVSheepdogState *s = acb->common.bs->opaque; | 29 | - if (QEMU_VFIO_DEBUG) { |
46 | - AIOReq *aioreq; | 30 | - printf(" vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host, |
47 | - | 31 | - (uint64_t)m->size, (uint64_t)m->iova); |
48 | - if (!acb->cancelable) { | ||
49 | - return false; | ||
50 | - } | ||
51 | - | ||
52 | - QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) { | ||
53 | - if (aioreq->aiocb == acb) { | ||
54 | - return false; | ||
55 | - } | ||
56 | - } | ||
57 | - | ||
58 | - return true; | ||
59 | -} | ||
60 | - | ||
61 | -static void sd_aio_cancel(BlockAIOCB *blockacb) | ||
62 | -{ | ||
63 | - SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; | ||
64 | - BDRVSheepdogState *s = acb->common.bs->opaque; | ||
65 | - AIOReq *aioreq, *next; | ||
66 | - | ||
67 | - if (sd_acb_cancelable(acb)) { | ||
68 | - /* Remove outstanding requests from failed queue. */ | ||
69 | - QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, | ||
70 | - next) { | ||
71 | - if (aioreq->aiocb == acb) { | ||
72 | - free_aio_req(s, aioreq); | ||
73 | - } | ||
74 | - } | ||
75 | - | ||
76 | - assert(acb->nr_pending == 0); | ||
77 | - if (acb->common.cb) { | ||
78 | - acb->common.cb(acb->common.opaque, -ECANCELED); | ||
79 | - } | ||
80 | - sd_finish_aiocb(acb); | ||
81 | - } | 32 | - } |
82 | -} | 33 | -} |
83 | - | 34 | - |
84 | static const AIOCBInfo sd_aiocb_info = { | 35 | static void qemu_vfio_dump_mappings(QEMUVFIOState *s) |
85 | .aiocb_size = sizeof(SheepdogAIOCB), | 36 | { |
86 | - .cancel_async = sd_aio_cancel, | 37 | - int i; |
87 | }; | 38 | - |
88 | 39 | - if (QEMU_VFIO_DEBUG) { | |
89 | static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, | 40 | - printf("vfio mappings\n"); |
90 | @@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, | 41 | - for (i = 0; i < s->nr_mappings; ++i) { |
91 | acb->nb_sectors = nb_sectors; | 42 | - qemu_vfio_dump_mapping(&s->mappings[i]); |
92 | 43 | - } | |
93 | acb->aio_done_func = NULL; | 44 | + for (int i = 0; i < s->nr_mappings; ++i) { |
94 | - acb->cancelable = true; | 45 | + trace_qemu_vfio_dump_mapping(s->mappings[i].host, |
95 | acb->coroutine = qemu_coroutine_self(); | 46 | + s->mappings[i].iova, |
96 | acb->ret = 0; | 47 | + s->mappings[i].size); |
97 | acb->nr_pending = 0; | 48 | } |
49 | } | ||
50 | |||
51 | diff --git a/util/trace-events b/util/trace-events | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/util/trace-events | ||
54 | +++ b/util/trace-events | ||
55 | @@ -XXX,XX +XXX,XX @@ qemu_mutex_unlock(void *mutex, const char *file, const int line) "released mutex | ||
56 | qemu_vfio_dma_reset_temporary(void *s) "s %p" | ||
57 | qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%zx" | ||
58 | qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx" | ||
59 | +qemu_vfio_dump_mapping(void *host, uint64_t iova, size_t size) "vfio mapping %p to iova 0x%08" PRIx64 " size 0x%zx" | ||
60 | qemu_vfio_find_mapping(void *s, void *p) "s %p host %p" | ||
61 | qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64 | ||
62 | qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx" | ||
98 | -- | 63 | -- |
99 | 2.9.3 | 64 | 2.28.0 |
100 | 65 | ||
101 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
1 | 2 | ||
3 | mmap(2) states: | ||
4 | |||
5 | 'offset' must be a multiple of the page size as returned | ||
6 | by sysconf(_SC_PAGE_SIZE). | ||
7 | |||
8 | Add an assertion to be sure we don't break this contract. | ||
9 | |||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Message-id: 20201103020733.2303148-8-philmd@redhat.com | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Tested-by: Eric Auger <eric.auger@redhat.com> | ||
14 | --- | ||
15 | util/vfio-helpers.c | 1 + | ||
16 | 1 file changed, 1 insertion(+) | ||
17 | |||
18 | diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/util/vfio-helpers.c | ||
21 | +++ b/util/vfio-helpers.c | ||
22 | @@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index, | ||
23 | Error **errp) | ||
24 | { | ||
25 | void *p; | ||
26 | + assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size)); | ||
27 | assert_bar_index_valid(s, index); | ||
28 | p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset), | ||
29 | prot, MAP_SHARED, | ||
30 | -- | ||
31 | 2.28.0 | ||
32 | diff view generated by jsdifflib |