1
The following changes since commit a0def594286d9110a6035e02eef558cf3cf5d847:
1
The following changes since commit 8507c9d5c9a62de2a0e281b640f995e26eac46af:
2
2
3
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2017-01-30 10:23:20 +0000)
3
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2020-11-03 15:59:44 +0000)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to acf6e5f0962c4be670d4a93ede77423512521876:
9
for you to fetch changes up to fc107d86840b3364e922c26cf7631b7fd38ce523:
10
10
11
sheepdog: reorganize check for overlapping requests (2017-02-01 00:17:20 -0500)
11
util/vfio-helpers: Assert offset is aligned to page size (2020-11-03 19:06:23 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches
14
Pull request for 5.2
15
16
NVMe fixes to solve IOMMU issues on non-x86 and error message/tracing
17
improvements. Elena Afanasova's ioeventfd fixes are also included.
18
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
15
----------------------------------------------------------------
21
----------------------------------------------------------------
16
22
17
Paolo Bonzini (5):
23
Elena Afanasova (2):
18
sheepdog: remove unused cancellation support
24
accel/kvm: add PIO ioeventfds only in case kvm_eventfds_allowed is
19
sheepdog: reorganize coroutine flow
25
true
20
sheepdog: do not use BlockAIOCB
26
softmmu/memory: fix memory_region_ioeventfd_equal()
21
sheepdog: simplify inflight_aio_head management
22
sheepdog: reorganize check for overlapping requests
23
27
24
block/sheepdog.c | 289 ++++++++++++++++---------------------------------------
28
Eric Auger (4):
25
1 file changed, 84 insertions(+), 205 deletions(-)
29
block/nvme: Change size and alignment of IDENTIFY response buffer
30
block/nvme: Change size and alignment of queue
31
block/nvme: Change size and alignment of prp_list_pages
32
block/nvme: Align iov's va and size on host page size
33
34
Philippe Mathieu-Daudé (27):
35
MAINTAINERS: Cover "block/nvme.h" file
36
block/nvme: Use hex format to display offset in trace events
37
block/nvme: Report warning with warn_report()
38
block/nvme: Trace controller capabilities
39
block/nvme: Trace nvme_poll_queue() per queue
40
block/nvme: Improve nvme_free_req_queue_wait() trace information
41
block/nvme: Trace queue pair creation/deletion
42
block/nvme: Move definitions before structure declarations
43
block/nvme: Use unsigned integer for queue counter/size
44
block/nvme: Make nvme_identify() return boolean indicating error
45
block/nvme: Make nvme_init_queue() return boolean indicating error
46
block/nvme: Introduce Completion Queue definitions
47
block/nvme: Use definitions instead of magic values in add_io_queue()
48
block/nvme: Correctly initialize Admin Queue Attributes
49
block/nvme: Simplify ADMIN queue access
50
block/nvme: Simplify nvme_cmd_sync()
51
block/nvme: Set request_alignment at initialization
52
block/nvme: Correct minimum device page size
53
block/nvme: Fix use of write-only doorbells page on Aarch64 arch
54
block/nvme: Fix nvme_submit_command() on big-endian host
55
util/vfio-helpers: Improve reporting unsupported IOMMU type
56
util/vfio-helpers: Trace PCI I/O config accesses
57
util/vfio-helpers: Trace PCI BAR region info
58
util/vfio-helpers: Trace where BARs are mapped
59
util/vfio-helpers: Improve DMA trace events
60
util/vfio-helpers: Convert vfio_dump_mapping to trace events
61
util/vfio-helpers: Assert offset is aligned to page size
62
63
MAINTAINERS | 2 +
64
include/block/nvme.h | 18 ++--
65
accel/kvm/kvm-all.c | 6 +-
66
block/nvme.c | 209 ++++++++++++++++++++++++-------------------
67
softmmu/memory.c | 11 ++-
68
util/vfio-helpers.c | 43 +++++----
69
block/trace-events | 30 ++++---
70
util/trace-events | 10 ++-
71
8 files changed, 195 insertions(+), 134 deletions(-)
26
72
27
--
73
--
28
2.9.3
74
2.28.0
29
75
30
diff view generated by jsdifflib
New patch
1
From: Elena Afanasova <eafanasova@gmail.com>
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
5
Message-Id: <20201017210102.26036-1-eafanasova@gmail.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
accel/kvm/kvm-all.c | 6 ++++--
9
1 file changed, 4 insertions(+), 2 deletions(-)
10
11
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/kvm/kvm-all.c
14
+++ b/accel/kvm/kvm-all.c
15
@@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms)
16
17
kvm_memory_listener_register(s, &s->memory_listener,
18
&address_space_memory, 0);
19
- memory_listener_register(&kvm_io_listener,
20
- &address_space_io);
21
+ if (kvm_eventfds_allowed) {
22
+ memory_listener_register(&kvm_io_listener,
23
+ &address_space_io);
24
+ }
25
memory_listener_register(&kvm_coalesced_pio_listener,
26
&address_space_io);
27
28
--
29
2.28.0
30
diff view generated by jsdifflib
New patch
1
From: Elena Afanasova <eafanasova@gmail.com>
1
2
3
Eventfd can be registered with a zero length when fast_mmio is true.
4
Handle this case properly when dispatching through QEMU.
5
6
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
7
Message-id: cf71a62eb04e61932ff8ffdd02e0b2aab4f495a0.camel@gmail.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
softmmu/memory.c | 11 +++++++++--
11
1 file changed, 9 insertions(+), 2 deletions(-)
12
13
diff --git a/softmmu/memory.c b/softmmu/memory.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/softmmu/memory.c
16
+++ b/softmmu/memory.c
17
@@ -XXX,XX +XXX,XX @@ static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a,
18
static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a,
19
MemoryRegionIoeventfd *b)
20
{
21
- return !memory_region_ioeventfd_before(a, b)
22
- && !memory_region_ioeventfd_before(b, a);
23
+ if (int128_eq(a->addr.start, b->addr.start) &&
24
+ (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) ||
25
+ (int128_eq(a->addr.size, b->addr.size) &&
26
+ (a->match_data == b->match_data) &&
27
+ ((a->match_data && (a->data == b->data)) || !a->match_data) &&
28
+ (a->e == b->e))))
29
+ return true;
30
+
31
+ return false;
32
}
33
34
/* Range of memory in the global map. Addresses are absolute. */
35
--
36
2.28.0
37
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
The "block/nvme.h" header is shared by both the NVMe block
4
driver and the NVMe emulated device. Add the 'F:' entry on
5
both sections, so all maintainers/reviewers are notified
6
when it is changed.
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
11
Message-Id: <20200701140634.25994-1-philmd@redhat.com>
12
---
13
MAINTAINERS | 2 ++
14
1 file changed, 2 insertions(+)
15
16
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
19
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ M: Klaus Jensen <its@irrelevant.dk>
21
L: qemu-block@nongnu.org
22
S: Supported
23
F: hw/block/nvme*
24
+F: include/block/nvme.h
25
F: tests/qtest/nvme-test.c
26
F: docs/specs/nvme.txt
27
T: git git://git.infradead.org/qemu-nvme.git nvme-next
28
@@ -XXX,XX +XXX,XX @@ R: Fam Zheng <fam@euphon.net>
29
L: qemu-block@nongnu.org
30
S: Supported
31
F: block/nvme*
32
+F: include/block/nvme.h
33
T: git https://github.com/stefanha/qemu.git block
34
35
Bootdevice
36
--
37
2.28.0
38
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Use the same format used for the hw/vfio/ trace events.
4
5
Suggested-by: Eric Auger <eric.auger@redhat.com>
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-3-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/trace-events | 12 ++++++------
15
1 file changed, 6 insertions(+), 6 deletions(-)
16
17
diff --git a/block/trace-events b/block/trace-events
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/trace-events
20
+++ b/block/trace-events
21
@@ -XXX,XX +XXX,XX @@ nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
22
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
23
nvme_handle_event(void *s) "s %p"
24
nvme_poll_cb(void *s) "s %p"
25
-nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d niov %d"
26
-nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset %"PRId64" bytes %"PRId64" flags %d"
27
+nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d"
28
+nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d"
29
nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
30
-nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d"
31
-nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d"
32
-nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64""
33
-nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d"
34
+nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d"
35
+nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d"
36
+nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
37
+nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
38
nvme_dma_map_flush(void *s) "s %p"
39
nvme_free_req_queue_wait(void *q) "q %p"
40
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
41
--
42
2.28.0
43
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Instead of displaying warning on stderr, use warn_report()
4
which also displays it on the monitor.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-4-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
22
}
23
cid = le16_to_cpu(c->cid);
24
if (cid == 0 || cid > NVME_QUEUE_SIZE) {
25
- fprintf(stderr, "Unexpected CID in completion queue: %" PRIu32 "\n",
26
- cid);
27
+ warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
28
+ "queue size: %u", cid, NVME_QUEUE_SIZE);
29
continue;
30
}
31
trace_nvme_complete_command(s, q->index, cid);
32
--
33
2.28.0
34
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Controllers have different capabilities and report them in the
4
CAP register. We are particularly interested by the page size
5
limits.
6
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-5-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 13 +++++++++++++
16
block/trace-events | 2 ++
17
2 files changed, 15 insertions(+)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
* Initialization". */
25
26
cap = le64_to_cpu(regs->cap);
27
+ trace_nvme_controller_capability_raw(cap);
28
+ trace_nvme_controller_capability("Maximum Queue Entries Supported",
29
+ 1 + NVME_CAP_MQES(cap));
30
+ trace_nvme_controller_capability("Contiguous Queues Required",
31
+ NVME_CAP_CQR(cap));
32
+ trace_nvme_controller_capability("Doorbell Stride",
33
+ 2 << (2 + NVME_CAP_DSTRD(cap)));
34
+ trace_nvme_controller_capability("Subsystem Reset Supported",
35
+ NVME_CAP_NSSRS(cap));
36
+ trace_nvme_controller_capability("Memory Page Size Minimum",
37
+ 1 << (12 + NVME_CAP_MPSMIN(cap)));
38
+ trace_nvme_controller_capability("Memory Page Size Maximum",
39
+ 1 << (12 + NVME_CAP_MPSMAX(cap)));
40
if (!NVME_CAP_CSS(cap)) {
41
error_setg(errp, "Device doesn't support NVMe command set");
42
ret = -EINVAL;
43
diff --git a/block/trace-events b/block/trace-events
44
index XXXXXXX..XXXXXXX 100644
45
--- a/block/trace-events
46
+++ b/block/trace-events
47
@@ -XXX,XX +XXX,XX @@ qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t
48
qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
49
50
# nvme.c
51
+nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64
52
+nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64
53
nvme_kick(void *s, int queue) "s %p queue %d"
54
nvme_dma_flush_queue_wait(void *s) "s %p"
55
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
56
--
57
2.28.0
58
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
As we want to enable multiple queues, report the event
4
in each nvme_poll_queue() call, rather than once in
5
the callback calling nvme_poll_queues().
6
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-6-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 2 +-
16
block/trace-events | 2 +-
17
2 files changed, 2 insertions(+), 2 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queue(NVMeQueuePair *q)
24
const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
25
NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
26
27
+ trace_nvme_poll_queue(q->s, q->index);
28
/*
29
* Do an early check for completions. q->lock isn't needed because
30
* nvme_process_completion() only runs in the event loop thread and
31
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_cb(void *opaque)
32
BDRVNVMeState *s = container_of(e, BDRVNVMeState,
33
irq_notifier[MSIX_SHARED_IRQ_IDX]);
34
35
- trace_nvme_poll_cb(s);
36
return nvme_poll_queues(s);
37
}
38
39
diff --git a/block/trace-events b/block/trace-events
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/trace-events
42
+++ b/block/trace-events
43
@@ -XXX,XX +XXX,XX @@ nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
44
nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
45
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
46
nvme_handle_event(void *s) "s %p"
47
-nvme_poll_cb(void *s) "s %p"
48
+nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u"
49
nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d"
50
nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d"
51
nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
52
--
53
2.28.0
54
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
What we want to trace is the block driver state and the queue index.
4
5
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-7-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 2 +-
15
block/trace-events | 2 +-
16
2 files changed, 2 insertions(+), 2 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
23
24
while (q->free_req_head == -1) {
25
if (qemu_in_coroutine()) {
26
- trace_nvme_free_req_queue_wait(q);
27
+ trace_nvme_free_req_queue_wait(q->s, q->index);
28
qemu_co_queue_wait(&q->free_req_queue, &q->lock);
29
} else {
30
qemu_mutex_unlock(&q->lock);
31
diff --git a/block/trace-events b/block/trace-events
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/trace-events
34
+++ b/block/trace-events
35
@@ -XXX,XX +XXX,XX @@ nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s
36
nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
37
nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
38
nvme_dma_map_flush(void *s) "s %p"
39
-nvme_free_req_queue_wait(void *q) "q %p"
40
+nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
41
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
42
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
43
nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
44
--
45
2.28.0
46
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Reviewed-by: Eric Auger <eric.auger@redhat.com>
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Tested-by: Eric Auger <eric.auger@redhat.com>
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Message-id: 20201029093306.1063879-8-philmd@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
---
11
block/nvme.c | 3 +++
12
block/trace-events | 2 ++
13
2 files changed, 5 insertions(+)
14
15
diff --git a/block/nvme.c b/block/nvme.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/nvme.c
18
+++ b/block/nvme.c
19
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
20
21
static void nvme_free_queue_pair(NVMeQueuePair *q)
22
{
23
+ trace_nvme_free_queue_pair(q->index, q);
24
if (q->completion_bh) {
25
qemu_bh_delete(q->completion_bh);
26
}
27
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
28
if (!q) {
29
return NULL;
30
}
31
+ trace_nvme_create_queue_pair(idx, q, size, aio_context,
32
+ event_notifier_get_fd(s->irq_notifier));
33
q->prp_list_pages = qemu_try_memalign(s->page_size,
34
s->page_size * NVME_NUM_REQS);
35
if (!q->prp_list_pages) {
36
diff --git a/block/trace-events b/block/trace-events
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/trace-events
39
+++ b/block/trace-events
40
@@ -XXX,XX +XXX,XX @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" byte
41
nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
42
nvme_dma_map_flush(void *s) "s %p"
43
nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
44
+nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d"
45
+nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
46
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
47
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
48
nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
49
--
50
2.28.0
51
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
To be able to use some definitions in structure declarations,
4
move them earlier. No logical change.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-9-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 19 ++++++++++---------
15
1 file changed, 10 insertions(+), 9 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@
22
23
typedef struct BDRVNVMeState BDRVNVMeState;
24
25
+/* Same index is used for queues and IRQs */
26
+#define INDEX_ADMIN 0
27
+#define INDEX_IO(n) (1 + n)
28
+
29
+/* This driver shares a single MSIX IRQ for the admin and I/O queues */
30
+enum {
31
+ MSIX_SHARED_IRQ_IDX = 0,
32
+ MSIX_IRQ_COUNT = 1
33
+};
34
+
35
typedef struct {
36
int32_t head, tail;
37
uint8_t *queue;
38
@@ -XXX,XX +XXX,XX @@ typedef struct {
39
QEMUBH *completion_bh;
40
} NVMeQueuePair;
41
42
-#define INDEX_ADMIN 0
43
-#define INDEX_IO(n) (1 + n)
44
-
45
-/* This driver shares a single MSIX IRQ for the admin and I/O queues */
46
-enum {
47
- MSIX_SHARED_IRQ_IDX = 0,
48
- MSIX_IRQ_COUNT = 1
49
-};
50
-
51
struct BDRVNVMeState {
52
AioContext *aio_context;
53
QEMUVFIOState *vfio;
54
--
55
2.28.0
56
diff view generated by jsdifflib
New patch
1
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
3
We can not have negative queue count/size/index, use unsigned type.
4
Rename 'nr_queues' as 'queue_count' to match the spec naming.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-10-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 38 ++++++++++++++++++--------------------
15
block/trace-events | 10 +++++-----
16
2 files changed, 23 insertions(+), 25 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
23
* [1..]: io queues.
24
*/
25
NVMeQueuePair **queues;
26
- int nr_queues;
27
+ unsigned queue_count;
28
size_t page_size;
29
/* How many uint32_t elements does each doorbell entry take. */
30
size_t doorbell_scale;
31
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
32
};
33
34
static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
35
- int nentries, int entry_bytes, Error **errp)
36
+ unsigned nentries, size_t entry_bytes, Error **errp)
37
{
38
size_t bytes;
39
int r;
40
@@ -XXX,XX +XXX,XX @@ static void nvme_free_req_queue_cb(void *opaque)
41
42
static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
43
AioContext *aio_context,
44
- int idx, int size,
45
+ unsigned idx, size_t size,
46
Error **errp)
47
{
48
int i, r;
49
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queues(BDRVNVMeState *s)
50
bool progress = false;
51
int i;
52
53
- for (i = 0; i < s->nr_queues; i++) {
54
+ for (i = 0; i < s->queue_count; i++) {
55
if (nvme_poll_queue(s->queues[i])) {
56
progress = true;
57
}
58
@@ -XXX,XX +XXX,XX @@ static void nvme_handle_event(EventNotifier *n)
59
static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
60
{
61
BDRVNVMeState *s = bs->opaque;
62
- int n = s->nr_queues;
63
+ unsigned n = s->queue_count;
64
NVMeQueuePair *q;
65
NvmeCmd cmd;
66
- int queue_size = NVME_QUEUE_SIZE;
67
+ unsigned queue_size = NVME_QUEUE_SIZE;
68
69
q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
70
n, queue_size, errp);
71
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
72
.cdw11 = cpu_to_le32(0x3),
73
};
74
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
75
- error_setg(errp, "Failed to create CQ io queue [%d]", n);
76
+ error_setg(errp, "Failed to create CQ io queue [%u]", n);
77
goto out_error;
78
}
79
cmd = (NvmeCmd) {
80
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
81
.cdw11 = cpu_to_le32(0x1 | (n << 16)),
82
};
83
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
84
- error_setg(errp, "Failed to create SQ io queue [%d]", n);
85
+ error_setg(errp, "Failed to create SQ io queue [%u]", n);
86
goto out_error;
87
}
88
s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
89
s->queues[n] = q;
90
- s->nr_queues++;
91
+ s->queue_count++;
92
return true;
93
out_error:
94
nvme_free_queue_pair(q);
95
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
96
ret = -EINVAL;
97
goto out;
98
}
99
- s->nr_queues = 1;
100
+ s->queue_count = 1;
101
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
102
regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
103
(NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
104
@@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable,
105
106
static void nvme_close(BlockDriverState *bs)
107
{
108
- int i;
109
BDRVNVMeState *s = bs->opaque;
110
111
- for (i = 0; i < s->nr_queues; ++i) {
112
+ for (unsigned i = 0; i < s->queue_count; ++i) {
113
nvme_free_queue_pair(s->queues[i]);
114
}
115
g_free(s->queues);
116
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
117
};
118
119
trace_nvme_prw_aligned(s, is_write, offset, bytes, flags, qiov->niov);
120
- assert(s->nr_queues > 1);
121
+ assert(s->queue_count > 1);
122
req = nvme_get_free_req(ioq);
123
assert(req);
124
125
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
126
.ret = -EINPROGRESS,
127
};
128
129
- assert(s->nr_queues > 1);
130
+ assert(s->queue_count > 1);
131
req = nvme_get_free_req(ioq);
132
assert(req);
133
nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
134
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
135
cmd.cdw12 = cpu_to_le32(cdw12);
136
137
trace_nvme_write_zeroes(s, offset, bytes, flags);
138
- assert(s->nr_queues > 1);
139
+ assert(s->queue_count > 1);
140
req = nvme_get_free_req(ioq);
141
assert(req);
142
143
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
144
return -ENOTSUP;
145
}
146
147
- assert(s->nr_queues > 1);
148
+ assert(s->queue_count > 1);
149
150
buf = qemu_try_memalign(s->page_size, s->page_size);
151
if (!buf) {
152
@@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs)
153
{
154
BDRVNVMeState *s = bs->opaque;
155
156
- for (int i = 0; i < s->nr_queues; i++) {
157
+ for (unsigned i = 0; i < s->queue_count; i++) {
158
NVMeQueuePair *q = s->queues[i];
159
160
qemu_bh_delete(q->completion_bh);
161
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
162
aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
163
false, nvme_handle_event, nvme_poll_cb);
164
165
- for (int i = 0; i < s->nr_queues; i++) {
166
+ for (unsigned i = 0; i < s->queue_count; i++) {
167
NVMeQueuePair *q = s->queues[i];
168
169
q->completion_bh =
170
@@ -XXX,XX +XXX,XX @@ static void nvme_aio_plug(BlockDriverState *bs)
171
172
static void nvme_aio_unplug(BlockDriverState *bs)
173
{
174
- int i;
175
BDRVNVMeState *s = bs->opaque;
176
assert(s->plugged);
177
s->plugged = false;
178
- for (i = INDEX_IO(0); i < s->nr_queues; i++) {
179
+ for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) {
180
NVMeQueuePair *q = s->queues[i];
181
qemu_mutex_lock(&q->lock);
182
nvme_kick(q);
183
diff --git a/block/trace-events b/block/trace-events
184
index XXXXXXX..XXXXXXX 100644
185
--- a/block/trace-events
186
+++ b/block/trace-events
187
@@ -XXX,XX +XXX,XX @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
188
# nvme.c
189
nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64
190
nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64
191
-nvme_kick(void *s, int queue) "s %p queue %d"
192
+nvme_kick(void *s, unsigned q_index) "s %p q #%u"
193
nvme_dma_flush_queue_wait(void *s) "s %p"
194
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
195
-nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d"
196
-nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d"
197
-nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
198
-nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
199
+nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d"
200
+nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u"
201
+nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
202
+nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
203
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
204
nvme_handle_event(void *s) "s %p"
205
nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u"
206
--
207
2.28.0
208
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Delimit co_recv's lifetime clearly in aio_read_response.
3
Just for consistency, following the example documented since
4
commit e3fe3988d7 ("error: Document Error API usage rules"),
5
return a boolean value indicating an error is set or not.
6
Directly pass errp as the local_err is not requested in our
7
case.
4
8
5
Do a simple qemu_coroutine_enter in aio_read_response, letting
9
Tested-by: Eric Auger <eric.auger@redhat.com>
6
sd_co_writev call sd_write_done.
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20201029093306.1063879-11-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 12 +++++++-----
17
1 file changed, 7 insertions(+), 5 deletions(-)
7
18
8
Handle nr_pending in the same way in sd_co_rw_vector,
19
diff --git a/block/nvme.c b/block/nvme.c
9
sd_write_done and sd_co_flush_to_disk.
10
11
Remove sd_co_rw_vector's return value; just leave with no
12
pending requests.
13
14
[Jeff: added missing 'return' back, spotted by Paolo after
15
series was applied.]
16
17
Signed-off-by: Jeff Cody <jcody@redhat.com>
18
---
19
block/sheepdog.c | 115 ++++++++++++++++++++-----------------------------------
20
1 file changed, 42 insertions(+), 73 deletions(-)
21
22
diff --git a/block/sheepdog.c b/block/sheepdog.c
23
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
24
--- a/block/sheepdog.c
21
--- a/block/nvme.c
25
+++ b/block/sheepdog.c
22
+++ b/block/nvme.c
26
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
23
@@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
27
enum AIOCBState aiocb_type;
24
return ret;
28
29
Coroutine *coroutine;
30
- void (*aio_done_func)(SheepdogAIOCB *);
31
-
32
int nr_pending;
33
34
uint32_t min_affect_data_idx;
35
@@ -XXX,XX +XXX,XX @@ static const char * sd_strerror(int err)
36
*
37
* 1. In sd_co_rw_vector, we send the I/O requests to the server and
38
* link the requests to the inflight_list in the
39
- * BDRVSheepdogState. The function exits without waiting for
40
+ * BDRVSheepdogState. The function yields while waiting for
41
* receiving the response.
42
*
43
* 2. We receive the response in aio_read_response, the fd handler to
44
- * the sheepdog connection. If metadata update is needed, we send
45
- * the write request to the vdi object in sd_write_done, the write
46
- * completion function. We switch back to sd_co_readv/writev after
47
- * all the requests belonging to the AIOCB are finished.
48
+ * the sheepdog connection. We switch back to sd_co_readv/sd_writev
49
+ * after all the requests belonging to the AIOCB are finished. If
50
+ * needed, sd_co_writev will send another requests for the vdi object.
51
*/
52
53
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
54
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
55
acb->nr_pending--;
56
}
25
}
57
26
58
-static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
27
-static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
59
-{
28
+/* Returns true on success, false on failure. */
60
- qemu_coroutine_enter(acb->coroutine);
29
+static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
61
- qemu_aio_unref(acb);
30
{
62
-}
31
BDRVNVMeState *s = bs->opaque;
63
-
32
+ bool ret = false;
64
static const AIOCBInfo sd_aiocb_info = {
33
union {
65
.aiocb_size = sizeof(SheepdogAIOCB),
34
NvmeIdCtrl ctrl;
66
};
35
NvmeIdNs ns;
67
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
36
@@ -XXX,XX +XXX,XX @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
68
acb->sector_num = sector_num;
37
goto out;
69
acb->nb_sectors = nb_sectors;
70
71
- acb->aio_done_func = NULL;
72
acb->coroutine = qemu_coroutine_self();
73
acb->ret = 0;
74
acb->nr_pending = 0;
75
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
76
77
switch (acb->aiocb_type) {
78
case AIOCB_WRITE_UDATA:
79
- /* this coroutine context is no longer suitable for co_recv
80
- * because we may send data to update vdi objects */
81
- s->co_recv = NULL;
82
if (!is_data_obj(aio_req->oid)) {
83
break;
84
}
85
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
86
}
87
}
38
}
88
39
89
+ /* No more data for this aio_req (reload_inode below uses its own file
40
+ ret = true;
90
+ * descriptor handler which doesn't use co_recv).
41
s->blkshift = lbaf->ds;
91
+ */
42
out:
92
+ s->co_recv = NULL;
43
qemu_vfio_dma_unmap(s->vfio, id);
44
qemu_vfree(id);
93
+
45
+
94
switch (rsp.result) {
95
case SD_RES_SUCCESS:
96
break;
97
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
98
aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
99
}
100
resend_aioreq(s, aio_req);
101
- goto out;
102
+ return;
103
default:
104
acb->ret = -EIO;
105
error_report("%s", sd_strerror(rsp.result));
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
107
* We've finished all requests which belong to the AIOCB, so
108
* we can switch back to sd_co_readv/writev now.
109
*/
110
- acb->aio_done_func(acb);
111
+ qemu_coroutine_enter(acb->coroutine);
112
}
113
-out:
114
- s->co_recv = NULL;
115
+
116
return;
117
+
118
err:
119
- s->co_recv = NULL;
120
reconnect_to_sdog(opaque);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
124
/*
125
* This function is called after writing data objects. If we need to
126
* update metadata, this sends a write request to the vdi object.
127
- * Otherwise, this switches back to sd_co_readv/writev.
128
*/
129
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
130
{
131
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
132
mx = acb->max_dirty_data_idx;
133
if (mn <= mx) {
134
/* we need to update the vdi object. */
135
+ ++acb->nr_pending;
136
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
137
mn * sizeof(s->inode.data_vdi_id[0]);
138
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
139
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
140
data_len, offset, 0, false, 0, offset);
141
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
142
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
143
-
144
- acb->aio_done_func = sd_finish_aiocb;
145
- acb->aiocb_type = AIOCB_WRITE_UDATA;
146
- return;
147
+ if (--acb->nr_pending) {
148
+ qemu_coroutine_yield();
149
+ }
150
}
151
-
152
- sd_finish_aiocb(acb);
153
}
154
155
/* Delete current working VDI on the snapshot chain */
156
@@ -XXX,XX +XXX,XX @@ out:
157
* Returns 1 when we need to wait a response, 0 when there is no sent
158
* request and -errno in error cases.
159
*/
160
-static int coroutine_fn sd_co_rw_vector(void *p)
161
+static void coroutine_fn sd_co_rw_vector(void *p)
162
{
163
SheepdogAIOCB *acb = p;
164
int ret = 0;
165
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p)
166
ret = sd_create_branch(s);
167
if (ret) {
168
acb->ret = -EIO;
169
- goto out;
170
+ return;
171
}
172
}
173
174
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_rw_vector(void *p)
175
idx++;
176
done += len;
177
}
178
-out:
179
- if (!--acb->nr_pending) {
180
- return acb->ret;
181
+ if (--acb->nr_pending) {
182
+ qemu_coroutine_yield();
183
}
184
- return 1;
185
}
186
187
static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
188
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
189
}
190
191
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
192
- acb->aio_done_func = sd_write_done;
193
acb->aiocb_type = AIOCB_WRITE_UDATA;
194
195
retry:
196
@@ -XXX,XX +XXX,XX @@ retry:
197
goto retry;
198
}
199
200
- ret = sd_co_rw_vector(acb);
201
- if (ret <= 0) {
202
- QLIST_REMOVE(acb, aiocb_siblings);
203
- qemu_co_queue_restart_all(&s->overlapping_queue);
204
- qemu_aio_unref(acb);
205
- return ret;
206
- }
207
-
208
- qemu_coroutine_yield();
209
+ sd_co_rw_vector(acb);
210
+ sd_write_done(acb);
211
212
QLIST_REMOVE(acb, aiocb_siblings);
213
qemu_co_queue_restart_all(&s->overlapping_queue);
214
-
215
- return acb->ret;
216
+ ret = acb->ret;
217
+ qemu_aio_unref(acb);
218
+ return ret;
46
+ return ret;
219
}
47
}
220
48
221
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
49
static bool nvme_poll_queue(NVMeQueuePair *q)
222
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
50
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
223
51
uint64_t cap;
224
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
52
uint64_t timeout_ms;
225
acb->aiocb_type = AIOCB_READ_UDATA;
53
uint64_t deadline, now;
226
- acb->aio_done_func = sd_finish_aiocb;
54
- Error *local_err = NULL;
227
55
volatile NvmeBar *regs = NULL;
228
retry:
56
229
if (check_overlapping_aiocb(s, acb)) {
57
qemu_co_mutex_init(&s->dma_map_lock);
230
@@ -XXX,XX +XXX,XX @@ retry:
58
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
231
goto retry;
59
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
60
false, nvme_handle_event, nvme_poll_cb);
61
62
- nvme_identify(bs, namespace, &local_err);
63
- if (local_err) {
64
- error_propagate(errp, local_err);
65
+ if (!nvme_identify(bs, namespace, errp)) {
66
ret = -EIO;
67
goto out;
232
}
68
}
233
234
- ret = sd_co_rw_vector(acb);
235
- if (ret <= 0) {
236
- QLIST_REMOVE(acb, aiocb_siblings);
237
- qemu_co_queue_restart_all(&s->overlapping_queue);
238
- qemu_aio_unref(acb);
239
- return ret;
240
- }
241
-
242
- qemu_coroutine_yield();
243
+ sd_co_rw_vector(acb);
244
245
QLIST_REMOVE(acb, aiocb_siblings);
246
qemu_co_queue_restart_all(&s->overlapping_queue);
247
- return acb->ret;
248
+ ret = acb->ret;
249
+ qemu_aio_unref(acb);
250
+ return ret;
251
}
252
253
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
254
{
255
BDRVSheepdogState *s = bs->opaque;
256
SheepdogAIOCB *acb;
257
+ int ret;
258
AIOReq *aio_req;
259
260
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
261
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
262
263
acb = sd_aio_setup(bs, NULL, 0, 0);
264
acb->aiocb_type = AIOCB_FLUSH_CACHE;
265
- acb->aio_done_func = sd_finish_aiocb;
266
267
+ acb->nr_pending++;
268
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
269
0, 0, 0, false, 0, 0);
270
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
271
add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
272
273
- qemu_coroutine_yield();
274
- return acb->ret;
275
+ if (--acb->nr_pending) {
276
+ qemu_coroutine_yield();
277
+ }
278
+ ret = acb->ret;
279
+ qemu_aio_unref(acb);
280
+ return ret;
281
}
282
283
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
284
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
285
acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
286
count >> BDRV_SECTOR_BITS);
287
acb->aiocb_type = AIOCB_DISCARD_OBJ;
288
- acb->aio_done_func = sd_finish_aiocb;
289
290
retry:
291
if (check_overlapping_aiocb(s, acb)) {
292
@@ -XXX,XX +XXX,XX @@ retry:
293
goto retry;
294
}
295
296
- ret = sd_co_rw_vector(acb);
297
- if (ret <= 0) {
298
- QLIST_REMOVE(acb, aiocb_siblings);
299
- qemu_co_queue_restart_all(&s->overlapping_queue);
300
- qemu_aio_unref(acb);
301
- return ret;
302
- }
303
-
304
- qemu_coroutine_yield();
305
+ sd_co_rw_vector(acb);
306
307
QLIST_REMOVE(acb, aiocb_siblings);
308
qemu_co_queue_restart_all(&s->overlapping_queue);
309
-
310
- return acb->ret;
311
+ ret = acb->ret;
312
+ qemu_aio_unref(acb);
313
+ return ret;
314
}
315
316
static coroutine_fn int64_t
317
--
69
--
318
2.9.3
70
2.28.0
319
71
320
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Sheepdog's AIOCB are completely internal entities for a group of
3
Just for consistency, following the example documented since
4
requests and do not need dynamic allocation.
4
commit e3fe3988d7 ("error: Document Error API usage rules"),
5
return a boolean value indicating an error is set or not.
6
Directly pass errp as the local_err is not requested in our
7
case. This simplifies a bit nvme_create_queue_pair().
5
8
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20161129113245.32724-4-pbonzini@redhat.com
10
Tested-by: Eric Auger <eric.auger@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-12-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
9
---
15
---
10
block/sheepdog.c | 99 ++++++++++++++++++++++----------------------------------
16
block/nvme.c | 16 +++++++---------
11
1 file changed, 39 insertions(+), 60 deletions(-)
17
1 file changed, 7 insertions(+), 9 deletions(-)
12
18
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
19
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
21
--- a/block/nvme.c
16
+++ b/block/sheepdog.c
22
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
23
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
18
} while (0)
24
},
19
20
typedef struct SheepdogAIOCB SheepdogAIOCB;
21
+typedef struct BDRVSheepdogState BDRVSheepdogState;
22
23
typedef struct AIOReq {
24
SheepdogAIOCB *aiocb;
25
@@ -XXX,XX +XXX,XX @@ enum AIOCBState {
26
|| y->max_affect_data_idx < x->min_affect_data_idx))
27
28
struct SheepdogAIOCB {
29
- BlockAIOCB common;
30
+ BDRVSheepdogState *s;
31
32
QEMUIOVector *qiov;
33
34
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
35
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
36
};
25
};
37
26
38
-typedef struct BDRVSheepdogState {
27
-static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
39
+struct BDRVSheepdogState {
28
+/* Returns true on success, false on failure. */
40
BlockDriverState *bs;
29
+static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
41
AioContext *aio_context;
30
unsigned nentries, size_t entry_bytes, Error **errp)
42
31
{
43
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSheepdogState {
32
size_t bytes;
44
33
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
45
CoQueue overlapping_queue;
34
q->queue = qemu_try_memalign(s->page_size, bytes);
46
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
35
if (!q->queue) {
47
-} BDRVSheepdogState;
36
error_setg(errp, "Cannot allocate queue");
48
+};
37
- return;
49
38
+ return false;
50
typedef struct BDRVSheepdogReopenState {
39
}
51
int fd;
40
memset(q->queue, 0, bytes);
52
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
41
r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
53
acb->nr_pending--;
42
if (r) {
43
error_setg(errp, "Cannot map queue");
44
+ return false;
45
}
46
+ return true;
54
}
47
}
55
48
56
-static const AIOCBInfo sd_aiocb_info = {
49
static void nvme_free_queue_pair(NVMeQueuePair *q)
57
- .aiocb_size = sizeof(SheepdogAIOCB),
50
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
58
-};
51
Error **errp)
59
-
60
-static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
61
- int64_t sector_num, int nb_sectors)
62
+static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
63
+ QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
64
+ int type)
65
{
52
{
66
- SheepdogAIOCB *acb;
53
int i, r;
67
uint32_t object_size;
54
- Error *local_err = NULL;
68
- BDRVSheepdogState *s = bs->opaque;
55
NVMeQueuePair *q;
69
56
uint64_t prp_list_iova;
70
object_size = (UINT32_C(1) << s->inode.block_size_shift);
57
71
58
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
72
- acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
59
req->prp_list_iova = prp_list_iova + i * s->page_size;
73
+ acb->s = s;
74
75
acb->qiov = qiov;
76
77
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
78
79
acb->min_dirty_data_idx = UINT32_MAX;
80
acb->max_dirty_data_idx = 0;
81
-
82
- return acb;
83
+ acb->aiocb_type = type;
84
}
85
86
/* Return -EIO in case of error, file descriptor on success */
87
@@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
88
*/
89
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
90
{
91
- BDRVSheepdogState *s = acb->common.bs->opaque;
92
+ BDRVSheepdogState *s = acb->s;
93
struct iovec iov;
94
AIOReq *aio_req;
95
uint32_t offset, data_len, mn, mx;
96
@@ -XXX,XX +XXX,XX @@ out:
97
* Returns 1 when we need to wait a response, 0 when there is no sent
98
* request and -errno in error cases.
99
*/
100
-static void coroutine_fn sd_co_rw_vector(void *p)
101
+static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
102
{
103
- SheepdogAIOCB *acb = p;
104
int ret = 0;
105
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
106
unsigned long idx;
107
uint32_t object_size;
108
uint64_t oid;
109
uint64_t offset;
110
- BDRVSheepdogState *s = acb->common.bs->opaque;
111
+ BDRVSheepdogState *s = acb->s;
112
SheepdogInode *inode = &s->inode;
113
AIOReq *aio_req;
114
115
@@ -XXX,XX +XXX,XX @@ static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
116
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
117
int nb_sectors, QEMUIOVector *qiov)
118
{
119
- SheepdogAIOCB *acb;
120
+ SheepdogAIOCB acb;
121
int ret;
122
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
123
BDRVSheepdogState *s = bs->opaque;
124
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
125
}
126
}
60
}
127
61
128
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
62
- nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
129
- acb->aiocb_type = AIOCB_WRITE_UDATA;
63
- if (local_err) {
130
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
64
- error_propagate(errp, local_err);
131
65
+ if (!nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, errp)) {
132
retry:
66
goto fail;
133
- if (check_overlapping_aiocb(s, acb)) {
134
+ if (check_overlapping_aiocb(s, &acb)) {
135
qemu_co_queue_wait(&s->overlapping_queue);
136
goto retry;
137
}
67
}
138
68
q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
139
- sd_co_rw_vector(acb);
69
140
- sd_write_done(acb);
70
- nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
141
+ sd_co_rw_vector(&acb);
71
- if (local_err) {
142
+ sd_write_done(&acb);
72
- error_propagate(errp, local_err);
143
73
+ if (!nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, errp)) {
144
- QLIST_REMOVE(acb, aiocb_siblings);
74
goto fail;
145
+ QLIST_REMOVE(&acb, aiocb_siblings);
146
qemu_co_queue_restart_all(&s->overlapping_queue);
147
- ret = acb->ret;
148
- qemu_aio_unref(acb);
149
- return ret;
150
+ return acb.ret;
151
}
152
153
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
154
int nb_sectors, QEMUIOVector *qiov)
155
{
156
- SheepdogAIOCB *acb;
157
- int ret;
158
+ SheepdogAIOCB acb;
159
BDRVSheepdogState *s = bs->opaque;
160
161
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
162
- acb->aiocb_type = AIOCB_READ_UDATA;
163
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
164
165
retry:
166
- if (check_overlapping_aiocb(s, acb)) {
167
+ if (check_overlapping_aiocb(s, &acb)) {
168
qemu_co_queue_wait(&s->overlapping_queue);
169
goto retry;
170
}
75
}
171
76
q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
172
- sd_co_rw_vector(acb);
173
+ sd_co_rw_vector(&acb);
174
175
- QLIST_REMOVE(acb, aiocb_siblings);
176
+ QLIST_REMOVE(&acb, aiocb_siblings);
177
qemu_co_queue_restart_all(&s->overlapping_queue);
178
- ret = acb->ret;
179
- qemu_aio_unref(acb);
180
- return ret;
181
+ return acb.ret;
182
}
183
184
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
185
{
186
BDRVSheepdogState *s = bs->opaque;
187
- SheepdogAIOCB *acb;
188
- int ret;
189
+ SheepdogAIOCB acb;
190
AIOReq *aio_req;
191
192
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
193
return 0;
194
}
195
196
- acb = sd_aio_setup(bs, NULL, 0, 0);
197
- acb->aiocb_type = AIOCB_FLUSH_CACHE;
198
+ sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
199
200
- acb->nr_pending++;
201
- aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
202
+ acb.nr_pending++;
203
+ aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
204
0, 0, 0, false, 0, 0);
205
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
206
- add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
207
+ add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
208
209
- if (--acb->nr_pending) {
210
+ if (--acb.nr_pending) {
211
qemu_coroutine_yield();
212
}
213
- ret = acb->ret;
214
- qemu_aio_unref(acb);
215
- return ret;
216
+ return acb.ret;
217
}
218
219
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
220
@@ -XXX,XX +XXX,XX @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
221
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
222
int count)
223
{
224
- SheepdogAIOCB *acb;
225
+ SheepdogAIOCB acb;
226
BDRVSheepdogState *s = bs->opaque;
227
- int ret;
228
QEMUIOVector discard_iov;
229
struct iovec iov;
230
uint32_t zero = 0;
231
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
232
if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
233
return -ENOTSUP;
234
}
235
- acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
236
- count >> BDRV_SECTOR_BITS);
237
- acb->aiocb_type = AIOCB_DISCARD_OBJ;
238
+ sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
239
+ count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
240
241
retry:
242
- if (check_overlapping_aiocb(s, acb)) {
243
+ if (check_overlapping_aiocb(s, &acb)) {
244
qemu_co_queue_wait(&s->overlapping_queue);
245
goto retry;
246
}
247
248
- sd_co_rw_vector(acb);
249
+ sd_co_rw_vector(&acb);
250
251
- QLIST_REMOVE(acb, aiocb_siblings);
252
+ QLIST_REMOVE(&acb, aiocb_siblings);
253
qemu_co_queue_restart_all(&s->overlapping_queue);
254
- ret = acb->ret;
255
- qemu_aio_unref(acb);
256
- return ret;
257
+ return acb.ret;
258
}
259
260
static coroutine_fn int64_t
261
--
77
--
262
2.9.3
78
2.28.0
263
79
264
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Rename Submission Queue flags with 'Sq' to differentiate
4
submission queue flags from command queue flags, and introduce
5
Completion Queue flag definitions.
6
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-id: 20201029093306.1063879-13-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
include/block/nvme.h | 18 ++++++++++++------
16
1 file changed, 12 insertions(+), 6 deletions(-)
17
18
diff --git a/include/block/nvme.h b/include/block/nvme.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/nvme.h
21
+++ b/include/block/nvme.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateCq {
23
#define NVME_CQ_FLAGS_PC(cq_flags) (cq_flags & 0x1)
24
#define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1)
25
26
+enum NvmeFlagsCq {
27
+ NVME_CQ_PC = 1,
28
+ NVME_CQ_IEN = 2,
29
+};
30
+
31
typedef struct QEMU_PACKED NvmeCreateSq {
32
uint8_t opcode;
33
uint8_t flags;
34
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateSq {
35
#define NVME_SQ_FLAGS_PC(sq_flags) (sq_flags & 0x1)
36
#define NVME_SQ_FLAGS_QPRIO(sq_flags) ((sq_flags >> 1) & 0x3)
37
38
-enum NvmeQueueFlags {
39
- NVME_Q_PC = 1,
40
- NVME_Q_PRIO_URGENT = 0,
41
- NVME_Q_PRIO_HIGH = 1,
42
- NVME_Q_PRIO_NORMAL = 2,
43
- NVME_Q_PRIO_LOW = 3,
44
+enum NvmeFlagsSq {
45
+ NVME_SQ_PC = 1,
46
+
47
+ NVME_SQ_PRIO_URGENT = 0,
48
+ NVME_SQ_PRIO_HIGH = 1,
49
+ NVME_SQ_PRIO_NORMAL = 2,
50
+ NVME_SQ_PRIO_LOW = 3,
51
};
52
53
typedef struct QEMU_PACKED NvmeIdentify {
54
--
55
2.28.0
56
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Replace magic values by definitions, and simplifiy since the
4
number of queues will never reach 64K.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-14-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 9 +++++----
15
1 file changed, 5 insertions(+), 4 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
22
NvmeCmd cmd;
23
unsigned queue_size = NVME_QUEUE_SIZE;
24
25
+ assert(n <= UINT16_MAX);
26
q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
27
n, queue_size, errp);
28
if (!q) {
29
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
30
cmd = (NvmeCmd) {
31
.opcode = NVME_ADM_CMD_CREATE_CQ,
32
.dptr.prp1 = cpu_to_le64(q->cq.iova),
33
- .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
34
- .cdw11 = cpu_to_le32(0x3),
35
+ .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
36
+ .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC),
37
};
38
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
39
error_setg(errp, "Failed to create CQ io queue [%u]", n);
40
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
41
cmd = (NvmeCmd) {
42
.opcode = NVME_ADM_CMD_CREATE_SQ,
43
.dptr.prp1 = cpu_to_le64(q->sq.iova),
44
- .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
45
- .cdw11 = cpu_to_le32(0x1 | (n << 16)),
46
+ .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
47
+ .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)),
48
};
49
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
50
error_setg(errp, "Failed to create SQ io queue [%u]", n);
51
--
52
2.28.0
53
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
From the specification chapter 3.1.8 "AQA - Admin Queue Attributes"
4
the Admin Submission Queue Size field is a 0’s based value:
5
6
Admin Submission Queue Size (ASQS):
7
8
Defines the size of the Admin Submission Queue in entries.
9
Enabling a controller while this field is cleared to 00h
10
produces undefined results. The minimum size of the Admin
11
Submission Queue is two entries. The maximum size of the
12
Admin Submission Queue is 4096 entries.
13
This is a 0’s based value.
14
15
This bug has never been hit because the device initialization
16
uses a single command synchronously :)
17
18
Reviewed-by: Eric Auger <eric.auger@redhat.com>
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Tested-by: Eric Auger <eric.auger@redhat.com>
21
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22
Message-id: 20201029093306.1063879-15-philmd@redhat.com
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Tested-by: Eric Auger <eric.auger@redhat.com>
25
---
26
block/nvme.c | 6 +++---
27
1 file changed, 3 insertions(+), 3 deletions(-)
28
29
diff --git a/block/nvme.c b/block/nvme.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/block/nvme.c
32
+++ b/block/nvme.c
33
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
34
goto out;
35
}
36
s->queue_count = 1;
37
- QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
38
- regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
39
- (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
40
+ QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
41
+ regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
42
+ ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
43
regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
44
regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
45
46
--
47
2.28.0
48
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
We don't need to dereference from BDRVNVMeState each time.
4
Use a NVMeQueuePair pointer on the admin queue.
5
The nvme_init() becomes easier to review, matching the style
6
of nvme_add_io_queue().
7
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-16-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 12 ++++++------
17
1 file changed, 6 insertions(+), 6 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
Error **errp)
25
{
26
BDRVNVMeState *s = bs->opaque;
27
+ NVMeQueuePair *q;
28
AioContext *aio_context = bdrv_get_aio_context(bs);
29
int ret;
30
uint64_t cap;
31
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
32
33
/* Set up admin queue. */
34
s->queues = g_new(NVMeQueuePair *, 1);
35
- s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
36
- NVME_QUEUE_SIZE,
37
- errp);
38
- if (!s->queues[INDEX_ADMIN]) {
39
+ q = nvme_create_queue_pair(s, aio_context, 0, NVME_QUEUE_SIZE, errp);
40
+ if (!q) {
41
ret = -EINVAL;
42
goto out;
43
}
44
+ s->queues[INDEX_ADMIN] = q;
45
s->queue_count = 1;
46
QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
47
regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
48
((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
49
- regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
50
- regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
51
+ regs->asq = cpu_to_le64(q->sq.iova);
52
+ regs->acq = cpu_to_le64(q->cq.iova);
53
54
/* After setting up all control registers we can enable device now. */
55
regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
56
--
57
2.28.0
58
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Wrap the code that was copied repeatedly in the two functions,
3
As all commands use the ADMIN queue, it is pointless to pass
4
sd_aio_setup and sd_aio_complete.
4
it as argument each time. Remove the argument, and rename the
5
function as nvme_admin_cmd_sync() to make this new behavior
6
clearer.
5
7
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Message-id: 20161129113245.32724-6-pbonzini@redhat.com
9
Tested-by: Eric Auger <eric.auger@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20201029093306.1063879-17-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
9
---
15
---
10
block/sheepdog.c | 66 ++++++++++++++++++++++++++------------------------------
16
block/nvme.c | 19 ++++++++++---------
11
1 file changed, 30 insertions(+), 36 deletions(-)
17
1 file changed, 10 insertions(+), 9 deletions(-)
12
18
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
19
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
21
--- a/block/nvme.c
16
+++ b/block/sheepdog.c
22
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
23
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
18
return aio_req;
24
qemu_mutex_unlock(&q->lock);
19
}
25
}
20
26
21
+static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
27
-static void nvme_cmd_sync_cb(void *opaque, int ret)
22
+{
28
+static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
23
+ SheepdogAIOCB *cb;
29
{
24
+
30
int *pret = opaque;
25
+retry:
31
*pret = ret;
26
+ QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
32
aio_wait_kick();
27
+ if (AIOCBOverlapping(acb, cb)) {
28
+ qemu_co_queue_wait(&s->overlapping_queue);
29
+ goto retry;
30
+ }
31
+ }
32
+}
33
+
34
static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
35
QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
36
int type)
37
@@ -XXX,XX +XXX,XX @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
38
acb->min_dirty_data_idx = UINT32_MAX;
39
acb->max_dirty_data_idx = 0;
40
acb->aiocb_type = type;
41
+
42
+ if (type == AIOCB_FLUSH_CACHE) {
43
+ return;
44
+ }
45
+
46
+ wait_for_overlapping_aiocb(s, acb);
47
+ QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
48
}
33
}
49
34
50
/* Return -EIO in case of error, file descriptor on success */
35
-static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
51
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
36
- NvmeCmd *cmd)
37
+static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd)
38
{
39
+ BDRVNVMeState *s = bs->opaque;
40
+ NVMeQueuePair *q = s->queues[INDEX_ADMIN];
41
AioContext *aio_context = bdrv_get_aio_context(bs);
42
NVMeRequest *req;
43
int ret = -EINPROGRESS;
44
@@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
45
if (!req) {
46
return -EBUSY;
52
}
47
}
53
}
48
- nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
54
49
+ nvme_submit_command(q, req, cmd, nvme_admin_cmd_sync_cb, &ret);
55
-static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
50
56
+static void sd_aio_complete(SheepdogAIOCB *acb)
51
AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS);
57
{
52
return ret;
58
- SheepdogAIOCB *cb;
53
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
59
-
54
60
- QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
55
memset(id, 0, sizeof(*id));
61
- if (AIOCBOverlapping(aiocb, cb)) {
56
cmd.dptr.prp1 = cpu_to_le64(iova);
62
- return true;
57
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
63
- }
58
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
64
+ if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
59
error_setg(errp, "Failed to identify controller");
65
+ return;
60
goto out;
66
}
61
}
67
62
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
68
- QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
63
memset(id, 0, sizeof(*id));
69
- return false;
64
cmd.cdw10 = 0;
70
+ QLIST_REMOVE(acb, aiocb_siblings);
65
cmd.nsid = cpu_to_le32(namespace);
71
+ qemu_co_queue_restart_all(&acb->s->overlapping_queue);
66
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
72
}
67
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
73
68
error_setg(errp, "Failed to identify namespace");
74
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
69
goto out;
75
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
76
}
70
}
77
71
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
78
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
72
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
79
-
73
.cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC),
80
-retry:
74
};
81
- if (check_overlapping_aiocb(s, &acb)) {
75
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
82
- qemu_co_queue_wait(&s->overlapping_queue);
76
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
83
- goto retry;
77
error_setg(errp, "Failed to create CQ io queue [%u]", n);
84
- }
78
goto out_error;
85
-
86
sd_co_rw_vector(&acb);
87
sd_write_done(&acb);
88
+ sd_aio_complete(&acb);
89
90
- QLIST_REMOVE(&acb, aiocb_siblings);
91
- qemu_co_queue_restart_all(&s->overlapping_queue);
92
return acb.ret;
93
}
94
95
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
96
BDRVSheepdogState *s = bs->opaque;
97
98
sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
99
-
100
-retry:
101
- if (check_overlapping_aiocb(s, &acb)) {
102
- qemu_co_queue_wait(&s->overlapping_queue);
103
- goto retry;
104
- }
105
-
106
sd_co_rw_vector(&acb);
107
+ sd_aio_complete(&acb);
108
109
- QLIST_REMOVE(&acb, aiocb_siblings);
110
- qemu_co_queue_restart_all(&s->overlapping_queue);
111
return acb.ret;
112
}
113
114
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
115
if (--acb.nr_pending) {
116
qemu_coroutine_yield();
117
}
79
}
118
+
80
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
119
+ sd_aio_complete(&acb);
81
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
120
return acb.ret;
82
.cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)),
121
}
83
};
122
84
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
123
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
85
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
86
error_setg(errp, "Failed to create SQ io queue [%u]", n);
87
goto out_error;
124
}
88
}
125
sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
89
@@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable,
126
count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
90
.cdw11 = cpu_to_le32(enable ? 0x01 : 0x00),
127
-
91
};
128
-retry:
92
129
- if (check_overlapping_aiocb(s, &acb)) {
93
- ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd);
130
- qemu_co_queue_wait(&s->overlapping_queue);
94
+ ret = nvme_admin_cmd_sync(bs, &cmd);
131
- goto retry;
95
if (ret) {
132
- }
96
error_setg(errp, "Failed to configure NVMe write cache");
133
-
97
}
134
sd_co_rw_vector(&acb);
135
+ sd_aio_complete(&acb);
136
137
- QLIST_REMOVE(&acb, aiocb_siblings);
138
- qemu_co_queue_restart_all(&s->overlapping_queue);
139
return acb.ret;
140
}
141
142
--
98
--
143
2.9.3
99
2.28.0
144
100
145
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Commit bdd6a90a9e5 ("block: Add VFIO based NVMe driver")
4
sets the request_alignment in nvme_refresh_limits().
5
For consistency, also set it during initialization.
6
7
Reported-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-18-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 1 +
17
1 file changed, 1 insertion(+)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
25
s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
26
bs->bl.opt_mem_alignment = s->page_size;
27
+ bs->bl.request_alignment = s->page_size;
28
timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
29
30
/* Reset device to get a clean state. */
31
--
32
2.28.0
33
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
While trying to simplify the code using a macro, we forgot
4
the 12-bit shift... Correct that.
5
6
Fixes: fad1eb68862 ("block/nvme: Use register definitions from 'block/nvme.h'")
7
Reported-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-19-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
goto out;
25
}
26
27
- s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
28
+ s->page_size = 1u << (12 + NVME_CAP_MPSMIN(cap));
29
s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
30
bs->bl.opt_mem_alignment = s->page_size;
31
bs->bl.request_alignment = s->page_size;
32
--
33
2.28.0
34
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Eric Auger <eric.auger@redhat.com>
2
2
3
Add to the list in add_aio_request and, indirectly, resend_aioreq. Inline
3
In preparation of 64kB host page support, let's change the size
4
free_aio_req in the caller, it does not simply undo alloc_aio_req's job.
4
and alignment of the IDENTIFY command response buffer so that
5
the VFIO DMA MAP succeeds. We align on the host page size.
5
6
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Signed-off-by: Eric Auger <eric.auger@redhat.com>
7
Message-id: 20161129113245.32724-5-pbonzini@redhat.com
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-20-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
9
---
15
---
10
block/sheepdog.c | 23 ++++++-----------------
16
block/nvme.c | 9 +++++----
11
1 file changed, 6 insertions(+), 17 deletions(-)
17
1 file changed, 5 insertions(+), 4 deletions(-)
12
18
13
diff --git a/block/sheepdog.c b/block/sheepdog.c
19
diff --git a/block/nvme.c b/block/nvme.c
14
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
15
--- a/block/sheepdog.c
21
--- a/block/nvme.c
16
+++ b/block/sheepdog.c
22
+++ b/block/nvme.c
17
@@ -XXX,XX +XXX,XX @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
23
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
18
return aio_req;
24
.opcode = NVME_ADM_CMD_IDENTIFY,
19
}
25
.cdw10 = cpu_to_le32(0x1),
20
26
};
21
-static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
27
+ size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size);
22
-{
28
23
- SheepdogAIOCB *acb = aio_req->aiocb;
29
- id = qemu_try_memalign(s->page_size, sizeof(*id));
24
-
30
+ id = qemu_try_memalign(qemu_real_host_page_size, id_size);
25
- QLIST_REMOVE(aio_req, aio_siblings);
31
if (!id) {
26
- g_free(aio_req);
32
error_setg(errp, "Cannot allocate buffer for identify response");
27
-
33
goto out;
28
- acb->nr_pending--;
29
-}
30
-
31
static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
32
QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
33
int type)
34
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
35
while (!QLIST_EMPTY(&s->failed_aio_head)) {
36
aio_req = QLIST_FIRST(&s->failed_aio_head);
37
QLIST_REMOVE(aio_req, aio_siblings);
38
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
39
resend_aioreq(s, aio_req);
40
}
34
}
41
}
35
- r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova);
42
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
36
+ r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
43
*/
37
if (r) {
44
s->co_recv = NULL;
38
error_setg(errp, "Cannot map buffer for DMA");
45
39
goto out;
46
+ QLIST_REMOVE(aio_req, aio_siblings);
47
switch (rsp.result) {
48
case SD_RES_SUCCESS:
49
break;
50
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn aio_read_response(void *opaque)
51
break;
52
}
40
}
53
41
54
- free_aio_req(s, aio_req);
42
- memset(id, 0, sizeof(*id));
55
- if (!acb->nr_pending) {
43
+ memset(id, 0, id_size);
56
+ g_free(aio_req);
44
cmd.dptr.prp1 = cpu_to_le64(iova);
57
+
45
if (nvme_admin_cmd_sync(bs, &cmd)) {
58
+ if (!--acb->nr_pending) {
46
error_setg(errp, "Failed to identify controller");
59
/*
47
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
60
* We've finished all requests which belong to the AIOCB, so
48
s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
61
* we can switch back to sd_co_readv/writev now.
49
s->supports_discard = !!(oncs & NVME_ONCS_DSM);
62
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
50
63
uint64_t old_oid = aio_req->base_oid;
51
- memset(id, 0, sizeof(*id));
64
bool create = aio_req->create;
52
+ memset(id, 0, id_size);
65
53
cmd.cdw10 = 0;
66
+ QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
54
cmd.nsid = cpu_to_le32(namespace);
67
+
55
if (nvme_admin_cmd_sync(bs, &cmd)) {
68
if (!nr_copies) {
69
error_report("bug");
70
}
71
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
72
iov.iov_len = sizeof(s->inode);
73
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
74
data_len, offset, 0, false, 0, offset);
75
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
76
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
77
if (--acb->nr_pending) {
78
qemu_coroutine_yield();
79
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
80
old_oid,
81
acb->aiocb_type == AIOCB_DISCARD_OBJ ?
82
0 : done);
83
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
84
-
85
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
86
acb->aiocb_type);
87
done:
88
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
89
acb.nr_pending++;
90
aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
91
0, 0, 0, false, 0, 0);
92
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
93
add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
94
95
if (--acb.nr_pending) {
96
--
56
--
97
2.9.3
57
2.28.0
98
58
99
diff view generated by jsdifflib
New patch
1
From: Eric Auger <eric.auger@redhat.com>
1
2
3
In preparation of 64kB host page support, let's change the size
4
and alignment of the queue so that the VFIO DMA MAP succeeds.
5
We align on the host page size.
6
7
Signed-off-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-21-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 4 ++--
16
1 file changed, 2 insertions(+), 2 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
23
size_t bytes;
24
int r;
25
26
- bytes = ROUND_UP(nentries * entry_bytes, s->page_size);
27
+ bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size);
28
q->head = q->tail = 0;
29
- q->queue = qemu_try_memalign(s->page_size, bytes);
30
+ q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes);
31
if (!q->queue) {
32
error_setg(errp, "Cannot allocate queue");
33
return false;
34
--
35
2.28.0
36
diff view generated by jsdifflib
New patch
1
From: Eric Auger <eric.auger@redhat.com>
1
2
3
In preparation of 64kB host page support, let's change the size
4
and alignment of the prp_list_pages so that the VFIO DMA MAP succeeds
5
with 64kB host page size. We align on the host page size.
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-22-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 11 ++++++-----
17
1 file changed, 6 insertions(+), 5 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
24
int i, r;
25
NVMeQueuePair *q;
26
uint64_t prp_list_iova;
27
+ size_t bytes;
28
29
q = g_try_new0(NVMeQueuePair, 1);
30
if (!q) {
31
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
32
}
33
trace_nvme_create_queue_pair(idx, q, size, aio_context,
34
event_notifier_get_fd(s->irq_notifier));
35
- q->prp_list_pages = qemu_try_memalign(s->page_size,
36
- s->page_size * NVME_NUM_REQS);
37
+ bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS,
38
+ qemu_real_host_page_size);
39
+ q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
40
if (!q->prp_list_pages) {
41
goto fail;
42
}
43
- memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS);
44
+ memset(q->prp_list_pages, 0, bytes);
45
qemu_mutex_init(&q->lock);
46
q->s = s;
47
q->index = idx;
48
qemu_co_queue_init(&q->free_req_queue);
49
q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
50
- r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
51
- s->page_size * NVME_NUM_REQS,
52
+ r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
53
false, &prp_list_iova);
54
if (r) {
55
goto fail;
56
--
57
2.28.0
58
diff view generated by jsdifflib
New patch
1
From: Eric Auger <eric.auger@redhat.com>
1
2
3
Make sure iov's va and size are properly aligned on the
4
host page size.
5
6
Signed-off-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-23-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 14 ++++++++------
16
1 file changed, 8 insertions(+), 6 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
23
for (i = 0; i < qiov->niov; ++i) {
24
bool retry = true;
25
uint64_t iova;
26
+ size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len,
27
+ qemu_real_host_page_size);
28
try_map:
29
r = qemu_vfio_dma_map(s->vfio,
30
qiov->iov[i].iov_base,
31
- qiov->iov[i].iov_len,
32
- true, &iova);
33
+ len, true, &iova);
34
if (r == -ENOMEM && retry) {
35
retry = false;
36
trace_nvme_dma_flush_queue_wait(s);
37
@@ -XXX,XX +XXX,XX @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs,
38
BDRVNVMeState *s = bs->opaque;
39
40
for (i = 0; i < qiov->niov; ++i) {
41
- if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, s->page_size) ||
42
- !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, s->page_size)) {
43
+ if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base,
44
+ qemu_real_host_page_size) ||
45
+ !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) {
46
trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base,
47
qiov->iov[i].iov_len, s->page_size);
48
return false;
49
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
50
int r;
51
uint8_t *buf = NULL;
52
QEMUIOVector local_qiov;
53
-
54
+ size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
55
assert(QEMU_IS_ALIGNED(offset, s->page_size));
56
assert(QEMU_IS_ALIGNED(bytes, s->page_size));
57
assert(bytes <= s->max_transfer);
58
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
59
}
60
s->stats.unaligned_accesses++;
61
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
62
- buf = qemu_try_memalign(s->page_size, bytes);
63
+ buf = qemu_try_memalign(qemu_real_host_page_size, len);
64
65
if (!buf) {
66
return -ENOMEM;
67
--
68
2.28.0
69
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
qemu_vfio_pci_map_bar() calls mmap(), and mmap(2) states:
4
5
'offset' must be a multiple of the page size as returned
6
by sysconf(_SC_PAGE_SIZE).
7
8
In commit f68453237b9 we started to use an offset of 4K which
9
broke this contract on Aarch64 arch.
10
11
Fix by mapping at offset 0, and and accessing doorbells at offset=4K.
12
13
Fixes: f68453237b9 ("block/nvme: Map doorbells pages write-only")
14
Reported-by: Eric Auger <eric.auger@redhat.com>
15
Reviewed-by: Eric Auger <eric.auger@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Tested-by: Eric Auger <eric.auger@redhat.com>
18
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Message-id: 20201029093306.1063879-24-philmd@redhat.com
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Tested-by: Eric Auger <eric.auger@redhat.com>
22
---
23
block/nvme.c | 11 +++++++----
24
1 file changed, 7 insertions(+), 4 deletions(-)
25
26
diff --git a/block/nvme.c b/block/nvme.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block/nvme.c
29
+++ b/block/nvme.c
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
31
struct BDRVNVMeState {
32
AioContext *aio_context;
33
QEMUVFIOState *vfio;
34
+ void *bar0_wo_map;
35
/* Memory mapped registers */
36
volatile struct {
37
uint32_t sq_tail;
38
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
39
}
40
}
41
42
- s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
43
- NVME_DOORBELL_SIZE, PROT_WRITE, errp);
44
+ s->bar0_wo_map = qemu_vfio_pci_map_bar(s->vfio, 0, 0,
45
+ sizeof(NvmeBar) + NVME_DOORBELL_SIZE,
46
+ PROT_WRITE, errp);
47
+ s->doorbells = (void *)((uintptr_t)s->bar0_wo_map + sizeof(NvmeBar));
48
if (!s->doorbells) {
49
ret = -EINVAL;
50
goto out;
51
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
52
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
53
false, NULL, NULL);
54
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
55
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
56
- sizeof(NvmeBar), NVME_DOORBELL_SIZE);
57
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
58
+ 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
59
qemu_vfio_close(s->vfio);
60
61
g_free(s->device);
62
--
63
2.28.0
64
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
The Completion Queue Command Identifier is a 16-bit value,
4
so nvme_submit_command() is unlikely to work on big-endian
5
hosts, as the relevant bits are truncated.
6
Fix by using the correct byte-swap function.
7
8
Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver")
9
Reported-by: Keith Busch <kbusch@kernel.org>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20201029093306.1063879-25-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
24
assert(!req->cb);
25
req->cb = cb;
26
req->opaque = opaque;
27
- cmd->cid = cpu_to_le32(req->cid);
28
+ cmd->cid = cpu_to_le16(req->cid);
29
30
trace_nvme_submit_command(q->s, q->index, req->cid);
31
nvme_trace_command(cmd);
32
--
33
2.28.0
34
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Change the confuse "VFIO IOMMU check failed" error message by
4
the explicit "VFIO IOMMU Type1 is not supported" once.
5
6
Example on POWER:
7
8
$ qemu-system-ppc64 -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw
9
qemu-system-ppc64: -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw: VFIO IOMMU Type1 is not supported
10
11
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
12
Reviewed-by: Fam Zheng <fam@euphon.net>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Message-id: 20201103020733.2303148-2-philmd@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Tested-by: Eric Auger <eric.auger@redhat.com>
18
---
19
util/vfio-helpers.c | 2 +-
20
1 file changed, 1 insertion(+), 1 deletion(-)
21
22
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/util/vfio-helpers.c
25
+++ b/util/vfio-helpers.c
26
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
27
}
28
29
if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
30
- error_setg_errno(errp, errno, "VFIO IOMMU check failed");
31
+ error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported");
32
ret = -EINVAL;
33
goto fail_container;
34
}
35
--
36
2.28.0
37
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
We sometime get kernel panic with some devices on Aarch64
4
hosts. Alex Williamson suggests it might be broken PCIe
5
root complex. Add trace event to record the latest I/O
6
access before crashing. In case, assert our accesses are
7
aligned.
8
9
Reviewed-by: Fam Zheng <fam@euphon.net>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201103020733.2303148-3-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
util/vfio-helpers.c | 8 ++++++++
17
util/trace-events | 2 ++
18
2 files changed, 10 insertions(+)
19
20
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/util/vfio-helpers.c
23
+++ b/util/vfio-helpers.c
24
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
25
{
26
int ret;
27
28
+ trace_qemu_vfio_pci_read_config(buf, ofs, size,
29
+ s->config_region_info.offset,
30
+ s->config_region_info.size);
31
+ assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
32
do {
33
ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
34
} while (ret == -1 && errno == EINTR);
35
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
36
{
37
int ret;
38
39
+ trace_qemu_vfio_pci_write_config(buf, ofs, size,
40
+ s->config_region_info.offset,
41
+ s->config_region_info.size);
42
+ assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
43
do {
44
ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
45
} while (ret == -1 && errno == EINTR);
46
diff --git a/util/trace-events b/util/trace-events
47
index XXXXXXX..XXXXXXX 100644
48
--- a/util/trace-events
49
+++ b/util/trace-events
50
@@ -XXX,XX +XXX,XX @@ qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova
51
qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64
52
qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p"
53
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
54
+qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
55
+qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
56
--
57
2.28.0
58
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
For debug purpose, trace BAR regions info.
4
5
Reviewed-by: Fam Zheng <fam@euphon.net>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Message-id: 20201103020733.2303148-4-philmd@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
---
12
util/vfio-helpers.c | 8 ++++++++
13
util/trace-events | 1 +
14
2 files changed, 9 insertions(+)
15
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
19
+++ b/util/vfio-helpers.c
20
@@ -XXX,XX +XXX,XX @@ static inline void assert_bar_index_valid(QEMUVFIOState *s, int index)
21
22
static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
23
{
24
+ g_autofree char *barname = NULL;
25
assert_bar_index_valid(s, index);
26
s->bar_region_info[index] = (struct vfio_region_info) {
27
.index = VFIO_PCI_BAR0_REGION_INDEX + index,
28
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
29
error_setg_errno(errp, errno, "Failed to get BAR region info");
30
return -errno;
31
}
32
+ barname = g_strdup_printf("bar[%d]", index);
33
+ trace_qemu_vfio_region_info(barname, s->bar_region_info[index].offset,
34
+ s->bar_region_info[index].size,
35
+ s->bar_region_info[index].cap_offset);
36
37
return 0;
38
}
39
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
40
ret = -errno;
41
goto fail;
42
}
43
+ trace_qemu_vfio_region_info("config", s->config_region_info.offset,
44
+ s->config_region_info.size,
45
+ s->config_region_info.cap_offset);
46
47
for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) {
48
ret = qemu_vfio_pci_init_bar(s, i, errp);
49
diff --git a/util/trace-events b/util/trace-events
50
index XXXXXXX..XXXXXXX 100644
51
--- a/util/trace-events
52
+++ b/util/trace-events
53
@@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *io
54
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
55
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
56
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
57
+qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
58
--
59
2.28.0
60
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
For debugging purpose, trace where a BAR is mapped.
4
5
Reviewed-by: Fam Zheng <fam@euphon.net>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Message-id: 20201103020733.2303148-5-philmd@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
---
12
util/vfio-helpers.c | 2 ++
13
util/trace-events | 1 +
14
2 files changed, 3 insertions(+)
15
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
19
+++ b/util/vfio-helpers.c
20
@@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
21
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
22
prot, MAP_SHARED,
23
s->device, s->bar_region_info[index].offset + offset);
24
+ trace_qemu_vfio_pci_map_bar(index, s->bar_region_info[index].offset ,
25
+ size, offset, p);
26
if (p == MAP_FAILED) {
27
error_setg_errno(errp, errno, "Failed to map BAR region");
28
p = NULL;
29
diff --git a/util/trace-events b/util/trace-events
30
index XXXXXXX..XXXXXXX 100644
31
--- a/util/trace-events
32
+++ b/util/trace-events
33
@@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
34
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
35
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
36
qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
37
+qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
38
--
39
2.28.0
40
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
For debugging purpose, trace where DMA regions are mapped.
4
5
Reviewed-by: Fam Zheng <fam@euphon.net>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Message-id: 20201103020733.2303148-6-philmd@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
---
12
util/vfio-helpers.c | 3 ++-
13
util/trace-events | 5 +++--
14
2 files changed, 5 insertions(+), 3 deletions(-)
15
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
19
+++ b/util/vfio-helpers.c
20
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
21
.vaddr = (uintptr_t)host,
22
.size = size,
23
};
24
- trace_qemu_vfio_do_mapping(s, host, size, iova);
25
+ trace_qemu_vfio_do_mapping(s, host, iova, size);
26
27
if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
28
error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
29
@@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
30
}
31
}
32
}
33
+ trace_qemu_vfio_dma_mapped(s, host, iova0, size);
34
if (iova) {
35
*iova = iova0;
36
}
37
diff --git a/util/trace-events b/util/trace-events
38
index XXXXXXX..XXXXXXX 100644
39
--- a/util/trace-events
40
+++ b/util/trace-events
41
@@ -XXX,XX +XXX,XX @@ qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%z
42
qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
43
qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
44
qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64
45
-qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64
46
-qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p"
47
+qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx"
48
+qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p"
49
+qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx"
50
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
51
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
52
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
53
--
54
2.28.0
55
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
SheepdogAIOCB is internal to sheepdog.c, hence it is never canceled.
3
The QEMU_VFIO_DEBUG definition is only modifiable at build-time.
4
Trace events can be enabled at run-time. As we prefer the latter,
5
convert qemu_vfio_dump_mappings() to use trace events instead
6
of fprintf().
4
7
5
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Reviewed-by: Fam Zheng <fam@euphon.net>
6
Message-id: 20161129113245.32724-2-pbonzini@redhat.com
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201103020733.2303148-7-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
8
---
14
---
9
block/sheepdog.c | 52 ----------------------------------------------------
15
util/vfio-helpers.c | 19 ++++---------------
10
1 file changed, 52 deletions(-)
16
util/trace-events | 1 +
17
2 files changed, 5 insertions(+), 15 deletions(-)
11
18
12
diff --git a/block/sheepdog.c b/block/sheepdog.c
19
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
13
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
14
--- a/block/sheepdog.c
21
--- a/util/vfio-helpers.c
15
+++ b/block/sheepdog.c
22
+++ b/util/vfio-helpers.c
16
@@ -XXX,XX +XXX,XX @@ struct SheepdogAIOCB {
23
@@ -XXX,XX +XXX,XX @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp)
17
Coroutine *coroutine;
24
return s;
18
void (*aio_done_func)(SheepdogAIOCB *);
19
20
- bool cancelable;
21
int nr_pending;
22
23
uint32_t min_affect_data_idx;
24
@@ -XXX,XX +XXX,XX @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
25
{
26
SheepdogAIOCB *acb = aio_req->aiocb;
27
28
- acb->cancelable = false;
29
QLIST_REMOVE(aio_req, aio_siblings);
30
g_free(aio_req);
31
32
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
33
qemu_aio_unref(acb);
34
}
25
}
35
26
36
-/*
27
-static void qemu_vfio_dump_mapping(IOVAMapping *m)
37
- * Check whether the specified acb can be canceled
38
- *
39
- * We can cancel aio when any request belonging to the acb is:
40
- * - Not processed by the sheepdog server.
41
- * - Not linked to the inflight queue.
42
- */
43
-static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
44
-{
28
-{
45
- BDRVSheepdogState *s = acb->common.bs->opaque;
29
- if (QEMU_VFIO_DEBUG) {
46
- AIOReq *aioreq;
30
- printf(" vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host,
47
-
31
- (uint64_t)m->size, (uint64_t)m->iova);
48
- if (!acb->cancelable) {
49
- return false;
50
- }
51
-
52
- QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
53
- if (aioreq->aiocb == acb) {
54
- return false;
55
- }
56
- }
57
-
58
- return true;
59
-}
60
-
61
-static void sd_aio_cancel(BlockAIOCB *blockacb)
62
-{
63
- SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
64
- BDRVSheepdogState *s = acb->common.bs->opaque;
65
- AIOReq *aioreq, *next;
66
-
67
- if (sd_acb_cancelable(acb)) {
68
- /* Remove outstanding requests from failed queue. */
69
- QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
70
- next) {
71
- if (aioreq->aiocb == acb) {
72
- free_aio_req(s, aioreq);
73
- }
74
- }
75
-
76
- assert(acb->nr_pending == 0);
77
- if (acb->common.cb) {
78
- acb->common.cb(acb->common.opaque, -ECANCELED);
79
- }
80
- sd_finish_aiocb(acb);
81
- }
32
- }
82
-}
33
-}
83
-
34
-
84
static const AIOCBInfo sd_aiocb_info = {
35
static void qemu_vfio_dump_mappings(QEMUVFIOState *s)
85
.aiocb_size = sizeof(SheepdogAIOCB),
36
{
86
- .cancel_async = sd_aio_cancel,
37
- int i;
87
};
38
-
88
39
- if (QEMU_VFIO_DEBUG) {
89
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
40
- printf("vfio mappings\n");
90
@@ -XXX,XX +XXX,XX @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
41
- for (i = 0; i < s->nr_mappings; ++i) {
91
acb->nb_sectors = nb_sectors;
42
- qemu_vfio_dump_mapping(&s->mappings[i]);
92
43
- }
93
acb->aio_done_func = NULL;
44
+ for (int i = 0; i < s->nr_mappings; ++i) {
94
- acb->cancelable = true;
45
+ trace_qemu_vfio_dump_mapping(s->mappings[i].host,
95
acb->coroutine = qemu_coroutine_self();
46
+ s->mappings[i].iova,
96
acb->ret = 0;
47
+ s->mappings[i].size);
97
acb->nr_pending = 0;
48
}
49
}
50
51
diff --git a/util/trace-events b/util/trace-events
52
index XXXXXXX..XXXXXXX 100644
53
--- a/util/trace-events
54
+++ b/util/trace-events
55
@@ -XXX,XX +XXX,XX @@ qemu_mutex_unlock(void *mutex, const char *file, const int line) "released mutex
56
qemu_vfio_dma_reset_temporary(void *s) "s %p"
57
qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
58
qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
59
+qemu_vfio_dump_mapping(void *host, uint64_t iova, size_t size) "vfio mapping %p to iova 0x%08" PRIx64 " size 0x%zx"
60
qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
61
qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64
62
qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx"
98
--
63
--
99
2.9.3
64
2.28.0
100
65
101
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
mmap(2) states:
4
5
'offset' must be a multiple of the page size as returned
6
by sysconf(_SC_PAGE_SIZE).
7
8
Add an assertion to be sure we don't break this contract.
9
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201103020733.2303148-8-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
util/vfio-helpers.c | 1 +
16
1 file changed, 1 insertion(+)
17
18
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/util/vfio-helpers.c
21
+++ b/util/vfio-helpers.c
22
@@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
23
Error **errp)
24
{
25
void *p;
26
+ assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size));
27
assert_bar_index_valid(s, index);
28
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
29
prot, MAP_SHARED,
30
--
31
2.28.0
32
diff view generated by jsdifflib