1
The following changes since commit 8507c9d5c9a62de2a0e281b640f995e26eac46af:
1
The following changes since commit 474f3938d79ab36b9231c9ad3b5a9314c2aeacde:
2
2
3
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2020-11-03 15:59:44 +0000)
3
Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' into staging (2019-06-21 15:40:50 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-24
8
8
9
for you to fetch changes up to fc107d86840b3364e922c26cf7631b7fd38ce523:
9
for you to fetch changes up to ab5d4a30f7f3803ca5106b370969c1b7b54136f8:
10
10
11
util/vfio-helpers: Assert offset is aligned to page size (2020-11-03 19:06:23 +0000)
11
iotests: Fix 205 for concurrent runs (2019-06-24 16:01:40 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request for 5.2
14
Block patches:
15
- The SSH block driver now uses libssh instead of libssh2
16
- The VMDK block driver gets read-only support for the seSparse
17
subformat
18
- Various fixes
15
19
16
NVMe fixes to solve IOMMU issues on non-x86 and error message/tracing
20
---
17
improvements. Elena Afanasova's ioeventfd fixes are also included.
18
21
19
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
v2:
23
- Squashed Pino's fix for pre-0.8 libssh into the libssh patch
20
24
21
----------------------------------------------------------------
25
----------------------------------------------------------------
26
Anton Nefedov (1):
27
iotest 134: test cluster-misaligned encrypted write
22
28
23
Elena Afanasova (2):
29
Klaus Birkelund Jensen (1):
24
accel/kvm: add PIO ioeventfds only in case kvm_eventfds_allowed is
30
nvme: do not advertise support for unsupported arbitration mechanism
25
true
26
softmmu/memory: fix memory_region_ioeventfd_equal()
27
31
28
Eric Auger (4):
32
Max Reitz (1):
29
block/nvme: Change size and alignment of IDENTIFY response buffer
33
iotests: Fix 205 for concurrent runs
30
block/nvme: Change size and alignment of queue
31
block/nvme: Change size and alignment of prp_list_pages
32
block/nvme: Align iov's va and size on host page size
33
34
34
Philippe Mathieu-Daudé (27):
35
Pino Toscano (1):
35
MAINTAINERS: Cover "block/nvme.h" file
36
ssh: switch from libssh2 to libssh
36
block/nvme: Use hex format to display offset in trace events
37
block/nvme: Report warning with warn_report()
38
block/nvme: Trace controller capabilities
39
block/nvme: Trace nvme_poll_queue() per queue
40
block/nvme: Improve nvme_free_req_queue_wait() trace information
41
block/nvme: Trace queue pair creation/deletion
42
block/nvme: Move definitions before structure declarations
43
block/nvme: Use unsigned integer for queue counter/size
44
block/nvme: Make nvme_identify() return boolean indicating error
45
block/nvme: Make nvme_init_queue() return boolean indicating error
46
block/nvme: Introduce Completion Queue definitions
47
block/nvme: Use definitions instead of magic values in add_io_queue()
48
block/nvme: Correctly initialize Admin Queue Attributes
49
block/nvme: Simplify ADMIN queue access
50
block/nvme: Simplify nvme_cmd_sync()
51
block/nvme: Set request_alignment at initialization
52
block/nvme: Correct minimum device page size
53
block/nvme: Fix use of write-only doorbells page on Aarch64 arch
54
block/nvme: Fix nvme_submit_command() on big-endian host
55
util/vfio-helpers: Improve reporting unsupported IOMMU type
56
util/vfio-helpers: Trace PCI I/O config accesses
57
util/vfio-helpers: Trace PCI BAR region info
58
util/vfio-helpers: Trace where BARs are mapped
59
util/vfio-helpers: Improve DMA trace events
60
util/vfio-helpers: Convert vfio_dump_mapping to trace events
61
util/vfio-helpers: Assert offset is aligned to page size
62
37
63
MAINTAINERS | 2 +
38
Sam Eiderman (3):
64
include/block/nvme.h | 18 ++--
39
vmdk: Fix comment regarding max l1_size coverage
65
accel/kvm/kvm-all.c | 6 +-
40
vmdk: Reduce the max bound for L1 table size
66
block/nvme.c | 209 ++++++++++++++++++++++++-------------------
41
vmdk: Add read-only support for seSparse snapshots
67
softmmu/memory.c | 11 ++-
42
68
util/vfio-helpers.c | 43 +++++----
43
Vladimir Sementsov-Ogievskiy (1):
69
block/trace-events | 30 ++++---
44
blockdev: enable non-root nodes for transaction drive-backup source
70
util/trace-events | 10 ++-
45
71
8 files changed, 195 insertions(+), 134 deletions(-)
46
configure | 65 +-
47
block/Makefile.objs | 6 +-
48
block/ssh.c | 652 ++++++++++--------
49
block/vmdk.c | 372 +++++++++-
50
blockdev.c | 2 +-
51
hw/block/nvme.c | 1 -
52
.travis.yml | 4 +-
53
block/trace-events | 14 +-
54
docs/qemu-block-drivers.texi | 2 +-
55
.../dockerfiles/debian-win32-cross.docker | 1 -
56
.../dockerfiles/debian-win64-cross.docker | 1 -
57
tests/docker/dockerfiles/fedora.docker | 4 +-
58
tests/docker/dockerfiles/ubuntu.docker | 2 +-
59
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
60
tests/qemu-iotests/059.out | 2 +-
61
tests/qemu-iotests/134 | 9 +
62
tests/qemu-iotests/134.out | 10 +
63
tests/qemu-iotests/205 | 2 +-
64
tests/qemu-iotests/207 | 54 +-
65
tests/qemu-iotests/207.out | 2 +-
66
20 files changed, 823 insertions(+), 384 deletions(-)
72
67
73
--
68
--
74
2.28.0
69
2.21.0
75
70
71
diff view generated by jsdifflib
Deleted patch
1
From: Elena Afanasova <eafanasova@gmail.com>
2
1
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
5
Message-Id: <20201017210102.26036-1-eafanasova@gmail.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
accel/kvm/kvm-all.c | 6 ++++--
9
1 file changed, 4 insertions(+), 2 deletions(-)
10
11
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/kvm/kvm-all.c
14
+++ b/accel/kvm/kvm-all.c
15
@@ -XXX,XX +XXX,XX @@ static int kvm_init(MachineState *ms)
16
17
kvm_memory_listener_register(s, &s->memory_listener,
18
&address_space_memory, 0);
19
- memory_listener_register(&kvm_io_listener,
20
- &address_space_io);
21
+ if (kvm_eventfds_allowed) {
22
+ memory_listener_register(&kvm_io_listener,
23
+ &address_space_io);
24
+ }
25
memory_listener_register(&kvm_coalesced_pio_listener,
26
&address_space_io);
27
28
--
29
2.28.0
30
diff view generated by jsdifflib
Deleted patch
1
From: Elena Afanasova <eafanasova@gmail.com>
2
1
3
Eventfd can be registered with a zero length when fast_mmio is true.
4
Handle this case properly when dispatching through QEMU.
5
6
Signed-off-by: Elena Afanasova <eafanasova@gmail.com>
7
Message-id: cf71a62eb04e61932ff8ffdd02e0b2aab4f495a0.camel@gmail.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
softmmu/memory.c | 11 +++++++++--
11
1 file changed, 9 insertions(+), 2 deletions(-)
12
13
diff --git a/softmmu/memory.c b/softmmu/memory.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/softmmu/memory.c
16
+++ b/softmmu/memory.c
17
@@ -XXX,XX +XXX,XX @@ static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a,
18
static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a,
19
MemoryRegionIoeventfd *b)
20
{
21
- return !memory_region_ioeventfd_before(a, b)
22
- && !memory_region_ioeventfd_before(b, a);
23
+ if (int128_eq(a->addr.start, b->addr.start) &&
24
+ (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) ||
25
+ (int128_eq(a->addr.size, b->addr.size) &&
26
+ (a->match_data == b->match_data) &&
27
+ ((a->match_data && (a->data == b->data)) || !a->match_data) &&
28
+ (a->e == b->e))))
29
+ return true;
30
+
31
+ return false;
32
}
33
34
/* Range of memory in the global map. Addresses are absolute. */
35
--
36
2.28.0
37
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
The "block/nvme.h" header is shared by both the NVMe block
4
driver and the NVMe emulated device. Add the 'F:' entry on
5
both sections, so all maintainers/reviewers are notified
6
when it is changed.
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
11
Message-Id: <20200701140634.25994-1-philmd@redhat.com>
12
---
13
MAINTAINERS | 2 ++
14
1 file changed, 2 insertions(+)
15
16
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
19
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ M: Klaus Jensen <its@irrelevant.dk>
21
L: qemu-block@nongnu.org
22
S: Supported
23
F: hw/block/nvme*
24
+F: include/block/nvme.h
25
F: tests/qtest/nvme-test.c
26
F: docs/specs/nvme.txt
27
T: git git://git.infradead.org/qemu-nvme.git nvme-next
28
@@ -XXX,XX +XXX,XX @@ R: Fam Zheng <fam@euphon.net>
29
L: qemu-block@nongnu.org
30
S: Supported
31
F: block/nvme*
32
+F: include/block/nvme.h
33
T: git https://github.com/stefanha/qemu.git block
34
35
Bootdevice
36
--
37
2.28.0
38
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Use the same format used for the hw/vfio/ trace events.
4
5
Suggested-by: Eric Auger <eric.auger@redhat.com>
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-3-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/trace-events | 12 ++++++------
15
1 file changed, 6 insertions(+), 6 deletions(-)
16
17
diff --git a/block/trace-events b/block/trace-events
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/trace-events
20
+++ b/block/trace-events
21
@@ -XXX,XX +XXX,XX @@ nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
22
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
23
nvme_handle_event(void *s) "s %p"
24
nvme_poll_cb(void *s) "s %p"
25
-nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d niov %d"
26
-nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset %"PRId64" bytes %"PRId64" flags %d"
27
+nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d"
28
+nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d"
29
nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
30
-nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d"
31
-nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d"
32
-nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64""
33
-nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d"
34
+nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d"
35
+nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d"
36
+nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
37
+nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
38
nvme_dma_map_flush(void *s) "s %p"
39
nvme_free_req_queue_wait(void *q) "q %p"
40
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
41
--
42
2.28.0
43
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Instead of displaying warning on stderr, use warn_report()
4
which also displays it on the monitor.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-4-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
22
}
23
cid = le16_to_cpu(c->cid);
24
if (cid == 0 || cid > NVME_QUEUE_SIZE) {
25
- fprintf(stderr, "Unexpected CID in completion queue: %" PRIu32 "\n",
26
- cid);
27
+ warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
28
+ "queue size: %u", cid, NVME_QUEUE_SIZE);
29
continue;
30
}
31
trace_nvme_complete_command(s, q->index, cid);
32
--
33
2.28.0
34
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Controllers have different capabilities and report them in the
4
CAP register. We are particularly interested by the page size
5
limits.
6
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-5-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 13 +++++++++++++
16
block/trace-events | 2 ++
17
2 files changed, 15 insertions(+)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
* Initialization". */
25
26
cap = le64_to_cpu(regs->cap);
27
+ trace_nvme_controller_capability_raw(cap);
28
+ trace_nvme_controller_capability("Maximum Queue Entries Supported",
29
+ 1 + NVME_CAP_MQES(cap));
30
+ trace_nvme_controller_capability("Contiguous Queues Required",
31
+ NVME_CAP_CQR(cap));
32
+ trace_nvme_controller_capability("Doorbell Stride",
33
+ 2 << (2 + NVME_CAP_DSTRD(cap)));
34
+ trace_nvme_controller_capability("Subsystem Reset Supported",
35
+ NVME_CAP_NSSRS(cap));
36
+ trace_nvme_controller_capability("Memory Page Size Minimum",
37
+ 1 << (12 + NVME_CAP_MPSMIN(cap)));
38
+ trace_nvme_controller_capability("Memory Page Size Maximum",
39
+ 1 << (12 + NVME_CAP_MPSMAX(cap)));
40
if (!NVME_CAP_CSS(cap)) {
41
error_setg(errp, "Device doesn't support NVMe command set");
42
ret = -EINVAL;
43
diff --git a/block/trace-events b/block/trace-events
44
index XXXXXXX..XXXXXXX 100644
45
--- a/block/trace-events
46
+++ b/block/trace-events
47
@@ -XXX,XX +XXX,XX @@ qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t
48
qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
49
50
# nvme.c
51
+nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64
52
+nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64
53
nvme_kick(void *s, int queue) "s %p queue %d"
54
nvme_dma_flush_queue_wait(void *s) "s %p"
55
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
56
--
57
2.28.0
58
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
2
2
3
The QEMU_VFIO_DEBUG definition is only modifiable at build-time.
3
The device mistakenly reports that the Weighted Round Robin with Urgent
4
Trace events can be enabled at run-time. As we prefer the latter,
4
Priority Class arbitration mechanism is supported.
5
convert qemu_vfio_dump_mappings() to use trace events instead
6
of fprintf().
7
5
8
Reviewed-by: Fam Zheng <fam@euphon.net>
6
It is not.
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
11
Message-id: 20201103020733.2303148-7-philmd@redhat.com
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
12
---
15
util/vfio-helpers.c | 19 ++++---------------
13
hw/block/nvme.c | 1 -
16
util/trace-events | 1 +
14
1 file changed, 1 deletion(-)
17
2 files changed, 5 insertions(+), 15 deletions(-)
18
15
19
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
21
--- a/util/vfio-helpers.c
18
--- a/hw/block/nvme.c
22
+++ b/util/vfio-helpers.c
19
+++ b/hw/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp)
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
24
return s;
21
n->bar.cap = 0;
25
}
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
26
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
27
-static void qemu_vfio_dump_mapping(IOVAMapping *m)
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
28
-{
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
29
- if (QEMU_VFIO_DEBUG) {
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
30
- printf(" vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host,
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
31
- (uint64_t)m->size, (uint64_t)m->iova);
32
- }
33
-}
34
-
35
static void qemu_vfio_dump_mappings(QEMUVFIOState *s)
36
{
37
- int i;
38
-
39
- if (QEMU_VFIO_DEBUG) {
40
- printf("vfio mappings\n");
41
- for (i = 0; i < s->nr_mappings; ++i) {
42
- qemu_vfio_dump_mapping(&s->mappings[i]);
43
- }
44
+ for (int i = 0; i < s->nr_mappings; ++i) {
45
+ trace_qemu_vfio_dump_mapping(s->mappings[i].host,
46
+ s->mappings[i].iova,
47
+ s->mappings[i].size);
48
}
49
}
50
51
diff --git a/util/trace-events b/util/trace-events
52
index XXXXXXX..XXXXXXX 100644
53
--- a/util/trace-events
54
+++ b/util/trace-events
55
@@ -XXX,XX +XXX,XX @@ qemu_mutex_unlock(void *mutex, const char *file, const int line) "released mutex
56
qemu_vfio_dma_reset_temporary(void *s) "s %p"
57
qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
58
qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
59
+qemu_vfio_dump_mapping(void *host, uint64_t iova, size_t size) "vfio mapping %p to iova 0x%08" PRIx64 " size 0x%zx"
60
qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
61
qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64
62
qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx"
63
--
28
--
64
2.28.0
29
2.21.0
65
30
31
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Change the confuse "VFIO IOMMU check failed" error message by
3
We forget to enable it for transaction .prepare, while it is already
4
the explicit "VFIO IOMMU Type1 is not supported" once.
4
enabled in do_drive_backup since commit a2d665c1bc362
5
"blockdev: loosen restrictions on drive-backup source node"
5
6
6
Example on POWER:
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
8
$ qemu-system-ppc64 -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw
9
Reviewed-by: John Snow <jsnow@redhat.com>
9
qemu-system-ppc64: -drive if=none,id=nvme0,file=nvme://0001:01:00.0/1,format=raw: VFIO IOMMU Type1 is not supported
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
11
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
12
Reviewed-by: Fam Zheng <fam@euphon.net>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Message-id: 20201103020733.2303148-2-philmd@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Tested-by: Eric Auger <eric.auger@redhat.com>
18
---
11
---
19
util/vfio-helpers.c | 2 +-
12
blockdev.c | 2 +-
20
1 file changed, 1 insertion(+), 1 deletion(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
21
14
22
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
15
diff --git a/blockdev.c b/blockdev.c
23
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
24
--- a/util/vfio-helpers.c
17
--- a/blockdev.c
25
+++ b/util/vfio-helpers.c
18
+++ b/blockdev.c
26
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
27
}
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
28
21
backup = common->action->u.drive_backup.data;
29
if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
22
30
- error_setg_errno(errp, errno, "VFIO IOMMU check failed");
23
- bs = qmp_get_root_bs(backup->device, errp);
31
+ error_setg_errno(errp, errno, "VFIO IOMMU Type1 is not supported");
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
32
ret = -EINVAL;
25
if (!bs) {
33
goto fail_container;
26
return;
34
}
27
}
35
--
28
--
36
2.28.0
29
2.21.0
37
30
31
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
2
2
3
For debugging purpose, trace where DMA regions are mapped.
3
COW (even empty/zero) areas require encryption too
4
4
5
Reviewed-by: Fam Zheng <fam@euphon.net>
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-id: 20201103020733.2303148-6-philmd@redhat.com
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
10
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
11
---
12
util/vfio-helpers.c | 3 ++-
12
tests/qemu-iotests/134 | 9 +++++++++
13
util/trace-events | 5 +++--
13
tests/qemu-iotests/134.out | 10 ++++++++++
14
2 files changed, 5 insertions(+), 3 deletions(-)
14
2 files changed, 19 insertions(+)
15
15
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/134
19
+++ b/tests/qemu-iotests/134
20
@@ -XXX,XX +XXX,XX @@ echo
21
echo "== reading whole image =="
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
23
24
+echo
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
17
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
38
--- a/tests/qemu-iotests/134.out
19
+++ b/util/vfio-helpers.c
39
+++ b/tests/qemu-iotests/134.out
20
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
21
.vaddr = (uintptr_t)host,
41
read 134217728/134217728 bytes at offset 0
22
.size = size,
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
23
};
43
24
- trace_qemu_vfio_do_mapping(s, host, size, iova);
44
+== rewriting cluster part ==
25
+ trace_qemu_vfio_do_mapping(s, host, iova, size);
45
+wrote 512/512 bytes at offset 512
26
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
27
if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
47
+
28
error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
48
+== verify pattern ==
29
@@ -XXX,XX +XXX,XX @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
49
+read 512/512 bytes at offset 0
30
}
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
31
}
51
+read 512/512 bytes at offset 512
32
}
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
33
+ trace_qemu_vfio_dma_mapped(s, host, iova0, size);
53
+
34
if (iova) {
54
== rewriting whole image ==
35
*iova = iova0;
55
wrote 134217728/134217728 bytes at offset 0
36
}
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
37
diff --git a/util/trace-events b/util/trace-events
38
index XXXXXXX..XXXXXXX 100644
39
--- a/util/trace-events
40
+++ b/util/trace-events
41
@@ -XXX,XX +XXX,XX @@ qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 0x%z
42
qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 0x%zx"
43
qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
44
qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova) "s %p host %p size 0x%zx index %d iova 0x%"PRIx64
45
-qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64
46
-qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p"
47
+qemu_vfio_do_mapping(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64 " size 0x%zx"
48
+qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d &iova %p"
49
+qemu_vfio_dma_mapped(void *s, void *host, uint64_t iova, size_t size) "s %p host %p <-> iova 0x%"PRIx64" size 0x%zx"
50
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
51
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
52
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
53
--
57
--
54
2.28.0
58
2.21.0
55
59
60
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
As all commands use the ADMIN queue, it is pointless to pass
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
4
it as argument each time. Remove the argument, and rename the
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
5
function as nvme_admin_cmd_sync() to make this new behavior
5
default coverage in the moved comment.
6
clearer.
7
6
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
The previous vmdk4 calculation:
9
Tested-by: Eric Auger <eric.auger@redhat.com>
8
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
12
Message-id: 20201029093306.1063879-17-philmd@redhat.com
11
The added vmdk3 calculation:
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
14
Tested-by: Eric Auger <eric.auger@redhat.com>
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
30
---
16
block/nvme.c | 19 ++++++++++---------
31
block/vmdk.c | 11 ++++++++---
17
1 file changed, 10 insertions(+), 9 deletions(-)
32
1 file changed, 8 insertions(+), 3 deletions(-)
18
33
19
diff --git a/block/nvme.c b/block/nvme.c
34
diff --git a/block/vmdk.c b/block/vmdk.c
20
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
36
--- a/block/vmdk.c
22
+++ b/block/nvme.c
37
+++ b/block/vmdk.c
23
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
24
qemu_mutex_unlock(&q->lock);
39
return -EFBIG;
25
}
26
27
-static void nvme_cmd_sync_cb(void *opaque, int ret)
28
+static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
29
{
30
int *pret = opaque;
31
*pret = ret;
32
aio_wait_kick();
33
}
34
35
-static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
36
- NvmeCmd *cmd)
37
+static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd)
38
{
39
+ BDRVNVMeState *s = bs->opaque;
40
+ NVMeQueuePair *q = s->queues[INDEX_ADMIN];
41
AioContext *aio_context = bdrv_get_aio_context(bs);
42
NVMeRequest *req;
43
int ret = -EINPROGRESS;
44
@@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
45
if (!req) {
46
return -EBUSY;
47
}
40
}
48
- nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
41
if (l1_size > 512 * 1024 * 1024) {
49
+ nvme_submit_command(q, req, cmd, nvme_admin_cmd_sync_cb, &ret);
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
50
43
+ /*
51
AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS);
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
52
return ret;
45
* big l1_size, we don't want unbounded value to allocate the table.
53
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
54
47
- * size */
55
memset(id, 0, sizeof(*id));
48
+ * Limit it to 512M, which is:
56
cmd.dptr.prp1 = cpu_to_le64(iova);
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
57
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
50
+ * cluster size: 64KB, L2 table size: 512 entries
58
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
59
error_setg(errp, "Failed to identify controller");
52
+ * cluster size: 512B, L2 table size: 4096 entries
60
goto out;
53
+ */
61
}
54
error_setg(errp, "L1 size too big");
62
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
55
return -EFBIG;
63
memset(id, 0, sizeof(*id));
64
cmd.cdw10 = 0;
65
cmd.nsid = cpu_to_le32(namespace);
66
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
67
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
68
error_setg(errp, "Failed to identify namespace");
69
goto out;
70
}
71
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
72
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
73
.cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC),
74
};
75
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
76
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
77
error_setg(errp, "Failed to create CQ io queue [%u]", n);
78
goto out_error;
79
}
80
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
81
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
82
.cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)),
83
};
84
- if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
85
+ if (nvme_admin_cmd_sync(bs, &cmd)) {
86
error_setg(errp, "Failed to create SQ io queue [%u]", n);
87
goto out_error;
88
}
89
@@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable,
90
.cdw11 = cpu_to_le32(enable ? 0x01 : 0x00),
91
};
92
93
- ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd);
94
+ ret = nvme_admin_cmd_sync(bs, &cmd);
95
if (ret) {
96
error_setg(errp, "Failed to configure NVMe write cache");
97
}
56
}
98
--
57
--
99
2.28.0
58
2.21.0
100
59
60
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
mmap(2) states:
3
512M of L1 entries is a very loose bound, only 32M are required to store
4
the maximal supported VMDK file size of 2TB.
4
5
5
'offset' must be a multiple of the page size as returned
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
6
by sysconf(_SC_PAGE_SIZE).
7
table size.
7
8
8
Add an assertion to be sure we don't break this contract.
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
17
---
18
block/vmdk.c | 13 +++++++------
19
tests/qemu-iotests/059.out | 2 +-
20
2 files changed, 8 insertions(+), 7 deletions(-)
9
21
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22
diff --git a/block/vmdk.c b/block/vmdk.c
11
Message-id: 20201103020733.2303148-8-philmd@redhat.com
23
index XXXXXXX..XXXXXXX 100644
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
--- a/block/vmdk.c
13
Tested-by: Eric Auger <eric.auger@redhat.com>
25
+++ b/block/vmdk.c
14
---
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
15
util/vfio-helpers.c | 1 +
27
error_setg(errp, "Invalid granularity, image may be corrupt");
16
1 file changed, 1 insertion(+)
28
return -EFBIG;
29
}
30
- if (l1_size > 512 * 1024 * 1024) {
31
+ if (l1_size > 32 * 1024 * 1024) {
32
/*
33
* Although with big capacity and small l1_entry_sectors, we can get a
34
* big l1_size, we don't want unbounded value to allocate the table.
35
- * Limit it to 512M, which is:
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
37
- * cluster size: 64KB, L2 table size: 512 entries
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
39
- * cluster size: 512B, L2 table size: 4096 entries
40
+ * Limit it to 32M, which is enough to store:
41
+ * 8TB - for both VMDK3 & VMDK4 with
42
+ * minimal cluster size: 512B
43
+ * minimal L2 table size: 512 entries
44
+ * 8 TB is still more than the maximal value supported for
45
+ * VMDK3 & VMDK4 which is 2TB.
46
*/
47
error_setg(errp, "L1 size too big");
48
return -EFBIG;
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tests/qemu-iotests/059.out
52
+++ b/tests/qemu-iotests/059.out
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
55
56
=== Testing afl image with a very large capacity ===
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
59
*** done
60
--
61
2.21.0
17
62
18
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/util/vfio-helpers.c
21
+++ b/util/vfio-helpers.c
22
@@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
23
Error **errp)
24
{
25
void *p;
26
+ assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size));
27
assert_bar_index_valid(s, index);
28
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
29
prot, MAP_SHARED,
30
--
31
2.28.0
32
63
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Just for consistency, following the example documented since
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
4
commit e3fe3988d7 ("error: Document Error API usage rules"),
4
QEMU).
5
return a boolean value indicating an error is set or not.
5
6
Directly pass errp as the local_err is not requested in our
6
This format was lacking in the following:
7
case.
7
8
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
9
Tested-by: Eric Auger <eric.auger@redhat.com>
9
allowing access to only 2TB (slightly less) of data.
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
* The grain size (default) was 512 bytes - leading to data
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
fragmentation and many grain tables.
12
Message-id: 20201029093306.1063879-11-philmd@redhat.com
12
* For space reclamation purposes, it was necessary to find all the
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
grains which are not pointed to by any grain table - so a reverse
14
Tested-by: Eric Auger <eric.auger@redhat.com>
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
constructed - which takes large amounts of CPU/RAM.
16
17
The format specification can be found in VMware's documentation:
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
21
introduced: SESparse (Space Efficient).
22
23
This format fixes the above issues:
24
25
* All entries are now 64-bit.
26
* The grain size (default) is 4KB.
27
* Grain directory and grain tables are now located at the beginning
28
of the file.
29
+ seSparse format reserves space for all grain tables.
30
+ Grain tables can be addressed using an index.
31
+ Grains are located in the end of the file and can also be
32
addressed with an index.
33
- seSparse vmdks of large disks (64TB) have huge preallocated
34
headers - mainly due to L2 tables, even for empty snapshots.
35
* The header contains a reverse mapping ("backmap") of "offset of
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
152
---
16
block/nvme.c | 12 +++++++-----
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
17
1 file changed, 7 insertions(+), 5 deletions(-)
154
1 file changed, 342 insertions(+), 16 deletions(-)
18
155
19
diff --git a/block/nvme.c b/block/nvme.c
156
diff --git a/block/vmdk.c b/block/vmdk.c
20
index XXXXXXX..XXXXXXX 100644
157
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
158
--- a/block/vmdk.c
22
+++ b/block/nvme.c
159
+++ b/block/vmdk.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
161
uint16_t compressAlgorithm;
162
} QEMU_PACKED VMDK4Header;
163
164
+typedef struct VMDKSESparseConstHeader {
165
+ uint64_t magic;
166
+ uint64_t version;
167
+ uint64_t capacity;
168
+ uint64_t grain_size;
169
+ uint64_t grain_table_size;
170
+ uint64_t flags;
171
+ uint64_t reserved1;
172
+ uint64_t reserved2;
173
+ uint64_t reserved3;
174
+ uint64_t reserved4;
175
+ uint64_t volatile_header_offset;
176
+ uint64_t volatile_header_size;
177
+ uint64_t journal_header_offset;
178
+ uint64_t journal_header_size;
179
+ uint64_t journal_offset;
180
+ uint64_t journal_size;
181
+ uint64_t grain_dir_offset;
182
+ uint64_t grain_dir_size;
183
+ uint64_t grain_tables_offset;
184
+ uint64_t grain_tables_size;
185
+ uint64_t free_bitmap_offset;
186
+ uint64_t free_bitmap_size;
187
+ uint64_t backmap_offset;
188
+ uint64_t backmap_size;
189
+ uint64_t grains_offset;
190
+ uint64_t grains_size;
191
+ uint8_t pad[304];
192
+} QEMU_PACKED VMDKSESparseConstHeader;
193
+
194
+typedef struct VMDKSESparseVolatileHeader {
195
+ uint64_t magic;
196
+ uint64_t free_gt_number;
197
+ uint64_t next_txn_seq_number;
198
+ uint64_t replay_journal;
199
+ uint8_t pad[480];
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
201
+
202
#define L2_CACHE_SIZE 16
203
204
typedef struct VmdkExtent {
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
206
bool compressed;
207
bool has_marker;
208
bool has_zero_grain;
209
+ bool sesparse;
210
+ uint64_t sesparse_l2_tables_offset;
211
+ uint64_t sesparse_clusters_offset;
212
+ int32_t entry_size;
213
int version;
214
int64_t sectors;
215
int64_t end_sector;
216
int64_t flat_start_offset;
217
int64_t l1_table_offset;
218
int64_t l1_backup_table_offset;
219
- uint32_t *l1_table;
220
+ void *l1_table;
221
uint32_t *l1_backup_table;
222
unsigned int l1_size;
223
uint32_t l1_entry_sectors;
224
225
unsigned int l2_size;
226
- uint32_t *l2_cache;
227
+ void *l2_cache;
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
230
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
232
* minimal L2 table size: 512 entries
233
* 8 TB is still more than the maximal value supported for
234
* VMDK3 & VMDK4 which is 2TB.
235
+ * 64TB - for "ESXi seSparse Extent"
236
+ * minimal cluster size: 512B (default is 4KB)
237
+ * L2 table size: 4096 entries (const).
238
+ * 64TB is more than the maximal value supported for
239
+ * seSparse VMDKs (which is slightly less than 64TB)
240
*/
241
error_setg(errp, "L1 size too big");
242
return -EFBIG;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
244
extent->l2_size = l2_size;
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
24
return ret;
288
return ret;
25
}
289
}
26
290
27
-static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
28
+/* Returns true on success, false on failure. */
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
29
+static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
293
+
294
+/* Strict checks - format not officially documented */
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
296
+ Error **errp)
297
+{
298
+ header->magic = le64_to_cpu(header->magic);
299
+ header->version = le64_to_cpu(header->version);
300
+ header->grain_size = le64_to_cpu(header->grain_size);
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
302
+ header->flags = le64_to_cpu(header->flags);
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
307
+
308
+ header->volatile_header_offset =
309
+ le64_to_cpu(header->volatile_header_offset);
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
311
+
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
314
+
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
316
+ header->journal_size = le64_to_cpu(header->journal_size);
317
+
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
320
+
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
323
+
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
326
+
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
329
+
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
331
+ header->grains_size = le64_to_cpu(header->grains_size);
332
+
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
335
+ header->magic);
336
+ return -EINVAL;
337
+ }
338
+
339
+ if (header->version != 0x0000000200000001) {
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
341
+ header->version);
342
+ return -ENOTSUP;
343
+ }
344
+
345
+ if (header->grain_size != 8) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
347
+ header->grain_size);
348
+ return -ENOTSUP;
349
+ }
350
+
351
+ if (header->grain_table_size != 64) {
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
353
+ header->grain_table_size);
354
+ return -ENOTSUP;
355
+ }
356
+
357
+ if (header->flags != 0) {
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
359
+ header->flags);
360
+ return -ENOTSUP;
361
+ }
362
+
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
365
+ error_setg(errp, "Unsupported reserved bits:"
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
368
+ header->reserved1, header->reserved2,
369
+ header->reserved3, header->reserved4);
370
+ return -ENOTSUP;
371
+ }
372
+
373
+ /* check that padding is 0 */
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
375
+ error_setg(errp, "Unsupported non-zero const header padding");
376
+ return -ENOTSUP;
377
+ }
378
+
379
+ return 0;
380
+}
381
+
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
383
+ Error **errp)
384
+{
385
+ header->magic = le64_to_cpu(header->magic);
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
389
+
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
392
+ header->magic);
393
+ return -EINVAL;
394
+ }
395
+
396
+ if (header->replay_journal) {
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
398
+ return -ENOTSUP;
399
+ }
400
+
401
+ /* check that padding is 0 */
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
407
+ return 0;
408
+}
409
+
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
411
+ BdrvChild *file,
412
+ int flags, Error **errp)
413
+{
414
+ int ret;
415
+ VMDKSESparseConstHeader const_header;
416
+ VMDKSESparseVolatileHeader volatile_header;
417
+ VmdkExtent *extent;
418
+
419
+ ret = bdrv_apply_auto_read_only(bs,
420
+ "No write support for seSparse images available", errp);
421
+ if (ret < 0) {
422
+ return ret;
423
+ }
424
+
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
426
+
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
428
+ if (ret < 0) {
429
+ bdrv_refresh_filename(file->bs);
430
+ error_setg_errno(errp, -ret,
431
+ "Could not read const header from file '%s'",
432
+ file->bs->filename);
433
+ return ret;
434
+ }
435
+
436
+ /* check const header */
437
+ ret = check_se_sparse_const_header(&const_header, errp);
438
+ if (ret < 0) {
439
+ return ret;
440
+ }
441
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
443
+
444
+ ret = bdrv_pread(file,
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
446
+ &volatile_header, sizeof(volatile_header));
447
+ if (ret < 0) {
448
+ bdrv_refresh_filename(file->bs);
449
+ error_setg_errno(errp, -ret,
450
+ "Could not read volatile header from file '%s'",
451
+ file->bs->filename);
452
+ return ret;
453
+ }
454
+
455
+ /* check volatile header */
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
457
+ if (ret < 0) {
458
+ return ret;
459
+ }
460
+
461
+ ret = vmdk_add_extent(bs, file, false,
462
+ const_header.capacity,
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
464
+ 0,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
487
+ return ret;
488
+}
489
+
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
491
QDict *options, Error **errp);
492
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
494
* RW [size in sectors] SPARSE "file-name.vmdk"
495
* RW [size in sectors] VMFS "file-name.vmdk"
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
498
*/
499
flat_offset = -1;
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
502
503
if (sectors <= 0 ||
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
507
+ strcmp(type, "SESPARSE")) ||
508
(strcmp(access, "RW"))) {
509
continue;
510
}
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
512
return ret;
513
}
514
extent = &s->extents[s->num_extents - 1];
515
+ } else if (!strcmp(type, "SESPARSE")) {
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
517
+ if (ret) {
518
+ bdrv_unref_child(bs, extent_file);
519
+ return ret;
520
+ }
521
+ extent = &s->extents[s->num_extents - 1];
522
} else {
523
error_setg(errp, "Unsupported extent type '%s'", type);
524
bdrv_unref_child(bs, extent_file);
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
526
if (strcmp(ct, "monolithicFlat") &&
527
strcmp(ct, "vmfs") &&
528
strcmp(ct, "vmfsSparse") &&
529
+ strcmp(ct, "seSparse") &&
530
strcmp(ct, "twoGbMaxExtentSparse") &&
531
strcmp(ct, "twoGbMaxExtentFlat")) {
532
error_setg(errp, "Unsupported image type '%s'", ct);
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
30
{
534
{
31
BDRVNVMeState *s = bs->opaque;
535
unsigned int l1_index, l2_offset, l2_index;
32
+ bool ret = false;
536
int min_index, i, j;
33
union {
537
- uint32_t min_count, *l2_table;
34
NvmeIdCtrl ctrl;
538
+ uint32_t min_count;
35
NvmeIdNs ns;
539
+ void *l2_table;
36
@@ -XXX,XX +XXX,XX @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
540
bool zeroed = false;
37
goto out;
541
int64_t ret;
38
}
542
int64_t cluster_sector;
39
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
40
+ ret = true;
544
41
s->blkshift = lbaf->ds;
545
if (m_data) {
42
out:
546
m_data->valid = 0;
43
qemu_vfio_dma_unmap(s->vfio, id);
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
44
qemu_vfree(id);
548
if (l1_index >= extent->l1_size) {
45
+
549
return VMDK_ERROR;
46
+ return ret;
550
}
47
}
551
- l2_offset = extent->l1_table[l1_index];
48
552
+ if (extent->sesparse) {
49
static bool nvme_poll_queue(NVMeQueuePair *q)
553
+ uint64_t l2_offset_u64;
50
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
554
+
51
uint64_t cap;
555
+ assert(extent->entry_size == sizeof(uint64_t));
52
uint64_t timeout_ms;
556
+
53
uint64_t deadline, now;
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
54
- Error *local_err = NULL;
558
+ if (l2_offset_u64 == 0) {
55
volatile NvmeBar *regs = NULL;
559
+ l2_offset = 0;
56
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
57
qemu_co_mutex_init(&s->dma_map_lock);
561
+ /*
58
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
562
+ * Top most nibble is 0x1 if grain table is allocated.
59
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
60
false, nvme_handle_event, nvme_poll_cb);
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
61
565
+ * grain directories which is smaller than uint32,
62
- nvme_identify(bs, namespace, &local_err);
566
+ * where 16MB is the only supported default grain table coverage.
63
- if (local_err) {
567
+ */
64
- error_propagate(errp, local_err);
568
+ return VMDK_ERROR;
65
+ if (!nvme_identify(bs, namespace, errp)) {
569
+ } else {
66
ret = -EIO;
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
67
goto out;
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
68
}
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
574
+ return VMDK_ERROR;
575
+ }
576
+ l2_offset = (unsigned int)(l2_offset_u64);
577
+ }
578
+ } else {
579
+ assert(extent->entry_size == sizeof(uint32_t));
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
581
+ }
582
if (!l2_offset) {
583
return VMDK_UNALLOC;
584
}
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
586
extent->l2_cache_counts[j] >>= 1;
587
}
588
}
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
591
goto found;
592
}
593
}
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
595
min_index = i;
596
}
597
}
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
601
if (bdrv_pread(extent->file,
602
(int64_t)l2_offset * 512,
603
l2_table,
604
- extent->l2_size * sizeof(uint32_t)
605
- ) != extent->l2_size * sizeof(uint32_t)) {
606
+ l2_size_bytes
607
+ ) != l2_size_bytes) {
608
return VMDK_ERROR;
609
}
610
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
612
extent->l2_cache_counts[min_index] = 1;
613
found:
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
616
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
618
- zeroed = true;
619
+ if (extent->sesparse) {
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
621
+ switch (cluster_sector & 0xf000000000000000) {
622
+ case 0x0000000000000000:
623
+ /* unallocated grain */
624
+ if (cluster_sector != 0) {
625
+ return VMDK_ERROR;
626
+ }
627
+ break;
628
+ case 0x1000000000000000:
629
+ /* scsi-unmapped grain - fallthrough */
630
+ case 0x2000000000000000:
631
+ /* zero grain */
632
+ zeroed = true;
633
+ break;
634
+ case 0x3000000000000000:
635
+ /* allocated grain */
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
638
+ cluster_sector = extent->sesparse_clusters_offset +
639
+ cluster_sector * extent->cluster_sectors;
640
+ break;
641
+ default:
642
+ return VMDK_ERROR;
643
+ }
644
+ } else {
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
646
+
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
648
+ zeroed = true;
649
+ }
650
}
651
652
if (!cluster_sector || zeroed) {
653
if (!allocate) {
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
655
}
656
+ assert(!extent->sesparse);
657
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
659
return VMDK_ERROR;
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
661
m_data->l1_index = l1_index;
662
m_data->l2_index = l2_index;
663
m_data->l2_offset = l2_offset;
664
- m_data->l2_cache_entry = &l2_table[l2_index];
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
666
}
667
}
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
670
if (!extent) {
671
return -EIO;
672
}
673
+ if (extent->sesparse) {
674
+ return -ENOTSUP;
675
+ }
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
678
- offset_in_cluster);
69
--
679
--
70
2.28.0
680
2.21.0
71
681
682
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Pino Toscano <ptoscano@redhat.com>
2
2
3
As we want to enable multiple queues, report the event
3
Rewrite the implementation of the ssh block driver to use libssh instead
4
in each nvme_poll_queue() call, rather than once in
4
of libssh2. The libssh library has various advantages over libssh2:
5
the callback calling nvme_poll_queues().
5
- easier API for authentication (for example for using ssh-agent)
6
- easier API for known_hosts handling
7
- supports newer types of keys in known_hosts
6
8
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Use APIs/features available in libssh 0.8 conditionally, to support
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
older versions (which are not recommended though).
9
Tested-by: Eric Auger <eric.auger@redhat.com>
11
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Adjust the iotest 207 according to the different error message, and to
11
Message-id: 20201029093306.1063879-6-philmd@redhat.com
13
find the default key type for localhost (to properly compare the
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
fingerprint with).
13
Tested-by: Eric Auger <eric.auger@redhat.com>
15
Contributed-by: Max Reitz <mreitz@redhat.com>
16
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
20
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Message-id: 5873173.t2JhDm7DL7@lindworm.usersys.redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
14
---
28
---
15
block/nvme.c | 2 +-
29
configure | 65 +-
16
block/trace-events | 2 +-
30
block/Makefile.objs | 6 +-
17
2 files changed, 2 insertions(+), 2 deletions(-)
31
block/ssh.c | 652 ++++++++++--------
32
.travis.yml | 4 +-
33
block/trace-events | 14 +-
34
docs/qemu-block-drivers.texi | 2 +-
35
.../dockerfiles/debian-win32-cross.docker | 1 -
36
.../dockerfiles/debian-win64-cross.docker | 1 -
37
tests/docker/dockerfiles/fedora.docker | 4 +-
38
tests/docker/dockerfiles/ubuntu.docker | 2 +-
39
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
40
tests/qemu-iotests/207 | 54 +-
41
tests/qemu-iotests/207.out | 2 +-
42
13 files changed, 449 insertions(+), 360 deletions(-)
18
43
19
diff --git a/block/nvme.c b/block/nvme.c
44
diff --git a/configure b/configure
45
index XXXXXXX..XXXXXXX 100755
46
--- a/configure
47
+++ b/configure
48
@@ -XXX,XX +XXX,XX @@ auth_pam=""
49
vte=""
50
virglrenderer=""
51
tpm=""
52
-libssh2=""
53
+libssh=""
54
live_block_migration="yes"
55
numa=""
56
tcmalloc="no"
57
@@ -XXX,XX +XXX,XX @@ for opt do
58
;;
59
--enable-tpm) tpm="yes"
60
;;
61
- --disable-libssh2) libssh2="no"
62
+ --disable-libssh) libssh="no"
63
;;
64
- --enable-libssh2) libssh2="yes"
65
+ --enable-libssh) libssh="yes"
66
;;
67
--disable-live-block-migration) live_block_migration="no"
68
;;
69
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
70
coroutine-pool coroutine freelist (better performance)
71
glusterfs GlusterFS backend
72
tpm TPM support
73
- libssh2 ssh block device support
74
+ libssh ssh block device support
75
numa libnuma support
76
libxml2 for Parallels image format
77
tcmalloc tcmalloc support
78
@@ -XXX,XX +XXX,XX @@ EOF
79
fi
80
81
##########################################
82
-# libssh2 probe
83
-min_libssh2_version=1.2.8
84
-if test "$libssh2" != "no" ; then
85
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
86
- libssh2_cflags=$($pkg_config libssh2 --cflags)
87
- libssh2_libs=$($pkg_config libssh2 --libs)
88
- libssh2=yes
89
+# libssh probe
90
+if test "$libssh" != "no" ; then
91
+ if $pkg_config --exists libssh; then
92
+ libssh_cflags=$($pkg_config libssh --cflags)
93
+ libssh_libs=$($pkg_config libssh --libs)
94
+ libssh=yes
95
else
96
- if test "$libssh2" = "yes" ; then
97
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
98
+ if test "$libssh" = "yes" ; then
99
+ error_exit "libssh required for --enable-libssh"
100
fi
101
- libssh2=no
102
+ libssh=no
103
fi
104
fi
105
106
##########################################
107
-# libssh2_sftp_fsync probe
108
+# Check for libssh 0.8
109
+# This is done like this instead of using the LIBSSH_VERSION_* and
110
+# SSH_VERSION_* macros because some distributions in the past shipped
111
+# snapshots of the future 0.8 from Git, and those snapshots did not
112
+# have updated version numbers (still referring to 0.7.0).
113
114
-if test "$libssh2" = "yes"; then
115
+if test "$libssh" = "yes"; then
116
cat > $TMPC <<EOF
117
-#include <stdio.h>
118
-#include <libssh2.h>
119
-#include <libssh2_sftp.h>
120
-int main(void) {
121
- LIBSSH2_SESSION *session;
122
- LIBSSH2_SFTP *sftp;
123
- LIBSSH2_SFTP_HANDLE *sftp_handle;
124
- session = libssh2_session_init ();
125
- sftp = libssh2_sftp_init (session);
126
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
127
- libssh2_sftp_fsync (sftp_handle);
128
- return 0;
129
-}
130
+#include <libssh/libssh.h>
131
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
132
EOF
133
- # libssh2_cflags/libssh2_libs defined in previous test.
134
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
135
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
136
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
137
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
138
fi
139
fi
140
141
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
142
echo "gcov $gcov_tool"
143
echo "gcov enabled $gcov"
144
echo "TPM support $tpm"
145
-echo "libssh2 support $libssh2"
146
+echo "libssh support $libssh"
147
echo "QOM debugging $qom_cast_debug"
148
echo "Live block migration $live_block_migration"
149
echo "lzo support $lzo"
150
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
151
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
152
fi
153
154
-if test "$libssh2" = "yes" ; then
155
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
156
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
157
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
158
+if test "$libssh" = "yes" ; then
159
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
160
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
161
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
162
fi
163
164
if test "$live_block_migration" = "yes" ; then
165
diff --git a/block/Makefile.objs b/block/Makefile.objs
20
index XXXXXXX..XXXXXXX 100644
166
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
167
--- a/block/Makefile.objs
22
+++ b/block/nvme.c
168
+++ b/block/Makefile.objs
23
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queue(NVMeQueuePair *q)
169
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
24
const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
170
block-obj-$(CONFIG_RBD) += rbd.o
25
NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
171
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
26
172
block-obj-$(CONFIG_VXHS) += vxhs.o
27
+ trace_nvme_poll_queue(q->s, q->index);
173
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
28
/*
174
+block-obj-$(CONFIG_LIBSSH) += ssh.o
29
* Do an early check for completions. q->lock isn't needed because
175
block-obj-y += accounting.o dirty-bitmap.o
30
* nvme_process_completion() only runs in the event loop thread and
176
block-obj-y += write-threshold.o
31
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_cb(void *opaque)
177
block-obj-y += backup.o
32
BDRVNVMeState *s = container_of(e, BDRVNVMeState,
178
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
33
irq_notifier[MSIX_SHARED_IRQ_IDX]);
179
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
34
180
gluster.o-libs := $(GLUSTERFS_LIBS)
35
- trace_nvme_poll_cb(s);
181
vxhs.o-libs := $(VXHS_LIBS)
36
return nvme_poll_queues(s);
182
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
183
-ssh.o-libs := $(LIBSSH2_LIBS)
184
+ssh.o-cflags := $(LIBSSH_CFLAGS)
185
+ssh.o-libs := $(LIBSSH_LIBS)
186
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
187
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
188
dmg-bz2.o-libs := $(BZIP2_LIBS)
189
diff --git a/block/ssh.c b/block/ssh.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/block/ssh.c
192
+++ b/block/ssh.c
193
@@ -XXX,XX +XXX,XX @@
194
195
#include "qemu/osdep.h"
196
197
-#include <libssh2.h>
198
-#include <libssh2_sftp.h>
199
+#include <libssh/libssh.h>
200
+#include <libssh/sftp.h>
201
202
#include "block/block_int.h"
203
#include "block/qdict.h"
204
@@ -XXX,XX +XXX,XX @@
205
#include "trace.h"
206
207
/*
208
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
209
- * that this requires that libssh2 was specially compiled with the
210
- * `./configure --enable-debug' option, so most likely you will have
211
- * to compile it yourself. The meaning of <bitmask> is described
212
- * here: http://www.libssh2.org/libssh2_trace.html
213
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
214
+ * The meaning of <level> is described here:
215
+ * http://api.libssh.org/master/group__libssh__log.html
216
*/
217
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
218
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
219
220
typedef struct BDRVSSHState {
221
/* Coroutine. */
222
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
223
224
/* SSH connection. */
225
int sock; /* socket */
226
- LIBSSH2_SESSION *session; /* ssh session */
227
- LIBSSH2_SFTP *sftp; /* sftp session */
228
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
229
+ ssh_session session; /* ssh session */
230
+ sftp_session sftp; /* sftp session */
231
+ sftp_file sftp_handle; /* sftp remote file handle */
232
233
- /* See ssh_seek() function below. */
234
- int64_t offset;
235
- bool offset_op_read;
236
-
237
- /* File attributes at open. We try to keep the .filesize field
238
+ /*
239
+ * File attributes at open. We try to keep the .size field
240
* updated if it changes (eg by writing at the end of the file).
241
*/
242
- LIBSSH2_SFTP_ATTRIBUTES attrs;
243
+ sftp_attributes attrs;
244
245
InetSocketAddress *inet;
246
247
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
248
{
249
memset(s, 0, sizeof *s);
250
s->sock = -1;
251
- s->offset = -1;
252
qemu_co_mutex_init(&s->lock);
37
}
253
}
38
254
255
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
256
{
257
g_free(s->user);
258
259
+ if (s->attrs) {
260
+ sftp_attributes_free(s->attrs);
261
+ }
262
if (s->sftp_handle) {
263
- libssh2_sftp_close(s->sftp_handle);
264
+ sftp_close(s->sftp_handle);
265
}
266
if (s->sftp) {
267
- libssh2_sftp_shutdown(s->sftp);
268
+ sftp_free(s->sftp);
269
}
270
if (s->session) {
271
- libssh2_session_disconnect(s->session,
272
- "from qemu ssh client: "
273
- "user closed the connection");
274
- libssh2_session_free(s->session);
275
- }
276
- if (s->sock >= 0) {
277
- close(s->sock);
278
+ ssh_disconnect(s->session);
279
+ ssh_free(s->session); /* This frees s->sock */
280
}
281
}
282
283
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
284
va_end(args);
285
286
if (s->session) {
287
- char *ssh_err;
288
+ const char *ssh_err;
289
int ssh_err_code;
290
291
- /* This is not an errno. See <libssh2.h>. */
292
- ssh_err_code = libssh2_session_last_error(s->session,
293
- &ssh_err, NULL, 0);
294
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
295
+ /* This is not an errno. See <libssh/libssh.h>. */
296
+ ssh_err = ssh_get_error(s->session);
297
+ ssh_err_code = ssh_get_error_code(s->session);
298
+ error_setg(errp, "%s: %s (libssh error code: %d)",
299
msg, ssh_err, ssh_err_code);
300
} else {
301
error_setg(errp, "%s", msg);
302
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
303
va_end(args);
304
305
if (s->sftp) {
306
- char *ssh_err;
307
+ const char *ssh_err;
308
int ssh_err_code;
309
- unsigned long sftp_err_code;
310
+ int sftp_err_code;
311
312
- /* This is not an errno. See <libssh2.h>. */
313
- ssh_err_code = libssh2_session_last_error(s->session,
314
- &ssh_err, NULL, 0);
315
- /* See <libssh2_sftp.h>. */
316
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
317
+ /* This is not an errno. See <libssh/libssh.h>. */
318
+ ssh_err = ssh_get_error(s->session);
319
+ ssh_err_code = ssh_get_error_code(s->session);
320
+ /* See <libssh/sftp.h>. */
321
+ sftp_err_code = sftp_get_error(s->sftp);
322
323
error_setg(errp,
324
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
325
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
326
msg, ssh_err, ssh_err_code, sftp_err_code);
327
} else {
328
error_setg(errp, "%s", msg);
329
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
330
331
static void sftp_error_trace(BDRVSSHState *s, const char *op)
332
{
333
- char *ssh_err;
334
+ const char *ssh_err;
335
int ssh_err_code;
336
- unsigned long sftp_err_code;
337
+ int sftp_err_code;
338
339
- /* This is not an errno. See <libssh2.h>. */
340
- ssh_err_code = libssh2_session_last_error(s->session,
341
- &ssh_err, NULL, 0);
342
- /* See <libssh2_sftp.h>. */
343
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
344
+ /* This is not an errno. See <libssh/libssh.h>. */
345
+ ssh_err = ssh_get_error(s->session);
346
+ ssh_err_code = ssh_get_error_code(s->session);
347
+ /* See <libssh/sftp.h>. */
348
+ sftp_err_code = sftp_get_error(s->sftp);
349
350
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
351
}
352
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
353
parse_uri(filename, options, errp);
354
}
355
356
-static int check_host_key_knownhosts(BDRVSSHState *s,
357
- const char *host, int port, Error **errp)
358
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
359
{
360
- const char *home;
361
- char *knh_file = NULL;
362
- LIBSSH2_KNOWNHOSTS *knh = NULL;
363
- struct libssh2_knownhost *found;
364
- int ret, r;
365
- const char *hostkey;
366
- size_t len;
367
- int type;
368
-
369
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
370
- if (!hostkey) {
371
+ int ret;
372
+#ifdef HAVE_LIBSSH_0_8
373
+ enum ssh_known_hosts_e state;
374
+ int r;
375
+ ssh_key pubkey;
376
+ enum ssh_keytypes_e pubkey_type;
377
+ unsigned char *server_hash = NULL;
378
+ size_t server_hash_len;
379
+ char *fingerprint = NULL;
380
+
381
+ state = ssh_session_is_known_server(s->session);
382
+ trace_ssh_server_status(state);
383
+
384
+ switch (state) {
385
+ case SSH_KNOWN_HOSTS_OK:
386
+ /* OK */
387
+ trace_ssh_check_host_key_knownhosts();
388
+ break;
389
+ case SSH_KNOWN_HOSTS_CHANGED:
390
ret = -EINVAL;
391
- session_error_setg(errp, s, "failed to read remote host key");
392
+ r = ssh_get_server_publickey(s->session, &pubkey);
393
+ if (r == 0) {
394
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
395
+ &server_hash, &server_hash_len);
396
+ pubkey_type = ssh_key_type(pubkey);
397
+ ssh_key_free(pubkey);
398
+ }
399
+ if (r == 0) {
400
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
401
+ server_hash,
402
+ server_hash_len);
403
+ ssh_clean_pubkey_hash(&server_hash);
404
+ }
405
+ if (fingerprint) {
406
+ error_setg(errp,
407
+ "host key (%s key with fingerprint %s) does not match "
408
+ "the one in known_hosts; this may be a possible attack",
409
+ ssh_key_type_to_char(pubkey_type), fingerprint);
410
+ ssh_string_free_char(fingerprint);
411
+ } else {
412
+ error_setg(errp,
413
+ "host key does not match the one in known_hosts; this "
414
+ "may be a possible attack");
415
+ }
416
goto out;
417
- }
418
-
419
- knh = libssh2_knownhost_init(s->session);
420
- if (!knh) {
421
+ case SSH_KNOWN_HOSTS_OTHER:
422
ret = -EINVAL;
423
- session_error_setg(errp, s,
424
- "failed to initialize known hosts support");
425
+ error_setg(errp,
426
+ "host key for this server not found, another type exists");
427
+ goto out;
428
+ case SSH_KNOWN_HOSTS_UNKNOWN:
429
+ ret = -EINVAL;
430
+ error_setg(errp, "no host key was found in known_hosts");
431
+ goto out;
432
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
433
+ ret = -ENOENT;
434
+ error_setg(errp, "known_hosts file not found");
435
+ goto out;
436
+ case SSH_KNOWN_HOSTS_ERROR:
437
+ ret = -EINVAL;
438
+ error_setg(errp, "error while checking the host");
439
+ goto out;
440
+ default:
441
+ ret = -EINVAL;
442
+ error_setg(errp, "error while checking for known server (%d)", state);
443
goto out;
444
}
445
+#else /* !HAVE_LIBSSH_0_8 */
446
+ int state;
447
448
- home = getenv("HOME");
449
- if (home) {
450
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
451
- } else {
452
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
453
- }
454
-
455
- /* Read all known hosts from OpenSSH-style known_hosts file. */
456
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
457
+ state = ssh_is_server_known(s->session);
458
+ trace_ssh_server_status(state);
459
460
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
461
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
462
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
463
- &found);
464
- switch (r) {
465
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
466
+ switch (state) {
467
+ case SSH_SERVER_KNOWN_OK:
468
/* OK */
469
- trace_ssh_check_host_key_knownhosts(found->key);
470
+ trace_ssh_check_host_key_knownhosts();
471
break;
472
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
473
+ case SSH_SERVER_KNOWN_CHANGED:
474
ret = -EINVAL;
475
- session_error_setg(errp, s,
476
- "host key does not match the one in known_hosts"
477
- " (found key %s)", found->key);
478
+ error_setg(errp,
479
+ "host key does not match the one in known_hosts; this "
480
+ "may be a possible attack");
481
goto out;
482
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
483
+ case SSH_SERVER_FOUND_OTHER:
484
ret = -EINVAL;
485
- session_error_setg(errp, s, "no host key was found in known_hosts");
486
+ error_setg(errp,
487
+ "host key for this server not found, another type exists");
488
+ goto out;
489
+ case SSH_SERVER_FILE_NOT_FOUND:
490
+ ret = -ENOENT;
491
+ error_setg(errp, "known_hosts file not found");
492
goto out;
493
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
494
+ case SSH_SERVER_NOT_KNOWN:
495
ret = -EINVAL;
496
- session_error_setg(errp, s,
497
- "failure matching the host key with known_hosts");
498
+ error_setg(errp, "no host key was found in known_hosts");
499
+ goto out;
500
+ case SSH_SERVER_ERROR:
501
+ ret = -EINVAL;
502
+ error_setg(errp, "server error");
503
goto out;
504
default:
505
ret = -EINVAL;
506
- session_error_setg(errp, s, "unknown error matching the host key"
507
- " with known_hosts (%d)", r);
508
+ error_setg(errp, "error while checking for known server (%d)", state);
509
goto out;
510
}
511
+#endif /* !HAVE_LIBSSH_0_8 */
512
513
/* known_hosts checking successful. */
514
ret = 0;
515
516
out:
517
- if (knh != NULL) {
518
- libssh2_knownhost_free(knh);
519
- }
520
- g_free(knh_file);
521
return ret;
522
}
523
524
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
525
526
static int
527
check_host_key_hash(BDRVSSHState *s, const char *hash,
528
- int hash_type, size_t fingerprint_len, Error **errp)
529
+ enum ssh_publickey_hash_type type, Error **errp)
530
{
531
- const char *fingerprint;
532
-
533
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
534
- if (!fingerprint) {
535
+ int r;
536
+ ssh_key pubkey;
537
+ unsigned char *server_hash;
538
+ size_t server_hash_len;
539
+
540
+#ifdef HAVE_LIBSSH_0_8
541
+ r = ssh_get_server_publickey(s->session, &pubkey);
542
+#else
543
+ r = ssh_get_publickey(s->session, &pubkey);
544
+#endif
545
+ if (r != SSH_OK) {
546
session_error_setg(errp, s, "failed to read remote host key");
547
return -EINVAL;
548
}
549
550
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
551
- hash) != 0) {
552
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
553
+ ssh_key_free(pubkey);
554
+ if (r != 0) {
555
+ session_error_setg(errp, s,
556
+ "failed reading the hash of the server SSH key");
557
+ return -EINVAL;
558
+ }
559
+
560
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
561
+ ssh_clean_pubkey_hash(&server_hash);
562
+ if (r != 0) {
563
error_setg(errp, "remote host key does not match host_key_check '%s'",
564
hash);
565
return -EPERM;
566
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
567
return 0;
568
}
569
570
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
571
- SshHostKeyCheck *hkc, Error **errp)
572
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
573
{
574
SshHostKeyCheckMode mode;
575
576
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
577
case SSH_HOST_KEY_CHECK_MODE_HASH:
578
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
579
return check_host_key_hash(s, hkc->u.hash.hash,
580
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
581
+ SSH_PUBLICKEY_HASH_MD5, errp);
582
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
583
return check_host_key_hash(s, hkc->u.hash.hash,
584
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
585
+ SSH_PUBLICKEY_HASH_SHA1, errp);
586
}
587
g_assert_not_reached();
588
break;
589
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
590
- return check_host_key_knownhosts(s, host, port, errp);
591
+ return check_host_key_knownhosts(s, errp);
592
default:
593
g_assert_not_reached();
594
}
595
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
596
return -EINVAL;
597
}
598
599
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
600
+static int authenticate(BDRVSSHState *s, Error **errp)
601
{
602
int r, ret;
603
- const char *userauthlist;
604
- LIBSSH2_AGENT *agent = NULL;
605
- struct libssh2_agent_publickey *identity;
606
- struct libssh2_agent_publickey *prev_identity = NULL;
607
+ int method;
608
609
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
610
- if (strstr(userauthlist, "publickey") == NULL) {
611
+ /* Try to authenticate with the "none" method. */
612
+ r = ssh_userauth_none(s->session, NULL);
613
+ if (r == SSH_AUTH_ERROR) {
614
ret = -EPERM;
615
- error_setg(errp,
616
- "remote server does not support \"publickey\" authentication");
617
+ session_error_setg(errp, s, "failed to authenticate using none "
618
+ "authentication");
619
goto out;
620
- }
621
-
622
- /* Connect to ssh-agent and try each identity in turn. */
623
- agent = libssh2_agent_init(s->session);
624
- if (!agent) {
625
- ret = -EINVAL;
626
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
627
- goto out;
628
- }
629
- if (libssh2_agent_connect(agent)) {
630
- ret = -ECONNREFUSED;
631
- session_error_setg(errp, s, "failed to connect to ssh-agent");
632
- goto out;
633
- }
634
- if (libssh2_agent_list_identities(agent)) {
635
- ret = -EINVAL;
636
- session_error_setg(errp, s,
637
- "failed requesting identities from ssh-agent");
638
+ } else if (r == SSH_AUTH_SUCCESS) {
639
+ /* Authenticated! */
640
+ ret = 0;
641
goto out;
642
}
643
644
- for(;;) {
645
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
646
- if (r == 1) { /* end of list */
647
- break;
648
- }
649
- if (r < 0) {
650
+ method = ssh_userauth_list(s->session, NULL);
651
+ trace_ssh_auth_methods(method);
652
+
653
+ /*
654
+ * Try to authenticate with publickey, using the ssh-agent
655
+ * if available.
656
+ */
657
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
658
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
659
+ if (r == SSH_AUTH_ERROR) {
660
ret = -EINVAL;
661
- session_error_setg(errp, s,
662
- "failed to obtain identity from ssh-agent");
663
+ session_error_setg(errp, s, "failed to authenticate using "
664
+ "publickey authentication");
665
goto out;
666
- }
667
- r = libssh2_agent_userauth(agent, user, identity);
668
- if (r == 0) {
669
+ } else if (r == SSH_AUTH_SUCCESS) {
670
/* Authenticated! */
671
ret = 0;
672
goto out;
673
}
674
- /* Failed to authenticate with this identity, try the next one. */
675
- prev_identity = identity;
676
}
677
678
ret = -EPERM;
679
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
680
"and the identities held by your ssh-agent");
681
682
out:
683
- if (agent != NULL) {
684
- /* Note: libssh2 implementation implicitly calls
685
- * libssh2_agent_disconnect if necessary.
686
- */
687
- libssh2_agent_free(agent);
688
- }
689
-
690
return ret;
691
}
692
693
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
694
int ssh_flags, int creat_mode, Error **errp)
695
{
696
int r, ret;
697
- long port = 0;
698
+ unsigned int port = 0;
699
+ int new_sock = -1;
700
701
if (opts->has_user) {
702
s->user = g_strdup(opts->user);
703
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
704
s->inet = opts->server;
705
opts->server = NULL;
706
707
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
708
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
709
error_setg(errp, "Use only numeric port value");
710
ret = -EINVAL;
711
goto err;
712
}
713
714
/* Open the socket and connect. */
715
- s->sock = inet_connect_saddr(s->inet, errp);
716
- if (s->sock < 0) {
717
+ new_sock = inet_connect_saddr(s->inet, errp);
718
+ if (new_sock < 0) {
719
ret = -EIO;
720
goto err;
721
}
722
723
+ /*
724
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
725
+ * but do not fail if it cannot be disabled.
726
+ */
727
+ r = socket_set_nodelay(new_sock);
728
+ if (r < 0) {
729
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
730
+ s->inet->host, strerror(errno));
731
+ }
732
+
733
/* Create SSH session. */
734
- s->session = libssh2_session_init();
735
+ s->session = ssh_new();
736
if (!s->session) {
737
ret = -EINVAL;
738
- session_error_setg(errp, s, "failed to initialize libssh2 session");
739
+ session_error_setg(errp, s, "failed to initialize libssh session");
740
goto err;
741
}
742
743
-#if TRACE_LIBSSH2 != 0
744
- libssh2_trace(s->session, TRACE_LIBSSH2);
745
-#endif
746
+ /*
747
+ * Make sure we are in blocking mode during the connection and
748
+ * authentication phases.
749
+ */
750
+ ssh_set_blocking(s->session, 1);
751
752
- r = libssh2_session_handshake(s->session, s->sock);
753
- if (r != 0) {
754
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
755
+ if (r < 0) {
756
+ ret = -EINVAL;
757
+ session_error_setg(errp, s,
758
+ "failed to set the user in the libssh session");
759
+ goto err;
760
+ }
761
+
762
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
763
+ if (r < 0) {
764
+ ret = -EINVAL;
765
+ session_error_setg(errp, s,
766
+ "failed to set the host in the libssh session");
767
+ goto err;
768
+ }
769
+
770
+ if (port > 0) {
771
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
772
+ if (r < 0) {
773
+ ret = -EINVAL;
774
+ session_error_setg(errp, s,
775
+ "failed to set the port in the libssh session");
776
+ goto err;
777
+ }
778
+ }
779
+
780
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
781
+ if (r < 0) {
782
+ ret = -EINVAL;
783
+ session_error_setg(errp, s,
784
+ "failed to disable the compression in the libssh "
785
+ "session");
786
+ goto err;
787
+ }
788
+
789
+ /* Read ~/.ssh/config. */
790
+ r = ssh_options_parse_config(s->session, NULL);
791
+ if (r < 0) {
792
+ ret = -EINVAL;
793
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
794
+ goto err;
795
+ }
796
+
797
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
798
+ if (r < 0) {
799
+ ret = -EINVAL;
800
+ session_error_setg(errp, s,
801
+ "failed to set the socket in the libssh session");
802
+ goto err;
803
+ }
804
+ /* libssh took ownership of the socket. */
805
+ s->sock = new_sock;
806
+ new_sock = -1;
807
+
808
+ /* Connect. */
809
+ r = ssh_connect(s->session);
810
+ if (r != SSH_OK) {
811
ret = -EINVAL;
812
session_error_setg(errp, s, "failed to establish SSH session");
813
goto err;
814
}
815
816
/* Check the remote host's key against known_hosts. */
817
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
818
+ ret = check_host_key(s, opts->host_key_check, errp);
819
if (ret < 0) {
820
goto err;
821
}
822
823
/* Authenticate. */
824
- ret = authenticate(s, s->user, errp);
825
+ ret = authenticate(s, errp);
826
if (ret < 0) {
827
goto err;
828
}
829
830
/* Start SFTP. */
831
- s->sftp = libssh2_sftp_init(s->session);
832
+ s->sftp = sftp_new(s->session);
833
if (!s->sftp) {
834
- session_error_setg(errp, s, "failed to initialize sftp handle");
835
+ session_error_setg(errp, s, "failed to create sftp handle");
836
+ ret = -EINVAL;
837
+ goto err;
838
+ }
839
+
840
+ r = sftp_init(s->sftp);
841
+ if (r < 0) {
842
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
843
ret = -EINVAL;
844
goto err;
845
}
846
847
/* Open the remote file. */
848
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
849
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
850
- creat_mode);
851
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
852
if (!s->sftp_handle) {
853
- session_error_setg(errp, s, "failed to open remote file '%s'",
854
- opts->path);
855
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
856
+ opts->path);
857
ret = -EINVAL;
858
goto err;
859
}
860
861
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
862
- if (r < 0) {
863
+ /* Make sure the SFTP file is handled in blocking mode. */
864
+ sftp_file_set_blocking(s->sftp_handle);
865
+
866
+ s->attrs = sftp_fstat(s->sftp_handle);
867
+ if (!s->attrs) {
868
sftp_error_setg(errp, s, "failed to read file attributes");
869
return -EINVAL;
870
}
871
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
872
return 0;
873
874
err:
875
+ if (s->attrs) {
876
+ sftp_attributes_free(s->attrs);
877
+ }
878
+ s->attrs = NULL;
879
if (s->sftp_handle) {
880
- libssh2_sftp_close(s->sftp_handle);
881
+ sftp_close(s->sftp_handle);
882
}
883
s->sftp_handle = NULL;
884
if (s->sftp) {
885
- libssh2_sftp_shutdown(s->sftp);
886
+ sftp_free(s->sftp);
887
}
888
s->sftp = NULL;
889
if (s->session) {
890
- libssh2_session_disconnect(s->session,
891
- "from qemu ssh client: "
892
- "error opening connection");
893
- libssh2_session_free(s->session);
894
+ ssh_disconnect(s->session);
895
+ ssh_free(s->session);
896
}
897
s->session = NULL;
898
+ s->sock = -1;
899
+ if (new_sock >= 0) {
900
+ close(new_sock);
901
+ }
902
903
return ret;
904
}
905
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
906
907
ssh_state_init(s);
908
909
- ssh_flags = LIBSSH2_FXF_READ;
910
+ ssh_flags = 0;
911
if (bdrv_flags & BDRV_O_RDWR) {
912
- ssh_flags |= LIBSSH2_FXF_WRITE;
913
+ ssh_flags |= O_RDWR;
914
+ } else {
915
+ ssh_flags |= O_RDONLY;
916
}
917
918
opts = ssh_parse_options(options, errp);
919
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
920
}
921
922
/* Go non-blocking. */
923
- libssh2_session_set_blocking(s->session, 0);
924
+ ssh_set_blocking(s->session, 0);
925
926
qapi_free_BlockdevOptionsSsh(opts);
927
928
return 0;
929
930
err:
931
- if (s->sock >= 0) {
932
- close(s->sock);
933
- }
934
- s->sock = -1;
935
-
936
qapi_free_BlockdevOptionsSsh(opts);
937
938
return ret;
939
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
940
{
941
ssize_t ret;
942
char c[1] = { '\0' };
943
- int was_blocking = libssh2_session_get_blocking(s->session);
944
+ int was_blocking = ssh_is_blocking(s->session);
945
946
/* offset must be strictly greater than the current size so we do
947
* not overwrite anything */
948
- assert(offset > 0 && offset > s->attrs.filesize);
949
+ assert(offset > 0 && offset > s->attrs->size);
950
951
- libssh2_session_set_blocking(s->session, 1);
952
+ ssh_set_blocking(s->session, 1);
953
954
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
955
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
956
+ sftp_seek64(s->sftp_handle, offset - 1);
957
+ ret = sftp_write(s->sftp_handle, c, 1);
958
959
- libssh2_session_set_blocking(s->session, was_blocking);
960
+ ssh_set_blocking(s->session, was_blocking);
961
962
if (ret < 0) {
963
sftp_error_setg(errp, s, "Failed to grow file");
964
return -EIO;
965
}
966
967
- s->attrs.filesize = offset;
968
+ s->attrs->size = offset;
969
return 0;
970
}
971
972
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
973
ssh_state_init(&s);
974
975
ret = connect_to_ssh(&s, opts->location,
976
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
977
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
978
+ O_RDWR | O_CREAT | O_TRUNC,
979
0644, errp);
980
if (ret < 0) {
981
goto fail;
982
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
983
/* Assume false, unless we can positively prove it's true. */
984
int has_zero_init = 0;
985
986
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
987
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
988
- has_zero_init = 1;
989
- }
990
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
991
+ has_zero_init = 1;
992
}
993
994
return has_zero_init;
995
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
996
.co = qemu_coroutine_self()
997
};
998
999
- r = libssh2_session_block_directions(s->session);
1000
+ r = ssh_get_poll_flags(s->session);
1001
1002
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1003
+ if (r & SSH_READ_PENDING) {
1004
rd_handler = restart_coroutine;
1005
}
1006
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1007
+ if (r & SSH_WRITE_PENDING) {
1008
wr_handler = restart_coroutine;
1009
}
1010
1011
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1012
trace_ssh_co_yield_back(s->sock);
1013
}
1014
1015
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
1016
- * in the remote file. Notice that it just updates a field in the
1017
- * sftp_handle structure, so there is no network traffic and it cannot
1018
- * fail.
1019
- *
1020
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1021
- * performance since it causes the handle to throw away all in-flight
1022
- * reads and buffered readahead data. Therefore this function tries
1023
- * to be intelligent about when to call the underlying libssh2 function.
1024
- */
1025
-#define SSH_SEEK_WRITE 0
1026
-#define SSH_SEEK_READ 1
1027
-#define SSH_SEEK_FORCE 2
1028
-
1029
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
1030
-{
1031
- bool op_read = (flags & SSH_SEEK_READ) != 0;
1032
- bool force = (flags & SSH_SEEK_FORCE) != 0;
1033
-
1034
- if (force || op_read != s->offset_op_read || offset != s->offset) {
1035
- trace_ssh_seek(offset);
1036
- libssh2_sftp_seek64(s->sftp_handle, offset);
1037
- s->offset = offset;
1038
- s->offset_op_read = op_read;
1039
- }
1040
-}
1041
-
1042
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1043
int64_t offset, size_t size,
1044
QEMUIOVector *qiov)
1045
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1046
1047
trace_ssh_read(offset, size);
1048
1049
- ssh_seek(s, offset, SSH_SEEK_READ);
1050
+ trace_ssh_seek(offset);
1051
+ sftp_seek64(s->sftp_handle, offset);
1052
1053
/* This keeps track of the current iovec element ('i'), where we
1054
* will write to next ('buf'), and the end of the current iovec
1055
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1056
buf = i->iov_base;
1057
end_of_vec = i->iov_base + i->iov_len;
1058
1059
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1060
- * although it will also do readahead behind our backs. Therefore
1061
- * we may have to do repeated reads here until we have read 'size'
1062
- * bytes.
1063
- */
1064
for (got = 0; got < size; ) {
1065
+ size_t request_read_size;
1066
again:
1067
- trace_ssh_read_buf(buf, end_of_vec - buf);
1068
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1069
- trace_ssh_read_return(r);
1070
+ /*
1071
+ * The size of SFTP packets is limited to 32K bytes, so limit
1072
+ * the amount of data requested to 16K, as libssh currently
1073
+ * does not handle multiple requests on its own.
1074
+ */
1075
+ request_read_size = MIN(end_of_vec - buf, 16384);
1076
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1077
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1078
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1079
1080
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1081
+ if (r == SSH_AGAIN) {
1082
co_yield(s, bs);
1083
goto again;
1084
}
1085
- if (r < 0) {
1086
- sftp_error_trace(s, "read");
1087
- s->offset = -1;
1088
- return -EIO;
1089
- }
1090
- if (r == 0) {
1091
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
1092
/* EOF: Short read so pad the buffer with zeroes and return it. */
1093
qemu_iovec_memset(qiov, got, 0, size - got);
1094
return 0;
1095
}
1096
+ if (r <= 0) {
1097
+ sftp_error_trace(s, "read");
1098
+ return -EIO;
1099
+ }
1100
1101
got += r;
1102
buf += r;
1103
- s->offset += r;
1104
if (buf >= end_of_vec && got < size) {
1105
i++;
1106
buf = i->iov_base;
1107
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1108
1109
trace_ssh_write(offset, size);
1110
1111
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1112
+ trace_ssh_seek(offset);
1113
+ sftp_seek64(s->sftp_handle, offset);
1114
1115
/* This keeps track of the current iovec element ('i'), where we
1116
* will read from next ('buf'), and the end of the current iovec
1117
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1118
end_of_vec = i->iov_base + i->iov_len;
1119
1120
for (written = 0; written < size; ) {
1121
+ size_t request_write_size;
1122
again:
1123
- trace_ssh_write_buf(buf, end_of_vec - buf);
1124
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1125
- trace_ssh_write_return(r);
1126
+ /*
1127
+ * Avoid too large data packets, as libssh currently does not
1128
+ * handle multiple requests on its own.
1129
+ */
1130
+ request_write_size = MIN(end_of_vec - buf, 131072);
1131
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1132
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1133
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1134
1135
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1136
+ if (r == SSH_AGAIN) {
1137
co_yield(s, bs);
1138
goto again;
1139
}
1140
if (r < 0) {
1141
sftp_error_trace(s, "write");
1142
- s->offset = -1;
1143
return -EIO;
1144
}
1145
- /* The libssh2 API is very unclear about this. A comment in
1146
- * the code says "nothing was acked, and no EAGAIN was
1147
- * received!" which apparently means that no data got sent
1148
- * out, and the underlying channel didn't return any EAGAIN
1149
- * indication. I think this is a bug in either libssh2 or
1150
- * OpenSSH (server-side). In any case, forcing a seek (to
1151
- * discard libssh2 internal buffers), and then trying again
1152
- * works for me.
1153
- */
1154
- if (r == 0) {
1155
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1156
- co_yield(s, bs);
1157
- goto again;
1158
- }
1159
1160
written += r;
1161
buf += r;
1162
- s->offset += r;
1163
if (buf >= end_of_vec && written < size) {
1164
i++;
1165
buf = i->iov_base;
1166
end_of_vec = i->iov_base + i->iov_len;
1167
}
1168
1169
- if (offset + written > s->attrs.filesize)
1170
- s->attrs.filesize = offset + written;
1171
+ if (offset + written > s->attrs->size) {
1172
+ s->attrs->size = offset + written;
1173
+ }
1174
}
1175
1176
return 0;
1177
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
1178
}
1179
}
1180
1181
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
1182
+#ifdef HAVE_LIBSSH_0_8
1183
1184
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
1185
{
1186
int r;
1187
1188
trace_ssh_flush();
1189
+
1190
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
1191
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
1192
+ return 0;
1193
+ }
1194
again:
1195
- r = libssh2_sftp_fsync(s->sftp_handle);
1196
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1197
+ r = sftp_fsync(s->sftp_handle);
1198
+ if (r == SSH_AGAIN) {
1199
co_yield(s, bs);
1200
goto again;
1201
}
1202
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
1203
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
1204
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
1205
- return 0;
1206
- }
1207
if (r < 0) {
1208
sftp_error_trace(s, "fsync");
1209
return -EIO;
1210
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1211
return ret;
1212
}
1213
1214
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
1215
+#else /* !HAVE_LIBSSH_0_8 */
1216
1217
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1218
{
1219
BDRVSSHState *s = bs->opaque;
1220
1221
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
1222
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
1223
return 0;
1224
}
1225
1226
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
1227
+#endif /* !HAVE_LIBSSH_0_8 */
1228
1229
static int64_t ssh_getlength(BlockDriverState *bs)
1230
{
1231
BDRVSSHState *s = bs->opaque;
1232
int64_t length;
1233
1234
- /* Note we cannot make a libssh2 call here. */
1235
- length = (int64_t) s->attrs.filesize;
1236
+ /* Note we cannot make a libssh call here. */
1237
+ length = (int64_t) s->attrs->size;
1238
trace_ssh_getlength(length);
1239
1240
return length;
1241
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
1242
return -ENOTSUP;
1243
}
1244
1245
- if (offset < s->attrs.filesize) {
1246
+ if (offset < s->attrs->size) {
1247
error_setg(errp, "ssh driver does not support shrinking files");
1248
return -ENOTSUP;
1249
}
1250
1251
- if (offset == s->attrs.filesize) {
1252
+ if (offset == s->attrs->size) {
1253
return 0;
1254
}
1255
1256
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1257
{
1258
int r;
1259
1260
- r = libssh2_init(0);
1261
+ r = ssh_init();
1262
if (r != 0) {
1263
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1264
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1265
exit(EXIT_FAILURE);
1266
}
1267
1268
+#if TRACE_LIBSSH != 0
1269
+ ssh_set_log_level(TRACE_LIBSSH);
1270
+#endif
1271
+
1272
bdrv_register(&bdrv_ssh);
1273
}
1274
1275
diff --git a/.travis.yml b/.travis.yml
1276
index XXXXXXX..XXXXXXX 100644
1277
--- a/.travis.yml
1278
+++ b/.travis.yml
1279
@@ -XXX,XX +XXX,XX @@ addons:
1280
- libseccomp-dev
1281
- libspice-protocol-dev
1282
- libspice-server-dev
1283
- - libssh2-1-dev
1284
+ - libssh-dev
1285
- liburcu-dev
1286
- libusb-1.0-0-dev
1287
- libvte-2.91-dev
1288
@@ -XXX,XX +XXX,XX @@ matrix:
1289
- libseccomp-dev
1290
- libspice-protocol-dev
1291
- libspice-server-dev
1292
- - libssh2-1-dev
1293
+ - libssh-dev
1294
- liburcu-dev
1295
- libusb-1.0-0-dev
1296
- libvte-2.91-dev
39
diff --git a/block/trace-events b/block/trace-events
1297
diff --git a/block/trace-events b/block/trace-events
40
index XXXXXXX..XXXXXXX 100644
1298
index XXXXXXX..XXXXXXX 100644
41
--- a/block/trace-events
1299
--- a/block/trace-events
42
+++ b/block/trace-events
1300
+++ b/block/trace-events
43
@@ -XXX,XX +XXX,XX @@ nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
1301
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
44
nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
1302
# ssh.c
45
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
1303
ssh_restart_coroutine(void *co) "co=%p"
46
nvme_handle_event(void *s) "s %p"
1304
ssh_flush(void) "fsync"
47
-nvme_poll_cb(void *s) "s %p"
1305
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
48
+nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u"
1306
+ssh_check_host_key_knownhosts(void) "host key OK"
49
nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" flags %d niov %d"
1307
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
50
nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset 0x%"PRIx64" bytes %"PRId64" flags %d"
1308
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
51
nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
1309
ssh_co_yield_back(int sock) "s->sock=%d - back"
1310
ssh_getlength(int64_t length) "length=%" PRIi64
1311
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
1312
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1313
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
1314
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
1315
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
1316
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
1317
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1318
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
1319
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
1320
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
1321
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
1322
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
1323
+ssh_auth_methods(int methods) "auth methods=0x%x"
1324
+ssh_server_status(int status) "server status=%d"
1325
1326
# curl.c
1327
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
1328
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
1329
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
1330
1331
# ssh.c
1332
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
1333
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
1334
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
1335
index XXXXXXX..XXXXXXX 100644
1336
--- a/docs/qemu-block-drivers.texi
1337
+++ b/docs/qemu-block-drivers.texi
1338
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
1339
1340
warning: ssh server @code{ssh.example.com:22} does not support fsync
1341
1342
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1343
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1344
supported.
1345
1346
@node disk_images_nvme
1347
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1348
index XXXXXXX..XXXXXXX 100644
1349
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1350
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1351
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1352
mxe-$TARGET-w64-mingw32.shared-curl \
1353
mxe-$TARGET-w64-mingw32.shared-glib \
1354
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1355
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1356
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1357
mxe-$TARGET-w64-mingw32.shared-lzo \
1358
mxe-$TARGET-w64-mingw32.shared-nettle \
1359
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1360
index XXXXXXX..XXXXXXX 100644
1361
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1362
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1363
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1364
mxe-$TARGET-w64-mingw32.shared-curl \
1365
mxe-$TARGET-w64-mingw32.shared-glib \
1366
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1367
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1368
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1369
mxe-$TARGET-w64-mingw32.shared-lzo \
1370
mxe-$TARGET-w64-mingw32.shared-nettle \
1371
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1372
index XXXXXXX..XXXXXXX 100644
1373
--- a/tests/docker/dockerfiles/fedora.docker
1374
+++ b/tests/docker/dockerfiles/fedora.docker
1375
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1376
libpng-devel \
1377
librbd-devel \
1378
libseccomp-devel \
1379
- libssh2-devel \
1380
+ libssh-devel \
1381
libubsan \
1382
libusbx-devel \
1383
libxml2-devel \
1384
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1385
mingw32-gtk3 \
1386
mingw32-libjpeg-turbo \
1387
mingw32-libpng \
1388
- mingw32-libssh2 \
1389
mingw32-libtasn1 \
1390
mingw32-nettle \
1391
mingw32-pixman \
1392
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1393
mingw64-gtk3 \
1394
mingw64-libjpeg-turbo \
1395
mingw64-libpng \
1396
- mingw64-libssh2 \
1397
mingw64-libtasn1 \
1398
mingw64-nettle \
1399
mingw64-pixman \
1400
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1401
index XXXXXXX..XXXXXXX 100644
1402
--- a/tests/docker/dockerfiles/ubuntu.docker
1403
+++ b/tests/docker/dockerfiles/ubuntu.docker
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1405
libsnappy-dev \
1406
libspice-protocol-dev \
1407
libspice-server-dev \
1408
- libssh2-1-dev \
1409
+ libssh-dev \
1410
libusb-1.0-0-dev \
1411
libusbredirhost-dev \
1412
libvdeplug-dev \
1413
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1414
index XXXXXXX..XXXXXXX 100644
1415
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1416
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1417
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1418
libsnappy-dev \
1419
libspice-protocol-dev \
1420
libspice-server-dev \
1421
- libssh2-1-dev \
1422
+ libssh-dev \
1423
libusb-1.0-0-dev \
1424
libusbredirhost-dev \
1425
libvdeplug-dev \
1426
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1427
index XXXXXXX..XXXXXXX 100755
1428
--- a/tests/qemu-iotests/207
1429
+++ b/tests/qemu-iotests/207
1430
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1431
1432
iotests.img_info_log(remote_path)
1433
1434
- md5_key = subprocess.check_output(
1435
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1436
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1437
- shell=True).rstrip().decode('ascii')
1438
+ keys = subprocess.check_output(
1439
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1440
+ 'cut -d" " -f3',
1441
+ shell=True).rstrip().decode('ascii').split('\n')
1442
+
1443
+ # Mappings of base64 representations to digests
1444
+ md5_keys = {}
1445
+ sha1_keys = {}
1446
+
1447
+ for key in keys:
1448
+ md5_keys[key] = subprocess.check_output(
1449
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1450
+ shell=True).rstrip().decode('ascii')
1451
+
1452
+ sha1_keys[key] = subprocess.check_output(
1453
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1454
+ shell=True).rstrip().decode('ascii')
1455
1456
vm.launch()
1457
+
1458
+ # Find correct key first
1459
+ matching_key = None
1460
+ for key in keys:
1461
+ result = vm.qmp('blockdev-add',
1462
+ driver='ssh', node_name='node0', path=disk_path,
1463
+ server={
1464
+ 'host': '127.0.0.1',
1465
+ 'port': '22',
1466
+ }, host_key_check={
1467
+ 'mode': 'hash',
1468
+ 'type': 'md5',
1469
+ 'hash': md5_keys[key],
1470
+ })
1471
+
1472
+ if 'error' not in result:
1473
+ vm.qmp('blockdev-del', node_name='node0')
1474
+ matching_key = key
1475
+ break
1476
+
1477
+ if matching_key is None:
1478
+ vm.shutdown()
1479
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1480
+
1481
blockdev_create(vm, { 'driver': 'ssh',
1482
'location': {
1483
'path': disk_path,
1484
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1485
'host-key-check': {
1486
'mode': 'hash',
1487
'type': 'md5',
1488
- 'hash': md5_key,
1489
+ 'hash': md5_keys[matching_key],
1490
}
1491
},
1492
'size': 8388608 })
1493
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1494
1495
iotests.img_info_log(remote_path)
1496
1497
- sha1_key = subprocess.check_output(
1498
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1499
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1500
- shell=True).rstrip().decode('ascii')
1501
-
1502
vm.launch()
1503
blockdev_create(vm, { 'driver': 'ssh',
1504
'location': {
1505
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1506
'host-key-check': {
1507
'mode': 'hash',
1508
'type': 'sha1',
1509
- 'hash': sha1_key,
1510
+ 'hash': sha1_keys[matching_key],
1511
}
1512
},
1513
'size': 4194304 })
1514
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
1515
index XXXXXXX..XXXXXXX 100644
1516
--- a/tests/qemu-iotests/207.out
1517
+++ b/tests/qemu-iotests/207.out
1518
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
1519
1520
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
1521
{"return": {}}
1522
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
1523
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
1524
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
1525
{"return": {}}
1526
52
--
1527
--
53
2.28.0
1528
2.21.0
54
1529
1530
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
What we want to trace is the block driver state and the queue index.
4
5
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-7-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 2 +-
15
block/trace-events | 2 +-
16
2 files changed, 2 insertions(+), 2 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
23
24
while (q->free_req_head == -1) {
25
if (qemu_in_coroutine()) {
26
- trace_nvme_free_req_queue_wait(q);
27
+ trace_nvme_free_req_queue_wait(q->s, q->index);
28
qemu_co_queue_wait(&q->free_req_queue, &q->lock);
29
} else {
30
qemu_mutex_unlock(&q->lock);
31
diff --git a/block/trace-events b/block/trace-events
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/trace-events
34
+++ b/block/trace-events
35
@@ -XXX,XX +XXX,XX @@ nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s
36
nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
37
nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
38
nvme_dma_map_flush(void *s) "s %p"
39
-nvme_free_req_queue_wait(void *q) "q %p"
40
+nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
41
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
42
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
43
nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
44
--
45
2.28.0
46
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Reviewed-by: Eric Auger <eric.auger@redhat.com>
4
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Tested-by: Eric Auger <eric.auger@redhat.com>
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Message-id: 20201029093306.1063879-8-philmd@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
---
11
block/nvme.c | 3 +++
12
block/trace-events | 2 ++
13
2 files changed, 5 insertions(+)
14
15
diff --git a/block/nvme.c b/block/nvme.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/nvme.c
18
+++ b/block/nvme.c
19
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
20
21
static void nvme_free_queue_pair(NVMeQueuePair *q)
22
{
23
+ trace_nvme_free_queue_pair(q->index, q);
24
if (q->completion_bh) {
25
qemu_bh_delete(q->completion_bh);
26
}
27
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
28
if (!q) {
29
return NULL;
30
}
31
+ trace_nvme_create_queue_pair(idx, q, size, aio_context,
32
+ event_notifier_get_fd(s->irq_notifier));
33
q->prp_list_pages = qemu_try_memalign(s->page_size,
34
s->page_size * NVME_NUM_REQS);
35
if (!q->prp_list_pages) {
36
diff --git a/block/trace-events b/block/trace-events
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/trace-events
39
+++ b/block/trace-events
40
@@ -XXX,XX +XXX,XX @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" byte
41
nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
42
nvme_dma_map_flush(void *s) "s %p"
43
nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
44
+nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d"
45
+nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
46
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
47
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
48
nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
49
--
50
2.28.0
51
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
To be able to use some definitions in structure declarations,
4
move them earlier. No logical change.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-9-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 19 ++++++++++---------
15
1 file changed, 10 insertions(+), 9 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@
22
23
typedef struct BDRVNVMeState BDRVNVMeState;
24
25
+/* Same index is used for queues and IRQs */
26
+#define INDEX_ADMIN 0
27
+#define INDEX_IO(n) (1 + n)
28
+
29
+/* This driver shares a single MSIX IRQ for the admin and I/O queues */
30
+enum {
31
+ MSIX_SHARED_IRQ_IDX = 0,
32
+ MSIX_IRQ_COUNT = 1
33
+};
34
+
35
typedef struct {
36
int32_t head, tail;
37
uint8_t *queue;
38
@@ -XXX,XX +XXX,XX @@ typedef struct {
39
QEMUBH *completion_bh;
40
} NVMeQueuePair;
41
42
-#define INDEX_ADMIN 0
43
-#define INDEX_IO(n) (1 + n)
44
-
45
-/* This driver shares a single MSIX IRQ for the admin and I/O queues */
46
-enum {
47
- MSIX_SHARED_IRQ_IDX = 0,
48
- MSIX_IRQ_COUNT = 1
49
-};
50
-
51
struct BDRVNVMeState {
52
AioContext *aio_context;
53
QEMUVFIOState *vfio;
54
--
55
2.28.0
56
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
We can not have negative queue count/size/index, use unsigned type.
4
Rename 'nr_queues' as 'queue_count' to match the spec naming.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-10-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 38 ++++++++++++++++++--------------------
15
block/trace-events | 10 +++++-----
16
2 files changed, 23 insertions(+), 25 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
23
* [1..]: io queues.
24
*/
25
NVMeQueuePair **queues;
26
- int nr_queues;
27
+ unsigned queue_count;
28
size_t page_size;
29
/* How many uint32_t elements does each doorbell entry take. */
30
size_t doorbell_scale;
31
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
32
};
33
34
static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
35
- int nentries, int entry_bytes, Error **errp)
36
+ unsigned nentries, size_t entry_bytes, Error **errp)
37
{
38
size_t bytes;
39
int r;
40
@@ -XXX,XX +XXX,XX @@ static void nvme_free_req_queue_cb(void *opaque)
41
42
static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
43
AioContext *aio_context,
44
- int idx, int size,
45
+ unsigned idx, size_t size,
46
Error **errp)
47
{
48
int i, r;
49
@@ -XXX,XX +XXX,XX @@ static bool nvme_poll_queues(BDRVNVMeState *s)
50
bool progress = false;
51
int i;
52
53
- for (i = 0; i < s->nr_queues; i++) {
54
+ for (i = 0; i < s->queue_count; i++) {
55
if (nvme_poll_queue(s->queues[i])) {
56
progress = true;
57
}
58
@@ -XXX,XX +XXX,XX @@ static void nvme_handle_event(EventNotifier *n)
59
static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
60
{
61
BDRVNVMeState *s = bs->opaque;
62
- int n = s->nr_queues;
63
+ unsigned n = s->queue_count;
64
NVMeQueuePair *q;
65
NvmeCmd cmd;
66
- int queue_size = NVME_QUEUE_SIZE;
67
+ unsigned queue_size = NVME_QUEUE_SIZE;
68
69
q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
70
n, queue_size, errp);
71
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
72
.cdw11 = cpu_to_le32(0x3),
73
};
74
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
75
- error_setg(errp, "Failed to create CQ io queue [%d]", n);
76
+ error_setg(errp, "Failed to create CQ io queue [%u]", n);
77
goto out_error;
78
}
79
cmd = (NvmeCmd) {
80
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
81
.cdw11 = cpu_to_le32(0x1 | (n << 16)),
82
};
83
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
84
- error_setg(errp, "Failed to create SQ io queue [%d]", n);
85
+ error_setg(errp, "Failed to create SQ io queue [%u]", n);
86
goto out_error;
87
}
88
s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
89
s->queues[n] = q;
90
- s->nr_queues++;
91
+ s->queue_count++;
92
return true;
93
out_error:
94
nvme_free_queue_pair(q);
95
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
96
ret = -EINVAL;
97
goto out;
98
}
99
- s->nr_queues = 1;
100
+ s->queue_count = 1;
101
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
102
regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
103
(NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
104
@@ -XXX,XX +XXX,XX @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable,
105
106
static void nvme_close(BlockDriverState *bs)
107
{
108
- int i;
109
BDRVNVMeState *s = bs->opaque;
110
111
- for (i = 0; i < s->nr_queues; ++i) {
112
+ for (unsigned i = 0; i < s->queue_count; ++i) {
113
nvme_free_queue_pair(s->queues[i]);
114
}
115
g_free(s->queues);
116
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
117
};
118
119
trace_nvme_prw_aligned(s, is_write, offset, bytes, flags, qiov->niov);
120
- assert(s->nr_queues > 1);
121
+ assert(s->queue_count > 1);
122
req = nvme_get_free_req(ioq);
123
assert(req);
124
125
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
126
.ret = -EINPROGRESS,
127
};
128
129
- assert(s->nr_queues > 1);
130
+ assert(s->queue_count > 1);
131
req = nvme_get_free_req(ioq);
132
assert(req);
133
nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data);
134
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
135
cmd.cdw12 = cpu_to_le32(cdw12);
136
137
trace_nvme_write_zeroes(s, offset, bytes, flags);
138
- assert(s->nr_queues > 1);
139
+ assert(s->queue_count > 1);
140
req = nvme_get_free_req(ioq);
141
assert(req);
142
143
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
144
return -ENOTSUP;
145
}
146
147
- assert(s->nr_queues > 1);
148
+ assert(s->queue_count > 1);
149
150
buf = qemu_try_memalign(s->page_size, s->page_size);
151
if (!buf) {
152
@@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs)
153
{
154
BDRVNVMeState *s = bs->opaque;
155
156
- for (int i = 0; i < s->nr_queues; i++) {
157
+ for (unsigned i = 0; i < s->queue_count; i++) {
158
NVMeQueuePair *q = s->queues[i];
159
160
qemu_bh_delete(q->completion_bh);
161
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
162
aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
163
false, nvme_handle_event, nvme_poll_cb);
164
165
- for (int i = 0; i < s->nr_queues; i++) {
166
+ for (unsigned i = 0; i < s->queue_count; i++) {
167
NVMeQueuePair *q = s->queues[i];
168
169
q->completion_bh =
170
@@ -XXX,XX +XXX,XX @@ static void nvme_aio_plug(BlockDriverState *bs)
171
172
static void nvme_aio_unplug(BlockDriverState *bs)
173
{
174
- int i;
175
BDRVNVMeState *s = bs->opaque;
176
assert(s->plugged);
177
s->plugged = false;
178
- for (i = INDEX_IO(0); i < s->nr_queues; i++) {
179
+ for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) {
180
NVMeQueuePair *q = s->queues[i];
181
qemu_mutex_lock(&q->lock);
182
nvme_kick(q);
183
diff --git a/block/trace-events b/block/trace-events
184
index XXXXXXX..XXXXXXX 100644
185
--- a/block/trace-events
186
+++ b/block/trace-events
187
@@ -XXX,XX +XXX,XX @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
188
# nvme.c
189
nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64
190
nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64
191
-nvme_kick(void *s, int queue) "s %p queue %d"
192
+nvme_kick(void *s, unsigned q_index) "s %p q #%u"
193
nvme_dma_flush_queue_wait(void *s) "s %p"
194
nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
195
-nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d"
196
-nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d"
197
-nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d"
198
-nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d"
199
+nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d"
200
+nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u"
201
+nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
202
+nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
203
nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
204
nvme_handle_event(void *s) "s %p"
205
nvme_poll_queue(void *s, unsigned q_index) "s %p q #%u"
206
--
207
2.28.0
208
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Just for consistency, following the example documented since
4
commit e3fe3988d7 ("error: Document Error API usage rules"),
5
return a boolean value indicating an error is set or not.
6
Directly pass errp as the local_err is not requested in our
7
case. This simplifies a bit nvme_create_queue_pair().
8
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-12-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 16 +++++++---------
17
1 file changed, 7 insertions(+), 9 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
24
},
25
};
26
27
-static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
28
+/* Returns true on success, false on failure. */
29
+static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
30
unsigned nentries, size_t entry_bytes, Error **errp)
31
{
32
size_t bytes;
33
@@ -XXX,XX +XXX,XX @@ static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
34
q->queue = qemu_try_memalign(s->page_size, bytes);
35
if (!q->queue) {
36
error_setg(errp, "Cannot allocate queue");
37
- return;
38
+ return false;
39
}
40
memset(q->queue, 0, bytes);
41
r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
42
if (r) {
43
error_setg(errp, "Cannot map queue");
44
+ return false;
45
}
46
+ return true;
47
}
48
49
static void nvme_free_queue_pair(NVMeQueuePair *q)
50
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
51
Error **errp)
52
{
53
int i, r;
54
- Error *local_err = NULL;
55
NVMeQueuePair *q;
56
uint64_t prp_list_iova;
57
58
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
59
req->prp_list_iova = prp_list_iova + i * s->page_size;
60
}
61
62
- nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
63
- if (local_err) {
64
- error_propagate(errp, local_err);
65
+ if (!nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, errp)) {
66
goto fail;
67
}
68
q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
69
70
- nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
71
- if (local_err) {
72
- error_propagate(errp, local_err);
73
+ if (!nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, errp)) {
74
goto fail;
75
}
76
q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
77
--
78
2.28.0
79
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Rename Submission Queue flags with 'Sq' to differentiate
4
submission queue flags from command queue flags, and introduce
5
Completion Queue flag definitions.
6
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-id: 20201029093306.1063879-13-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
include/block/nvme.h | 18 ++++++++++++------
16
1 file changed, 12 insertions(+), 6 deletions(-)
17
18
diff --git a/include/block/nvme.h b/include/block/nvme.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/nvme.h
21
+++ b/include/block/nvme.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateCq {
23
#define NVME_CQ_FLAGS_PC(cq_flags) (cq_flags & 0x1)
24
#define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1)
25
26
+enum NvmeFlagsCq {
27
+ NVME_CQ_PC = 1,
28
+ NVME_CQ_IEN = 2,
29
+};
30
+
31
typedef struct QEMU_PACKED NvmeCreateSq {
32
uint8_t opcode;
33
uint8_t flags;
34
@@ -XXX,XX +XXX,XX @@ typedef struct QEMU_PACKED NvmeCreateSq {
35
#define NVME_SQ_FLAGS_PC(sq_flags) (sq_flags & 0x1)
36
#define NVME_SQ_FLAGS_QPRIO(sq_flags) ((sq_flags >> 1) & 0x3)
37
38
-enum NvmeQueueFlags {
39
- NVME_Q_PC = 1,
40
- NVME_Q_PRIO_URGENT = 0,
41
- NVME_Q_PRIO_HIGH = 1,
42
- NVME_Q_PRIO_NORMAL = 2,
43
- NVME_Q_PRIO_LOW = 3,
44
+enum NvmeFlagsSq {
45
+ NVME_SQ_PC = 1,
46
+
47
+ NVME_SQ_PRIO_URGENT = 0,
48
+ NVME_SQ_PRIO_HIGH = 1,
49
+ NVME_SQ_PRIO_NORMAL = 2,
50
+ NVME_SQ_PRIO_LOW = 3,
51
};
52
53
typedef struct QEMU_PACKED NvmeIdentify {
54
--
55
2.28.0
56
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Replace magic values by definitions, and simplifiy since the
4
number of queues will never reach 64K.
5
6
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Tested-by: Eric Auger <eric.auger@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-id: 20201029093306.1063879-14-philmd@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Tested-by: Eric Auger <eric.auger@redhat.com>
13
---
14
block/nvme.c | 9 +++++----
15
1 file changed, 5 insertions(+), 4 deletions(-)
16
17
diff --git a/block/nvme.c b/block/nvme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/nvme.c
20
+++ b/block/nvme.c
21
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
22
NvmeCmd cmd;
23
unsigned queue_size = NVME_QUEUE_SIZE;
24
25
+ assert(n <= UINT16_MAX);
26
q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
27
n, queue_size, errp);
28
if (!q) {
29
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
30
cmd = (NvmeCmd) {
31
.opcode = NVME_ADM_CMD_CREATE_CQ,
32
.dptr.prp1 = cpu_to_le64(q->cq.iova),
33
- .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
34
- .cdw11 = cpu_to_le32(0x3),
35
+ .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
36
+ .cdw11 = cpu_to_le32(NVME_CQ_IEN | NVME_CQ_PC),
37
};
38
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
39
error_setg(errp, "Failed to create CQ io queue [%u]", n);
40
@@ -XXX,XX +XXX,XX @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
41
cmd = (NvmeCmd) {
42
.opcode = NVME_ADM_CMD_CREATE_SQ,
43
.dptr.prp1 = cpu_to_le64(q->sq.iova),
44
- .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
45
- .cdw11 = cpu_to_le32(0x1 | (n << 16)),
46
+ .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | n),
47
+ .cdw11 = cpu_to_le32(NVME_SQ_PC | (n << 16)),
48
};
49
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
50
error_setg(errp, "Failed to create SQ io queue [%u]", n);
51
--
52
2.28.0
53
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
From the specification chapter 3.1.8 "AQA - Admin Queue Attributes"
4
the Admin Submission Queue Size field is a 0’s based value:
5
6
Admin Submission Queue Size (ASQS):
7
8
Defines the size of the Admin Submission Queue in entries.
9
Enabling a controller while this field is cleared to 00h
10
produces undefined results. The minimum size of the Admin
11
Submission Queue is two entries. The maximum size of the
12
Admin Submission Queue is 4096 entries.
13
This is a 0’s based value.
14
15
This bug has never been hit because the device initialization
16
uses a single command synchronously :)
17
18
Reviewed-by: Eric Auger <eric.auger@redhat.com>
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Tested-by: Eric Auger <eric.auger@redhat.com>
21
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22
Message-id: 20201029093306.1063879-15-philmd@redhat.com
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Tested-by: Eric Auger <eric.auger@redhat.com>
25
---
26
block/nvme.c | 6 +++---
27
1 file changed, 3 insertions(+), 3 deletions(-)
28
29
diff --git a/block/nvme.c b/block/nvme.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/block/nvme.c
32
+++ b/block/nvme.c
33
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
34
goto out;
35
}
36
s->queue_count = 1;
37
- QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
38
- regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
39
- (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
40
+ QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
41
+ regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
42
+ ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
43
regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
44
regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
45
46
--
47
2.28.0
48
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
We don't need to dereference from BDRVNVMeState each time.
4
Use a NVMeQueuePair pointer on the admin queue.
5
The nvme_init() becomes easier to review, matching the style
6
of nvme_add_io_queue().
7
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-16-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 12 ++++++------
17
1 file changed, 6 insertions(+), 6 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
Error **errp)
25
{
26
BDRVNVMeState *s = bs->opaque;
27
+ NVMeQueuePair *q;
28
AioContext *aio_context = bdrv_get_aio_context(bs);
29
int ret;
30
uint64_t cap;
31
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
32
33
/* Set up admin queue. */
34
s->queues = g_new(NVMeQueuePair *, 1);
35
- s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
36
- NVME_QUEUE_SIZE,
37
- errp);
38
- if (!s->queues[INDEX_ADMIN]) {
39
+ q = nvme_create_queue_pair(s, aio_context, 0, NVME_QUEUE_SIZE, errp);
40
+ if (!q) {
41
ret = -EINVAL;
42
goto out;
43
}
44
+ s->queues[INDEX_ADMIN] = q;
45
s->queue_count = 1;
46
QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
47
regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
48
((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
49
- regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
50
- regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
51
+ regs->asq = cpu_to_le64(q->sq.iova);
52
+ regs->acq = cpu_to_le64(q->cq.iova);
53
54
/* After setting up all control registers we can enable device now. */
55
regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
56
--
57
2.28.0
58
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
Commit bdd6a90a9e5 ("block: Add VFIO based NVMe driver")
4
sets the request_alignment in nvme_refresh_limits().
5
For consistency, also set it during initialization.
6
7
Reported-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-18-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 1 +
17
1 file changed, 1 insertion(+)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
25
s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
26
bs->bl.opt_mem_alignment = s->page_size;
27
+ bs->bl.request_alignment = s->page_size;
28
timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
29
30
/* Reset device to get a clean state. */
31
--
32
2.28.0
33
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
While trying to simplify the code using a macro, we forgot
4
the 12-bit shift... Correct that.
5
6
Fixes: fad1eb68862 ("block/nvme: Use register definitions from 'block/nvme.h'")
7
Reported-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-19-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
24
goto out;
25
}
26
27
- s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
28
+ s->page_size = 1u << (12 + NVME_CAP_MPSMIN(cap));
29
s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
30
bs->bl.opt_mem_alignment = s->page_size;
31
bs->bl.request_alignment = s->page_size;
32
--
33
2.28.0
34
diff view generated by jsdifflib
Deleted patch
1
From: Eric Auger <eric.auger@redhat.com>
2
1
3
In preparation of 64kB host page support, let's change the size
4
and alignment of the IDENTIFY command response buffer so that
5
the VFIO DMA MAP succeeds. We align on the host page size.
6
7
Signed-off-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-20-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 9 +++++----
17
1 file changed, 5 insertions(+), 4 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
24
.opcode = NVME_ADM_CMD_IDENTIFY,
25
.cdw10 = cpu_to_le32(0x1),
26
};
27
+ size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size);
28
29
- id = qemu_try_memalign(s->page_size, sizeof(*id));
30
+ id = qemu_try_memalign(qemu_real_host_page_size, id_size);
31
if (!id) {
32
error_setg(errp, "Cannot allocate buffer for identify response");
33
goto out;
34
}
35
- r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova);
36
+ r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
37
if (r) {
38
error_setg(errp, "Cannot map buffer for DMA");
39
goto out;
40
}
41
42
- memset(id, 0, sizeof(*id));
43
+ memset(id, 0, id_size);
44
cmd.dptr.prp1 = cpu_to_le64(iova);
45
if (nvme_admin_cmd_sync(bs, &cmd)) {
46
error_setg(errp, "Failed to identify controller");
47
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
48
s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
49
s->supports_discard = !!(oncs & NVME_ONCS_DSM);
50
51
- memset(id, 0, sizeof(*id));
52
+ memset(id, 0, id_size);
53
cmd.cdw10 = 0;
54
cmd.nsid = cpu_to_le32(namespace);
55
if (nvme_admin_cmd_sync(bs, &cmd)) {
56
--
57
2.28.0
58
diff view generated by jsdifflib
Deleted patch
1
From: Eric Auger <eric.auger@redhat.com>
2
1
3
In preparation of 64kB host page support, let's change the size
4
and alignment of the queue so that the VFIO DMA MAP succeeds.
5
We align on the host page size.
6
7
Signed-off-by: Eric Auger <eric.auger@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-21-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 4 ++--
16
1 file changed, 2 insertions(+), 2 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
23
size_t bytes;
24
int r;
25
26
- bytes = ROUND_UP(nentries * entry_bytes, s->page_size);
27
+ bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size);
28
q->head = q->tail = 0;
29
- q->queue = qemu_try_memalign(s->page_size, bytes);
30
+ q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes);
31
if (!q->queue) {
32
error_setg(errp, "Cannot allocate queue");
33
return false;
34
--
35
2.28.0
36
diff view generated by jsdifflib
Deleted patch
1
From: Eric Auger <eric.auger@redhat.com>
2
1
3
In preparation of 64kB host page support, let's change the size
4
and alignment of the prp_list_pages so that the VFIO DMA MAP succeeds
5
with 64kB host page size. We align on the host page size.
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Eric Auger <eric.auger@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201029093306.1063879-22-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 11 ++++++-----
17
1 file changed, 6 insertions(+), 5 deletions(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
24
int i, r;
25
NVMeQueuePair *q;
26
uint64_t prp_list_iova;
27
+ size_t bytes;
28
29
q = g_try_new0(NVMeQueuePair, 1);
30
if (!q) {
31
@@ -XXX,XX +XXX,XX @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
32
}
33
trace_nvme_create_queue_pair(idx, q, size, aio_context,
34
event_notifier_get_fd(s->irq_notifier));
35
- q->prp_list_pages = qemu_try_memalign(s->page_size,
36
- s->page_size * NVME_NUM_REQS);
37
+ bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS,
38
+ qemu_real_host_page_size);
39
+ q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
40
if (!q->prp_list_pages) {
41
goto fail;
42
}
43
- memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS);
44
+ memset(q->prp_list_pages, 0, bytes);
45
qemu_mutex_init(&q->lock);
46
q->s = s;
47
q->index = idx;
48
qemu_co_queue_init(&q->free_req_queue);
49
q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
50
- r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
51
- s->page_size * NVME_NUM_REQS,
52
+ r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
53
false, &prp_list_iova);
54
if (r) {
55
goto fail;
56
--
57
2.28.0
58
diff view generated by jsdifflib
Deleted patch
1
From: Eric Auger <eric.auger@redhat.com>
2
1
3
Make sure iov's va and size are properly aligned on the
4
host page size.
5
6
Signed-off-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Tested-by: Eric Auger <eric.auger@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Message-id: 20201029093306.1063879-23-philmd@redhat.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Tested-by: Eric Auger <eric.auger@redhat.com>
14
---
15
block/nvme.c | 14 ++++++++------
16
1 file changed, 8 insertions(+), 6 deletions(-)
17
18
diff --git a/block/nvme.c b/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block/nvme.c
21
+++ b/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
23
for (i = 0; i < qiov->niov; ++i) {
24
bool retry = true;
25
uint64_t iova;
26
+ size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len,
27
+ qemu_real_host_page_size);
28
try_map:
29
r = qemu_vfio_dma_map(s->vfio,
30
qiov->iov[i].iov_base,
31
- qiov->iov[i].iov_len,
32
- true, &iova);
33
+ len, true, &iova);
34
if (r == -ENOMEM && retry) {
35
retry = false;
36
trace_nvme_dma_flush_queue_wait(s);
37
@@ -XXX,XX +XXX,XX @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs,
38
BDRVNVMeState *s = bs->opaque;
39
40
for (i = 0; i < qiov->niov; ++i) {
41
- if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base, s->page_size) ||
42
- !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, s->page_size)) {
43
+ if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base,
44
+ qemu_real_host_page_size) ||
45
+ !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) {
46
trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base,
47
qiov->iov[i].iov_len, s->page_size);
48
return false;
49
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
50
int r;
51
uint8_t *buf = NULL;
52
QEMUIOVector local_qiov;
53
-
54
+ size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
55
assert(QEMU_IS_ALIGNED(offset, s->page_size));
56
assert(QEMU_IS_ALIGNED(bytes, s->page_size));
57
assert(bytes <= s->max_transfer);
58
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
59
}
60
s->stats.unaligned_accesses++;
61
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
62
- buf = qemu_try_memalign(s->page_size, bytes);
63
+ buf = qemu_try_memalign(qemu_real_host_page_size, len);
64
65
if (!buf) {
66
return -ENOMEM;
67
--
68
2.28.0
69
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
qemu_vfio_pci_map_bar() calls mmap(), and mmap(2) states:
4
5
'offset' must be a multiple of the page size as returned
6
by sysconf(_SC_PAGE_SIZE).
7
8
In commit f68453237b9 we started to use an offset of 4K which
9
broke this contract on Aarch64 arch.
10
11
Fix by mapping at offset 0, and and accessing doorbells at offset=4K.
12
13
Fixes: f68453237b9 ("block/nvme: Map doorbells pages write-only")
14
Reported-by: Eric Auger <eric.auger@redhat.com>
15
Reviewed-by: Eric Auger <eric.auger@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Tested-by: Eric Auger <eric.auger@redhat.com>
18
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Message-id: 20201029093306.1063879-24-philmd@redhat.com
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Tested-by: Eric Auger <eric.auger@redhat.com>
22
---
23
block/nvme.c | 11 +++++++----
24
1 file changed, 7 insertions(+), 4 deletions(-)
25
26
diff --git a/block/nvme.c b/block/nvme.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block/nvme.c
29
+++ b/block/nvme.c
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
31
struct BDRVNVMeState {
32
AioContext *aio_context;
33
QEMUVFIOState *vfio;
34
+ void *bar0_wo_map;
35
/* Memory mapped registers */
36
volatile struct {
37
uint32_t sq_tail;
38
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
39
}
40
}
41
42
- s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
43
- NVME_DOORBELL_SIZE, PROT_WRITE, errp);
44
+ s->bar0_wo_map = qemu_vfio_pci_map_bar(s->vfio, 0, 0,
45
+ sizeof(NvmeBar) + NVME_DOORBELL_SIZE,
46
+ PROT_WRITE, errp);
47
+ s->doorbells = (void *)((uintptr_t)s->bar0_wo_map + sizeof(NvmeBar));
48
if (!s->doorbells) {
49
ret = -EINVAL;
50
goto out;
51
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
52
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
53
false, NULL, NULL);
54
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
55
- qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
56
- sizeof(NvmeBar), NVME_DOORBELL_SIZE);
57
+ qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
58
+ 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
59
qemu_vfio_close(s->vfio);
60
61
g_free(s->device);
62
--
63
2.28.0
64
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
The Completion Queue Command Identifier is a 16-bit value,
4
so nvme_submit_command() is unlikely to work on big-endian
5
hosts, as the relevant bits are truncated.
6
Fix by using the correct byte-swap function.
7
8
Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver")
9
Reported-by: Keith Busch <kbusch@kernel.org>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20201029093306.1063879-25-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
block/nvme.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/block/nvme.c b/block/nvme.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/nvme.c
22
+++ b/block/nvme.c
23
@@ -XXX,XX +XXX,XX @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
24
assert(!req->cb);
25
req->cb = cb;
26
req->opaque = opaque;
27
- cmd->cid = cpu_to_le32(req->cid);
28
+ cmd->cid = cpu_to_le16(req->cid);
29
30
trace_nvme_submit_command(q->s, q->index, req->cid);
31
nvme_trace_command(cmd);
32
--
33
2.28.0
34
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
We sometime get kernel panic with some devices on Aarch64
4
hosts. Alex Williamson suggests it might be broken PCIe
5
root complex. Add trace event to record the latest I/O
6
access before crashing. In case, assert our accesses are
7
aligned.
8
9
Reviewed-by: Fam Zheng <fam@euphon.net>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Message-id: 20201103020733.2303148-3-philmd@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Tested-by: Eric Auger <eric.auger@redhat.com>
15
---
16
util/vfio-helpers.c | 8 ++++++++
17
util/trace-events | 2 ++
18
2 files changed, 10 insertions(+)
19
20
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/util/vfio-helpers.c
23
+++ b/util/vfio-helpers.c
24
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
25
{
26
int ret;
27
28
+ trace_qemu_vfio_pci_read_config(buf, ofs, size,
29
+ s->config_region_info.offset,
30
+ s->config_region_info.size);
31
+ assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
32
do {
33
ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
34
} while (ret == -1 && errno == EINTR);
35
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
36
{
37
int ret;
38
39
+ trace_qemu_vfio_pci_write_config(buf, ofs, size,
40
+ s->config_region_info.offset,
41
+ s->config_region_info.size);
42
+ assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
43
do {
44
ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
45
} while (ret == -1 && errno == EINTR);
46
diff --git a/util/trace-events b/util/trace-events
47
index XXXXXXX..XXXXXXX 100644
48
--- a/util/trace-events
49
+++ b/util/trace-events
50
@@ -XXX,XX +XXX,XX @@ qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t iova
51
qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p host %p size 0x%zx iova 0x%"PRIx64
52
qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size 0x%zx temporary %d iova %p"
53
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
54
+qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
55
+qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
56
--
57
2.28.0
58
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
1
3
For debug purpose, trace BAR regions info.
4
5
Reviewed-by: Fam Zheng <fam@euphon.net>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Message-id: 20201103020733.2303148-4-philmd@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Tested-by: Eric Auger <eric.auger@redhat.com>
11
---
12
util/vfio-helpers.c | 8 ++++++++
13
util/trace-events | 1 +
14
2 files changed, 9 insertions(+)
15
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
19
+++ b/util/vfio-helpers.c
20
@@ -XXX,XX +XXX,XX @@ static inline void assert_bar_index_valid(QEMUVFIOState *s, int index)
21
22
static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
23
{
24
+ g_autofree char *barname = NULL;
25
assert_bar_index_valid(s, index);
26
s->bar_region_info[index] = (struct vfio_region_info) {
27
.index = VFIO_PCI_BAR0_REGION_INDEX + index,
28
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
29
error_setg_errno(errp, errno, "Failed to get BAR region info");
30
return -errno;
31
}
32
+ barname = g_strdup_printf("bar[%d]", index);
33
+ trace_qemu_vfio_region_info(barname, s->bar_region_info[index].offset,
34
+ s->bar_region_info[index].size,
35
+ s->bar_region_info[index].cap_offset);
36
37
return 0;
38
}
39
@@ -XXX,XX +XXX,XX @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
40
ret = -errno;
41
goto fail;
42
}
43
+ trace_qemu_vfio_region_info("config", s->config_region_info.offset,
44
+ s->config_region_info.size,
45
+ s->config_region_info.cap_offset);
46
47
for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) {
48
ret = qemu_vfio_pci_init_bar(s, i, errp);
49
diff --git a/util/trace-events b/util/trace-events
50
index XXXXXXX..XXXXXXX 100644
51
--- a/util/trace-events
52
+++ b/util/trace-events
53
@@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t *io
54
qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
55
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
56
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
57
+qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
58
--
59
2.28.0
60
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
Tests should place their files into the test directory. This includes
2
Unix sockets. 205 currently fails to do so, which prevents it from
3
being run concurrently.
2
4
3
For debugging purpose, trace where a BAR is mapped.
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
10
tests/qemu-iotests/205 | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
4
12
5
Reviewed-by: Fam Zheng <fam@euphon.net>
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
index XXXXXXX..XXXXXXX 100755
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
--- a/tests/qemu-iotests/205
8
Message-id: 20201103020733.2303148-5-philmd@redhat.com
16
+++ b/tests/qemu-iotests/205
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
@@ -XXX,XX +XXX,XX @@ import iotests
10
Tested-by: Eric Auger <eric.auger@redhat.com>
18
import time
11
---
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
12
util/vfio-helpers.c | 2 ++
20
13
util/trace-events | 1 +
21
-nbd_sock = 'nbd_sock'
14
2 files changed, 3 insertions(+)
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
24
disk = os.path.join(iotests.test_dir, 'disk')
25
26
--
27
2.21.0
15
28
16
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vfio-helpers.c
19
+++ b/util/vfio-helpers.c
20
@@ -XXX,XX +XXX,XX @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
21
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
22
prot, MAP_SHARED,
23
s->device, s->bar_region_info[index].offset + offset);
24
+ trace_qemu_vfio_pci_map_bar(index, s->bar_region_info[index].offset ,
25
+ size, offset, p);
26
if (p == MAP_FAILED) {
27
error_setg_errno(errp, errno, "Failed to map BAR region");
28
p = NULL;
29
diff --git a/util/trace-events b/util/trace-events
30
index XXXXXXX..XXXXXXX 100644
31
--- a/util/trace-events
32
+++ b/util/trace-events
33
@@ -XXX,XX +XXX,XX @@ qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
34
qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "read cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
35
qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
36
qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
37
+qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
38
--
39
2.28.0
40
29
diff view generated by jsdifflib