1
The following changes since commit 8e6c70b9d4a1b1f3011805947925cfdb31642f7f:
1
The following changes since commit 474f3938d79ab36b9231c9ad3b5a9314c2aeacde:
2
2
3
Merge tag 'kraxel-20220614-pull-request' of git://git.kraxel.org/qemu into staging (2022-06-14 06:21:46 -0700)
3
Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' into staging (2019-06-21 15:40:50 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-24
8
8
9
for you to fetch changes up to 99b969fbe105117f5af6060d3afef40ca39cc9c1:
9
for you to fetch changes up to ab5d4a30f7f3803ca5106b370969c1b7b54136f8:
10
10
11
linux-aio: explain why max batch is checked in laio_io_unplug() (2022-06-15 16:43:42 +0100)
11
iotests: Fix 205 for concurrent runs (2019-06-24 16:01:40 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Block patches:
15
- The SSH block driver now uses libssh instead of libssh2
16
- The VMDK block driver gets read-only support for the seSparse
17
subformat
18
- Various fixes
15
19
16
This pull request includes an important aio=native I/O stall fix, the
20
---
17
experimental vifo-user server, the io_uring_register_ring_fd() optimization for
21
18
aio=io_uring, and an update to Vladimir Sementsov-Ogievskiy's maintainership
22
v2:
19
details.
23
- Squashed Pino's fix for pre-0.8 libssh into the libssh patch
20
24
21
----------------------------------------------------------------
25
----------------------------------------------------------------
26
Anton Nefedov (1):
27
iotest 134: test cluster-misaligned encrypted write
22
28
23
Jagannathan Raman (14):
29
Klaus Birkelund Jensen (1):
24
qdev: unplug blocker for devices
30
nvme: do not advertise support for unsupported arbitration mechanism
25
remote/machine: add HotplugHandler for remote machine
26
remote/machine: add vfio-user property
27
vfio-user: build library
28
vfio-user: define vfio-user-server object
29
vfio-user: instantiate vfio-user context
30
vfio-user: find and init PCI device
31
vfio-user: run vfio-user context
32
vfio-user: handle PCI config space accesses
33
vfio-user: IOMMU support for remote device
34
vfio-user: handle DMA mappings
35
vfio-user: handle PCI BAR accesses
36
vfio-user: handle device interrupts
37
vfio-user: handle reset of remote device
38
31
39
Sam Li (1):
32
Max Reitz (1):
40
Use io_uring_register_ring_fd() to skip fd operations
33
iotests: Fix 205 for concurrent runs
41
34
42
Stefan Hajnoczi (2):
35
Pino Toscano (1):
43
linux-aio: fix unbalanced plugged counter in laio_io_unplug()
36
ssh: switch from libssh2 to libssh
44
linux-aio: explain why max batch is checked in laio_io_unplug()
37
38
Sam Eiderman (3):
39
vmdk: Fix comment regarding max l1_size coverage
40
vmdk: Reduce the max bound for L1 table size
41
vmdk: Add read-only support for seSparse snapshots
45
42
46
Vladimir Sementsov-Ogievskiy (1):
43
Vladimir Sementsov-Ogievskiy (1):
47
MAINTAINERS: update Vladimir's address and repositories
44
blockdev: enable non-root nodes for transaction drive-backup source
48
45
49
MAINTAINERS | 27 +-
46
configure | 65 +-
50
meson_options.txt | 2 +
47
block/Makefile.objs | 6 +-
51
qapi/misc.json | 31 +
48
block/ssh.c | 652 ++++++++++--------
52
qapi/qom.json | 20 +-
49
block/vmdk.c | 372 +++++++++-
53
configure | 17 +
50
blockdev.c | 2 +-
54
meson.build | 24 +-
51
hw/block/nvme.c | 1 -
55
include/exec/memory.h | 3 +
52
.travis.yml | 4 +-
56
include/hw/pci/msi.h | 1 +
53
block/trace-events | 14 +-
57
include/hw/pci/msix.h | 1 +
54
docs/qemu-block-drivers.texi | 2 +-
58
include/hw/pci/pci.h | 13 +
55
.../dockerfiles/debian-win32-cross.docker | 1 -
59
include/hw/qdev-core.h | 29 +
56
.../dockerfiles/debian-win64-cross.docker | 1 -
60
include/hw/remote/iommu.h | 40 +
57
tests/docker/dockerfiles/fedora.docker | 4 +-
61
include/hw/remote/machine.h | 4 +
58
tests/docker/dockerfiles/ubuntu.docker | 2 +-
62
include/hw/remote/vfio-user-obj.h | 6 +
59
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
63
block/io_uring.c | 12 +-
60
tests/qemu-iotests/059.out | 2 +-
64
block/linux-aio.c | 10 +-
61
tests/qemu-iotests/134 | 9 +
65
hw/core/qdev.c | 24 +
62
tests/qemu-iotests/134.out | 10 +
66
hw/pci/msi.c | 49 +-
63
tests/qemu-iotests/205 | 2 +-
67
hw/pci/msix.c | 35 +-
64
tests/qemu-iotests/207 | 54 +-
68
hw/pci/pci.c | 13 +
65
tests/qemu-iotests/207.out | 2 +-
69
hw/remote/iommu.c | 131 ++++
66
20 files changed, 823 insertions(+), 384 deletions(-)
70
hw/remote/machine.c | 88 ++-
71
hw/remote/vfio-user-obj.c | 958 ++++++++++++++++++++++++
72
softmmu/physmem.c | 4 +-
73
softmmu/qdev-monitor.c | 4 +
74
stubs/vfio-user-obj.c | 6 +
75
tests/qtest/fuzz/generic_fuzz.c | 9 +-
76
.gitlab-ci.d/buildtest.yml | 1 +
77
.gitmodules | 3 +
78
Kconfig.host | 4 +
79
hw/remote/Kconfig | 4 +
80
hw/remote/meson.build | 4 +
81
hw/remote/trace-events | 11 +
82
scripts/meson-buildoptions.sh | 4 +
83
stubs/meson.build | 1 +
84
subprojects/libvfio-user | 1 +
85
tests/docker/dockerfiles/centos8.docker | 2 +
86
37 files changed, 1565 insertions(+), 31 deletions(-)
87
create mode 100644 include/hw/remote/iommu.h
88
create mode 100644 include/hw/remote/vfio-user-obj.h
89
create mode 100644 hw/remote/iommu.c
90
create mode 100644 hw/remote/vfio-user-obj.c
91
create mode 100644 stubs/vfio-user-obj.c
92
create mode 160000 subprojects/libvfio-user
93
67
94
--
68
--
95
2.36.1
69
2.21.0
70
71
diff view generated by jsdifflib
1
It may not be obvious why laio_io_unplug() checks max batch. I discussed
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
2
this with Stefano and have added a comment summarizing the reason.
3
2
4
Cc: Stefano Garzarella <sgarzare@redhat.com>
3
The device mistakenly reports that the Weighted Round Robin with Urgent
5
Cc: Kevin Wolf <kwolf@redhat.com>
4
Priority Class arbitration mechanism is supported.
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
7
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
6
It is not.
8
Message-id: 20220609164712.1539045-3-stefanha@redhat.com
7
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
12
---
11
block/linux-aio.c | 6 ++++++
13
hw/block/nvme.c | 1 -
12
1 file changed, 6 insertions(+)
14
1 file changed, 1 deletion(-)
13
15
14
diff --git a/block/linux-aio.c b/block/linux-aio.c
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/block/linux-aio.c
18
--- a/hw/block/nvme.c
17
+++ b/block/linux-aio.c
19
+++ b/hw/block/nvme.c
18
@@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
19
assert(s->io_q.plugged);
21
n->bar.cap = 0;
20
s->io_q.plugged--;
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
21
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
22
+ /*
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
23
+ * Why max batch checking is performed here:
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
24
+ * Another BDS may have queued requests with a higher dev_max_batch and
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
25
+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
26
+ * batch so we can honor our device's dev_max_batch.
27
+ */
28
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
29
(!s->io_q.plugged &&
30
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
31
--
28
--
32
2.36.1
29
2.21.0
30
31
diff view generated by jsdifflib
1
Every laio_io_plug() call has a matching laio_io_unplug() call. There is
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
a plugged counter that tracks the number of levels of plugging and
3
allows for nesting.
4
2
5
The plugged counter must reflect the balance between laio_io_plug() and
3
We forget to enable it for transaction .prepare, while it is already
6
laio_io_unplug() calls accurately. Otherwise I/O stalls occur since
4
enabled in do_drive_backup since commit a2d665c1bc362
7
io_submit(2) calls are skipped while plugged.
5
"blockdev: loosen restrictions on drive-backup source node"
8
6
9
Reported-by: Nikolay Tenev <nt@storpool.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
11
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Reviewed-by: John Snow <jsnow@redhat.com>
12
Message-id: 20220609164712.1539045-2-stefanha@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
Cc: Stefano Garzarella <sgarzare@redhat.com>
14
Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()")
15
[Stefano Garzarella suggested adding a Fixes tag.
16
--Stefan]
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
11
---
19
block/linux-aio.c | 4 +++-
12
blockdev.c | 2 +-
20
1 file changed, 3 insertions(+), 1 deletion(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
21
14
22
diff --git a/block/linux-aio.c b/block/linux-aio.c
15
diff --git a/blockdev.c b/blockdev.c
23
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
24
--- a/block/linux-aio.c
17
--- a/blockdev.c
25
+++ b/block/linux-aio.c
18
+++ b/blockdev.c
26
@@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
27
uint64_t dev_max_batch)
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
28
{
21
backup = common->action->u.drive_backup.data;
29
assert(s->io_q.plugged);
22
30
+ s->io_q.plugged--;
23
- bs = qmp_get_root_bs(backup->device, errp);
31
+
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
32
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
25
if (!bs) {
33
- (--s->io_q.plugged == 0 &&
26
return;
34
+ (!s->io_q.plugged &&
35
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
36
ioq_submit(s);
37
}
27
}
38
--
28
--
39
2.36.1
29
2.21.0
30
31
diff view generated by jsdifflib
1
From: Jagannathan Raman <jag.raman@oracle.com>
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
2
2
3
Assign separate address space for each device in the remote processes.
3
COW (even empty/zero) areas require encryption too
4
4
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: afe0b0a97582cdad42b5b25636a29c523265a10a.1655151679.git.jag.raman@oracle.com
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
11
---
12
MAINTAINERS | 2 +
12
tests/qemu-iotests/134 | 9 +++++++++
13
include/hw/remote/iommu.h | 40 ++++++++++++
13
tests/qemu-iotests/134.out | 10 ++++++++++
14
hw/remote/iommu.c | 131 ++++++++++++++++++++++++++++++++++++++
14
2 files changed, 19 insertions(+)
15
hw/remote/machine.c | 13 +++-
16
hw/remote/meson.build | 1 +
17
5 files changed, 186 insertions(+), 1 deletion(-)
18
create mode 100644 include/hw/remote/iommu.h
19
create mode 100644 hw/remote/iommu.c
20
15
21
diff --git a/MAINTAINERS b/MAINTAINERS
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/134
19
+++ b/tests/qemu-iotests/134
20
@@ -XXX,XX +XXX,XX @@ echo
21
echo "== reading whole image =="
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
23
24
+echo
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
22
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
23
--- a/MAINTAINERS
38
--- a/tests/qemu-iotests/134.out
24
+++ b/MAINTAINERS
39
+++ b/tests/qemu-iotests/134.out
25
@@ -XXX,XX +XXX,XX @@ F: hw/remote/iohub.c
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
26
F: include/hw/remote/iohub.h
41
read 134217728/134217728 bytes at offset 0
27
F: subprojects/libvfio-user
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
28
F: hw/remote/vfio-user-obj.c
43
29
+F: hw/remote/iommu.c
44
+== rewriting cluster part ==
30
+F: include/hw/remote/iommu.h
45
+wrote 512/512 bytes at offset 512
31
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
32
EBPF:
33
M: Jason Wang <jasowang@redhat.com>
34
diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
35
new file mode 100644
36
index XXXXXXX..XXXXXXX
37
--- /dev/null
38
+++ b/include/hw/remote/iommu.h
39
@@ -XXX,XX +XXX,XX @@
40
+/**
41
+ * Copyright © 2022 Oracle and/or its affiliates.
42
+ *
43
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
44
+ * See the COPYING file in the top-level directory.
45
+ *
46
+ */
47
+
47
+
48
+#ifndef REMOTE_IOMMU_H
48
+== verify pattern ==
49
+#define REMOTE_IOMMU_H
49
+read 512/512 bytes at offset 0
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
51
+read 512/512 bytes at offset 512
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
50
+
53
+
51
+#include "hw/pci/pci_bus.h"
54
== rewriting whole image ==
52
+#include "hw/pci/pci.h"
55
wrote 134217728/134217728 bytes at offset 0
53
+
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
54
+#ifndef INT2VOIDP
55
+#define INT2VOIDP(i) (void *)(uintptr_t)(i)
56
+#endif
57
+
58
+typedef struct RemoteIommuElem {
59
+ MemoryRegion *mr;
60
+
61
+ AddressSpace as;
62
+} RemoteIommuElem;
63
+
64
+#define TYPE_REMOTE_IOMMU "x-remote-iommu"
65
+OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU)
66
+
67
+struct RemoteIommu {
68
+ Object parent;
69
+
70
+ GHashTable *elem_by_devfn;
71
+
72
+ QemuMutex lock;
73
+};
74
+
75
+void remote_iommu_setup(PCIBus *pci_bus);
76
+
77
+void remote_iommu_unplug_dev(PCIDevice *pci_dev);
78
+
79
+#endif
80
diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
81
new file mode 100644
82
index XXXXXXX..XXXXXXX
83
--- /dev/null
84
+++ b/hw/remote/iommu.c
85
@@ -XXX,XX +XXX,XX @@
86
+/**
87
+ * IOMMU for remote device
88
+ *
89
+ * Copyright © 2022 Oracle and/or its affiliates.
90
+ *
91
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
92
+ * See the COPYING file in the top-level directory.
93
+ *
94
+ */
95
+
96
+#include "qemu/osdep.h"
97
+
98
+#include "hw/remote/iommu.h"
99
+#include "hw/pci/pci_bus.h"
100
+#include "hw/pci/pci.h"
101
+#include "exec/memory.h"
102
+#include "exec/address-spaces.h"
103
+#include "trace.h"
104
+
105
+/**
106
+ * IOMMU for TYPE_REMOTE_MACHINE - manages DMA address space isolation
107
+ * for remote machine. It is used by TYPE_VFIO_USER_SERVER.
108
+ *
109
+ * - Each TYPE_VFIO_USER_SERVER instance handles one PCIDevice on a PCIBus.
110
+ * There is one RemoteIommu per PCIBus, so the RemoteIommu tracks multiple
111
+ * PCIDevices by maintaining a ->elem_by_devfn mapping.
112
+ *
113
+ * - memory_region_init_iommu() is not used because vfio-user MemoryRegions
114
+ * will be added to the elem->mr container instead. This is more natural
115
+ * than implementing the IOMMUMemoryRegionClass APIs since vfio-user
116
+ * provides something that is close to a full-fledged MemoryRegion and
117
+ * not like an IOMMU mapping.
118
+ *
119
+ * - When a device is hot unplugged, the elem->mr reference is dropped so
120
+ * all vfio-user MemoryRegions associated with this vfio-user server are
121
+ * destroyed.
122
+ */
123
+
124
+static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus,
125
+ void *opaque, int devfn)
126
+{
127
+ RemoteIommu *iommu = opaque;
128
+ RemoteIommuElem *elem = NULL;
129
+
130
+ qemu_mutex_lock(&iommu->lock);
131
+
132
+ elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn));
133
+
134
+ if (!elem) {
135
+ elem = g_malloc0(sizeof(RemoteIommuElem));
136
+ g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem);
137
+ }
138
+
139
+ if (!elem->mr) {
140
+ elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION));
141
+ memory_region_set_size(elem->mr, UINT64_MAX);
142
+ address_space_init(&elem->as, elem->mr, NULL);
143
+ }
144
+
145
+ qemu_mutex_unlock(&iommu->lock);
146
+
147
+ return &elem->as;
148
+}
149
+
150
+void remote_iommu_unplug_dev(PCIDevice *pci_dev)
151
+{
152
+ AddressSpace *as = pci_device_iommu_address_space(pci_dev);
153
+ RemoteIommuElem *elem = NULL;
154
+
155
+ if (as == &address_space_memory) {
156
+ return;
157
+ }
158
+
159
+ elem = container_of(as, RemoteIommuElem, as);
160
+
161
+ address_space_destroy(&elem->as);
162
+
163
+ object_unref(elem->mr);
164
+
165
+ elem->mr = NULL;
166
+}
167
+
168
+static void remote_iommu_init(Object *obj)
169
+{
170
+ RemoteIommu *iommu = REMOTE_IOMMU(obj);
171
+
172
+ iommu->elem_by_devfn = g_hash_table_new_full(NULL, NULL, NULL, g_free);
173
+
174
+ qemu_mutex_init(&iommu->lock);
175
+}
176
+
177
+static void remote_iommu_finalize(Object *obj)
178
+{
179
+ RemoteIommu *iommu = REMOTE_IOMMU(obj);
180
+
181
+ qemu_mutex_destroy(&iommu->lock);
182
+
183
+ g_hash_table_destroy(iommu->elem_by_devfn);
184
+
185
+ iommu->elem_by_devfn = NULL;
186
+}
187
+
188
+void remote_iommu_setup(PCIBus *pci_bus)
189
+{
190
+ RemoteIommu *iommu = NULL;
191
+
192
+ g_assert(pci_bus);
193
+
194
+ iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU));
195
+
196
+ pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu);
197
+
198
+ object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu));
199
+
200
+ object_unref(OBJECT(iommu));
201
+}
202
+
203
+static const TypeInfo remote_iommu_info = {
204
+ .name = TYPE_REMOTE_IOMMU,
205
+ .parent = TYPE_OBJECT,
206
+ .instance_size = sizeof(RemoteIommu),
207
+ .instance_init = remote_iommu_init,
208
+ .instance_finalize = remote_iommu_finalize,
209
+};
210
+
211
+static void remote_iommu_register_types(void)
212
+{
213
+ type_register_static(&remote_iommu_info);
214
+}
215
+
216
+type_init(remote_iommu_register_types)
217
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/hw/remote/machine.c
220
+++ b/hw/remote/machine.c
221
@@ -XXX,XX +XXX,XX @@
222
#include "qapi/error.h"
223
#include "hw/pci/pci_host.h"
224
#include "hw/remote/iohub.h"
225
+#include "hw/remote/iommu.h"
226
#include "hw/qdev-core.h"
227
228
static void remote_machine_init(MachineState *machine)
229
@@ -XXX,XX +XXX,XX @@ static void remote_machine_instance_init(Object *obj)
230
s->auto_shutdown = true;
231
}
232
233
+static void remote_machine_dev_unplug_cb(HotplugHandler *hotplug_dev,
234
+ DeviceState *dev, Error **errp)
235
+{
236
+ qdev_unrealize(dev);
237
+
238
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
239
+ remote_iommu_unplug_dev(PCI_DEVICE(dev));
240
+ }
241
+}
242
+
243
static void remote_machine_class_init(ObjectClass *oc, void *data)
244
{
245
MachineClass *mc = MACHINE_CLASS(oc);
246
@@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data)
247
mc->init = remote_machine_init;
248
mc->desc = "Experimental remote machine";
249
250
- hc->unplug = qdev_simple_device_unplug_cb;
251
+ hc->unplug = remote_machine_dev_unplug_cb;
252
253
object_class_property_add_bool(oc, "vfio-user",
254
remote_machine_get_vfio_user,
255
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
256
index XXXXXXX..XXXXXXX 100644
257
--- a/hw/remote/meson.build
258
+++ b/hw/remote/meson.build
259
@@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
260
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
261
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
262
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
263
+remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c'))
264
remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
265
266
remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep)
267
--
57
--
268
2.36.1
58
2.21.0
269
59
270
60
diff view generated by jsdifflib
1
From: Jagannathan Raman <jag.raman@oracle.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Find the PCI device with specified id. Initialize the device context
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
4
with the QEMU PCI device
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
5
default coverage in the moved comment.
5
6
6
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
7
The previous vmdk4 calculation:
7
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
8
8
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
10
Message-id: 7798dbd730099b33fdd00c4c202cfe79e5c5c151.1655151679.git.jag.raman@oracle.com
11
The added vmdk3 calculation:
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
30
---
13
hw/remote/vfio-user-obj.c | 67 +++++++++++++++++++++++++++++++++++++++
31
block/vmdk.c | 11 ++++++++---
14
1 file changed, 67 insertions(+)
32
1 file changed, 8 insertions(+), 3 deletions(-)
15
33
16
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
34
diff --git a/block/vmdk.c b/block/vmdk.c
17
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/remote/vfio-user-obj.c
36
--- a/block/vmdk.c
19
+++ b/hw/remote/vfio-user-obj.c
37
+++ b/block/vmdk.c
20
@@ -XXX,XX +XXX,XX @@
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
21
#include "qemu/notify.h"
39
return -EFBIG;
22
#include "sysemu/sysemu.h"
23
#include "libvfio-user.h"
24
+#include "hw/qdev-core.h"
25
+#include "hw/pci/pci.h"
26
27
#define TYPE_VFU_OBJECT "x-vfio-user-server"
28
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
29
@@ -XXX,XX +XXX,XX @@ struct VfuObject {
30
Notifier machine_done;
31
32
vfu_ctx_t *vfu_ctx;
33
+
34
+ PCIDevice *pci_dev;
35
+
36
+ Error *unplug_blocker;
37
};
38
39
static void vfu_object_init_ctx(VfuObject *o, Error **errp);
40
@@ -XXX,XX +XXX,XX @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
41
static void vfu_object_init_ctx(VfuObject *o, Error **errp)
42
{
43
ERRP_GUARD();
44
+ DeviceState *dev = NULL;
45
+ vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
46
+ int ret;
47
48
if (o->vfu_ctx || !o->socket || !o->device ||
49
!phase_check(PHASE_MACHINE_READY)) {
50
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
51
error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
52
return;
53
}
40
}
54
+
41
if (l1_size > 512 * 1024 * 1024) {
55
+ dev = qdev_find_recursive(sysbus_get_default(), o->device);
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
56
+ if (dev == NULL) {
43
+ /*
57
+ error_setg(errp, "vfu: Device %s not found", o->device);
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
58
+ goto fail;
45
* big l1_size, we don't want unbounded value to allocate the table.
59
+ }
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
60
+
47
- * size */
61
+ if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
48
+ * Limit it to 512M, which is:
62
+ error_setg(errp, "vfu: %s not a PCI device", o->device);
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
63
+ goto fail;
50
+ * cluster size: 64KB, L2 table size: 512 entries
64
+ }
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
65
+
52
+ * cluster size: 512B, L2 table size: 4096 entries
66
+ o->pci_dev = PCI_DEVICE(dev);
53
+ */
67
+
54
error_setg(errp, "L1 size too big");
68
+ object_ref(OBJECT(o->pci_dev));
55
return -EFBIG;
69
+
70
+ if (pci_is_express(o->pci_dev)) {
71
+ pci_type = VFU_PCI_TYPE_EXPRESS;
72
+ }
73
+
74
+ ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
75
+ if (ret < 0) {
76
+ error_setg(errp,
77
+ "vfu: Failed to attach PCI device %s to context - %s",
78
+ o->device, strerror(errno));
79
+ goto fail;
80
+ }
81
+
82
+ error_setg(&o->unplug_blocker,
83
+ "vfu: %s for %s must be deleted before unplugging",
84
+ TYPE_VFU_OBJECT, o->device);
85
+ qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
86
+
87
+ return;
88
+
89
+fail:
90
+ vfu_destroy_ctx(o->vfu_ctx);
91
+ if (o->unplug_blocker && o->pci_dev) {
92
+ qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
93
+ error_free(o->unplug_blocker);
94
+ o->unplug_blocker = NULL;
95
+ }
96
+ if (o->pci_dev) {
97
+ object_unref(OBJECT(o->pci_dev));
98
+ o->pci_dev = NULL;
99
+ }
100
+ o->vfu_ctx = NULL;
101
}
102
103
static void vfu_object_init(Object *obj)
104
@@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj)
105
106
o->device = NULL;
107
108
+ if (o->unplug_blocker && o->pci_dev) {
109
+ qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
110
+ error_free(o->unplug_blocker);
111
+ o->unplug_blocker = NULL;
112
+ }
113
+
114
+ if (o->pci_dev) {
115
+ object_unref(OBJECT(o->pci_dev));
116
+ o->pci_dev = NULL;
117
+ }
118
+
119
if (!k->nr_devs && vfu_object_auto_shutdown()) {
120
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
121
}
56
}
122
--
57
--
123
2.36.1
58
2.21.0
59
60
diff view generated by jsdifflib
1
From: Jagannathan Raman <jag.raman@oracle.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Define vfio-user object which is remote process server for QEMU. Setup
3
512M of L1 entries is a very loose bound, only 32M are required to store
4
object initialization functions and properties necessary to instantiate
4
the maximal supported VMDK file size of 2TB.
5
the object
6
5
7
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
8
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
table size.
9
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
11
Message-id: e45a17001e9b38f451543a664ababdf860e5f2f2.1655151679.git.jag.raman@oracle.com
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
---
17
---
14
MAINTAINERS | 1 +
18
block/vmdk.c | 13 +++++++------
15
qapi/qom.json | 20 +++-
19
tests/qemu-iotests/059.out | 2 +-
16
include/hw/remote/machine.h | 2 +
20
2 files changed, 8 insertions(+), 7 deletions(-)
17
hw/remote/machine.c | 27 +++++
18
hw/remote/vfio-user-obj.c | 210 ++++++++++++++++++++++++++++++++++++
19
hw/remote/meson.build | 1 +
20
hw/remote/trace-events | 3 +
21
7 files changed, 262 insertions(+), 2 deletions(-)
22
create mode 100644 hw/remote/vfio-user-obj.c
23
21
24
diff --git a/MAINTAINERS b/MAINTAINERS
22
diff --git a/block/vmdk.c b/block/vmdk.c
25
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
26
--- a/MAINTAINERS
24
--- a/block/vmdk.c
27
+++ b/MAINTAINERS
25
+++ b/block/vmdk.c
28
@@ -XXX,XX +XXX,XX @@ F: include/hw/remote/proxy-memory-listener.h
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
29
F: hw/remote/iohub.c
27
error_setg(errp, "Invalid granularity, image may be corrupt");
30
F: include/hw/remote/iohub.h
28
return -EFBIG;
31
F: subprojects/libvfio-user
29
}
32
+F: hw/remote/vfio-user-obj.c
30
- if (l1_size > 512 * 1024 * 1024) {
33
31
+ if (l1_size > 32 * 1024 * 1024) {
34
EBPF:
32
/*
35
M: Jason Wang <jasowang@redhat.com>
33
* Although with big capacity and small l1_entry_sectors, we can get a
36
diff --git a/qapi/qom.json b/qapi/qom.json
34
* big l1_size, we don't want unbounded value to allocate the table.
35
- * Limit it to 512M, which is:
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
37
- * cluster size: 64KB, L2 table size: 512 entries
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
39
- * cluster size: 512B, L2 table size: 4096 entries
40
+ * Limit it to 32M, which is enough to store:
41
+ * 8TB - for both VMDK3 & VMDK4 with
42
+ * minimal cluster size: 512B
43
+ * minimal L2 table size: 512 entries
44
+ * 8 TB is still more than the maximal value supported for
45
+ * VMDK3 & VMDK4 which is 2TB.
46
*/
47
error_setg(errp, "L1 size too big");
48
return -EFBIG;
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
37
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
38
--- a/qapi/qom.json
51
--- a/tests/qemu-iotests/059.out
39
+++ b/qapi/qom.json
52
+++ b/tests/qemu-iotests/059.out
40
@@ -XXX,XX +XXX,XX @@
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
41
{ 'struct': 'RemoteObjectProperties',
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
42
'data': { 'fd': 'str', 'devid': 'str' } }
55
43
56
=== Testing afl image with a very large capacity ===
44
+##
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
45
+# @VfioUserServerProperties:
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
46
+#
59
*** done
47
+# Properties for x-vfio-user-server objects.
48
+#
49
+# @socket: socket to be used by the libvfio-user library
50
+#
51
+# @device: the ID of the device to be emulated at the server
52
+#
53
+# Since: 7.1
54
+##
55
+{ 'struct': 'VfioUserServerProperties',
56
+ 'data': { 'socket': 'SocketAddress', 'device': 'str' } }
57
+
58
##
59
# @RngProperties:
60
#
61
@@ -XXX,XX +XXX,XX @@
62
'tls-creds-psk',
63
'tls-creds-x509',
64
'tls-cipher-suites',
65
- { 'name': 'x-remote-object', 'features': [ 'unstable' ] }
66
+ { 'name': 'x-remote-object', 'features': [ 'unstable' ] },
67
+ { 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] }
68
] }
69
70
##
71
@@ -XXX,XX +XXX,XX @@
72
'tls-creds-psk': 'TlsCredsPskProperties',
73
'tls-creds-x509': 'TlsCredsX509Properties',
74
'tls-cipher-suites': 'TlsCredsProperties',
75
- 'x-remote-object': 'RemoteObjectProperties'
76
+ 'x-remote-object': 'RemoteObjectProperties',
77
+ 'x-vfio-user-server': 'VfioUserServerProperties'
78
} }
79
80
##
81
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/include/hw/remote/machine.h
84
+++ b/include/hw/remote/machine.h
85
@@ -XXX,XX +XXX,XX @@ struct RemoteMachineState {
86
RemoteIOHubState iohub;
87
88
bool vfio_user;
89
+
90
+ bool auto_shutdown;
91
};
92
93
/* Used to pass to co-routine device and ioc. */
94
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/remote/machine.c
97
+++ b/hw/remote/machine.c
98
@@ -XXX,XX +XXX,XX @@ static void remote_machine_set_vfio_user(Object *obj, bool value, Error **errp)
99
s->vfio_user = value;
100
}
101
102
+static bool remote_machine_get_auto_shutdown(Object *obj, Error **errp)
103
+{
104
+ RemoteMachineState *s = REMOTE_MACHINE(obj);
105
+
106
+ return s->auto_shutdown;
107
+}
108
+
109
+static void remote_machine_set_auto_shutdown(Object *obj, bool value,
110
+ Error **errp)
111
+{
112
+ RemoteMachineState *s = REMOTE_MACHINE(obj);
113
+
114
+ s->auto_shutdown = value;
115
+}
116
+
117
+static void remote_machine_instance_init(Object *obj)
118
+{
119
+ RemoteMachineState *s = REMOTE_MACHINE(obj);
120
+
121
+ s->auto_shutdown = true;
122
+}
123
+
124
static void remote_machine_class_init(ObjectClass *oc, void *data)
125
{
126
MachineClass *mc = MACHINE_CLASS(oc);
127
@@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data)
128
object_class_property_add_bool(oc, "vfio-user",
129
remote_machine_get_vfio_user,
130
remote_machine_set_vfio_user);
131
+
132
+ object_class_property_add_bool(oc, "auto-shutdown",
133
+ remote_machine_get_auto_shutdown,
134
+ remote_machine_set_auto_shutdown);
135
}
136
137
static const TypeInfo remote_machine = {
138
.name = TYPE_REMOTE_MACHINE,
139
.parent = TYPE_MACHINE,
140
.instance_size = sizeof(RemoteMachineState),
141
+ .instance_init = remote_machine_instance_init,
142
.class_init = remote_machine_class_init,
143
.interfaces = (InterfaceInfo[]) {
144
{ TYPE_HOTPLUG_HANDLER },
145
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
146
new file mode 100644
147
index XXXXXXX..XXXXXXX
148
--- /dev/null
149
+++ b/hw/remote/vfio-user-obj.c
150
@@ -XXX,XX +XXX,XX @@
151
+/**
152
+ * QEMU vfio-user-server server object
153
+ *
154
+ * Copyright © 2022 Oracle and/or its affiliates.
155
+ *
156
+ * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
157
+ *
158
+ * See the COPYING file in the top-level directory.
159
+ *
160
+ */
161
+
162
+/**
163
+ * Usage: add options:
164
+ * -machine x-remote,vfio-user=on,auto-shutdown=on
165
+ * -device <PCI-device>,id=<pci-dev-id>
166
+ * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
167
+ * device=<pci-dev-id>
168
+ *
169
+ * Note that x-vfio-user-server object must be used with x-remote machine only.
170
+ * This server could only support PCI devices for now.
171
+ *
172
+ * type - SocketAddress type - presently "unix" alone is supported. Required
173
+ * option
174
+ *
175
+ * path - named unix socket, it will be created by the server. It is
176
+ * a required option
177
+ *
178
+ * device - id of a device on the server, a required option. PCI devices
179
+ * alone are supported presently.
180
+ */
181
+
182
+#include "qemu/osdep.h"
183
+
184
+#include "qom/object.h"
185
+#include "qom/object_interfaces.h"
186
+#include "qemu/error-report.h"
187
+#include "trace.h"
188
+#include "sysemu/runstate.h"
189
+#include "hw/boards.h"
190
+#include "hw/remote/machine.h"
191
+#include "qapi/error.h"
192
+#include "qapi/qapi-visit-sockets.h"
193
+
194
+#define TYPE_VFU_OBJECT "x-vfio-user-server"
195
+OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
196
+
197
+/**
198
+ * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown
199
+ * is set, it aborts the machine on error. Otherwise, it logs an
200
+ * error message without aborting.
201
+ */
202
+#define VFU_OBJECT_ERROR(o, fmt, ...) \
203
+ { \
204
+ if (vfu_object_auto_shutdown()) { \
205
+ error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
206
+ } else { \
207
+ error_report((fmt), ## __VA_ARGS__); \
208
+ } \
209
+ } \
210
+
211
+struct VfuObjectClass {
212
+ ObjectClass parent_class;
213
+
214
+ unsigned int nr_devs;
215
+};
216
+
217
+struct VfuObject {
218
+ /* private */
219
+ Object parent;
220
+
221
+ SocketAddress *socket;
222
+
223
+ char *device;
224
+
225
+ Error *err;
226
+};
227
+
228
+static bool vfu_object_auto_shutdown(void)
229
+{
230
+ bool auto_shutdown = true;
231
+ Error *local_err = NULL;
232
+
233
+ if (!current_machine) {
234
+ return auto_shutdown;
235
+ }
236
+
237
+ auto_shutdown = object_property_get_bool(OBJECT(current_machine),
238
+ "auto-shutdown",
239
+ &local_err);
240
+
241
+ /*
242
+ * local_err would be set if no such property exists - safe to ignore.
243
+ * Unlikely scenario as auto-shutdown is always defined for
244
+ * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with
245
+ * TYPE_REMOTE_MACHINE
246
+ */
247
+ if (local_err) {
248
+ auto_shutdown = true;
249
+ error_free(local_err);
250
+ }
251
+
252
+ return auto_shutdown;
253
+}
254
+
255
+static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
256
+ void *opaque, Error **errp)
257
+{
258
+ VfuObject *o = VFU_OBJECT(obj);
259
+
260
+ qapi_free_SocketAddress(o->socket);
261
+
262
+ o->socket = NULL;
263
+
264
+ visit_type_SocketAddress(v, name, &o->socket, errp);
265
+
266
+ if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
267
+ error_setg(errp, "vfu: Unsupported socket type - %s",
268
+ SocketAddressType_str(o->socket->type));
269
+ qapi_free_SocketAddress(o->socket);
270
+ o->socket = NULL;
271
+ return;
272
+ }
273
+
274
+ trace_vfu_prop("socket", o->socket->u.q_unix.path);
275
+}
276
+
277
+static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
278
+{
279
+ VfuObject *o = VFU_OBJECT(obj);
280
+
281
+ g_free(o->device);
282
+
283
+ o->device = g_strdup(str);
284
+
285
+ trace_vfu_prop("device", str);
286
+}
287
+
288
+static void vfu_object_init(Object *obj)
289
+{
290
+ VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
291
+ VfuObject *o = VFU_OBJECT(obj);
292
+
293
+ k->nr_devs++;
294
+
295
+ if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
296
+ error_setg(&o->err, "vfu: %s only compatible with %s machine",
297
+ TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
298
+ return;
299
+ }
300
+}
301
+
302
+static void vfu_object_finalize(Object *obj)
303
+{
304
+ VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
305
+ VfuObject *o = VFU_OBJECT(obj);
306
+
307
+ k->nr_devs--;
308
+
309
+ qapi_free_SocketAddress(o->socket);
310
+
311
+ o->socket = NULL;
312
+
313
+ g_free(o->device);
314
+
315
+ o->device = NULL;
316
+
317
+ if (!k->nr_devs && vfu_object_auto_shutdown()) {
318
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
319
+ }
320
+}
321
+
322
+static void vfu_object_class_init(ObjectClass *klass, void *data)
323
+{
324
+ VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
325
+
326
+ k->nr_devs = 0;
327
+
328
+ object_class_property_add(klass, "socket", "SocketAddress", NULL,
329
+ vfu_object_set_socket, NULL, NULL);
330
+ object_class_property_set_description(klass, "socket",
331
+ "SocketAddress "
332
+ "(ex: type=unix,path=/tmp/sock). "
333
+ "Only UNIX is presently supported");
334
+ object_class_property_add_str(klass, "device", NULL,
335
+ vfu_object_set_device);
336
+ object_class_property_set_description(klass, "device",
337
+ "device ID - only PCI devices "
338
+ "are presently supported");
339
+}
340
+
341
+static const TypeInfo vfu_object_info = {
342
+ .name = TYPE_VFU_OBJECT,
343
+ .parent = TYPE_OBJECT,
344
+ .instance_size = sizeof(VfuObject),
345
+ .instance_init = vfu_object_init,
346
+ .instance_finalize = vfu_object_finalize,
347
+ .class_size = sizeof(VfuObjectClass),
348
+ .class_init = vfu_object_class_init,
349
+ .interfaces = (InterfaceInfo[]) {
350
+ { TYPE_USER_CREATABLE },
351
+ { }
352
+ }
353
+};
354
+
355
+static void vfu_register_types(void)
356
+{
357
+ type_register_static(&vfu_object_info);
358
+}
359
+
360
+type_init(vfu_register_types);
361
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
362
index XXXXXXX..XXXXXXX 100644
363
--- a/hw/remote/meson.build
364
+++ b/hw/remote/meson.build
365
@@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
366
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
367
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
368
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
369
+remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'))
370
371
remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep)
372
373
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
374
index XXXXXXX..XXXXXXX 100644
375
--- a/hw/remote/trace-events
376
+++ b/hw/remote/trace-events
377
@@ -XXX,XX +XXX,XX @@
378
379
mpqemu_send_io_error(int cmd, int size, int nfds) "send command %d size %d, %d file descriptors to remote process"
380
mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d, %d file descriptors to remote process"
381
+
382
+# vfio-user-obj.c
383
+vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s"
384
--
60
--
385
2.36.1
61
2.21.0
386
62
387
63
diff view generated by jsdifflib
1
From: Jagannathan Raman <jag.raman@oracle.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Determine the BARs used by the PCI device and register handlers to
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
4
manage the access to the same.
4
QEMU).
5
5
6
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
This format was lacking in the following:
7
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
8
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
allowing access to only 2TB (slightly less) of data.
10
Message-id: 3373e10b5be5f42846f0632d4382466e1698c505.1655151679.git.jag.raman@oracle.com
10
* The grain size (default) was 512 bytes - leading to data
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
fragmentation and many grain tables.
12
* For space reclamation purposes, it was necessary to find all the
13
grains which are not pointed to by any grain table - so a reverse
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
constructed - which takes large amounts of CPU/RAM.
16
17
The format specification can be found in VMware's documentation:
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
21
introduced: SESparse (Space Efficient).
22
23
This format fixes the above issues:
24
25
* All entries are now 64-bit.
26
* The grain size (default) is 4KB.
27
* Grain directory and grain tables are now located at the beginning
28
of the file.
29
+ seSparse format reserves space for all grain tables.
30
+ Grain tables can be addressed using an index.
31
+ Grains are located in the end of the file and can also be
32
addressed with an index.
33
- seSparse vmdks of large disks (64TB) have huge preallocated
34
headers - mainly due to L2 tables, even for empty snapshots.
35
* The header contains a reverse mapping ("backmap") of "offset of
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
152
---
13
include/exec/memory.h | 3 +
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
14
hw/remote/vfio-user-obj.c | 190 ++++++++++++++++++++++++++++++++
154
1 file changed, 342 insertions(+), 16 deletions(-)
15
softmmu/physmem.c | 4 +-
155
16
tests/qtest/fuzz/generic_fuzz.c | 9 +-
156
diff --git a/block/vmdk.c b/block/vmdk.c
17
hw/remote/trace-events | 3 +
18
5 files changed, 203 insertions(+), 6 deletions(-)
19
20
diff --git a/include/exec/memory.h b/include/exec/memory.h
21
index XXXXXXX..XXXXXXX 100644
157
index XXXXXXX..XXXXXXX 100644
22
--- a/include/exec/memory.h
158
--- a/block/vmdk.c
23
+++ b/include/exec/memory.h
159
+++ b/block/vmdk.c
24
@@ -XXX,XX +XXX,XX @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
25
hwaddr addr, const void *buf,
161
uint16_t compressAlgorithm;
26
hwaddr len);
162
} QEMU_PACKED VMDK4Header;
27
163
28
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
164
+typedef struct VMDKSESparseConstHeader {
29
+bool prepare_mmio_access(MemoryRegion *mr);
165
+ uint64_t magic;
30
+
166
+ uint64_t version;
31
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
167
+ uint64_t capacity;
32
{
168
+ uint64_t grain_size;
33
if (is_write) {
169
+ uint64_t grain_table_size;
34
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
170
+ uint64_t flags;
35
index XXXXXXX..XXXXXXX 100644
171
+ uint64_t reserved1;
36
--- a/hw/remote/vfio-user-obj.c
172
+ uint64_t reserved2;
37
+++ b/hw/remote/vfio-user-obj.c
173
+ uint64_t reserved3;
38
@@ -XXX,XX +XXX,XX @@
174
+ uint64_t reserved4;
39
#include "hw/qdev-core.h"
175
+ uint64_t volatile_header_offset;
40
#include "hw/pci/pci.h"
176
+ uint64_t volatile_header_size;
41
#include "qemu/timer.h"
177
+ uint64_t journal_header_offset;
42
+#include "exec/memory.h"
178
+ uint64_t journal_header_size;
43
179
+ uint64_t journal_offset;
44
#define TYPE_VFU_OBJECT "x-vfio-user-server"
180
+ uint64_t journal_size;
45
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
181
+ uint64_t grain_dir_offset;
46
@@ -XXX,XX +XXX,XX @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
182
+ uint64_t grain_dir_size;
47
trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
183
+ uint64_t grain_tables_offset;
184
+ uint64_t grain_tables_size;
185
+ uint64_t free_bitmap_offset;
186
+ uint64_t free_bitmap_size;
187
+ uint64_t backmap_offset;
188
+ uint64_t backmap_size;
189
+ uint64_t grains_offset;
190
+ uint64_t grains_size;
191
+ uint8_t pad[304];
192
+} QEMU_PACKED VMDKSESparseConstHeader;
193
+
194
+typedef struct VMDKSESparseVolatileHeader {
195
+ uint64_t magic;
196
+ uint64_t free_gt_number;
197
+ uint64_t next_txn_seq_number;
198
+ uint64_t replay_journal;
199
+ uint8_t pad[480];
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
201
+
202
#define L2_CACHE_SIZE 16
203
204
typedef struct VmdkExtent {
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
206
bool compressed;
207
bool has_marker;
208
bool has_zero_grain;
209
+ bool sesparse;
210
+ uint64_t sesparse_l2_tables_offset;
211
+ uint64_t sesparse_clusters_offset;
212
+ int32_t entry_size;
213
int version;
214
int64_t sectors;
215
int64_t end_sector;
216
int64_t flat_start_offset;
217
int64_t l1_table_offset;
218
int64_t l1_backup_table_offset;
219
- uint32_t *l1_table;
220
+ void *l1_table;
221
uint32_t *l1_backup_table;
222
unsigned int l1_size;
223
uint32_t l1_entry_sectors;
224
225
unsigned int l2_size;
226
- uint32_t *l2_cache;
227
+ void *l2_cache;
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
230
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
232
* minimal L2 table size: 512 entries
233
* 8 TB is still more than the maximal value supported for
234
* VMDK3 & VMDK4 which is 2TB.
235
+ * 64TB - for "ESXi seSparse Extent"
236
+ * minimal cluster size: 512B (default is 4KB)
237
+ * L2 table size: 4096 entries (const).
238
+ * 64TB is more than the maximal value supported for
239
+ * seSparse VMDKs (which is slightly less than 64TB)
240
*/
241
error_setg(errp, "L1 size too big");
242
return -EFBIG;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
244
extent->l2_size = l2_size;
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
288
return ret;
48
}
289
}
49
290
50
+static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
51
+ hwaddr size, const bool is_write)
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
293
+
294
+/* Strict checks - format not officially documented */
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
296
+ Error **errp)
52
+{
297
+{
53
+ uint8_t *ptr = buf;
298
+ header->magic = le64_to_cpu(header->magic);
54
+ bool release_lock = false;
299
+ header->version = le64_to_cpu(header->version);
55
+ uint8_t *ram_ptr = NULL;
300
+ header->grain_size = le64_to_cpu(header->grain_size);
56
+ MemTxResult result;
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
57
+ int access_size;
302
+ header->flags = le64_to_cpu(header->flags);
58
+ uint64_t val;
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
59
+
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
60
+ if (memory_access_is_direct(mr, is_write)) {
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
61
+ /**
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
62
+ * Some devices expose a PCI expansion ROM, which could be buffer
307
+
63
+ * based as compared to other regions which are primarily based on
308
+ header->volatile_header_offset =
64
+ * MemoryRegionOps. memory_region_find() would already check
309
+ le64_to_cpu(header->volatile_header_offset);
65
+ * for buffer overflow, we don't need to repeat it here.
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
66
+ */
311
+
67
+ ram_ptr = memory_region_get_ram_ptr(mr);
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
68
+
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
69
+ if (is_write) {
314
+
70
+ memcpy((ram_ptr + offset), buf, size);
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
71
+ } else {
316
+ header->journal_size = le64_to_cpu(header->journal_size);
72
+ memcpy(buf, (ram_ptr + offset), size);
317
+
73
+ }
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
74
+
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
75
+ return 0;
320
+
76
+ }
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
77
+
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
78
+ while (size) {
323
+
79
+ /**
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
80
+ * The read/write logic used below is similar to the ones in
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
81
+ * flatview_read/write_continue()
326
+
82
+ */
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
83
+ release_lock = prepare_mmio_access(mr);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
84
+
329
+
85
+ access_size = memory_access_size(mr, size, offset);
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
86
+
331
+ header->grains_size = le64_to_cpu(header->grains_size);
87
+ if (is_write) {
332
+
88
+ val = ldn_he_p(ptr, access_size);
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
89
+
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
90
+ result = memory_region_dispatch_write(mr, offset, val,
335
+ header->magic);
91
+ size_memop(access_size),
336
+ return -EINVAL;
92
+ MEMTXATTRS_UNSPECIFIED);
337
+ }
93
+ } else {
338
+
94
+ result = memory_region_dispatch_read(mr, offset, &val,
339
+ if (header->version != 0x0000000200000001) {
95
+ size_memop(access_size),
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
96
+ MEMTXATTRS_UNSPECIFIED);
341
+ header->version);
97
+
342
+ return -ENOTSUP;
98
+ stn_he_p(ptr, access_size, val);
343
+ }
99
+ }
344
+
100
+
345
+ if (header->grain_size != 8) {
101
+ if (release_lock) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
102
+ qemu_mutex_unlock_iothread();
347
+ header->grain_size);
103
+ release_lock = false;
348
+ return -ENOTSUP;
104
+ }
349
+ }
105
+
350
+
106
+ if (result != MEMTX_OK) {
351
+ if (header->grain_table_size != 64) {
107
+ return -1;
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
108
+ }
353
+ header->grain_table_size);
109
+
354
+ return -ENOTSUP;
110
+ size -= access_size;
355
+ }
111
+ ptr += access_size;
356
+
112
+ offset += access_size;
357
+ if (header->flags != 0) {
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
359
+ header->flags);
360
+ return -ENOTSUP;
361
+ }
362
+
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
365
+ error_setg(errp, "Unsupported reserved bits:"
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
368
+ header->reserved1, header->reserved2,
369
+ header->reserved3, header->reserved4);
370
+ return -ENOTSUP;
371
+ }
372
+
373
+ /* check that padding is 0 */
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
375
+ error_setg(errp, "Unsupported non-zero const header padding");
376
+ return -ENOTSUP;
113
+ }
377
+ }
114
+
378
+
115
+ return 0;
379
+ return 0;
116
+}
380
+}
117
+
381
+
118
+static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
119
+ hwaddr bar_offset, char * const buf,
383
+ Error **errp)
120
+ hwaddr len, const bool is_write)
121
+{
384
+{
122
+ MemoryRegionSection section = { 0 };
385
+ header->magic = le64_to_cpu(header->magic);
123
+ uint8_t *ptr = (uint8_t *)buf;
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
124
+ MemoryRegion *section_mr = NULL;
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
125
+ uint64_t section_size;
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
126
+ hwaddr section_offset;
389
+
127
+ hwaddr size = 0;
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
128
+
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
129
+ while (len) {
392
+ header->magic);
130
+ section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
393
+ return -EINVAL;
131
+ bar_offset, len);
394
+ }
132
+
395
+
133
+ if (!section.mr) {
396
+ if (header->replay_journal) {
134
+ warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
135
+ return size;
398
+ return -ENOTSUP;
399
+ }
400
+
401
+ /* check that padding is 0 */
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
407
+ return 0;
408
+}
409
+
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
411
+ BdrvChild *file,
412
+ int flags, Error **errp)
413
+{
414
+ int ret;
415
+ VMDKSESparseConstHeader const_header;
416
+ VMDKSESparseVolatileHeader volatile_header;
417
+ VmdkExtent *extent;
418
+
419
+ ret = bdrv_apply_auto_read_only(bs,
420
+ "No write support for seSparse images available", errp);
421
+ if (ret < 0) {
422
+ return ret;
423
+ }
424
+
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
426
+
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
428
+ if (ret < 0) {
429
+ bdrv_refresh_filename(file->bs);
430
+ error_setg_errno(errp, -ret,
431
+ "Could not read const header from file '%s'",
432
+ file->bs->filename);
433
+ return ret;
434
+ }
435
+
436
+ /* check const header */
437
+ ret = check_se_sparse_const_header(&const_header, errp);
438
+ if (ret < 0) {
439
+ return ret;
440
+ }
441
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
443
+
444
+ ret = bdrv_pread(file,
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
446
+ &volatile_header, sizeof(volatile_header));
447
+ if (ret < 0) {
448
+ bdrv_refresh_filename(file->bs);
449
+ error_setg_errno(errp, -ret,
450
+ "Could not read volatile header from file '%s'",
451
+ file->bs->filename);
452
+ return ret;
453
+ }
454
+
455
+ /* check volatile header */
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
457
+ if (ret < 0) {
458
+ return ret;
459
+ }
460
+
461
+ ret = vmdk_add_extent(bs, file, false,
462
+ const_header.capacity,
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
464
+ 0,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
487
+ return ret;
488
+}
489
+
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
491
QDict *options, Error **errp);
492
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
494
* RW [size in sectors] SPARSE "file-name.vmdk"
495
* RW [size in sectors] VMFS "file-name.vmdk"
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
498
*/
499
flat_offset = -1;
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
502
503
if (sectors <= 0 ||
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
507
+ strcmp(type, "SESPARSE")) ||
508
(strcmp(access, "RW"))) {
509
continue;
510
}
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
512
return ret;
513
}
514
extent = &s->extents[s->num_extents - 1];
515
+ } else if (!strcmp(type, "SESPARSE")) {
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
517
+ if (ret) {
518
+ bdrv_unref_child(bs, extent_file);
519
+ return ret;
520
+ }
521
+ extent = &s->extents[s->num_extents - 1];
522
} else {
523
error_setg(errp, "Unsupported extent type '%s'", type);
524
bdrv_unref_child(bs, extent_file);
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
526
if (strcmp(ct, "monolithicFlat") &&
527
strcmp(ct, "vmfs") &&
528
strcmp(ct, "vmfsSparse") &&
529
+ strcmp(ct, "seSparse") &&
530
strcmp(ct, "twoGbMaxExtentSparse") &&
531
strcmp(ct, "twoGbMaxExtentFlat")) {
532
error_setg(errp, "Unsupported image type '%s'", ct);
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
534
{
535
unsigned int l1_index, l2_offset, l2_index;
536
int min_index, i, j;
537
- uint32_t min_count, *l2_table;
538
+ uint32_t min_count;
539
+ void *l2_table;
540
bool zeroed = false;
541
int64_t ret;
542
int64_t cluster_sector;
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
544
545
if (m_data) {
546
m_data->valid = 0;
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
548
if (l1_index >= extent->l1_size) {
549
return VMDK_ERROR;
550
}
551
- l2_offset = extent->l1_table[l1_index];
552
+ if (extent->sesparse) {
553
+ uint64_t l2_offset_u64;
554
+
555
+ assert(extent->entry_size == sizeof(uint64_t));
556
+
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
558
+ if (l2_offset_u64 == 0) {
559
+ l2_offset = 0;
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
561
+ /*
562
+ * Top most nibble is 0x1 if grain table is allocated.
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
565
+ * grain directories which is smaller than uint32,
566
+ * where 16MB is the only supported default grain table coverage.
567
+ */
568
+ return VMDK_ERROR;
569
+ } else {
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
574
+ return VMDK_ERROR;
575
+ }
576
+ l2_offset = (unsigned int)(l2_offset_u64);
136
+ }
577
+ }
137
+
578
+ } else {
138
+ section_mr = section.mr;
579
+ assert(extent->entry_size == sizeof(uint32_t));
139
+ section_offset = section.offset_within_region;
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
140
+ section_size = int128_get64(section.size);
581
+ }
141
+
582
if (!l2_offset) {
142
+ if (is_write && section_mr->readonly) {
583
return VMDK_UNALLOC;
143
+ warn_report("vfu: attempting to write to readonly region in "
584
}
144
+ "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
145
+ pci_bar, bar_offset,
586
extent->l2_cache_counts[j] >>= 1;
146
+ (bar_offset + section_size));
587
}
147
+ memory_region_unref(section_mr);
588
}
148
+ return size;
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
591
goto found;
592
}
593
}
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
595
min_index = i;
596
}
597
}
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
601
if (bdrv_pread(extent->file,
602
(int64_t)l2_offset * 512,
603
l2_table,
604
- extent->l2_size * sizeof(uint32_t)
605
- ) != extent->l2_size * sizeof(uint32_t)) {
606
+ l2_size_bytes
607
+ ) != l2_size_bytes) {
608
return VMDK_ERROR;
609
}
610
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
612
extent->l2_cache_counts[min_index] = 1;
613
found:
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
616
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
618
- zeroed = true;
619
+ if (extent->sesparse) {
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
621
+ switch (cluster_sector & 0xf000000000000000) {
622
+ case 0x0000000000000000:
623
+ /* unallocated grain */
624
+ if (cluster_sector != 0) {
625
+ return VMDK_ERROR;
626
+ }
627
+ break;
628
+ case 0x1000000000000000:
629
+ /* scsi-unmapped grain - fallthrough */
630
+ case 0x2000000000000000:
631
+ /* zero grain */
632
+ zeroed = true;
633
+ break;
634
+ case 0x3000000000000000:
635
+ /* allocated grain */
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
638
+ cluster_sector = extent->sesparse_clusters_offset +
639
+ cluster_sector * extent->cluster_sectors;
640
+ break;
641
+ default:
642
+ return VMDK_ERROR;
149
+ }
643
+ }
150
+
644
+ } else {
151
+ if (vfu_object_mr_rw(section_mr, ptr, section_offset,
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
152
+ section_size, is_write)) {
646
+
153
+ warn_report("vfu: failed to %s "
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
154
+ "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
648
+ zeroed = true;
155
+ is_write ? "write to" : "read from", bar_offset,
156
+ (bar_offset + section_size), pci_bar);
157
+ memory_region_unref(section_mr);
158
+ return size;
159
+ }
649
+ }
160
+
650
}
161
+ size += section_size;
651
162
+ bar_offset += section_size;
652
if (!cluster_sector || zeroed) {
163
+ ptr += section_size;
653
if (!allocate) {
164
+ len -= section_size;
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
165
+
655
}
166
+ memory_region_unref(section_mr);
656
+ assert(!extent->sesparse);
167
+ }
657
168
+
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
169
+ return size;
659
return VMDK_ERROR;
170
+}
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
171
+
661
m_data->l1_index = l1_index;
172
+/**
662
m_data->l2_index = l2_index;
173
+ * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
663
m_data->l2_offset = l2_offset;
174
+ *
664
- m_data->l2_cache_entry = &l2_table[l2_index];
175
+ * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
176
+ * define vfu_object_bar2_handler
666
}
177
+ */
667
}
178
+#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
179
+ static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
180
+ char * const buf, size_t count, \
670
if (!extent) {
181
+ loff_t offset, const bool is_write) \
671
return -EIO;
182
+ { \
672
}
183
+ VfuObject *o = vfu_get_private(vfu_ctx); \
673
+ if (extent->sesparse) {
184
+ PCIDevice *pci_dev = o->pci_dev; \
674
+ return -ENOTSUP;
185
+ \
186
+ return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
187
+ buf, count, is_write); \
188
+ } \
189
+
190
+VFU_OBJECT_BAR_HANDLER(0)
191
+VFU_OBJECT_BAR_HANDLER(1)
192
+VFU_OBJECT_BAR_HANDLER(2)
193
+VFU_OBJECT_BAR_HANDLER(3)
194
+VFU_OBJECT_BAR_HANDLER(4)
195
+VFU_OBJECT_BAR_HANDLER(5)
196
+VFU_OBJECT_BAR_HANDLER(6)
197
+
198
+static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
199
+ &vfu_object_bar0_handler,
200
+ &vfu_object_bar1_handler,
201
+ &vfu_object_bar2_handler,
202
+ &vfu_object_bar3_handler,
203
+ &vfu_object_bar4_handler,
204
+ &vfu_object_bar5_handler,
205
+ &vfu_object_bar6_handler,
206
+};
207
+
208
+/**
209
+ * vfu_object_register_bars - Identify active BAR regions of pdev and setup
210
+ * callbacks to handle read/write accesses
211
+ */
212
+static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
213
+{
214
+ int flags = VFU_REGION_FLAG_RW;
215
+ int i;
216
+
217
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
218
+ if (!pdev->io_regions[i].size) {
219
+ continue;
220
+ }
675
+ }
221
+
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
222
+ if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
223
+ pdev->io_regions[i].memory->readonly) {
678
- offset_in_cluster);
224
+ flags &= ~VFU_REGION_FLAG_WRITE;
225
+ }
226
+
227
+ vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
228
+ (size_t)pdev->io_regions[i].size,
229
+ vfu_object_bar_handlers[i],
230
+ flags, NULL, 0, -1, 0);
231
+
232
+ trace_vfu_bar_register(i, pdev->io_regions[i].addr,
233
+ pdev->io_regions[i].size);
234
+ }
235
+}
236
+
237
/*
238
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
239
* properties. It also depends on devices instantiated in QEMU. These
240
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
241
goto fail;
242
}
243
244
+ vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
245
+
246
ret = vfu_realize_ctx(o->vfu_ctx);
247
if (ret < 0) {
248
error_setg(errp, "vfu: Failed to realize device %s- %s",
249
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
250
index XXXXXXX..XXXXXXX 100644
251
--- a/softmmu/physmem.c
252
+++ b/softmmu/physmem.c
253
@@ -XXX,XX +XXX,XX @@ void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
254
invalidate_and_set_dirty(mr, addr, size);
255
}
256
257
-static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
258
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
259
{
260
unsigned access_size_max = mr->ops->valid.max_access_size;
261
262
@@ -XXX,XX +XXX,XX @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
263
return l;
264
}
265
266
-static bool prepare_mmio_access(MemoryRegion *mr)
267
+bool prepare_mmio_access(MemoryRegion *mr)
268
{
269
bool release_lock = false;
270
271
diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
272
index XXXXXXX..XXXXXXX 100644
273
--- a/tests/qtest/fuzz/generic_fuzz.c
274
+++ b/tests/qtest/fuzz/generic_fuzz.c
275
@@ -XXX,XX +XXX,XX @@ static void *pattern_alloc(pattern p, size_t len)
276
return buf;
277
}
278
279
-static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
280
+static int fuzz_memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
281
{
282
unsigned access_size_max = mr->ops->valid.max_access_size;
283
284
@@ -XXX,XX +XXX,XX @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr)
285
286
/*
287
* If mr1 isn't RAM, address_space_translate doesn't update l. Use
288
- * memory_access_size to identify the number of bytes that it is safe
289
- * to write without accidentally writing to another MemoryRegion.
290
+ * fuzz_memory_access_size to identify the number of bytes that it
291
+ * is safe to write without accidentally writing to another
292
+ * MemoryRegion.
293
*/
294
if (!memory_region_is_ram(mr1)) {
295
- l = memory_access_size(mr1, l, addr1);
296
+ l = fuzz_memory_access_size(mr1, l, addr1);
297
}
298
if (memory_region_is_ram(mr1) ||
299
memory_region_is_romd(mr1) ||
300
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
301
index XXXXXXX..XXXXXXX 100644
302
--- a/hw/remote/trace-events
303
+++ b/hw/remote/trace-events
304
@@ -XXX,XX +XXX,XX @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x"
305
vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x"
306
vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes"
307
vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
308
+vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
309
+vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
310
+vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
311
--
679
--
312
2.36.1
680
2.21.0
681
682
diff view generated by jsdifflib
1
From: Jagannathan Raman <jag.raman@oracle.com>
1
From: Pino Toscano <ptoscano@redhat.com>
2
2
3
Forward remote device's interrupts to the guest
3
Rewrite the implementation of the ssh block driver to use libssh instead
4
of libssh2. The libssh library has various advantages over libssh2:
5
- easier API for authentication (for example for using ssh-agent)
6
- easier API for known_hosts handling
7
- supports newer types of keys in known_hosts
4
8
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
9
Use APIs/features available in libssh 0.8 conditionally, to support
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
10
older versions (which are not recommended though).
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
11
8
Message-id: 9523479eaafe050677f4de2af5dd0df18c27cfd9.1655151679.git.jag.raman@oracle.com
12
Adjust the iotest 207 according to the different error message, and to
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
find the default key type for localhost (to properly compare the
14
fingerprint with).
15
Contributed-by: Max Reitz <mreitz@redhat.com>
16
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
20
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Message-id: 5873173.t2JhDm7DL7@lindworm.usersys.redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
28
---
11
MAINTAINERS | 1 +
29
configure | 65 +-
12
include/hw/pci/msi.h | 1 +
30
block/Makefile.objs | 6 +-
13
include/hw/pci/msix.h | 1 +
31
block/ssh.c | 652 ++++++++++--------
14
include/hw/pci/pci.h | 13 +++
32
.travis.yml | 4 +-
15
include/hw/remote/vfio-user-obj.h | 6 ++
33
block/trace-events | 14 +-
16
hw/pci/msi.c | 49 +++++++--
34
docs/qemu-block-drivers.texi | 2 +-
17
hw/pci/msix.c | 35 ++++++-
35
.../dockerfiles/debian-win32-cross.docker | 1 -
18
hw/pci/pci.c | 13 +++
36
.../dockerfiles/debian-win64-cross.docker | 1 -
19
hw/remote/machine.c | 16 ++-
37
tests/docker/dockerfiles/fedora.docker | 4 +-
20
hw/remote/vfio-user-obj.c | 167 ++++++++++++++++++++++++++++++
38
tests/docker/dockerfiles/ubuntu.docker | 2 +-
21
stubs/vfio-user-obj.c | 6 ++
39
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
22
hw/remote/trace-events | 1 +
40
tests/qemu-iotests/207 | 54 +-
23
stubs/meson.build | 1 +
41
tests/qemu-iotests/207.out | 2 +-
24
13 files changed, 298 insertions(+), 12 deletions(-)
42
13 files changed, 449 insertions(+), 360 deletions(-)
25
create mode 100644 include/hw/remote/vfio-user-obj.h
26
create mode 100644 stubs/vfio-user-obj.c
27
43
28
diff --git a/MAINTAINERS b/MAINTAINERS
44
diff --git a/configure b/configure
45
index XXXXXXX..XXXXXXX 100755
46
--- a/configure
47
+++ b/configure
48
@@ -XXX,XX +XXX,XX @@ auth_pam=""
49
vte=""
50
virglrenderer=""
51
tpm=""
52
-libssh2=""
53
+libssh=""
54
live_block_migration="yes"
55
numa=""
56
tcmalloc="no"
57
@@ -XXX,XX +XXX,XX @@ for opt do
58
;;
59
--enable-tpm) tpm="yes"
60
;;
61
- --disable-libssh2) libssh2="no"
62
+ --disable-libssh) libssh="no"
63
;;
64
- --enable-libssh2) libssh2="yes"
65
+ --enable-libssh) libssh="yes"
66
;;
67
--disable-live-block-migration) live_block_migration="no"
68
;;
69
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
70
coroutine-pool coroutine freelist (better performance)
71
glusterfs GlusterFS backend
72
tpm TPM support
73
- libssh2 ssh block device support
74
+ libssh ssh block device support
75
numa libnuma support
76
libxml2 for Parallels image format
77
tcmalloc tcmalloc support
78
@@ -XXX,XX +XXX,XX @@ EOF
79
fi
80
81
##########################################
82
-# libssh2 probe
83
-min_libssh2_version=1.2.8
84
-if test "$libssh2" != "no" ; then
85
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
86
- libssh2_cflags=$($pkg_config libssh2 --cflags)
87
- libssh2_libs=$($pkg_config libssh2 --libs)
88
- libssh2=yes
89
+# libssh probe
90
+if test "$libssh" != "no" ; then
91
+ if $pkg_config --exists libssh; then
92
+ libssh_cflags=$($pkg_config libssh --cflags)
93
+ libssh_libs=$($pkg_config libssh --libs)
94
+ libssh=yes
95
else
96
- if test "$libssh2" = "yes" ; then
97
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
98
+ if test "$libssh" = "yes" ; then
99
+ error_exit "libssh required for --enable-libssh"
100
fi
101
- libssh2=no
102
+ libssh=no
103
fi
104
fi
105
106
##########################################
107
-# libssh2_sftp_fsync probe
108
+# Check for libssh 0.8
109
+# This is done like this instead of using the LIBSSH_VERSION_* and
110
+# SSH_VERSION_* macros because some distributions in the past shipped
111
+# snapshots of the future 0.8 from Git, and those snapshots did not
112
+# have updated version numbers (still referring to 0.7.0).
113
114
-if test "$libssh2" = "yes"; then
115
+if test "$libssh" = "yes"; then
116
cat > $TMPC <<EOF
117
-#include <stdio.h>
118
-#include <libssh2.h>
119
-#include <libssh2_sftp.h>
120
-int main(void) {
121
- LIBSSH2_SESSION *session;
122
- LIBSSH2_SFTP *sftp;
123
- LIBSSH2_SFTP_HANDLE *sftp_handle;
124
- session = libssh2_session_init ();
125
- sftp = libssh2_sftp_init (session);
126
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
127
- libssh2_sftp_fsync (sftp_handle);
128
- return 0;
129
-}
130
+#include <libssh/libssh.h>
131
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
132
EOF
133
- # libssh2_cflags/libssh2_libs defined in previous test.
134
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
135
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
136
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
137
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
138
fi
139
fi
140
141
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
142
echo "gcov $gcov_tool"
143
echo "gcov enabled $gcov"
144
echo "TPM support $tpm"
145
-echo "libssh2 support $libssh2"
146
+echo "libssh support $libssh"
147
echo "QOM debugging $qom_cast_debug"
148
echo "Live block migration $live_block_migration"
149
echo "lzo support $lzo"
150
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
151
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
152
fi
153
154
-if test "$libssh2" = "yes" ; then
155
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
156
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
157
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
158
+if test "$libssh" = "yes" ; then
159
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
160
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
161
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
162
fi
163
164
if test "$live_block_migration" = "yes" ; then
165
diff --git a/block/Makefile.objs b/block/Makefile.objs
29
index XXXXXXX..XXXXXXX 100644
166
index XXXXXXX..XXXXXXX 100644
30
--- a/MAINTAINERS
167
--- a/block/Makefile.objs
31
+++ b/MAINTAINERS
168
+++ b/block/Makefile.objs
32
@@ -XXX,XX +XXX,XX @@ F: hw/remote/iohub.c
169
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
33
F: include/hw/remote/iohub.h
170
block-obj-$(CONFIG_RBD) += rbd.o
34
F: subprojects/libvfio-user
171
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
35
F: hw/remote/vfio-user-obj.c
172
block-obj-$(CONFIG_VXHS) += vxhs.o
36
+F: include/hw/remote/vfio-user-obj.h
173
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
37
F: hw/remote/iommu.c
174
+block-obj-$(CONFIG_LIBSSH) += ssh.o
38
F: include/hw/remote/iommu.h
175
block-obj-y += accounting.o dirty-bitmap.o
39
176
block-obj-y += write-threshold.o
40
diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h
177
block-obj-y += backup.o
178
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
179
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
180
gluster.o-libs := $(GLUSTERFS_LIBS)
181
vxhs.o-libs := $(VXHS_LIBS)
182
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
183
-ssh.o-libs := $(LIBSSH2_LIBS)
184
+ssh.o-cflags := $(LIBSSH_CFLAGS)
185
+ssh.o-libs := $(LIBSSH_LIBS)
186
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
187
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
188
dmg-bz2.o-libs := $(BZIP2_LIBS)
189
diff --git a/block/ssh.c b/block/ssh.c
41
index XXXXXXX..XXXXXXX 100644
190
index XXXXXXX..XXXXXXX 100644
42
--- a/include/hw/pci/msi.h
191
--- a/block/ssh.c
43
+++ b/include/hw/pci/msi.h
192
+++ b/block/ssh.c
44
@@ -XXX,XX +XXX,XX @@ void msi_notify(PCIDevice *dev, unsigned int vector);
193
@@ -XXX,XX +XXX,XX @@
45
void msi_send_message(PCIDevice *dev, MSIMessage msg);
194
46
void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
195
#include "qemu/osdep.h"
47
unsigned int msi_nr_vectors_allocated(const PCIDevice *dev);
196
48
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
197
-#include <libssh2.h>
49
198
-#include <libssh2_sftp.h>
50
static inline bool msi_present(const PCIDevice *dev)
199
+#include <libssh/libssh.h>
200
+#include <libssh/sftp.h>
201
202
#include "block/block_int.h"
203
#include "block/qdict.h"
204
@@ -XXX,XX +XXX,XX @@
205
#include "trace.h"
206
207
/*
208
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
209
- * that this requires that libssh2 was specially compiled with the
210
- * `./configure --enable-debug' option, so most likely you will have
211
- * to compile it yourself. The meaning of <bitmask> is described
212
- * here: http://www.libssh2.org/libssh2_trace.html
213
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
214
+ * The meaning of <level> is described here:
215
+ * http://api.libssh.org/master/group__libssh__log.html
216
*/
217
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
218
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
219
220
typedef struct BDRVSSHState {
221
/* Coroutine. */
222
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
223
224
/* SSH connection. */
225
int sock; /* socket */
226
- LIBSSH2_SESSION *session; /* ssh session */
227
- LIBSSH2_SFTP *sftp; /* sftp session */
228
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
229
+ ssh_session session; /* ssh session */
230
+ sftp_session sftp; /* sftp session */
231
+ sftp_file sftp_handle; /* sftp remote file handle */
232
233
- /* See ssh_seek() function below. */
234
- int64_t offset;
235
- bool offset_op_read;
236
-
237
- /* File attributes at open. We try to keep the .filesize field
238
+ /*
239
+ * File attributes at open. We try to keep the .size field
240
* updated if it changes (eg by writing at the end of the file).
241
*/
242
- LIBSSH2_SFTP_ATTRIBUTES attrs;
243
+ sftp_attributes attrs;
244
245
InetSocketAddress *inet;
246
247
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
51
{
248
{
52
diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h
249
memset(s, 0, sizeof *s);
53
index XXXXXXX..XXXXXXX 100644
250
s->sock = -1;
54
--- a/include/hw/pci/msix.h
251
- s->offset = -1;
55
+++ b/include/hw/pci/msix.h
252
qemu_co_mutex_init(&s->lock);
56
@@ -XXX,XX +XXX,XX @@ void msix_clr_pending(PCIDevice *dev, int vector);
253
}
57
int msix_vector_use(PCIDevice *dev, unsigned vector);
254
58
void msix_vector_unuse(PCIDevice *dev, unsigned vector);
255
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
59
void msix_unuse_all_vectors(PCIDevice *dev);
256
{
60
+void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
257
g_free(s->user);
61
258
62
void msix_notify(PCIDevice *dev, unsigned vector);
259
+ if (s->attrs) {
63
260
+ sftp_attributes_free(s->attrs);
64
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
261
+ }
65
index XXXXXXX..XXXXXXX 100644
262
if (s->sftp_handle) {
66
--- a/include/hw/pci/pci.h
263
- libssh2_sftp_close(s->sftp_handle);
67
+++ b/include/hw/pci/pci.h
264
+ sftp_close(s->sftp_handle);
68
@@ -XXX,XX +XXX,XX @@ extern bool pci_available;
265
}
69
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
266
if (s->sftp) {
70
#define PCI_FUNC(devfn) ((devfn) & 0x07)
267
- libssh2_sftp_shutdown(s->sftp);
71
#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn))
268
+ sftp_free(s->sftp);
72
+#define PCI_BDF_TO_DEVFN(x) ((x) & 0xff)
269
}
73
#define PCI_BUS_MAX 256
270
if (s->session) {
74
#define PCI_DEVFN_MAX 256
271
- libssh2_session_disconnect(s->session,
75
#define PCI_SLOT_MAX 32
272
- "from qemu ssh client: "
76
@@ -XXX,XX +XXX,XX @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
273
- "user closed the connection");
77
pcibus_t addr, pcibus_t size, int type);
274
- libssh2_session_free(s->session);
78
typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
275
- }
79
276
- if (s->sock >= 0) {
80
+typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg);
277
- close(s->sock);
81
+typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector);
278
+ ssh_disconnect(s->session);
82
+typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector);
279
+ ssh_free(s->session); /* This frees s->sock */
83
+
280
}
84
typedef struct PCIIORegion {
281
}
85
pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
282
86
#define PCI_BAR_UNMAPPED (~(pcibus_t)0)
283
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
87
@@ -XXX,XX +XXX,XX @@ struct PCIDevice {
284
va_end(args);
88
/* Space to store MSIX table & pending bit array */
285
89
uint8_t *msix_table;
286
if (s->session) {
90
uint8_t *msix_pba;
287
- char *ssh_err;
91
+
288
+ const char *ssh_err;
92
+ /* May be used by INTx or MSI during interrupt notification */
289
int ssh_err_code;
93
+ void *irq_opaque;
290
94
+
291
- /* This is not an errno. See <libssh2.h>. */
95
+ MSITriggerFunc *msi_trigger;
292
- ssh_err_code = libssh2_session_last_error(s->session,
96
+ MSIPrepareMessageFunc *msi_prepare_message;
293
- &ssh_err, NULL, 0);
97
+ MSIxPrepareMessageFunc *msix_prepare_message;
294
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
98
+
295
+ /* This is not an errno. See <libssh/libssh.h>. */
99
/* MemoryRegion container for msix exclusive BAR setup */
296
+ ssh_err = ssh_get_error(s->session);
100
MemoryRegion msix_exclusive_bar;
297
+ ssh_err_code = ssh_get_error_code(s->session);
101
/* Memory Regions for MSIX table and pending bit entries. */
298
+ error_setg(errp, "%s: %s (libssh error code: %d)",
102
diff --git a/include/hw/remote/vfio-user-obj.h b/include/hw/remote/vfio-user-obj.h
299
msg, ssh_err, ssh_err_code);
103
new file mode 100644
300
} else {
104
index XXXXXXX..XXXXXXX
301
error_setg(errp, "%s", msg);
105
--- /dev/null
302
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
106
+++ b/include/hw/remote/vfio-user-obj.h
303
va_end(args);
107
@@ -XXX,XX +XXX,XX @@
304
108
+#ifndef VFIO_USER_OBJ_H
305
if (s->sftp) {
109
+#define VFIO_USER_OBJ_H
306
- char *ssh_err;
110
+
307
+ const char *ssh_err;
111
+void vfu_object_set_bus_irq(PCIBus *pci_bus);
308
int ssh_err_code;
112
+
309
- unsigned long sftp_err_code;
310
+ int sftp_err_code;
311
312
- /* This is not an errno. See <libssh2.h>. */
313
- ssh_err_code = libssh2_session_last_error(s->session,
314
- &ssh_err, NULL, 0);
315
- /* See <libssh2_sftp.h>. */
316
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
317
+ /* This is not an errno. See <libssh/libssh.h>. */
318
+ ssh_err = ssh_get_error(s->session);
319
+ ssh_err_code = ssh_get_error_code(s->session);
320
+ /* See <libssh/sftp.h>. */
321
+ sftp_err_code = sftp_get_error(s->sftp);
322
323
error_setg(errp,
324
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
325
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
326
msg, ssh_err, ssh_err_code, sftp_err_code);
327
} else {
328
error_setg(errp, "%s", msg);
329
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
330
331
static void sftp_error_trace(BDRVSSHState *s, const char *op)
332
{
333
- char *ssh_err;
334
+ const char *ssh_err;
335
int ssh_err_code;
336
- unsigned long sftp_err_code;
337
+ int sftp_err_code;
338
339
- /* This is not an errno. See <libssh2.h>. */
340
- ssh_err_code = libssh2_session_last_error(s->session,
341
- &ssh_err, NULL, 0);
342
- /* See <libssh2_sftp.h>. */
343
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
344
+ /* This is not an errno. See <libssh/libssh.h>. */
345
+ ssh_err = ssh_get_error(s->session);
346
+ ssh_err_code = ssh_get_error_code(s->session);
347
+ /* See <libssh/sftp.h>. */
348
+ sftp_err_code = sftp_get_error(s->sftp);
349
350
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
351
}
352
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
353
parse_uri(filename, options, errp);
354
}
355
356
-static int check_host_key_knownhosts(BDRVSSHState *s,
357
- const char *host, int port, Error **errp)
358
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
359
{
360
- const char *home;
361
- char *knh_file = NULL;
362
- LIBSSH2_KNOWNHOSTS *knh = NULL;
363
- struct libssh2_knownhost *found;
364
- int ret, r;
365
- const char *hostkey;
366
- size_t len;
367
- int type;
368
-
369
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
370
- if (!hostkey) {
371
+ int ret;
372
+#ifdef HAVE_LIBSSH_0_8
373
+ enum ssh_known_hosts_e state;
374
+ int r;
375
+ ssh_key pubkey;
376
+ enum ssh_keytypes_e pubkey_type;
377
+ unsigned char *server_hash = NULL;
378
+ size_t server_hash_len;
379
+ char *fingerprint = NULL;
380
+
381
+ state = ssh_session_is_known_server(s->session);
382
+ trace_ssh_server_status(state);
383
+
384
+ switch (state) {
385
+ case SSH_KNOWN_HOSTS_OK:
386
+ /* OK */
387
+ trace_ssh_check_host_key_knownhosts();
388
+ break;
389
+ case SSH_KNOWN_HOSTS_CHANGED:
390
ret = -EINVAL;
391
- session_error_setg(errp, s, "failed to read remote host key");
392
+ r = ssh_get_server_publickey(s->session, &pubkey);
393
+ if (r == 0) {
394
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
395
+ &server_hash, &server_hash_len);
396
+ pubkey_type = ssh_key_type(pubkey);
397
+ ssh_key_free(pubkey);
398
+ }
399
+ if (r == 0) {
400
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
401
+ server_hash,
402
+ server_hash_len);
403
+ ssh_clean_pubkey_hash(&server_hash);
404
+ }
405
+ if (fingerprint) {
406
+ error_setg(errp,
407
+ "host key (%s key with fingerprint %s) does not match "
408
+ "the one in known_hosts; this may be a possible attack",
409
+ ssh_key_type_to_char(pubkey_type), fingerprint);
410
+ ssh_string_free_char(fingerprint);
411
+ } else {
412
+ error_setg(errp,
413
+ "host key does not match the one in known_hosts; this "
414
+ "may be a possible attack");
415
+ }
416
goto out;
417
- }
418
-
419
- knh = libssh2_knownhost_init(s->session);
420
- if (!knh) {
421
+ case SSH_KNOWN_HOSTS_OTHER:
422
ret = -EINVAL;
423
- session_error_setg(errp, s,
424
- "failed to initialize known hosts support");
425
+ error_setg(errp,
426
+ "host key for this server not found, another type exists");
427
+ goto out;
428
+ case SSH_KNOWN_HOSTS_UNKNOWN:
429
+ ret = -EINVAL;
430
+ error_setg(errp, "no host key was found in known_hosts");
431
+ goto out;
432
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
433
+ ret = -ENOENT;
434
+ error_setg(errp, "known_hosts file not found");
435
+ goto out;
436
+ case SSH_KNOWN_HOSTS_ERROR:
437
+ ret = -EINVAL;
438
+ error_setg(errp, "error while checking the host");
439
+ goto out;
440
+ default:
441
+ ret = -EINVAL;
442
+ error_setg(errp, "error while checking for known server (%d)", state);
443
goto out;
444
}
445
+#else /* !HAVE_LIBSSH_0_8 */
446
+ int state;
447
448
- home = getenv("HOME");
449
- if (home) {
450
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
451
- } else {
452
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
453
- }
454
-
455
- /* Read all known hosts from OpenSSH-style known_hosts file. */
456
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
457
+ state = ssh_is_server_known(s->session);
458
+ trace_ssh_server_status(state);
459
460
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
461
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
462
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
463
- &found);
464
- switch (r) {
465
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
466
+ switch (state) {
467
+ case SSH_SERVER_KNOWN_OK:
468
/* OK */
469
- trace_ssh_check_host_key_knownhosts(found->key);
470
+ trace_ssh_check_host_key_knownhosts();
471
break;
472
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
473
+ case SSH_SERVER_KNOWN_CHANGED:
474
ret = -EINVAL;
475
- session_error_setg(errp, s,
476
- "host key does not match the one in known_hosts"
477
- " (found key %s)", found->key);
478
+ error_setg(errp,
479
+ "host key does not match the one in known_hosts; this "
480
+ "may be a possible attack");
481
goto out;
482
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
483
+ case SSH_SERVER_FOUND_OTHER:
484
ret = -EINVAL;
485
- session_error_setg(errp, s, "no host key was found in known_hosts");
486
+ error_setg(errp,
487
+ "host key for this server not found, another type exists");
488
+ goto out;
489
+ case SSH_SERVER_FILE_NOT_FOUND:
490
+ ret = -ENOENT;
491
+ error_setg(errp, "known_hosts file not found");
492
goto out;
493
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
494
+ case SSH_SERVER_NOT_KNOWN:
495
ret = -EINVAL;
496
- session_error_setg(errp, s,
497
- "failure matching the host key with known_hosts");
498
+ error_setg(errp, "no host key was found in known_hosts");
499
+ goto out;
500
+ case SSH_SERVER_ERROR:
501
+ ret = -EINVAL;
502
+ error_setg(errp, "server error");
503
goto out;
504
default:
505
ret = -EINVAL;
506
- session_error_setg(errp, s, "unknown error matching the host key"
507
- " with known_hosts (%d)", r);
508
+ error_setg(errp, "error while checking for known server (%d)", state);
509
goto out;
510
}
511
+#endif /* !HAVE_LIBSSH_0_8 */
512
513
/* known_hosts checking successful. */
514
ret = 0;
515
516
out:
517
- if (knh != NULL) {
518
- libssh2_knownhost_free(knh);
519
- }
520
- g_free(knh_file);
521
return ret;
522
}
523
524
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
525
526
static int
527
check_host_key_hash(BDRVSSHState *s, const char *hash,
528
- int hash_type, size_t fingerprint_len, Error **errp)
529
+ enum ssh_publickey_hash_type type, Error **errp)
530
{
531
- const char *fingerprint;
532
-
533
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
534
- if (!fingerprint) {
535
+ int r;
536
+ ssh_key pubkey;
537
+ unsigned char *server_hash;
538
+ size_t server_hash_len;
539
+
540
+#ifdef HAVE_LIBSSH_0_8
541
+ r = ssh_get_server_publickey(s->session, &pubkey);
542
+#else
543
+ r = ssh_get_publickey(s->session, &pubkey);
113
+#endif
544
+#endif
114
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
545
+ if (r != SSH_OK) {
115
index XXXXXXX..XXXXXXX 100644
546
session_error_setg(errp, s, "failed to read remote host key");
116
--- a/hw/pci/msi.c
547
return -EINVAL;
117
+++ b/hw/pci/msi.c
548
}
118
@@ -XXX,XX +XXX,XX @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
549
119
pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
550
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
120
}
551
- hash) != 0) {
121
552
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
122
-MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
553
+ ssh_key_free(pubkey);
123
+static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
554
+ if (r != 0) {
124
{
555
+ session_error_setg(errp, s,
125
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
556
+ "failed reading the hash of the server SSH key");
126
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
557
+ return -EINVAL;
127
@@ -XXX,XX +XXX,XX @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
558
+ }
128
return msg;
559
+
129
}
560
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
130
561
+ ssh_clean_pubkey_hash(&server_hash);
131
+MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
562
+ if (r != 0) {
132
+{
563
error_setg(errp, "remote host key does not match host_key_check '%s'",
133
+ return dev->msi_prepare_message(dev, vector);
564
hash);
134
+}
565
return -EPERM;
135
+
566
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
136
bool msi_enabled(const PCIDevice *dev)
137
{
138
return msi_present(dev) &&
139
@@ -XXX,XX +XXX,XX @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
140
0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
141
}
142
143
+ dev->msi_prepare_message = msi_prepare_message;
144
+
145
return 0;
567
return 0;
146
}
568
}
147
569
148
@@ -XXX,XX +XXX,XX @@ void msi_uninit(struct PCIDevice *dev)
570
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
149
cap_size = msi_cap_sizeof(flags);
571
- SshHostKeyCheck *hkc, Error **errp)
150
pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
572
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
151
dev->cap_present &= ~QEMU_PCI_CAP_MSI;
573
{
152
+ dev->msi_prepare_message = NULL;
574
SshHostKeyCheckMode mode;
153
575
154
MSI_DEV_PRINTF(dev, "uninit\n");
576
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
577
case SSH_HOST_KEY_CHECK_MODE_HASH:
578
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
579
return check_host_key_hash(s, hkc->u.hash.hash,
580
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
581
+ SSH_PUBLICKEY_HASH_MD5, errp);
582
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
583
return check_host_key_hash(s, hkc->u.hash.hash,
584
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
585
+ SSH_PUBLICKEY_HASH_SHA1, errp);
586
}
587
g_assert_not_reached();
588
break;
589
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
590
- return check_host_key_knownhosts(s, host, port, errp);
591
+ return check_host_key_knownhosts(s, errp);
592
default:
593
g_assert_not_reached();
594
}
595
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
596
return -EINVAL;
155
}
597
}
156
@@ -XXX,XX +XXX,XX @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
598
157
return mask & (1U << vector);
599
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
600
+static int authenticate(BDRVSSHState *s, Error **errp)
601
{
602
int r, ret;
603
- const char *userauthlist;
604
- LIBSSH2_AGENT *agent = NULL;
605
- struct libssh2_agent_publickey *identity;
606
- struct libssh2_agent_publickey *prev_identity = NULL;
607
+ int method;
608
609
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
610
- if (strstr(userauthlist, "publickey") == NULL) {
611
+ /* Try to authenticate with the "none" method. */
612
+ r = ssh_userauth_none(s->session, NULL);
613
+ if (r == SSH_AUTH_ERROR) {
614
ret = -EPERM;
615
- error_setg(errp,
616
- "remote server does not support \"publickey\" authentication");
617
+ session_error_setg(errp, s, "failed to authenticate using none "
618
+ "authentication");
619
goto out;
620
- }
621
-
622
- /* Connect to ssh-agent and try each identity in turn. */
623
- agent = libssh2_agent_init(s->session);
624
- if (!agent) {
625
- ret = -EINVAL;
626
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
627
- goto out;
628
- }
629
- if (libssh2_agent_connect(agent)) {
630
- ret = -ECONNREFUSED;
631
- session_error_setg(errp, s, "failed to connect to ssh-agent");
632
- goto out;
633
- }
634
- if (libssh2_agent_list_identities(agent)) {
635
- ret = -EINVAL;
636
- session_error_setg(errp, s,
637
- "failed requesting identities from ssh-agent");
638
+ } else if (r == SSH_AUTH_SUCCESS) {
639
+ /* Authenticated! */
640
+ ret = 0;
641
goto out;
642
}
643
644
- for(;;) {
645
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
646
- if (r == 1) { /* end of list */
647
- break;
648
- }
649
- if (r < 0) {
650
+ method = ssh_userauth_list(s->session, NULL);
651
+ trace_ssh_auth_methods(method);
652
+
653
+ /*
654
+ * Try to authenticate with publickey, using the ssh-agent
655
+ * if available.
656
+ */
657
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
658
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
659
+ if (r == SSH_AUTH_ERROR) {
660
ret = -EINVAL;
661
- session_error_setg(errp, s,
662
- "failed to obtain identity from ssh-agent");
663
+ session_error_setg(errp, s, "failed to authenticate using "
664
+ "publickey authentication");
665
goto out;
666
- }
667
- r = libssh2_agent_userauth(agent, user, identity);
668
- if (r == 0) {
669
+ } else if (r == SSH_AUTH_SUCCESS) {
670
/* Authenticated! */
671
ret = 0;
672
goto out;
673
}
674
- /* Failed to authenticate with this identity, try the next one. */
675
- prev_identity = identity;
676
}
677
678
ret = -EPERM;
679
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
680
"and the identities held by your ssh-agent");
681
682
out:
683
- if (agent != NULL) {
684
- /* Note: libssh2 implementation implicitly calls
685
- * libssh2_agent_disconnect if necessary.
686
- */
687
- libssh2_agent_free(agent);
688
- }
689
-
690
return ret;
158
}
691
}
159
692
160
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
693
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
161
+{
694
int ssh_flags, int creat_mode, Error **errp)
162
+ ERRP_GUARD();
695
{
163
+ uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
696
int r, ret;
164
+ bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
697
- long port = 0;
165
+ uint32_t irq_state, vector_mask, pending;
698
+ unsigned int port = 0;
166
+
699
+ int new_sock = -1;
167
+ if (vector > PCI_MSI_VECTORS_MAX) {
700
168
+ error_setg(errp, "msi: vector %d not allocated. max vector is %d",
701
if (opts->has_user) {
169
+ vector, PCI_MSI_VECTORS_MAX);
702
s->user = g_strdup(opts->user);
170
+ return;
703
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
704
s->inet = opts->server;
705
opts->server = NULL;
706
707
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
708
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
709
error_setg(errp, "Use only numeric port value");
710
ret = -EINVAL;
711
goto err;
712
}
713
714
/* Open the socket and connect. */
715
- s->sock = inet_connect_saddr(s->inet, errp);
716
- if (s->sock < 0) {
717
+ new_sock = inet_connect_saddr(s->inet, errp);
718
+ if (new_sock < 0) {
719
ret = -EIO;
720
goto err;
721
}
722
723
+ /*
724
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
725
+ * but do not fail if it cannot be disabled.
726
+ */
727
+ r = socket_set_nodelay(new_sock);
728
+ if (r < 0) {
729
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
730
+ s->inet->host, strerror(errno));
171
+ }
731
+ }
172
+
732
+
173
+ vector_mask = (1U << vector);
733
/* Create SSH session. */
174
+
734
- s->session = libssh2_session_init();
175
+ irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
735
+ s->session = ssh_new();
176
+
736
if (!s->session) {
177
+ if (mask) {
737
ret = -EINVAL;
178
+ irq_state |= vector_mask;
738
- session_error_setg(errp, s, "failed to initialize libssh2 session");
739
+ session_error_setg(errp, s, "failed to initialize libssh session");
740
goto err;
741
}
742
743
-#if TRACE_LIBSSH2 != 0
744
- libssh2_trace(s->session, TRACE_LIBSSH2);
745
-#endif
746
+ /*
747
+ * Make sure we are in blocking mode during the connection and
748
+ * authentication phases.
749
+ */
750
+ ssh_set_blocking(s->session, 1);
751
752
- r = libssh2_session_handshake(s->session, s->sock);
753
- if (r != 0) {
754
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
755
+ if (r < 0) {
756
+ ret = -EINVAL;
757
+ session_error_setg(errp, s,
758
+ "failed to set the user in the libssh session");
759
+ goto err;
760
+ }
761
+
762
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
763
+ if (r < 0) {
764
+ ret = -EINVAL;
765
+ session_error_setg(errp, s,
766
+ "failed to set the host in the libssh session");
767
+ goto err;
768
+ }
769
+
770
+ if (port > 0) {
771
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
772
+ if (r < 0) {
773
+ ret = -EINVAL;
774
+ session_error_setg(errp, s,
775
+ "failed to set the port in the libssh session");
776
+ goto err;
777
+ }
778
+ }
779
+
780
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
781
+ if (r < 0) {
782
+ ret = -EINVAL;
783
+ session_error_setg(errp, s,
784
+ "failed to disable the compression in the libssh "
785
+ "session");
786
+ goto err;
787
+ }
788
+
789
+ /* Read ~/.ssh/config. */
790
+ r = ssh_options_parse_config(s->session, NULL);
791
+ if (r < 0) {
792
+ ret = -EINVAL;
793
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
794
+ goto err;
795
+ }
796
+
797
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
798
+ if (r < 0) {
799
+ ret = -EINVAL;
800
+ session_error_setg(errp, s,
801
+ "failed to set the socket in the libssh session");
802
+ goto err;
803
+ }
804
+ /* libssh took ownership of the socket. */
805
+ s->sock = new_sock;
806
+ new_sock = -1;
807
+
808
+ /* Connect. */
809
+ r = ssh_connect(s->session);
810
+ if (r != SSH_OK) {
811
ret = -EINVAL;
812
session_error_setg(errp, s, "failed to establish SSH session");
813
goto err;
814
}
815
816
/* Check the remote host's key against known_hosts. */
817
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
818
+ ret = check_host_key(s, opts->host_key_check, errp);
819
if (ret < 0) {
820
goto err;
821
}
822
823
/* Authenticate. */
824
- ret = authenticate(s, s->user, errp);
825
+ ret = authenticate(s, errp);
826
if (ret < 0) {
827
goto err;
828
}
829
830
/* Start SFTP. */
831
- s->sftp = libssh2_sftp_init(s->session);
832
+ s->sftp = sftp_new(s->session);
833
if (!s->sftp) {
834
- session_error_setg(errp, s, "failed to initialize sftp handle");
835
+ session_error_setg(errp, s, "failed to create sftp handle");
836
+ ret = -EINVAL;
837
+ goto err;
838
+ }
839
+
840
+ r = sftp_init(s->sftp);
841
+ if (r < 0) {
842
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
843
ret = -EINVAL;
844
goto err;
845
}
846
847
/* Open the remote file. */
848
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
849
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
850
- creat_mode);
851
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
852
if (!s->sftp_handle) {
853
- session_error_setg(errp, s, "failed to open remote file '%s'",
854
- opts->path);
855
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
856
+ opts->path);
857
ret = -EINVAL;
858
goto err;
859
}
860
861
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
862
- if (r < 0) {
863
+ /* Make sure the SFTP file is handled in blocking mode. */
864
+ sftp_file_set_blocking(s->sftp_handle);
865
+
866
+ s->attrs = sftp_fstat(s->sftp_handle);
867
+ if (!s->attrs) {
868
sftp_error_setg(errp, s, "failed to read file attributes");
869
return -EINVAL;
870
}
871
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
872
return 0;
873
874
err:
875
+ if (s->attrs) {
876
+ sftp_attributes_free(s->attrs);
877
+ }
878
+ s->attrs = NULL;
879
if (s->sftp_handle) {
880
- libssh2_sftp_close(s->sftp_handle);
881
+ sftp_close(s->sftp_handle);
882
}
883
s->sftp_handle = NULL;
884
if (s->sftp) {
885
- libssh2_sftp_shutdown(s->sftp);
886
+ sftp_free(s->sftp);
887
}
888
s->sftp = NULL;
889
if (s->session) {
890
- libssh2_session_disconnect(s->session,
891
- "from qemu ssh client: "
892
- "error opening connection");
893
- libssh2_session_free(s->session);
894
+ ssh_disconnect(s->session);
895
+ ssh_free(s->session);
896
}
897
s->session = NULL;
898
+ s->sock = -1;
899
+ if (new_sock >= 0) {
900
+ close(new_sock);
901
+ }
902
903
return ret;
904
}
905
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
906
907
ssh_state_init(s);
908
909
- ssh_flags = LIBSSH2_FXF_READ;
910
+ ssh_flags = 0;
911
if (bdrv_flags & BDRV_O_RDWR) {
912
- ssh_flags |= LIBSSH2_FXF_WRITE;
913
+ ssh_flags |= O_RDWR;
179
+ } else {
914
+ } else {
180
+ irq_state &= ~vector_mask;
915
+ ssh_flags |= O_RDONLY;
181
+ }
916
}
182
+
917
183
+ pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
918
opts = ssh_parse_options(options, errp);
184
+
919
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
185
+ pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
920
}
186
+ if (!mask && (pending & vector_mask)) {
921
187
+ pending &= ~vector_mask;
922
/* Go non-blocking. */
188
+ pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
923
- libssh2_session_set_blocking(s->session, 0);
189
+ msi_notify(dev, vector);
924
+ ssh_set_blocking(s->session, 0);
190
+ }
925
191
+}
926
qapi_free_BlockdevOptionsSsh(opts);
192
+
927
193
void msi_notify(PCIDevice *dev, unsigned int vector)
928
return 0;
929
930
err:
931
- if (s->sock >= 0) {
932
- close(s->sock);
933
- }
934
- s->sock = -1;
935
-
936
qapi_free_BlockdevOptionsSsh(opts);
937
938
return ret;
939
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
194
{
940
{
195
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
941
ssize_t ret;
196
@@ -XXX,XX +XXX,XX @@ void msi_notify(PCIDevice *dev, unsigned int vector)
942
char c[1] = { '\0' };
197
943
- int was_blocking = libssh2_session_get_blocking(s->session);
198
void msi_send_message(PCIDevice *dev, MSIMessage msg)
944
+ int was_blocking = ssh_is_blocking(s->session);
199
{
945
200
- MemTxAttrs attrs = {};
946
/* offset must be strictly greater than the current size so we do
201
-
947
* not overwrite anything */
202
- attrs.requester_id = pci_requester_id(dev);
948
- assert(offset > 0 && offset > s->attrs.filesize);
203
- address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
949
+ assert(offset > 0 && offset > s->attrs->size);
204
- attrs, NULL);
950
205
+ dev->msi_trigger(dev, msg);
951
- libssh2_session_set_blocking(s->session, 1);
206
}
952
+ ssh_set_blocking(s->session, 1);
207
953
208
/* Normally called by pci_default_write_config(). */
954
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
209
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
955
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
210
index XXXXXXX..XXXXXXX 100644
956
+ sftp_seek64(s->sftp_handle, offset - 1);
211
--- a/hw/pci/msix.c
957
+ ret = sftp_write(s->sftp_handle, c, 1);
212
+++ b/hw/pci/msix.c
958
213
@@ -XXX,XX +XXX,XX @@
959
- libssh2_session_set_blocking(s->session, was_blocking);
214
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
960
+ ssh_set_blocking(s->session, was_blocking);
215
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
961
216
962
if (ret < 0) {
217
-MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
963
sftp_error_setg(errp, s, "Failed to grow file");
218
+static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
964
return -EIO;
219
{
965
}
220
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
966
221
MSIMessage msg;
967
- s->attrs.filesize = offset;
222
@@ -XXX,XX +XXX,XX @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
968
+ s->attrs->size = offset;
223
return msg;
224
}
225
226
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
227
+{
228
+ return dev->msix_prepare_message(dev, vector);
229
+}
230
+
231
/*
232
* Special API for POWER to configure the vectors through
233
* a side channel. Should never be used by devices.
234
@@ -XXX,XX +XXX,XX @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
235
}
236
}
237
238
+void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
239
+{
240
+ ERRP_GUARD();
241
+ unsigned offset;
242
+ bool was_masked;
243
+
244
+ if (vector > dev->msix_entries_nr) {
245
+ error_setg(errp, "msix: vector %d not allocated. max vector is %d",
246
+ vector, dev->msix_entries_nr);
247
+ return;
248
+ }
249
+
250
+ offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
251
+
252
+ was_masked = msix_is_masked(dev, vector);
253
+
254
+ if (mask) {
255
+ dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
256
+ } else {
257
+ dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
258
+ }
259
+
260
+ msix_handle_mask_update(dev, vector, was_masked);
261
+}
262
+
263
static bool msix_masked(PCIDevice *dev)
264
{
265
return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
266
@@ -XXX,XX +XXX,XX @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
267
"msix-pba", pba_size);
268
memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
269
270
+ dev->msix_prepare_message = msix_prepare_message;
271
+
272
return 0;
969
return 0;
273
}
970
}
274
971
275
@@ -XXX,XX +XXX,XX @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
972
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
276
g_free(dev->msix_entry_used);
973
ssh_state_init(&s);
277
dev->msix_entry_used = NULL;
974
278
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
975
ret = connect_to_ssh(&s, opts->location,
279
+ dev->msix_prepare_message = NULL;
976
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
977
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
978
+ O_RDWR | O_CREAT | O_TRUNC,
979
0644, errp);
980
if (ret < 0) {
981
goto fail;
982
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
983
/* Assume false, unless we can positively prove it's true. */
984
int has_zero_init = 0;
985
986
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
987
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
988
- has_zero_init = 1;
989
- }
990
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
991
+ has_zero_init = 1;
992
}
993
994
return has_zero_init;
995
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
996
.co = qemu_coroutine_self()
997
};
998
999
- r = libssh2_session_block_directions(s->session);
1000
+ r = ssh_get_poll_flags(s->session);
1001
1002
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1003
+ if (r & SSH_READ_PENDING) {
1004
rd_handler = restart_coroutine;
1005
}
1006
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1007
+ if (r & SSH_WRITE_PENDING) {
1008
wr_handler = restart_coroutine;
1009
}
1010
1011
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1012
trace_ssh_co_yield_back(s->sock);
280
}
1013
}
281
1014
282
void msix_uninit_exclusive_bar(PCIDevice *dev)
1015
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
283
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
1016
- * in the remote file. Notice that it just updates a field in the
284
index XXXXXXX..XXXXXXX 100644
1017
- * sftp_handle structure, so there is no network traffic and it cannot
285
--- a/hw/pci/pci.c
1018
- * fail.
286
+++ b/hw/pci/pci.c
1019
- *
287
@@ -XXX,XX +XXX,XX @@ void pci_device_deassert_intx(PCIDevice *dev)
1020
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1021
- * performance since it causes the handle to throw away all in-flight
1022
- * reads and buffered readahead data. Therefore this function tries
1023
- * to be intelligent about when to call the underlying libssh2 function.
1024
- */
1025
-#define SSH_SEEK_WRITE 0
1026
-#define SSH_SEEK_READ 1
1027
-#define SSH_SEEK_FORCE 2
1028
-
1029
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
1030
-{
1031
- bool op_read = (flags & SSH_SEEK_READ) != 0;
1032
- bool force = (flags & SSH_SEEK_FORCE) != 0;
1033
-
1034
- if (force || op_read != s->offset_op_read || offset != s->offset) {
1035
- trace_ssh_seek(offset);
1036
- libssh2_sftp_seek64(s->sftp_handle, offset);
1037
- s->offset = offset;
1038
- s->offset_op_read = op_read;
1039
- }
1040
-}
1041
-
1042
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1043
int64_t offset, size_t size,
1044
QEMUIOVector *qiov)
1045
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1046
1047
trace_ssh_read(offset, size);
1048
1049
- ssh_seek(s, offset, SSH_SEEK_READ);
1050
+ trace_ssh_seek(offset);
1051
+ sftp_seek64(s->sftp_handle, offset);
1052
1053
/* This keeps track of the current iovec element ('i'), where we
1054
* will write to next ('buf'), and the end of the current iovec
1055
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1056
buf = i->iov_base;
1057
end_of_vec = i->iov_base + i->iov_len;
1058
1059
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1060
- * although it will also do readahead behind our backs. Therefore
1061
- * we may have to do repeated reads here until we have read 'size'
1062
- * bytes.
1063
- */
1064
for (got = 0; got < size; ) {
1065
+ size_t request_read_size;
1066
again:
1067
- trace_ssh_read_buf(buf, end_of_vec - buf);
1068
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1069
- trace_ssh_read_return(r);
1070
+ /*
1071
+ * The size of SFTP packets is limited to 32K bytes, so limit
1072
+ * the amount of data requested to 16K, as libssh currently
1073
+ * does not handle multiple requests on its own.
1074
+ */
1075
+ request_read_size = MIN(end_of_vec - buf, 16384);
1076
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1077
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1078
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1079
1080
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1081
+ if (r == SSH_AGAIN) {
1082
co_yield(s, bs);
1083
goto again;
1084
}
1085
- if (r < 0) {
1086
- sftp_error_trace(s, "read");
1087
- s->offset = -1;
1088
- return -EIO;
1089
- }
1090
- if (r == 0) {
1091
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
1092
/* EOF: Short read so pad the buffer with zeroes and return it. */
1093
qemu_iovec_memset(qiov, got, 0, size - got);
1094
return 0;
1095
}
1096
+ if (r <= 0) {
1097
+ sftp_error_trace(s, "read");
1098
+ return -EIO;
1099
+ }
1100
1101
got += r;
1102
buf += r;
1103
- s->offset += r;
1104
if (buf >= end_of_vec && got < size) {
1105
i++;
1106
buf = i->iov_base;
1107
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1108
1109
trace_ssh_write(offset, size);
1110
1111
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1112
+ trace_ssh_seek(offset);
1113
+ sftp_seek64(s->sftp_handle, offset);
1114
1115
/* This keeps track of the current iovec element ('i'), where we
1116
* will read from next ('buf'), and the end of the current iovec
1117
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1118
end_of_vec = i->iov_base + i->iov_len;
1119
1120
for (written = 0; written < size; ) {
1121
+ size_t request_write_size;
1122
again:
1123
- trace_ssh_write_buf(buf, end_of_vec - buf);
1124
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1125
- trace_ssh_write_return(r);
1126
+ /*
1127
+ * Avoid too large data packets, as libssh currently does not
1128
+ * handle multiple requests on its own.
1129
+ */
1130
+ request_write_size = MIN(end_of_vec - buf, 131072);
1131
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1132
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1133
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1134
1135
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1136
+ if (r == SSH_AGAIN) {
1137
co_yield(s, bs);
1138
goto again;
1139
}
1140
if (r < 0) {
1141
sftp_error_trace(s, "write");
1142
- s->offset = -1;
1143
return -EIO;
1144
}
1145
- /* The libssh2 API is very unclear about this. A comment in
1146
- * the code says "nothing was acked, and no EAGAIN was
1147
- * received!" which apparently means that no data got sent
1148
- * out, and the underlying channel didn't return any EAGAIN
1149
- * indication. I think this is a bug in either libssh2 or
1150
- * OpenSSH (server-side). In any case, forcing a seek (to
1151
- * discard libssh2 internal buffers), and then trying again
1152
- * works for me.
1153
- */
1154
- if (r == 0) {
1155
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1156
- co_yield(s, bs);
1157
- goto again;
1158
- }
1159
1160
written += r;
1161
buf += r;
1162
- s->offset += r;
1163
if (buf >= end_of_vec && written < size) {
1164
i++;
1165
buf = i->iov_base;
1166
end_of_vec = i->iov_base + i->iov_len;
1167
}
1168
1169
- if (offset + written > s->attrs.filesize)
1170
- s->attrs.filesize = offset + written;
1171
+ if (offset + written > s->attrs->size) {
1172
+ s->attrs->size = offset + written;
1173
+ }
1174
}
1175
1176
return 0;
1177
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
288
}
1178
}
289
}
1179
}
290
1180
291
+static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
1181
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
292
+{
1182
+#ifdef HAVE_LIBSSH_0_8
293
+ MemTxAttrs attrs = {};
1183
294
+
1184
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
295
+ attrs.requester_id = pci_requester_id(dev);
296
+ address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
297
+ attrs, NULL);
298
+}
299
+
300
static void pci_reset_regions(PCIDevice *dev)
301
{
1185
{
302
int r;
1186
int r;
303
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_unrealize(DeviceState *dev)
1187
304
1188
trace_ssh_flush();
305
pci_device_deassert_intx(pci_dev);
1189
+
306
do_pci_unregister_device(pci_dev);
1190
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
307
+
1191
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
308
+ pci_dev->msi_trigger = NULL;
1192
+ return 0;
1193
+ }
1194
again:
1195
- r = libssh2_sftp_fsync(s->sftp_handle);
1196
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1197
+ r = sftp_fsync(s->sftp_handle);
1198
+ if (r == SSH_AGAIN) {
1199
co_yield(s, bs);
1200
goto again;
1201
}
1202
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
1203
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
1204
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
1205
- return 0;
1206
- }
1207
if (r < 0) {
1208
sftp_error_trace(s, "fsync");
1209
return -EIO;
1210
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1211
return ret;
309
}
1212
}
310
1213
311
void pci_register_bar(PCIDevice *pci_dev, int region_num,
1214
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
312
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
1215
+#else /* !HAVE_LIBSSH_0_8 */
313
}
1216
314
1217
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
315
pci_set_power(pci_dev, true);
1218
{
316
+
1219
BDRVSSHState *s = bs->opaque;
317
+ pci_dev->msi_trigger = pci_msi_trigger;
1220
1221
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
1222
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
1223
return 0;
318
}
1224
}
319
1225
320
PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
1226
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
321
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
1227
+#endif /* !HAVE_LIBSSH_0_8 */
1228
1229
static int64_t ssh_getlength(BlockDriverState *bs)
1230
{
1231
BDRVSSHState *s = bs->opaque;
1232
int64_t length;
1233
1234
- /* Note we cannot make a libssh2 call here. */
1235
- length = (int64_t) s->attrs.filesize;
1236
+ /* Note we cannot make a libssh call here. */
1237
+ length = (int64_t) s->attrs->size;
1238
trace_ssh_getlength(length);
1239
1240
return length;
1241
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
1242
return -ENOTSUP;
1243
}
1244
1245
- if (offset < s->attrs.filesize) {
1246
+ if (offset < s->attrs->size) {
1247
error_setg(errp, "ssh driver does not support shrinking files");
1248
return -ENOTSUP;
1249
}
1250
1251
- if (offset == s->attrs.filesize) {
1252
+ if (offset == s->attrs->size) {
1253
return 0;
1254
}
1255
1256
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1257
{
1258
int r;
1259
1260
- r = libssh2_init(0);
1261
+ r = ssh_init();
1262
if (r != 0) {
1263
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1264
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1265
exit(EXIT_FAILURE);
1266
}
1267
1268
+#if TRACE_LIBSSH != 0
1269
+ ssh_set_log_level(TRACE_LIBSSH);
1270
+#endif
1271
+
1272
bdrv_register(&bdrv_ssh);
1273
}
1274
1275
diff --git a/.travis.yml b/.travis.yml
322
index XXXXXXX..XXXXXXX 100644
1276
index XXXXXXX..XXXXXXX 100644
323
--- a/hw/remote/machine.c
1277
--- a/.travis.yml
324
+++ b/hw/remote/machine.c
1278
+++ b/.travis.yml
325
@@ -XXX,XX +XXX,XX @@
1279
@@ -XXX,XX +XXX,XX @@ addons:
326
#include "hw/remote/iommu.h"
1280
- libseccomp-dev
327
#include "hw/qdev-core.h"
1281
- libspice-protocol-dev
328
#include "hw/remote/iommu.h"
1282
- libspice-server-dev
329
+#include "hw/remote/vfio-user-obj.h"
1283
- - libssh2-1-dev
330
+#include "hw/pci/msi.h"
1284
+ - libssh-dev
331
1285
- liburcu-dev
332
static void remote_machine_init(MachineState *machine)
1286
- libusb-1.0-0-dev
333
{
1287
- libvte-2.91-dev
334
@@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine)
1288
@@ -XXX,XX +XXX,XX @@ matrix:
335
1289
- libseccomp-dev
336
if (s->vfio_user) {
1290
- libspice-protocol-dev
337
remote_iommu_setup(pci_host->bus);
1291
- libspice-server-dev
338
+
1292
- - libssh2-1-dev
339
+ msi_nonbroken = true;
1293
+ - libssh-dev
340
+
1294
- liburcu-dev
341
+ vfu_object_set_bus_irq(pci_host->bus);
1295
- libusb-1.0-0-dev
342
+ } else {
1296
- libvte-2.91-dev
343
+ remote_iohub_init(&s->iohub);
1297
diff --git a/block/trace-events b/block/trace-events
344
+
1298
index XXXXXXX..XXXXXXX 100644
345
+ pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
1299
--- a/block/trace-events
346
+ &s->iohub, REMOTE_IOHUB_NB_PIRQS);
1300
+++ b/block/trace-events
347
}
1301
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
348
1302
# ssh.c
349
- remote_iohub_init(&s->iohub);
1303
ssh_restart_coroutine(void *co) "co=%p"
1304
ssh_flush(void) "fsync"
1305
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
1306
+ssh_check_host_key_knownhosts(void) "host key OK"
1307
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
1308
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
1309
ssh_co_yield_back(int sock) "s->sock=%d - back"
1310
ssh_getlength(int64_t length) "length=%" PRIi64
1311
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
1312
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1313
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
1314
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
1315
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
1316
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
1317
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1318
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
1319
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
1320
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
1321
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
1322
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
1323
+ssh_auth_methods(int methods) "auth methods=0x%x"
1324
+ssh_server_status(int status) "server status=%d"
1325
1326
# curl.c
1327
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
1328
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
1329
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
1330
1331
# ssh.c
1332
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
1333
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
1334
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
1335
index XXXXXXX..XXXXXXX 100644
1336
--- a/docs/qemu-block-drivers.texi
1337
+++ b/docs/qemu-block-drivers.texi
1338
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
1339
1340
warning: ssh server @code{ssh.example.com:22} does not support fsync
1341
1342
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1343
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1344
supported.
1345
1346
@node disk_images_nvme
1347
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1348
index XXXXXXX..XXXXXXX 100644
1349
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1350
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1351
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1352
mxe-$TARGET-w64-mingw32.shared-curl \
1353
mxe-$TARGET-w64-mingw32.shared-glib \
1354
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1355
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1356
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1357
mxe-$TARGET-w64-mingw32.shared-lzo \
1358
mxe-$TARGET-w64-mingw32.shared-nettle \
1359
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1360
index XXXXXXX..XXXXXXX 100644
1361
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1362
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1363
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1364
mxe-$TARGET-w64-mingw32.shared-curl \
1365
mxe-$TARGET-w64-mingw32.shared-glib \
1366
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1367
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1368
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1369
mxe-$TARGET-w64-mingw32.shared-lzo \
1370
mxe-$TARGET-w64-mingw32.shared-nettle \
1371
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1372
index XXXXXXX..XXXXXXX 100644
1373
--- a/tests/docker/dockerfiles/fedora.docker
1374
+++ b/tests/docker/dockerfiles/fedora.docker
1375
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1376
libpng-devel \
1377
librbd-devel \
1378
libseccomp-devel \
1379
- libssh2-devel \
1380
+ libssh-devel \
1381
libubsan \
1382
libusbx-devel \
1383
libxml2-devel \
1384
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1385
mingw32-gtk3 \
1386
mingw32-libjpeg-turbo \
1387
mingw32-libpng \
1388
- mingw32-libssh2 \
1389
mingw32-libtasn1 \
1390
mingw32-nettle \
1391
mingw32-pixman \
1392
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1393
mingw64-gtk3 \
1394
mingw64-libjpeg-turbo \
1395
mingw64-libpng \
1396
- mingw64-libssh2 \
1397
mingw64-libtasn1 \
1398
mingw64-nettle \
1399
mingw64-pixman \
1400
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1401
index XXXXXXX..XXXXXXX 100644
1402
--- a/tests/docker/dockerfiles/ubuntu.docker
1403
+++ b/tests/docker/dockerfiles/ubuntu.docker
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1405
libsnappy-dev \
1406
libspice-protocol-dev \
1407
libspice-server-dev \
1408
- libssh2-1-dev \
1409
+ libssh-dev \
1410
libusb-1.0-0-dev \
1411
libusbredirhost-dev \
1412
libvdeplug-dev \
1413
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1414
index XXXXXXX..XXXXXXX 100644
1415
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1416
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1417
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1418
libsnappy-dev \
1419
libspice-protocol-dev \
1420
libspice-server-dev \
1421
- libssh2-1-dev \
1422
+ libssh-dev \
1423
libusb-1.0-0-dev \
1424
libusbredirhost-dev \
1425
libvdeplug-dev \
1426
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1427
index XXXXXXX..XXXXXXX 100755
1428
--- a/tests/qemu-iotests/207
1429
+++ b/tests/qemu-iotests/207
1430
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1431
1432
iotests.img_info_log(remote_path)
1433
1434
- md5_key = subprocess.check_output(
1435
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1436
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1437
- shell=True).rstrip().decode('ascii')
1438
+ keys = subprocess.check_output(
1439
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1440
+ 'cut -d" " -f3',
1441
+ shell=True).rstrip().decode('ascii').split('\n')
1442
+
1443
+ # Mappings of base64 representations to digests
1444
+ md5_keys = {}
1445
+ sha1_keys = {}
1446
+
1447
+ for key in keys:
1448
+ md5_keys[key] = subprocess.check_output(
1449
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1450
+ shell=True).rstrip().decode('ascii')
1451
+
1452
+ sha1_keys[key] = subprocess.check_output(
1453
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1454
+ shell=True).rstrip().decode('ascii')
1455
1456
vm.launch()
1457
+
1458
+ # Find correct key first
1459
+ matching_key = None
1460
+ for key in keys:
1461
+ result = vm.qmp('blockdev-add',
1462
+ driver='ssh', node_name='node0', path=disk_path,
1463
+ server={
1464
+ 'host': '127.0.0.1',
1465
+ 'port': '22',
1466
+ }, host_key_check={
1467
+ 'mode': 'hash',
1468
+ 'type': 'md5',
1469
+ 'hash': md5_keys[key],
1470
+ })
1471
+
1472
+ if 'error' not in result:
1473
+ vm.qmp('blockdev-del', node_name='node0')
1474
+ matching_key = key
1475
+ break
1476
+
1477
+ if matching_key is None:
1478
+ vm.shutdown()
1479
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1480
+
1481
blockdev_create(vm, { 'driver': 'ssh',
1482
'location': {
1483
'path': disk_path,
1484
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1485
'host-key-check': {
1486
'mode': 'hash',
1487
'type': 'md5',
1488
- 'hash': md5_key,
1489
+ 'hash': md5_keys[matching_key],
1490
}
1491
},
1492
'size': 8388608 })
1493
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1494
1495
iotests.img_info_log(remote_path)
1496
1497
- sha1_key = subprocess.check_output(
1498
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1499
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1500
- shell=True).rstrip().decode('ascii')
350
-
1501
-
351
- pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
1502
vm.launch()
352
- &s->iohub, REMOTE_IOHUB_NB_PIRQS);
1503
blockdev_create(vm, { 'driver': 'ssh',
353
-
1504
'location': {
354
qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
1505
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
355
}
1506
'host-key-check': {
356
1507
'mode': 'hash',
357
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
1508
'type': 'sha1',
1509
- 'hash': sha1_key,
1510
+ 'hash': sha1_keys[matching_key],
1511
}
1512
},
1513
'size': 4194304 })
1514
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
358
index XXXXXXX..XXXXXXX 100644
1515
index XXXXXXX..XXXXXXX 100644
359
--- a/hw/remote/vfio-user-obj.c
1516
--- a/tests/qemu-iotests/207.out
360
+++ b/hw/remote/vfio-user-obj.c
1517
+++ b/tests/qemu-iotests/207.out
361
@@ -XXX,XX +XXX,XX @@
1518
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
362
#include "hw/pci/pci.h"
1519
363
#include "qemu/timer.h"
1520
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
364
#include "exec/memory.h"
1521
{"return": {}}
365
+#include "hw/pci/msi.h"
1522
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
366
+#include "hw/pci/msix.h"
1523
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
367
+#include "hw/remote/vfio-user-obj.h"
1524
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
368
1525
{"return": {}}
369
#define TYPE_VFU_OBJECT "x-vfio-user-server"
1526
370
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
371
@@ -XXX,XX +XXX,XX @@ struct VfuObject {
372
Error *unplug_blocker;
373
374
int vfu_poll_fd;
375
+
376
+ MSITriggerFunc *default_msi_trigger;
377
+ MSIPrepareMessageFunc *default_msi_prepare_message;
378
+ MSIxPrepareMessageFunc *default_msix_prepare_message;
379
};
380
381
static void vfu_object_init_ctx(VfuObject *o, Error **errp);
382
@@ -XXX,XX +XXX,XX @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
383
}
384
}
385
386
+static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
387
+{
388
+ int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
389
+ pci_dev->devfn);
390
+
391
+ return pci_bdf;
392
+}
393
+
394
+static void vfu_object_set_irq(void *opaque, int pirq, int level)
395
+{
396
+ PCIBus *pci_bus = opaque;
397
+ PCIDevice *pci_dev = NULL;
398
+ vfu_ctx_t *vfu_ctx = NULL;
399
+ int pci_bus_num, devfn;
400
+
401
+ if (level) {
402
+ pci_bus_num = PCI_BUS_NUM(pirq);
403
+ devfn = PCI_BDF_TO_DEVFN(pirq);
404
+
405
+ /*
406
+ * pci_find_device() performs at O(1) if the device is attached
407
+ * to the root PCI bus. Whereas, if the device is attached to a
408
+ * secondary PCI bus (such as when a root port is involved),
409
+ * finding the parent PCI bus could take O(n)
410
+ */
411
+ pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
412
+
413
+ vfu_ctx = pci_dev->irq_opaque;
414
+
415
+ g_assert(vfu_ctx);
416
+
417
+ vfu_irq_trigger(vfu_ctx, 0);
418
+ }
419
+}
420
+
421
+static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
422
+ unsigned int vector)
423
+{
424
+ MSIMessage msg;
425
+
426
+ msg.address = 0;
427
+ msg.data = vector;
428
+
429
+ return msg;
430
+}
431
+
432
+static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
433
+{
434
+ vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
435
+
436
+ vfu_irq_trigger(vfu_ctx, msg.data);
437
+}
438
+
439
+static void vfu_object_setup_msi_cbs(VfuObject *o)
440
+{
441
+ o->default_msi_trigger = o->pci_dev->msi_trigger;
442
+ o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
443
+ o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
444
+
445
+ o->pci_dev->msi_trigger = vfu_object_msi_trigger;
446
+ o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
447
+ o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
448
+}
449
+
450
+static void vfu_object_restore_msi_cbs(VfuObject *o)
451
+{
452
+ o->pci_dev->msi_trigger = o->default_msi_trigger;
453
+ o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
454
+ o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
455
+}
456
+
457
+static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
458
+ uint32_t count, bool mask)
459
+{
460
+ VfuObject *o = vfu_get_private(vfu_ctx);
461
+ Error *err = NULL;
462
+ uint32_t vector;
463
+
464
+ for (vector = start; vector < count; vector++) {
465
+ msix_set_mask(o->pci_dev, vector, mask, &err);
466
+ if (err) {
467
+ VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
468
+ error_get_pretty(err));
469
+ error_free(err);
470
+ err = NULL;
471
+ }
472
+ }
473
+}
474
+
475
+static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
476
+ uint32_t count, bool mask)
477
+{
478
+ VfuObject *o = vfu_get_private(vfu_ctx);
479
+ Error *err = NULL;
480
+ uint32_t vector;
481
+
482
+ for (vector = start; vector < count; vector++) {
483
+ msi_set_mask(o->pci_dev, vector, mask, &err);
484
+ if (err) {
485
+ VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
486
+ error_get_pretty(err));
487
+ error_free(err);
488
+ err = NULL;
489
+ }
490
+ }
491
+}
492
+
493
+static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
494
+{
495
+ vfu_ctx_t *vfu_ctx = o->vfu_ctx;
496
+ int ret;
497
+
498
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
499
+ if (ret < 0) {
500
+ return ret;
501
+ }
502
+
503
+ if (msix_nr_vectors_allocated(pci_dev)) {
504
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
505
+ msix_nr_vectors_allocated(pci_dev));
506
+ vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
507
+ &vfu_msix_irq_state);
508
+ } else if (msi_nr_vectors_allocated(pci_dev)) {
509
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
510
+ msi_nr_vectors_allocated(pci_dev));
511
+ vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
512
+ &vfu_msi_irq_state);
513
+ }
514
+
515
+ if (ret < 0) {
516
+ return ret;
517
+ }
518
+
519
+ vfu_object_setup_msi_cbs(o);
520
+
521
+ pci_dev->irq_opaque = vfu_ctx;
522
+
523
+ return 0;
524
+}
525
+
526
+void vfu_object_set_bus_irq(PCIBus *pci_bus)
527
+{
528
+ int bus_num = pci_bus_num(pci_bus);
529
+ int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
530
+
531
+ pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
532
+ max_bdf);
533
+}
534
+
535
/*
536
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
537
* properties. It also depends on devices instantiated in QEMU. These
538
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
539
540
vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
541
542
+ ret = vfu_object_setup_irqs(o, o->pci_dev);
543
+ if (ret < 0) {
544
+ error_setg(errp, "vfu: Failed to setup interrupts for %s",
545
+ o->device);
546
+ goto fail;
547
+ }
548
+
549
ret = vfu_realize_ctx(o->vfu_ctx);
550
if (ret < 0) {
551
error_setg(errp, "vfu: Failed to realize device %s- %s",
552
@@ -XXX,XX +XXX,XX @@ fail:
553
o->unplug_blocker = NULL;
554
}
555
if (o->pci_dev) {
556
+ vfu_object_restore_msi_cbs(o);
557
+ o->pci_dev->irq_opaque = NULL;
558
object_unref(OBJECT(o->pci_dev));
559
o->pci_dev = NULL;
560
}
561
@@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj)
562
}
563
564
if (o->pci_dev) {
565
+ vfu_object_restore_msi_cbs(o);
566
+ o->pci_dev->irq_opaque = NULL;
567
object_unref(OBJECT(o->pci_dev));
568
o->pci_dev = NULL;
569
}
570
diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c
571
new file mode 100644
572
index XXXXXXX..XXXXXXX
573
--- /dev/null
574
+++ b/stubs/vfio-user-obj.c
575
@@ -XXX,XX +XXX,XX @@
576
+#include "qemu/osdep.h"
577
+#include "hw/remote/vfio-user-obj.h"
578
+
579
+void vfu_object_set_bus_irq(PCIBus *pci_bus)
580
+{
581
+}
582
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
583
index XXXXXXX..XXXXXXX 100644
584
--- a/hw/remote/trace-events
585
+++ b/hw/remote/trace-events
586
@@ -XXX,XX +XXX,XX @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
587
vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
588
vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
589
vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
590
+vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
591
diff --git a/stubs/meson.build b/stubs/meson.build
592
index XXXXXXX..XXXXXXX 100644
593
--- a/stubs/meson.build
594
+++ b/stubs/meson.build
595
@@ -XXX,XX +XXX,XX @@ if have_system
596
else
597
stub_ss.add(files('qdev.c'))
598
endif
599
+stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c'))
600
--
1527
--
601
2.36.1
1528
2.21.0
1529
1530
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
1
Tests should place their files into the test directory. This includes
2
Unix sockets. 205 currently fails to do so, which prevents it from
3
being run concurrently.
2
4
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
4
Message-id: 20220526115432.138384-1-vsementsov@yandex-team.ru
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
6
---
9
---
7
MAINTAINERS | 22 ++++++++++++----------
10
tests/qemu-iotests/205 | 2 +-
8
1 file changed, 12 insertions(+), 10 deletions(-)
11
1 file changed, 1 insertion(+), 1 deletion(-)
9
12
10
diff --git a/MAINTAINERS b/MAINTAINERS
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
11
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100755
12
--- a/MAINTAINERS
15
--- a/tests/qemu-iotests/205
13
+++ b/MAINTAINERS
16
+++ b/tests/qemu-iotests/205
14
@@ -XXX,XX +XXX,XX @@ F: scsi/*
17
@@ -XXX,XX +XXX,XX @@ import iotests
15
18
import time
16
Block Jobs
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
17
M: John Snow <jsnow@redhat.com>
20
18
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
21
-nbd_sock = 'nbd_sock'
19
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
20
L: qemu-block@nongnu.org
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
21
S: Supported
24
disk = os.path.join(iotests.test_dir, 'disk')
22
F: blockjob.c
25
23
@@ -XXX,XX +XXX,XX @@ F: block/aio_task.c
24
F: util/qemu-co-shared-resource.c
25
F: include/qemu/co-shared-resource.h
26
T: git https://gitlab.com/jsnow/qemu.git jobs
27
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git jobs
28
+T: git https://gitlab.com/vsementsov/qemu.git block
29
30
Block QAPI, monitor, command line
31
M: Markus Armbruster <armbru@redhat.com>
32
@@ -XXX,XX +XXX,XX @@ F: include/hw/cxl/
33
34
Dirty Bitmaps
35
M: Eric Blake <eblake@redhat.com>
36
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
37
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
38
R: John Snow <jsnow@redhat.com>
39
L: qemu-block@nongnu.org
40
S: Supported
41
@@ -XXX,XX +XXX,XX @@ F: util/hbitmap.c
42
F: tests/unit/test-hbitmap.c
43
F: docs/interop/bitmaps.rst
44
T: git https://repo.or.cz/qemu/ericb.git bitmaps
45
+T: git https://gitlab.com/vsementsov/qemu.git block
46
47
Character device backends
48
M: Marc-André Lureau <marcandre.lureau@redhat.com>
49
@@ -XXX,XX +XXX,XX @@ F: scripts/*.py
50
F: tests/*.py
51
52
Benchmark util
53
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
54
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
55
S: Maintained
56
F: scripts/simplebench/
57
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git simplebench
58
+T: git https://gitlab.com/vsementsov/qemu.git simplebench
59
60
Transactions helper
61
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
62
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
63
S: Maintained
64
F: include/qemu/transactions.h
65
F: util/transactions.c
66
+T: git https://gitlab.com/vsementsov/qemu.git block
67
68
QAPI
69
M: Markus Armbruster <armbru@redhat.com>
70
@@ -XXX,XX +XXX,XX @@ F: block/iscsi-opts.c
71
72
Network Block Device (NBD)
73
M: Eric Blake <eblake@redhat.com>
74
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
75
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
76
L: qemu-block@nongnu.org
77
S: Maintained
78
F: block/nbd*
79
@@ -XXX,XX +XXX,XX @@ F: docs/interop/nbd.txt
80
F: docs/tools/qemu-nbd.rst
81
F: tests/qemu-iotests/tests/*nbd*
82
T: git https://repo.or.cz/qemu/ericb.git nbd
83
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd
84
+T: git https://gitlab.com/vsementsov/qemu.git block
85
86
NFS
87
M: Peter Lieven <pl@kamp.de>
88
@@ -XXX,XX +XXX,XX @@ F: block/dmg.c
89
parallels
90
M: Stefan Hajnoczi <stefanha@redhat.com>
91
M: Denis V. Lunev <den@openvz.org>
92
-M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
93
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
94
L: qemu-block@nongnu.org
95
S: Supported
96
F: block/parallels.c
97
F: block/parallels-ext.c
98
F: docs/interop/parallels.txt
99
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git parallels
100
+T: git https://gitlab.com/vsementsov/qemu.git block
101
102
qed
103
M: Stefan Hajnoczi <stefanha@redhat.com>
104
--
26
--
105
2.36.1
27
2.21.0
106
28
107
29
diff view generated by jsdifflib
Deleted patch
1
From: Sam Li <faithilikerun@gmail.com>
2
1
3
Linux recently added a new io_uring(7) optimization API that QEMU
4
doesn't take advantage of yet. The liburing library that QEMU uses
5
has added a corresponding new API calling io_uring_register_ring_fd().
6
When this API is called after creating the ring, the io_uring_submit()
7
library function passes a flag to the io_uring_enter(2) syscall
8
allowing it to skip the ring file descriptor fdget()/fdput()
9
operations. This saves some CPU cycles.
10
11
Signed-off-by: Sam Li <faithilikerun@gmail.com>
12
Message-id: 20220531105011.111082-1-faithilikerun@gmail.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
meson.build | 1 +
16
block/io_uring.c | 12 +++++++++++-
17
2 files changed, 12 insertions(+), 1 deletion(-)
18
19
diff --git a/meson.build b/meson.build
20
index XXXXXXX..XXXXXXX 100644
21
--- a/meson.build
22
+++ b/meson.build
23
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LIBNFS', libnfs.found())
24
config_host_data.set('CONFIG_LIBSSH', libssh.found())
25
config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
26
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
27
+config_host_data.set('CONFIG_LIBURING_REGISTER_RING_FD', cc.has_function('io_uring_register_ring_fd', prefix: '#include <liburing.h>', dependencies:linux_io_uring))
28
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
29
config_host_data.set('CONFIG_NUMA', numa.found())
30
config_host_data.set('CONFIG_OPENGL', opengl.found())
31
diff --git a/block/io_uring.c b/block/io_uring.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/io_uring.c
34
+++ b/block/io_uring.c
35
@@ -XXX,XX +XXX,XX @@
36
#include "qapi/error.h"
37
#include "trace.h"
38
39
+
40
/* io_uring ring size */
41
#define MAX_ENTRIES 128
42
43
@@ -XXX,XX +XXX,XX @@ LuringState *luring_init(Error **errp)
44
}
45
46
ioq_init(&s->io_q);
47
+#ifdef CONFIG_LIBURING_REGISTER_RING_FD
48
+ if (io_uring_register_ring_fd(&s->ring) < 0) {
49
+ /*
50
+ * Only warn about this error: we will fallback to the non-optimized
51
+ * io_uring operations.
52
+ */
53
+ warn_report("failed to register linux io_uring ring file descriptor");
54
+ }
55
+#endif
56
+
57
return s;
58
-
59
}
60
61
void luring_cleanup(LuringState *s)
62
--
63
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Add blocker to prevent hot-unplug of devices
4
5
TYPE_VFIO_USER_SERVER, which is introduced shortly, attaches itself to a
6
PCIDevice on which it depends. If the attached PCIDevice gets removed
7
while the server in use, it could cause it crash. To prevent this,
8
TYPE_VFIO_USER_SERVER adds an unplug blocker for the PCIDevice.
9
10
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
11
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
12
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-id: c41ef80b7cc063314d629737bed2159e5713f2e0.1655151679.git.jag.raman@oracle.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
include/hw/qdev-core.h | 29 +++++++++++++++++++++++++++++
18
hw/core/qdev.c | 24 ++++++++++++++++++++++++
19
softmmu/qdev-monitor.c | 4 ++++
20
3 files changed, 57 insertions(+)
21
22
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/hw/qdev-core.h
25
+++ b/include/hw/qdev-core.h
26
@@ -XXX,XX +XXX,XX @@ struct DeviceState {
27
int instance_id_alias;
28
int alias_required_for_version;
29
ResettableState reset;
30
+ GSList *unplug_blockers;
31
};
32
33
struct DeviceListener {
34
@@ -XXX,XX +XXX,XX @@ void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
35
void qdev_machine_creation_done(void);
36
bool qdev_machine_modified(void);
37
38
+/**
39
+ * qdev_add_unplug_blocker: Add an unplug blocker to a device
40
+ *
41
+ * @dev: Device to be blocked from unplug
42
+ * @reason: Reason for blocking
43
+ */
44
+void qdev_add_unplug_blocker(DeviceState *dev, Error *reason);
45
+
46
+/**
47
+ * qdev_del_unplug_blocker: Remove an unplug blocker from a device
48
+ *
49
+ * @dev: Device to be unblocked
50
+ * @reason: Pointer to the Error used with qdev_add_unplug_blocker.
51
+ * Used as a handle to lookup the blocker for deletion.
52
+ */
53
+void qdev_del_unplug_blocker(DeviceState *dev, Error *reason);
54
+
55
+/**
56
+ * qdev_unplug_blocked: Confirm if a device is blocked from unplug
57
+ *
58
+ * @dev: Device to be tested
59
+ * @reason: Returns one of the reasons why the device is blocked,
60
+ * if any
61
+ *
62
+ * Returns: true if device is blocked from unplug, false otherwise
63
+ */
64
+bool qdev_unplug_blocked(DeviceState *dev, Error **errp);
65
+
66
/**
67
* GpioPolarity: Polarity of a GPIO line
68
*
69
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/hw/core/qdev.c
72
+++ b/hw/core/qdev.c
73
@@ -XXX,XX +XXX,XX @@ char *qdev_get_dev_path(DeviceState *dev)
74
return NULL;
75
}
76
77
+void qdev_add_unplug_blocker(DeviceState *dev, Error *reason)
78
+{
79
+ dev->unplug_blockers = g_slist_prepend(dev->unplug_blockers, reason);
80
+}
81
+
82
+void qdev_del_unplug_blocker(DeviceState *dev, Error *reason)
83
+{
84
+ dev->unplug_blockers = g_slist_remove(dev->unplug_blockers, reason);
85
+}
86
+
87
+bool qdev_unplug_blocked(DeviceState *dev, Error **errp)
88
+{
89
+ ERRP_GUARD();
90
+
91
+ if (dev->unplug_blockers) {
92
+ error_propagate(errp, error_copy(dev->unplug_blockers->data));
93
+ return true;
94
+ }
95
+
96
+ return false;
97
+}
98
+
99
static bool device_get_realized(Object *obj, Error **errp)
100
{
101
DeviceState *dev = DEVICE(obj);
102
@@ -XXX,XX +XXX,XX @@ static void device_finalize(Object *obj)
103
104
DeviceState *dev = DEVICE(obj);
105
106
+ g_assert(!dev->unplug_blockers);
107
+
108
QLIST_FOREACH_SAFE(ngl, &dev->gpios, node, next) {
109
QLIST_REMOVE(ngl, node);
110
qemu_free_irqs(ngl->in, ngl->num_in);
111
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/softmmu/qdev-monitor.c
114
+++ b/softmmu/qdev-monitor.c
115
@@ -XXX,XX +XXX,XX @@ void qdev_unplug(DeviceState *dev, Error **errp)
116
HotplugHandlerClass *hdc;
117
Error *local_err = NULL;
118
119
+ if (qdev_unplug_blocked(dev, errp)) {
120
+ return;
121
+ }
122
+
123
if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
124
error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name);
125
return;
126
--
127
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Allow hotplugging of PCI(e) devices to remote machine
4
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: d1e6cfa0afb528ad343758f9b1d918be0175c5e5.1655151679.git.jag.raman@oracle.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
hw/remote/machine.c | 10 ++++++++++
13
1 file changed, 10 insertions(+)
14
15
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/remote/machine.c
18
+++ b/hw/remote/machine.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "qapi/error.h"
21
#include "hw/pci/pci_host.h"
22
#include "hw/remote/iohub.h"
23
+#include "hw/qdev-core.h"
24
25
static void remote_machine_init(MachineState *machine)
26
{
27
@@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine)
28
29
pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
30
&s->iohub, REMOTE_IOHUB_NB_PIRQS);
31
+
32
+ qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
33
}
34
35
static void remote_machine_class_init(ObjectClass *oc, void *data)
36
{
37
MachineClass *mc = MACHINE_CLASS(oc);
38
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
39
40
mc->init = remote_machine_init;
41
mc->desc = "Experimental remote machine";
42
+
43
+ hc->unplug = qdev_simple_device_unplug_cb;
44
}
45
46
static const TypeInfo remote_machine = {
47
@@ -XXX,XX +XXX,XX @@ static const TypeInfo remote_machine = {
48
.parent = TYPE_MACHINE,
49
.instance_size = sizeof(RemoteMachineState),
50
.class_init = remote_machine_class_init,
51
+ .interfaces = (InterfaceInfo[]) {
52
+ { TYPE_HOTPLUG_HANDLER },
53
+ { }
54
+ }
55
};
56
57
static void remote_machine_register_types(void)
58
--
59
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Add vfio-user to x-remote machine. It is a boolean, which indicates if
4
the machine supports vfio-user protocol. The machine configures the bus
5
differently vfio-user and multiprocess protocols, so this property
6
informs it on how to configure the bus.
7
8
This property should be short lived. Once vfio-user fully replaces
9
multiprocess, this property could be removed.
10
11
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
12
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
13
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
14
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-id: 5d51a152a419cbda35d070b8e49b772b60a7230a.1655151679.git.jag.raman@oracle.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
18
include/hw/remote/machine.h | 2 ++
19
hw/remote/machine.c | 23 +++++++++++++++++++++++
20
2 files changed, 25 insertions(+)
21
22
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/hw/remote/machine.h
25
+++ b/include/hw/remote/machine.h
26
@@ -XXX,XX +XXX,XX @@ struct RemoteMachineState {
27
28
RemotePCIHost *host;
29
RemoteIOHubState iohub;
30
+
31
+ bool vfio_user;
32
};
33
34
/* Used to pass to co-routine device and ioc. */
35
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/remote/machine.c
38
+++ b/hw/remote/machine.c
39
@@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine)
40
qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
41
}
42
43
+static bool remote_machine_get_vfio_user(Object *obj, Error **errp)
44
+{
45
+ RemoteMachineState *s = REMOTE_MACHINE(obj);
46
+
47
+ return s->vfio_user;
48
+}
49
+
50
+static void remote_machine_set_vfio_user(Object *obj, bool value, Error **errp)
51
+{
52
+ RemoteMachineState *s = REMOTE_MACHINE(obj);
53
+
54
+ if (phase_check(PHASE_MACHINE_CREATED)) {
55
+ error_setg(errp, "Error enabling vfio-user - machine already created");
56
+ return;
57
+ }
58
+
59
+ s->vfio_user = value;
60
+}
61
+
62
static void remote_machine_class_init(ObjectClass *oc, void *data)
63
{
64
MachineClass *mc = MACHINE_CLASS(oc);
65
@@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data)
66
mc->desc = "Experimental remote machine";
67
68
hc->unplug = qdev_simple_device_unplug_cb;
69
+
70
+ object_class_property_add_bool(oc, "vfio-user",
71
+ remote_machine_get_vfio_user,
72
+ remote_machine_set_vfio_user);
73
}
74
75
static const TypeInfo remote_machine = {
76
--
77
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
add the libvfio-user library as a submodule. build it as a meson
4
subproject.
5
6
libvfio-user is distributed with BSD 3-Clause license and
7
json-c with MIT (Expat) license
8
9
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
10
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
11
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
12
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Message-id: c2adec87958b081d1dc8775d4aa05c897912f025.1655151679.git.jag.raman@oracle.com
14
15
[Changed submodule URL to QEMU's libvfio-user mirror on GitLab. The QEMU
16
project mirrors its dependencies so that it can provide full source code
17
even in the event that its dependencies become unavailable. Note that
18
the mirror repo is manually updated, so please contact me to make newer
19
libvfio-user commits available. If I become a bottleneck we can set up a
20
cronjob.
21
22
Updated scripts/meson-buildoptions.sh to match the meson_options.txt
23
change. Failure to do so can result in scripts/meson-buildoptions.sh
24
being modified by the build system later on and you end up with a dirty
25
working tree.
26
--Stefan]
27
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
---
30
MAINTAINERS | 1 +
31
meson_options.txt | 2 ++
32
configure | 17 +++++++++++++++++
33
meson.build | 23 ++++++++++++++++++++++-
34
.gitlab-ci.d/buildtest.yml | 1 +
35
.gitmodules | 3 +++
36
Kconfig.host | 4 ++++
37
hw/remote/Kconfig | 4 ++++
38
hw/remote/meson.build | 2 ++
39
scripts/meson-buildoptions.sh | 4 ++++
40
subprojects/libvfio-user | 1 +
41
tests/docker/dockerfiles/centos8.docker | 2 ++
42
12 files changed, 63 insertions(+), 1 deletion(-)
43
create mode 160000 subprojects/libvfio-user
44
45
diff --git a/MAINTAINERS b/MAINTAINERS
46
index XXXXXXX..XXXXXXX 100644
47
--- a/MAINTAINERS
48
+++ b/MAINTAINERS
49
@@ -XXX,XX +XXX,XX @@ F: hw/remote/proxy-memory-listener.c
50
F: include/hw/remote/proxy-memory-listener.h
51
F: hw/remote/iohub.c
52
F: include/hw/remote/iohub.h
53
+F: subprojects/libvfio-user
54
55
EBPF:
56
M: Jason Wang <jasowang@redhat.com>
57
diff --git a/meson_options.txt b/meson_options.txt
58
index XXXXXXX..XXXXXXX 100644
59
--- a/meson_options.txt
60
+++ b/meson_options.txt
61
@@ -XXX,XX +XXX,XX @@ option('cfi_debug', type: 'boolean', value: 'false',
62
description: 'Verbose errors in case of CFI violation')
63
option('multiprocess', type: 'feature', value: 'auto',
64
description: 'Out of process device emulation support')
65
+option('vfio_user_server', type: 'feature', value: 'disabled',
66
+ description: 'vfio-user server support')
67
option('dbus_display', type: 'feature', value: 'auto',
68
description: '-display dbus support')
69
option('tpm', type : 'feature', value : 'auto',
70
diff --git a/configure b/configure
71
index XXXXXXX..XXXXXXX 100755
72
--- a/configure
73
+++ b/configure
74
@@ -XXX,XX +XXX,XX @@ meson_args=""
75
ninja=""
76
bindir="bin"
77
skip_meson=no
78
+vfio_user_server="disabled"
79
80
# The following Meson options are handled manually (still they
81
# are included in the automatically generated help message)
82
@@ -XXX,XX +XXX,XX @@ for opt do
83
;;
84
--disable-blobs) meson_option_parse --disable-install-blobs ""
85
;;
86
+ --enable-vfio-user-server) vfio_user_server="enabled"
87
+ ;;
88
+ --disable-vfio-user-server) vfio_user_server="disabled"
89
+ ;;
90
--enable-tcmalloc) meson_option_parse --enable-malloc=tcmalloc tcmalloc
91
;;
92
--enable-jemalloc) meson_option_parse --enable-malloc=jemalloc jemalloc
93
@@ -XXX,XX +XXX,XX @@ write_container_target_makefile() {
94
95
96
97
+##########################################
98
+# check for vfio_user_server
99
+
100
+case "$vfio_user_server" in
101
+ enabled )
102
+ if test "$git_submodules_action" != "ignore"; then
103
+ git_submodules="${git_submodules} subprojects/libvfio-user"
104
+ fi
105
+ ;;
106
+esac
107
+
108
##########################################
109
# End of CC checks
110
# After here, no more $cc or $ld runs
111
@@ -XXX,XX +XXX,XX @@ if test "$skip_meson" = no; then
112
test "$slirp" != auto && meson_option_add "-Dslirp=$slirp"
113
test "$smbd" != '' && meson_option_add "-Dsmbd=$smbd"
114
test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg"
115
+ test "$vfio_user_server" != auto && meson_option_add "-Dvfio_user_server=$vfio_user_server"
116
run_meson() {
117
NINJA=$ninja $meson setup --prefix "$prefix" "$@" $cross_arg "$PWD" "$source_path"
118
}
119
diff --git a/meson.build b/meson.build
120
index XXXXXXX..XXXXXXX 100644
121
--- a/meson.build
122
+++ b/meson.build
123
@@ -XXX,XX +XXX,XX @@ multiprocess_allowed = get_option('multiprocess') \
124
.require(targetos == 'linux', error_message: 'Multiprocess QEMU is supported only on Linux') \
125
.allowed()
126
127
+vfio_user_server_allowed = get_option('vfio_user_server') \
128
+ .require(targetos == 'linux', error_message: 'vfio-user server is supported only on Linux') \
129
+ .allowed()
130
+
131
have_tpm = get_option('tpm') \
132
.require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \
133
.allowed()
134
@@ -XXX,XX +XXX,XX @@ host_kconfig = \
135
(have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
136
('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
137
(have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
138
- (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : [])
139
+ (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
140
+ (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : [])
141
142
ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
143
144
@@ -XXX,XX +XXX,XX @@ if have_system
145
endif
146
endif
147
148
+libvfio_user_dep = not_found
149
+if have_system and vfio_user_server_allowed
150
+ have_internal = fs.exists(meson.current_source_dir() / 'subprojects/libvfio-user/meson.build')
151
+
152
+ if not have_internal
153
+ error('libvfio-user source not found - please pull git submodule')
154
+ endif
155
+
156
+ libvfio_user_proj = subproject('libvfio-user')
157
+
158
+ libvfio_user_lib = libvfio_user_proj.get_variable('libvfio_user_dep')
159
+
160
+ libvfio_user_dep = declare_dependency(dependencies: [libvfio_user_lib])
161
+endif
162
+
163
fdt = not_found
164
if have_system
165
fdt_opt = get_option('fdt')
166
@@ -XXX,XX +XXX,XX @@ summary_info += {'target list': ' '.join(target_dirs)}
167
if have_system
168
summary_info += {'default devices': get_option('default_devices')}
169
summary_info += {'out of process emulation': multiprocess_allowed}
170
+ summary_info += {'vfio-user server': vfio_user_server_allowed}
171
endif
172
summary(summary_info, bool_yn: true, section: 'Targets and accelerators')
173
174
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
175
index XXXXXXX..XXXXXXX 100644
176
--- a/.gitlab-ci.d/buildtest.yml
177
+++ b/.gitlab-ci.d/buildtest.yml
178
@@ -XXX,XX +XXX,XX @@ build-system-centos:
179
IMAGE: centos8
180
CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system
181
--enable-modules --enable-trace-backends=dtrace --enable-docs
182
+ --enable-vfio-user-server
183
TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu
184
x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
185
MAKE_CHECK_ARGS: check-build
186
diff --git a/.gitmodules b/.gitmodules
187
index XXXXXXX..XXXXXXX 100644
188
--- a/.gitmodules
189
+++ b/.gitmodules
190
@@ -XXX,XX +XXX,XX @@
191
[submodule "tests/lcitool/libvirt-ci"]
192
    path = tests/lcitool/libvirt-ci
193
    url = https://gitlab.com/libvirt/libvirt-ci.git
194
+[submodule "subprojects/libvfio-user"]
195
+    path = subprojects/libvfio-user
196
+    url = https://gitlab.com/qemu-project/libvfio-user.git
197
diff --git a/Kconfig.host b/Kconfig.host
198
index XXXXXXX..XXXXXXX 100644
199
--- a/Kconfig.host
200
+++ b/Kconfig.host
201
@@ -XXX,XX +XXX,XX @@ config MULTIPROCESS_ALLOWED
202
config FUZZ
203
bool
204
select SPARSE_MEM
205
+
206
+config VFIO_USER_SERVER_ALLOWED
207
+ bool
208
+ imply VFIO_USER_SERVER
209
diff --git a/hw/remote/Kconfig b/hw/remote/Kconfig
210
index XXXXXXX..XXXXXXX 100644
211
--- a/hw/remote/Kconfig
212
+++ b/hw/remote/Kconfig
213
@@ -XXX,XX +XXX,XX @@ config MULTIPROCESS
214
bool
215
depends on PCI && PCI_EXPRESS && KVM
216
select REMOTE_PCIHOST
217
+
218
+config VFIO_USER_SERVER
219
+ bool
220
+ depends on MULTIPROCESS
221
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
222
index XXXXXXX..XXXXXXX 100644
223
--- a/hw/remote/meson.build
224
+++ b/hw/remote/meson.build
225
@@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
226
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
227
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c'))
228
229
+remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep)
230
+
231
specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
232
specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy-memory-listener.c'))
233
234
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
235
index XXXXXXX..XXXXXXX 100644
236
--- a/scripts/meson-buildoptions.sh
237
+++ b/scripts/meson-buildoptions.sh
238
@@ -XXX,XX +XXX,XX @@ meson_options_help() {
239
printf "%s\n" ' usb-redir libusbredir support'
240
printf "%s\n" ' vde vde network backend support'
241
printf "%s\n" ' vdi vdi image format support'
242
+ printf "%s\n" ' vfio-user-server'
243
+ printf "%s\n" ' vfio-user server support'
244
printf "%s\n" ' vhost-crypto vhost-user crypto backend support'
245
printf "%s\n" ' vhost-kernel vhost kernel backend support'
246
printf "%s\n" ' vhost-net vhost-net kernel acceleration support'
247
@@ -XXX,XX +XXX,XX @@ _meson_option_parse() {
248
--disable-vde) printf "%s" -Dvde=disabled ;;
249
--enable-vdi) printf "%s" -Dvdi=enabled ;;
250
--disable-vdi) printf "%s" -Dvdi=disabled ;;
251
+ --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
252
+ --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
253
--enable-vhost-crypto) printf "%s" -Dvhost_crypto=enabled ;;
254
--disable-vhost-crypto) printf "%s" -Dvhost_crypto=disabled ;;
255
--enable-vhost-kernel) printf "%s" -Dvhost_kernel=enabled ;;
256
diff --git a/subprojects/libvfio-user b/subprojects/libvfio-user
257
new file mode 160000
258
index XXXXXXX..XXXXXXX
259
--- /dev/null
260
+++ b/subprojects/libvfio-user
261
@@ -0,0 +1 @@
262
+Subproject commit 0b28d205572c80b568a1003db2c8f37ca333e4d7
263
diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker
264
index XXXXXXX..XXXXXXX 100644
265
--- a/tests/docker/dockerfiles/centos8.docker
266
+++ b/tests/docker/dockerfiles/centos8.docker
267
@@ -XXX,XX +XXX,XX @@ RUN dnf update -y && \
268
libbpf-devel \
269
libcacard-devel \
270
libcap-ng-devel \
271
+ libcmocka-devel \
272
libcurl-devel \
273
libdrm-devel \
274
libepoxy-devel \
275
@@ -XXX,XX +XXX,XX @@ RUN dnf update -y && \
276
libgcrypt-devel \
277
libiscsi-devel \
278
libjpeg-devel \
279
+ json-c-devel \
280
libnfs-devel \
281
libpmem-devel \
282
libpng-devel \
283
--
284
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
create a context with the vfio-user library to run a PCI device
4
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: a452871ac8c812ff96fc4f0ce6037f4769953fab.1655151679.git.jag.raman@oracle.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
hw/remote/vfio-user-obj.c | 82 +++++++++++++++++++++++++++++++++++++++
13
1 file changed, 82 insertions(+)
14
15
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/remote/vfio-user-obj.c
18
+++ b/hw/remote/vfio-user-obj.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "hw/remote/machine.h"
21
#include "qapi/error.h"
22
#include "qapi/qapi-visit-sockets.h"
23
+#include "qemu/notify.h"
24
+#include "sysemu/sysemu.h"
25
+#include "libvfio-user.h"
26
27
#define TYPE_VFU_OBJECT "x-vfio-user-server"
28
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
29
@@ -XXX,XX +XXX,XX @@ struct VfuObject {
30
char *device;
31
32
Error *err;
33
+
34
+ Notifier machine_done;
35
+
36
+ vfu_ctx_t *vfu_ctx;
37
};
38
39
+static void vfu_object_init_ctx(VfuObject *o, Error **errp);
40
+
41
static bool vfu_object_auto_shutdown(void)
42
{
43
bool auto_shutdown = true;
44
@@ -XXX,XX +XXX,XX @@ static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
45
{
46
VfuObject *o = VFU_OBJECT(obj);
47
48
+ if (o->vfu_ctx) {
49
+ error_setg(errp, "vfu: Unable to set socket property - server busy");
50
+ return;
51
+ }
52
+
53
qapi_free_SocketAddress(o->socket);
54
55
o->socket = NULL;
56
@@ -XXX,XX +XXX,XX @@ static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
57
}
58
59
trace_vfu_prop("socket", o->socket->u.q_unix.path);
60
+
61
+ vfu_object_init_ctx(o, errp);
62
}
63
64
static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
65
{
66
VfuObject *o = VFU_OBJECT(obj);
67
68
+ if (o->vfu_ctx) {
69
+ error_setg(errp, "vfu: Unable to set device property - server busy");
70
+ return;
71
+ }
72
+
73
g_free(o->device);
74
75
o->device = g_strdup(str);
76
77
trace_vfu_prop("device", str);
78
+
79
+ vfu_object_init_ctx(o, errp);
80
+}
81
+
82
+/*
83
+ * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
84
+ * properties. It also depends on devices instantiated in QEMU. These
85
+ * dependencies are not available during the instance_init phase of this
86
+ * object's life-cycle. As such, the server is initialized after the
87
+ * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
88
+ * when the machine is setup, and the dependencies are available.
89
+ */
90
+static void vfu_object_machine_done(Notifier *notifier, void *data)
91
+{
92
+ VfuObject *o = container_of(notifier, VfuObject, machine_done);
93
+ Error *err = NULL;
94
+
95
+ vfu_object_init_ctx(o, &err);
96
+
97
+ if (err) {
98
+ error_propagate(&error_abort, err);
99
+ }
100
+}
101
+
102
+static void vfu_object_init_ctx(VfuObject *o, Error **errp)
103
+{
104
+ ERRP_GUARD();
105
+
106
+ if (o->vfu_ctx || !o->socket || !o->device ||
107
+ !phase_check(PHASE_MACHINE_READY)) {
108
+ return;
109
+ }
110
+
111
+ if (o->err) {
112
+ error_propagate(errp, o->err);
113
+ o->err = NULL;
114
+ return;
115
+ }
116
+
117
+ o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0,
118
+ o, VFU_DEV_TYPE_PCI);
119
+ if (o->vfu_ctx == NULL) {
120
+ error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
121
+ return;
122
+ }
123
}
124
125
static void vfu_object_init(Object *obj)
126
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init(Object *obj)
127
TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
128
return;
129
}
130
+
131
+ if (!phase_check(PHASE_MACHINE_READY)) {
132
+ o->machine_done.notify = vfu_object_machine_done;
133
+ qemu_add_machine_init_done_notifier(&o->machine_done);
134
+ }
135
+
136
}
137
138
static void vfu_object_finalize(Object *obj)
139
@@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj)
140
141
o->socket = NULL;
142
143
+ if (o->vfu_ctx) {
144
+ vfu_destroy_ctx(o->vfu_ctx);
145
+ o->vfu_ctx = NULL;
146
+ }
147
+
148
g_free(o->device);
149
150
o->device = NULL;
151
@@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj)
152
if (!k->nr_devs && vfu_object_auto_shutdown()) {
153
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
154
}
155
+
156
+ if (o->machine_done.notify) {
157
+ qemu_remove_machine_init_done_notifier(&o->machine_done);
158
+ o->machine_done.notify = NULL;
159
+ }
160
}
161
162
static void vfu_object_class_init(ObjectClass *klass, void *data)
163
--
164
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Setup a handler to run vfio-user context. The context is driven by
4
messages to the file descriptor associated with it - get the fd for
5
the context and hook up the handler with it
6
7
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
8
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
9
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-id: e934b0090529d448b6a7972b21dfc3d7421ce494.1655151679.git.jag.raman@oracle.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
qapi/misc.json | 31 ++++++++++
15
hw/remote/vfio-user-obj.c | 118 +++++++++++++++++++++++++++++++++++++-
16
2 files changed, 148 insertions(+), 1 deletion(-)
17
18
diff --git a/qapi/misc.json b/qapi/misc.json
19
index XXXXXXX..XXXXXXX 100644
20
--- a/qapi/misc.json
21
+++ b/qapi/misc.json
22
@@ -XXX,XX +XXX,XX @@
23
##
24
{ 'event': 'RTC_CHANGE',
25
'data': { 'offset': 'int', 'qom-path': 'str' } }
26
+
27
+##
28
+# @VFU_CLIENT_HANGUP:
29
+#
30
+# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
31
+# communication channel
32
+#
33
+# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object. It is the last component
34
+# of @vfu-qom-path referenced below
35
+#
36
+# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
37
+#
38
+# @dev-id: ID of attached PCI device
39
+#
40
+# @dev-qom-path: path to attached PCI device in the QOM tree
41
+#
42
+# Since: 7.1
43
+#
44
+# Example:
45
+#
46
+# <- { "event": "VFU_CLIENT_HANGUP",
47
+# "data": { "vfu-id": "vfu1",
48
+# "vfu-qom-path": "/objects/vfu1",
49
+# "dev-id": "sas1",
50
+# "dev-qom-path": "/machine/peripheral/sas1" },
51
+# "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
52
+#
53
+##
54
+{ 'event': 'VFU_CLIENT_HANGUP',
55
+ 'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
56
+ 'dev-id': 'str', 'dev-qom-path': 'str' } }
57
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/remote/vfio-user-obj.c
60
+++ b/hw/remote/vfio-user-obj.c
61
@@ -XXX,XX +XXX,XX @@
62
*
63
* device - id of a device on the server, a required option. PCI devices
64
* alone are supported presently.
65
+ *
66
+ * notes - x-vfio-user-server could block IO and monitor during the
67
+ * initialization phase.
68
*/
69
70
#include "qemu/osdep.h"
71
@@ -XXX,XX +XXX,XX @@
72
#include "hw/remote/machine.h"
73
#include "qapi/error.h"
74
#include "qapi/qapi-visit-sockets.h"
75
+#include "qapi/qapi-events-misc.h"
76
#include "qemu/notify.h"
77
+#include "qemu/thread.h"
78
#include "sysemu/sysemu.h"
79
#include "libvfio-user.h"
80
#include "hw/qdev-core.h"
81
#include "hw/pci/pci.h"
82
+#include "qemu/timer.h"
83
84
#define TYPE_VFU_OBJECT "x-vfio-user-server"
85
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
86
@@ -XXX,XX +XXX,XX @@ struct VfuObject {
87
PCIDevice *pci_dev;
88
89
Error *unplug_blocker;
90
+
91
+ int vfu_poll_fd;
92
};
93
94
static void vfu_object_init_ctx(VfuObject *o, Error **errp);
95
@@ -XXX,XX +XXX,XX @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
96
vfu_object_init_ctx(o, errp);
97
}
98
99
+static void vfu_object_ctx_run(void *opaque)
100
+{
101
+ VfuObject *o = opaque;
102
+ const char *vfu_id;
103
+ char *vfu_path, *pci_dev_path;
104
+ int ret = -1;
105
+
106
+ while (ret != 0) {
107
+ ret = vfu_run_ctx(o->vfu_ctx);
108
+ if (ret < 0) {
109
+ if (errno == EINTR) {
110
+ continue;
111
+ } else if (errno == ENOTCONN) {
112
+ vfu_id = object_get_canonical_path_component(OBJECT(o));
113
+ vfu_path = object_get_canonical_path(OBJECT(o));
114
+ g_assert(o->pci_dev);
115
+ pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
116
+ /* o->device is a required property and is non-NULL here */
117
+ g_assert(o->device);
118
+ qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
119
+ o->device, pci_dev_path);
120
+ qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
121
+ o->vfu_poll_fd = -1;
122
+ object_unparent(OBJECT(o));
123
+ g_free(vfu_path);
124
+ g_free(pci_dev_path);
125
+ break;
126
+ } else {
127
+ VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
128
+ o->device, strerror(errno));
129
+ break;
130
+ }
131
+ }
132
+ }
133
+}
134
+
135
+static void vfu_object_attach_ctx(void *opaque)
136
+{
137
+ VfuObject *o = opaque;
138
+ GPollFD pfds[1];
139
+ int ret;
140
+
141
+ qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
142
+
143
+ pfds[0].fd = o->vfu_poll_fd;
144
+ pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
145
+
146
+retry_attach:
147
+ ret = vfu_attach_ctx(o->vfu_ctx);
148
+ if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
149
+ /**
150
+ * vfu_object_attach_ctx can block QEMU's main loop
151
+ * during attach - the monitor and other IO
152
+ * could be unresponsive during this time.
153
+ */
154
+ (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
155
+ goto retry_attach;
156
+ } else if (ret < 0) {
157
+ VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
158
+ o->device, strerror(errno));
159
+ return;
160
+ }
161
+
162
+ o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
163
+ if (o->vfu_poll_fd < 0) {
164
+ VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
165
+ return;
166
+ }
167
+
168
+ qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
169
+}
170
+
171
/*
172
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
173
* properties. It also depends on devices instantiated in QEMU. These
174
@@ -XXX,XX +XXX,XX @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
175
}
176
}
177
178
+/**
179
+ * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
180
+ * an unplug blocker for the associated PCI device. Setup a FD handler
181
+ * to process incoming messages in the context's socket.
182
+ *
183
+ * The socket and device properties are mandatory, and this function
184
+ * will not create the context without them - the setters for these
185
+ * properties should call this function when the property is set. The
186
+ * machine should also be ready when this function is invoked - it is
187
+ * because QEMU objects are initialized before devices, and the
188
+ * associated PCI device wouldn't be available at the object
189
+ * initialization time. Until these conditions are satisfied, this
190
+ * function would return early without performing any task.
191
+ */
192
static void vfu_object_init_ctx(VfuObject *o, Error **errp)
193
{
194
ERRP_GUARD();
195
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
196
return;
197
}
198
199
- o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0,
200
+ o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
201
+ LIBVFIO_USER_FLAG_ATTACH_NB,
202
o, VFU_DEV_TYPE_PCI);
203
if (o->vfu_ctx == NULL) {
204
error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
205
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
206
TYPE_VFU_OBJECT, o->device);
207
qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
208
209
+ ret = vfu_realize_ctx(o->vfu_ctx);
210
+ if (ret < 0) {
211
+ error_setg(errp, "vfu: Failed to realize device %s- %s",
212
+ o->device, strerror(errno));
213
+ goto fail;
214
+ }
215
+
216
+ o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
217
+ if (o->vfu_poll_fd < 0) {
218
+ error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
219
+ goto fail;
220
+ }
221
+
222
+ qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
223
+
224
return;
225
226
fail:
227
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init(Object *obj)
228
qemu_add_machine_init_done_notifier(&o->machine_done);
229
}
230
231
+ o->vfu_poll_fd = -1;
232
}
233
234
static void vfu_object_finalize(Object *obj)
235
@@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj)
236
237
o->socket = NULL;
238
239
+ if (o->vfu_poll_fd != -1) {
240
+ qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
241
+ o->vfu_poll_fd = -1;
242
+ }
243
+
244
if (o->vfu_ctx) {
245
vfu_destroy_ctx(o->vfu_ctx);
246
o->vfu_ctx = NULL;
247
--
248
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Define and register handlers for PCI config space accesses
4
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: be9d2ccf9b1d24e50dcd9c23404dbf284142cec7.1655151679.git.jag.raman@oracle.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
hw/remote/vfio-user-obj.c | 51 +++++++++++++++++++++++++++++++++++++++
13
hw/remote/trace-events | 2 ++
14
2 files changed, 53 insertions(+)
15
16
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/remote/vfio-user-obj.c
19
+++ b/hw/remote/vfio-user-obj.c
20
@@ -XXX,XX +XXX,XX @@
21
#include "qapi/qapi-events-misc.h"
22
#include "qemu/notify.h"
23
#include "qemu/thread.h"
24
+#include "qemu/main-loop.h"
25
#include "sysemu/sysemu.h"
26
#include "libvfio-user.h"
27
#include "hw/qdev-core.h"
28
@@ -XXX,XX +XXX,XX @@ retry_attach:
29
qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
30
}
31
32
+static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
33
+ size_t count, loff_t offset,
34
+ const bool is_write)
35
+{
36
+ VfuObject *o = vfu_get_private(vfu_ctx);
37
+ uint32_t pci_access_width = sizeof(uint32_t);
38
+ size_t bytes = count;
39
+ uint32_t val = 0;
40
+ char *ptr = buf;
41
+ int len;
42
+
43
+ /*
44
+ * Writes to the BAR registers would trigger an update to the
45
+ * global Memory and IO AddressSpaces. But the remote device
46
+ * never uses the global AddressSpaces, therefore overlapping
47
+ * memory regions are not a problem
48
+ */
49
+ while (bytes > 0) {
50
+ len = (bytes > pci_access_width) ? pci_access_width : bytes;
51
+ if (is_write) {
52
+ memcpy(&val, ptr, len);
53
+ pci_host_config_write_common(o->pci_dev, offset,
54
+ pci_config_size(o->pci_dev),
55
+ val, len);
56
+ trace_vfu_cfg_write(offset, val);
57
+ } else {
58
+ val = pci_host_config_read_common(o->pci_dev, offset,
59
+ pci_config_size(o->pci_dev), len);
60
+ memcpy(ptr, &val, len);
61
+ trace_vfu_cfg_read(offset, val);
62
+ }
63
+ offset += len;
64
+ ptr += len;
65
+ bytes -= len;
66
+ }
67
+
68
+ return count;
69
+}
70
+
71
/*
72
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
73
* properties. It also depends on devices instantiated in QEMU. These
74
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
75
TYPE_VFU_OBJECT, o->device);
76
qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
77
78
+ ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
79
+ pci_config_size(o->pci_dev), &vfu_object_cfg_access,
80
+ VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
81
+ NULL, 0, -1, 0);
82
+ if (ret < 0) {
83
+ error_setg(errp,
84
+ "vfu: Failed to setup config space handlers for %s- %s",
85
+ o->device, strerror(errno));
86
+ goto fail;
87
+ }
88
+
89
ret = vfu_realize_ctx(o->vfu_ctx);
90
if (ret < 0) {
91
error_setg(errp, "vfu: Failed to realize device %s- %s",
92
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/remote/trace-events
95
+++ b/hw/remote/trace-events
96
@@ -XXX,XX +XXX,XX @@ mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d,
97
98
# vfio-user-obj.c
99
vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s"
100
+vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x"
101
+vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x"
102
--
103
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Define and register callbacks to manage the RAM regions used for
4
device DMA
5
6
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
7
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
8
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: faacbcd45c4d02c591f0dbfdc19041fbb3eae7eb.1655151679.git.jag.raman@oracle.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
hw/remote/machine.c | 5 ++++
14
hw/remote/vfio-user-obj.c | 55 +++++++++++++++++++++++++++++++++++++++
15
hw/remote/trace-events | 2 ++
16
3 files changed, 62 insertions(+)
17
18
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/remote/machine.c
21
+++ b/hw/remote/machine.c
22
@@ -XXX,XX +XXX,XX @@
23
#include "hw/remote/iohub.h"
24
#include "hw/remote/iommu.h"
25
#include "hw/qdev-core.h"
26
+#include "hw/remote/iommu.h"
27
28
static void remote_machine_init(MachineState *machine)
29
{
30
@@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine)
31
32
pci_host = PCI_HOST_BRIDGE(rem_host);
33
34
+ if (s->vfio_user) {
35
+ remote_iommu_setup(pci_host->bus);
36
+ }
37
+
38
remote_iohub_init(&s->iohub);
39
40
pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
41
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/remote/vfio-user-obj.c
44
+++ b/hw/remote/vfio-user-obj.c
45
@@ -XXX,XX +XXX,XX @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
46
return count;
47
}
48
49
+static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
50
+{
51
+ VfuObject *o = vfu_get_private(vfu_ctx);
52
+ AddressSpace *dma_as = NULL;
53
+ MemoryRegion *subregion = NULL;
54
+ g_autofree char *name = NULL;
55
+ struct iovec *iov = &info->iova;
56
+
57
+ if (!info->vaddr) {
58
+ return;
59
+ }
60
+
61
+ name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
62
+ (uint64_t)info->vaddr);
63
+
64
+ subregion = g_new0(MemoryRegion, 1);
65
+
66
+ memory_region_init_ram_ptr(subregion, NULL, name,
67
+ iov->iov_len, info->vaddr);
68
+
69
+ dma_as = pci_device_iommu_address_space(o->pci_dev);
70
+
71
+ memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
72
+
73
+ trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
74
+}
75
+
76
+static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
77
+{
78
+ VfuObject *o = vfu_get_private(vfu_ctx);
79
+ AddressSpace *dma_as = NULL;
80
+ MemoryRegion *mr = NULL;
81
+ ram_addr_t offset;
82
+
83
+ mr = memory_region_from_host(info->vaddr, &offset);
84
+ if (!mr) {
85
+ return;
86
+ }
87
+
88
+ dma_as = pci_device_iommu_address_space(o->pci_dev);
89
+
90
+ memory_region_del_subregion(dma_as->root, mr);
91
+
92
+ object_unparent((OBJECT(mr)));
93
+
94
+ trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
95
+}
96
+
97
/*
98
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
99
* properties. It also depends on devices instantiated in QEMU. These
100
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
101
goto fail;
102
}
103
104
+ ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
105
+ if (ret < 0) {
106
+ error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
107
+ o->device);
108
+ goto fail;
109
+ }
110
+
111
ret = vfu_realize_ctx(o->vfu_ctx);
112
if (ret < 0) {
113
error_setg(errp, "vfu: Failed to realize device %s- %s",
114
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
115
index XXXXXXX..XXXXXXX 100644
116
--- a/hw/remote/trace-events
117
+++ b/hw/remote/trace-events
118
@@ -XXX,XX +XXX,XX @@ mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d,
119
vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s"
120
vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x"
121
vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x"
122
+vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes"
123
+vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
124
--
125
2.36.1
diff view generated by jsdifflib
Deleted patch
1
From: Jagannathan Raman <jag.raman@oracle.com>
2
1
3
Adds handler to reset a remote device
4
5
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
6
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
7
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 112eeadf3bc4c6cdb100bc3f9a6fcfc20b467c1b.1655151679.git.jag.raman@oracle.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
hw/remote/vfio-user-obj.c | 20 ++++++++++++++++++++
13
1 file changed, 20 insertions(+)
14
15
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/remote/vfio-user-obj.c
18
+++ b/hw/remote/vfio-user-obj.c
19
@@ -XXX,XX +XXX,XX @@ void vfu_object_set_bus_irq(PCIBus *pci_bus)
20
max_bdf);
21
}
22
23
+static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
24
+{
25
+ VfuObject *o = vfu_get_private(vfu_ctx);
26
+
27
+ /* vfu_object_ctx_run() handles lost connection */
28
+ if (type == VFU_RESET_LOST_CONN) {
29
+ return 0;
30
+ }
31
+
32
+ qdev_reset_all(DEVICE(o->pci_dev));
33
+
34
+ return 0;
35
+}
36
+
37
/*
38
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
39
* properties. It also depends on devices instantiated in QEMU. These
40
@@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
41
goto fail;
42
}
43
44
+ ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
45
+ if (ret < 0) {
46
+ error_setg(errp, "vfu: Failed to setup reset callback");
47
+ goto fail;
48
+ }
49
+
50
ret = vfu_realize_ctx(o->vfu_ctx);
51
if (ret < 0) {
52
error_setg(errp, "vfu: Failed to realize device %s- %s",
53
--
54
2.36.1
diff view generated by jsdifflib