1 | The following changes since commit bfec359afba088aaacc7d316f43302f28c6e642a: | 1 | The following changes since commit 8e6c70b9d4a1b1f3011805947925cfdb31642f7f: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/armbru/tags/pull-qdev-2017-04-21' into staging (2017-04-21 11:42:03 +0100) | 3 | Merge tag 'kraxel-20220614-pull-request' of git://git.kraxel.org/qemu into staging (2022-06-14 06:21:46 -0700) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 1507631e438930bc07f776f303af127a9cdb4d41: | 9 | for you to fetch changes up to 99b969fbe105117f5af6060d3afef40ca39cc9c1: |
10 | 10 | ||
11 | qemu-iotests: _cleanup_qemu must be called on exit (2017-04-21 08:32:44 -0400) | 11 | linux-aio: explain why max batch is checked in laio_io_unplug() (2022-06-15 16:43:42 +0100) |
12 | |||
13 | ---------------------------------------------------------------- | ||
14 | Pull request | ||
15 | |||
16 | This pull request includes an important aio=native I/O stall fix, the | ||
17 | experimental vifo-user server, the io_uring_register_ring_fd() optimization for | ||
18 | aio=io_uring, and an update to Vladimir Sementsov-Ogievskiy's maintainership | ||
19 | details. | ||
12 | 20 | ||
13 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
14 | 22 | ||
15 | Block patches for 2.10 | 23 | Jagannathan Raman (14): |
24 | qdev: unplug blocker for devices | ||
25 | remote/machine: add HotplugHandler for remote machine | ||
26 | remote/machine: add vfio-user property | ||
27 | vfio-user: build library | ||
28 | vfio-user: define vfio-user-server object | ||
29 | vfio-user: instantiate vfio-user context | ||
30 | vfio-user: find and init PCI device | ||
31 | vfio-user: run vfio-user context | ||
32 | vfio-user: handle PCI config space accesses | ||
33 | vfio-user: IOMMU support for remote device | ||
34 | vfio-user: handle DMA mappings | ||
35 | vfio-user: handle PCI BAR accesses | ||
36 | vfio-user: handle device interrupts | ||
37 | vfio-user: handle reset of remote device | ||
16 | 38 | ||
17 | ---------------------------------------------------------------- | 39 | Sam Li (1): |
40 | Use io_uring_register_ring_fd() to skip fd operations | ||
18 | 41 | ||
19 | Ashish Mittal (2): | 42 | Stefan Hajnoczi (2): |
20 | block/vxhs.c: Add support for a new block device type called "vxhs" | 43 | linux-aio: fix unbalanced plugged counter in laio_io_unplug() |
21 | block/vxhs.c: Add qemu-iotests for new block device type "vxhs" | 44 | linux-aio: explain why max batch is checked in laio_io_unplug() |
22 | 45 | ||
23 | Jeff Cody (10): | 46 | Vladimir Sementsov-Ogievskiy (1): |
24 | qemu-iotests: exclude vxhs from image creation via protocol | 47 | MAINTAINERS: update Vladimir's address and repositories |
25 | block: add bdrv_set_read_only() helper function | ||
26 | block: do not set BDS read_only if copy_on_read enabled | ||
27 | block: honor BDRV_O_ALLOW_RDWR when clearing bs->read_only | ||
28 | block: code movement | ||
29 | block: introduce bdrv_can_set_read_only() | ||
30 | block: use bdrv_can_set_read_only() during reopen | ||
31 | block/rbd - update variable names to more apt names | ||
32 | block/rbd: Add support for reopen() | ||
33 | qemu-iotests: _cleanup_qemu must be called on exit | ||
34 | 48 | ||
35 | block.c | 56 +++- | 49 | MAINTAINERS | 27 +- |
36 | block/Makefile.objs | 2 + | 50 | meson_options.txt | 2 + |
37 | block/bochs.c | 5 +- | 51 | qapi/misc.json | 31 + |
38 | block/cloop.c | 5 +- | 52 | qapi/qom.json | 20 +- |
39 | block/dmg.c | 6 +- | 53 | configure | 17 + |
40 | block/rbd.c | 65 +++-- | 54 | meson.build | 24 +- |
41 | block/trace-events | 17 ++ | 55 | include/exec/memory.h | 3 + |
42 | block/vvfat.c | 19 +- | 56 | include/hw/pci/msi.h | 1 + |
43 | block/vxhs.c | 575 +++++++++++++++++++++++++++++++++++++++ | 57 | include/hw/pci/msix.h | 1 + |
44 | configure | 39 +++ | 58 | include/hw/pci/pci.h | 13 + |
45 | include/block/block.h | 2 + | 59 | include/hw/qdev-core.h | 29 + |
46 | qapi/block-core.json | 23 +- | 60 | include/hw/remote/iommu.h | 40 + |
47 | tests/qemu-iotests/017 | 1 + | 61 | include/hw/remote/machine.h | 4 + |
48 | tests/qemu-iotests/020 | 1 + | 62 | include/hw/remote/vfio-user-obj.h | 6 + |
49 | tests/qemu-iotests/028 | 1 + | 63 | block/io_uring.c | 12 +- |
50 | tests/qemu-iotests/029 | 1 + | 64 | block/linux-aio.c | 10 +- |
51 | tests/qemu-iotests/073 | 1 + | 65 | hw/core/qdev.c | 24 + |
52 | tests/qemu-iotests/094 | 11 +- | 66 | hw/pci/msi.c | 49 +- |
53 | tests/qemu-iotests/102 | 5 +- | 67 | hw/pci/msix.c | 35 +- |
54 | tests/qemu-iotests/109 | 1 + | 68 | hw/pci/pci.c | 13 + |
55 | tests/qemu-iotests/114 | 1 + | 69 | hw/remote/iommu.c | 131 ++++ |
56 | tests/qemu-iotests/117 | 1 + | 70 | hw/remote/machine.c | 88 ++- |
57 | tests/qemu-iotests/130 | 2 + | 71 | hw/remote/vfio-user-obj.c | 958 ++++++++++++++++++++++++ |
58 | tests/qemu-iotests/134 | 1 + | 72 | softmmu/physmem.c | 4 +- |
59 | tests/qemu-iotests/140 | 1 + | 73 | softmmu/qdev-monitor.c | 4 + |
60 | tests/qemu-iotests/141 | 1 + | 74 | stubs/vfio-user-obj.c | 6 + |
61 | tests/qemu-iotests/143 | 1 + | 75 | tests/qtest/fuzz/generic_fuzz.c | 9 +- |
62 | tests/qemu-iotests/156 | 2 + | 76 | .gitlab-ci.d/buildtest.yml | 1 + |
63 | tests/qemu-iotests/158 | 1 + | 77 | .gitmodules | 3 + |
64 | tests/qemu-iotests/common | 6 + | 78 | Kconfig.host | 4 + |
65 | tests/qemu-iotests/common.config | 13 + | 79 | hw/remote/Kconfig | 4 + |
66 | tests/qemu-iotests/common.filter | 1 + | 80 | hw/remote/meson.build | 4 + |
67 | tests/qemu-iotests/common.rc | 19 ++ | 81 | hw/remote/trace-events | 11 + |
68 | 33 files changed, 844 insertions(+), 42 deletions(-) | 82 | scripts/meson-buildoptions.sh | 4 + |
69 | create mode 100644 block/vxhs.c | 83 | stubs/meson.build | 1 + |
84 | subprojects/libvfio-user | 1 + | ||
85 | tests/docker/dockerfiles/centos8.docker | 2 + | ||
86 | 37 files changed, 1565 insertions(+), 31 deletions(-) | ||
87 | create mode 100644 include/hw/remote/iommu.h | ||
88 | create mode 100644 include/hw/remote/vfio-user-obj.h | ||
89 | create mode 100644 hw/remote/iommu.c | ||
90 | create mode 100644 hw/remote/vfio-user-obj.c | ||
91 | create mode 100644 stubs/vfio-user-obj.c | ||
92 | create mode 160000 subprojects/libvfio-user | ||
70 | 93 | ||
71 | -- | 94 | -- |
72 | 2.9.3 | 95 | 2.36.1 |
73 | |||
74 | diff view generated by jsdifflib |
1 | The protocol VXHS does not support image creation. Some tests expect | 1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
---|---|---|---|
2 | to be able to create images through the protocol. Exclude VXHS from | ||
3 | these tests. | ||
4 | 2 | ||
5 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 3 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
4 | Message-id: 20220526115432.138384-1-vsementsov@yandex-team.ru | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | --- | 6 | --- |
7 | tests/qemu-iotests/017 | 1 + | 7 | MAINTAINERS | 22 ++++++++++++---------- |
8 | tests/qemu-iotests/020 | 1 + | 8 | 1 file changed, 12 insertions(+), 10 deletions(-) |
9 | tests/qemu-iotests/029 | 1 + | ||
10 | tests/qemu-iotests/073 | 1 + | ||
11 | tests/qemu-iotests/114 | 1 + | ||
12 | tests/qemu-iotests/130 | 1 + | ||
13 | tests/qemu-iotests/134 | 1 + | ||
14 | tests/qemu-iotests/156 | 1 + | ||
15 | tests/qemu-iotests/158 | 1 + | ||
16 | 9 files changed, 9 insertions(+) | ||
17 | 9 | ||
18 | diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 | 10 | diff --git a/MAINTAINERS b/MAINTAINERS |
19 | index XXXXXXX..XXXXXXX 100755 | 11 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/tests/qemu-iotests/017 | 12 | --- a/MAINTAINERS |
21 | +++ b/tests/qemu-iotests/017 | 13 | +++ b/MAINTAINERS |
22 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 14 | @@ -XXX,XX +XXX,XX @@ F: scsi/* |
23 | # Any format supporting backing files | 15 | |
24 | _supported_fmt qcow qcow2 vmdk qed | 16 | Block Jobs |
25 | _supported_proto generic | 17 | M: John Snow <jsnow@redhat.com> |
26 | +_unsupported_proto vxhs | 18 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> |
27 | _supported_os Linux | 19 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
28 | _unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" | 20 | L: qemu-block@nongnu.org |
29 | 21 | S: Supported | |
30 | diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 | 22 | F: blockjob.c |
31 | index XXXXXXX..XXXXXXX 100755 | 23 | @@ -XXX,XX +XXX,XX @@ F: block/aio_task.c |
32 | --- a/tests/qemu-iotests/020 | 24 | F: util/qemu-co-shared-resource.c |
33 | +++ b/tests/qemu-iotests/020 | 25 | F: include/qemu/co-shared-resource.h |
34 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 26 | T: git https://gitlab.com/jsnow/qemu.git jobs |
35 | # Any format supporting backing files | 27 | -T: git https://src.openvz.org/scm/~vsementsov/qemu.git jobs |
36 | _supported_fmt qcow qcow2 vmdk qed | 28 | +T: git https://gitlab.com/vsementsov/qemu.git block |
37 | _supported_proto generic | 29 | |
38 | +_unsupported_proto vxhs | 30 | Block QAPI, monitor, command line |
39 | _supported_os Linux | 31 | M: Markus Armbruster <armbru@redhat.com> |
40 | _unsupported_imgopts "subformat=monolithicFlat" \ | 32 | @@ -XXX,XX +XXX,XX @@ F: include/hw/cxl/ |
41 | "subformat=twoGbMaxExtentFlat" \ | 33 | |
42 | diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029 | 34 | Dirty Bitmaps |
43 | index XXXXXXX..XXXXXXX 100755 | 35 | M: Eric Blake <eblake@redhat.com> |
44 | --- a/tests/qemu-iotests/029 | 36 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> |
45 | +++ b/tests/qemu-iotests/029 | 37 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
46 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 38 | R: John Snow <jsnow@redhat.com> |
47 | # Any format supporting intenal snapshots | 39 | L: qemu-block@nongnu.org |
48 | _supported_fmt qcow2 | 40 | S: Supported |
49 | _supported_proto generic | 41 | @@ -XXX,XX +XXX,XX @@ F: util/hbitmap.c |
50 | +_unsupported_proto vxhs | 42 | F: tests/unit/test-hbitmap.c |
51 | _supported_os Linux | 43 | F: docs/interop/bitmaps.rst |
52 | # Internal snapshots are (currently) impossible with refcount_bits=1 | 44 | T: git https://repo.or.cz/qemu/ericb.git bitmaps |
53 | _unsupported_imgopts 'refcount_bits=1[^0-9]' | 45 | +T: git https://gitlab.com/vsementsov/qemu.git block |
54 | diff --git a/tests/qemu-iotests/073 b/tests/qemu-iotests/073 | 46 | |
55 | index XXXXXXX..XXXXXXX 100755 | 47 | Character device backends |
56 | --- a/tests/qemu-iotests/073 | 48 | M: Marc-André Lureau <marcandre.lureau@redhat.com> |
57 | +++ b/tests/qemu-iotests/073 | 49 | @@ -XXX,XX +XXX,XX @@ F: scripts/*.py |
58 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 50 | F: tests/*.py |
59 | 51 | ||
60 | _supported_fmt qcow2 | 52 | Benchmark util |
61 | _supported_proto generic | 53 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> |
62 | +_unsupported_proto vxhs | 54 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
63 | _supported_os Linux | 55 | S: Maintained |
64 | 56 | F: scripts/simplebench/ | |
65 | CLUSTER_SIZE=64k | 57 | -T: git https://src.openvz.org/scm/~vsementsov/qemu.git simplebench |
66 | diff --git a/tests/qemu-iotests/114 b/tests/qemu-iotests/114 | 58 | +T: git https://gitlab.com/vsementsov/qemu.git simplebench |
67 | index XXXXXXX..XXXXXXX 100755 | 59 | |
68 | --- a/tests/qemu-iotests/114 | 60 | Transactions helper |
69 | +++ b/tests/qemu-iotests/114 | 61 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> |
70 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 62 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> |
71 | 63 | S: Maintained | |
72 | _supported_fmt qcow2 | 64 | F: include/qemu/transactions.h |
73 | _supported_proto generic | 65 | F: util/transactions.c |
74 | +_unsupported_proto vxhs | 66 | +T: git https://gitlab.com/vsementsov/qemu.git block |
75 | _supported_os Linux | 67 | |
76 | 68 | QAPI | |
77 | 69 | M: Markus Armbruster <armbru@redhat.com> | |
78 | diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130 | 70 | @@ -XXX,XX +XXX,XX @@ F: block/iscsi-opts.c |
79 | index XXXXXXX..XXXXXXX 100755 | 71 | |
80 | --- a/tests/qemu-iotests/130 | 72 | Network Block Device (NBD) |
81 | +++ b/tests/qemu-iotests/130 | 73 | M: Eric Blake <eblake@redhat.com> |
82 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 74 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> |
83 | 75 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | |
84 | _supported_fmt qcow2 | 76 | L: qemu-block@nongnu.org |
85 | _supported_proto generic | 77 | S: Maintained |
86 | +_unsupported_proto vxhs | 78 | F: block/nbd* |
87 | _supported_os Linux | 79 | @@ -XXX,XX +XXX,XX @@ F: docs/interop/nbd.txt |
88 | 80 | F: docs/tools/qemu-nbd.rst | |
89 | qemu_comm_method="monitor" | 81 | F: tests/qemu-iotests/tests/*nbd* |
90 | diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134 | 82 | T: git https://repo.or.cz/qemu/ericb.git nbd |
91 | index XXXXXXX..XXXXXXX 100755 | 83 | -T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd |
92 | --- a/tests/qemu-iotests/134 | 84 | +T: git https://gitlab.com/vsementsov/qemu.git block |
93 | +++ b/tests/qemu-iotests/134 | 85 | |
94 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 86 | NFS |
95 | 87 | M: Peter Lieven <pl@kamp.de> | |
96 | _supported_fmt qcow2 | 88 | @@ -XXX,XX +XXX,XX @@ F: block/dmg.c |
97 | _supported_proto generic | 89 | parallels |
98 | +_unsupported_proto vxhs | 90 | M: Stefan Hajnoczi <stefanha@redhat.com> |
99 | _supported_os Linux | 91 | M: Denis V. Lunev <den@openvz.org> |
100 | 92 | -M: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> | |
101 | 93 | +M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru> | |
102 | diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156 | 94 | L: qemu-block@nongnu.org |
103 | index XXXXXXX..XXXXXXX 100755 | 95 | S: Supported |
104 | --- a/tests/qemu-iotests/156 | 96 | F: block/parallels.c |
105 | +++ b/tests/qemu-iotests/156 | 97 | F: block/parallels-ext.c |
106 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | 98 | F: docs/interop/parallels.txt |
107 | 99 | -T: git https://src.openvz.org/scm/~vsementsov/qemu.git parallels | |
108 | _supported_fmt qcow2 qed | 100 | +T: git https://gitlab.com/vsementsov/qemu.git block |
109 | _supported_proto generic | 101 | |
110 | +_unsupported_proto vxhs | 102 | qed |
111 | _supported_os Linux | 103 | M: Stefan Hajnoczi <stefanha@redhat.com> |
112 | |||
113 | # Create source disk | ||
114 | diff --git a/tests/qemu-iotests/158 b/tests/qemu-iotests/158 | ||
115 | index XXXXXXX..XXXXXXX 100755 | ||
116 | --- a/tests/qemu-iotests/158 | ||
117 | +++ b/tests/qemu-iotests/158 | ||
118 | @@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
119 | |||
120 | _supported_fmt qcow2 | ||
121 | _supported_proto generic | ||
122 | +_unsupported_proto vxhs | ||
123 | _supported_os Linux | ||
124 | |||
125 | |||
126 | -- | 104 | -- |
127 | 2.9.3 | 105 | 2.36.1 |
128 | 106 | ||
129 | 107 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Sam Li <faithilikerun@gmail.com> | ||
1 | 2 | ||
3 | Linux recently added a new io_uring(7) optimization API that QEMU | ||
4 | doesn't take advantage of yet. The liburing library that QEMU uses | ||
5 | has added a corresponding new API calling io_uring_register_ring_fd(). | ||
6 | When this API is called after creating the ring, the io_uring_submit() | ||
7 | library function passes a flag to the io_uring_enter(2) syscall | ||
8 | allowing it to skip the ring file descriptor fdget()/fdput() | ||
9 | operations. This saves some CPU cycles. | ||
10 | |||
11 | Signed-off-by: Sam Li <faithilikerun@gmail.com> | ||
12 | Message-id: 20220531105011.111082-1-faithilikerun@gmail.com | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | --- | ||
15 | meson.build | 1 + | ||
16 | block/io_uring.c | 12 +++++++++++- | ||
17 | 2 files changed, 12 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/meson.build b/meson.build | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/meson.build | ||
22 | +++ b/meson.build | ||
23 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LIBNFS', libnfs.found()) | ||
24 | config_host_data.set('CONFIG_LIBSSH', libssh.found()) | ||
25 | config_host_data.set('CONFIG_LINUX_AIO', libaio.found()) | ||
26 | config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found()) | ||
27 | +config_host_data.set('CONFIG_LIBURING_REGISTER_RING_FD', cc.has_function('io_uring_register_ring_fd', prefix: '#include <liburing.h>', dependencies:linux_io_uring)) | ||
28 | config_host_data.set('CONFIG_LIBPMEM', libpmem.found()) | ||
29 | config_host_data.set('CONFIG_NUMA', numa.found()) | ||
30 | config_host_data.set('CONFIG_OPENGL', opengl.found()) | ||
31 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/block/io_uring.c | ||
34 | +++ b/block/io_uring.c | ||
35 | @@ -XXX,XX +XXX,XX @@ | ||
36 | #include "qapi/error.h" | ||
37 | #include "trace.h" | ||
38 | |||
39 | + | ||
40 | /* io_uring ring size */ | ||
41 | #define MAX_ENTRIES 128 | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ LuringState *luring_init(Error **errp) | ||
44 | } | ||
45 | |||
46 | ioq_init(&s->io_q); | ||
47 | +#ifdef CONFIG_LIBURING_REGISTER_RING_FD | ||
48 | + if (io_uring_register_ring_fd(&s->ring) < 0) { | ||
49 | + /* | ||
50 | + * Only warn about this error: we will fallback to the non-optimized | ||
51 | + * io_uring operations. | ||
52 | + */ | ||
53 | + warn_report("failed to register linux io_uring ring file descriptor"); | ||
54 | + } | ||
55 | +#endif | ||
56 | + | ||
57 | return s; | ||
58 | - | ||
59 | } | ||
60 | |||
61 | void luring_cleanup(LuringState *s) | ||
62 | -- | ||
63 | 2.36.1 | diff view generated by jsdifflib |
1 | For the tests that use the common.qemu functions for running a QEMU | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | process, _cleanup_qemu must be called in the exit function. | ||
3 | 2 | ||
4 | If it is not, if the qemu process aborts, then not all of the droppings | 3 | Add blocker to prevent hot-unplug of devices |
5 | are cleaned up (e.g. pidfile, fifos). | ||
6 | 4 | ||
7 | This updates those tests that did not have a cleanup in qemu-iotests. | 5 | TYPE_VFIO_USER_SERVER, which is introduced shortly, attaches itself to a |
6 | PCIDevice on which it depends. If the attached PCIDevice gets removed | ||
7 | while the server in use, it could cause it crash. To prevent this, | ||
8 | TYPE_VFIO_USER_SERVER adds an unplug blocker for the PCIDevice. | ||
8 | 9 | ||
9 | (I swapped spaces for tabs in test 102 as well) | 10 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> |
11 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
12 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Message-id: c41ef80b7cc063314d629737bed2159e5713f2e0.1655151679.git.jag.raman@oracle.com | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | ||
17 | include/hw/qdev-core.h | 29 +++++++++++++++++++++++++++++ | ||
18 | hw/core/qdev.c | 24 ++++++++++++++++++++++++ | ||
19 | softmmu/qdev-monitor.c | 4 ++++ | ||
20 | 3 files changed, 57 insertions(+) | ||
10 | 21 | ||
11 | Reported-by: Eric Blake <eblake@redhat.com> | 22 | diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h |
12 | Reviewed-by: Eric Blake <eblake@redhat.com> | 23 | index XXXXXXX..XXXXXXX 100644 |
13 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 24 | --- a/include/hw/qdev-core.h |
14 | Message-id: d59c2f6ad6c1da8b9b3c7f357c94a7122ccfc55a.1492544096.git.jcody@redhat.com | 25 | +++ b/include/hw/qdev-core.h |
15 | --- | 26 | @@ -XXX,XX +XXX,XX @@ struct DeviceState { |
16 | tests/qemu-iotests/028 | 1 + | 27 | int instance_id_alias; |
17 | tests/qemu-iotests/094 | 11 ++++++++--- | 28 | int alias_required_for_version; |
18 | tests/qemu-iotests/102 | 5 +++-- | 29 | ResettableState reset; |
19 | tests/qemu-iotests/109 | 1 + | 30 | + GSList *unplug_blockers; |
20 | tests/qemu-iotests/117 | 1 + | 31 | }; |
21 | tests/qemu-iotests/130 | 1 + | 32 | |
22 | tests/qemu-iotests/140 | 1 + | 33 | struct DeviceListener { |
23 | tests/qemu-iotests/141 | 1 + | 34 | @@ -XXX,XX +XXX,XX @@ void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, |
24 | tests/qemu-iotests/143 | 1 + | 35 | void qdev_machine_creation_done(void); |
25 | tests/qemu-iotests/156 | 1 + | 36 | bool qdev_machine_modified(void); |
26 | 10 files changed, 19 insertions(+), 5 deletions(-) | 37 | |
27 | 38 | +/** | |
28 | diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028 | 39 | + * qdev_add_unplug_blocker: Add an unplug blocker to a device |
29 | index XXXXXXX..XXXXXXX 100755 | 40 | + * |
30 | --- a/tests/qemu-iotests/028 | 41 | + * @dev: Device to be blocked from unplug |
31 | +++ b/tests/qemu-iotests/028 | 42 | + * @reason: Reason for blocking |
32 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | 43 | + */ |
33 | 44 | +void qdev_add_unplug_blocker(DeviceState *dev, Error *reason); | |
34 | _cleanup() | 45 | + |
35 | { | 46 | +/** |
36 | + _cleanup_qemu | 47 | + * qdev_del_unplug_blocker: Remove an unplug blocker from a device |
37 | rm -f "${TEST_IMG}.copy" | 48 | + * |
38 | _cleanup_test_img | 49 | + * @dev: Device to be unblocked |
50 | + * @reason: Pointer to the Error used with qdev_add_unplug_blocker. | ||
51 | + * Used as a handle to lookup the blocker for deletion. | ||
52 | + */ | ||
53 | +void qdev_del_unplug_blocker(DeviceState *dev, Error *reason); | ||
54 | + | ||
55 | +/** | ||
56 | + * qdev_unplug_blocked: Confirm if a device is blocked from unplug | ||
57 | + * | ||
58 | + * @dev: Device to be tested | ||
59 | + * @reason: Returns one of the reasons why the device is blocked, | ||
60 | + * if any | ||
61 | + * | ||
62 | + * Returns: true if device is blocked from unplug, false otherwise | ||
63 | + */ | ||
64 | +bool qdev_unplug_blocked(DeviceState *dev, Error **errp); | ||
65 | + | ||
66 | /** | ||
67 | * GpioPolarity: Polarity of a GPIO line | ||
68 | * | ||
69 | diff --git a/hw/core/qdev.c b/hw/core/qdev.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/hw/core/qdev.c | ||
72 | +++ b/hw/core/qdev.c | ||
73 | @@ -XXX,XX +XXX,XX @@ char *qdev_get_dev_path(DeviceState *dev) | ||
74 | return NULL; | ||
39 | } | 75 | } |
40 | diff --git a/tests/qemu-iotests/094 b/tests/qemu-iotests/094 | 76 | |
41 | index XXXXXXX..XXXXXXX 100755 | 77 | +void qdev_add_unplug_blocker(DeviceState *dev, Error *reason) |
42 | --- a/tests/qemu-iotests/094 | ||
43 | +++ b/tests/qemu-iotests/094 | ||
44 | @@ -XXX,XX +XXX,XX @@ echo "QA output created by $seq" | ||
45 | here="$PWD" | ||
46 | status=1 # failure is the default! | ||
47 | |||
48 | -trap "exit \$status" 0 1 2 3 15 | ||
49 | +_cleanup() | ||
50 | +{ | 78 | +{ |
51 | + _cleanup_qemu | 79 | + dev->unplug_blockers = g_slist_prepend(dev->unplug_blockers, reason); |
52 | + _cleanup_test_img | ||
53 | + rm -f "$TEST_DIR/source.$IMGFMT" | ||
54 | +} | 80 | +} |
55 | + | 81 | + |
56 | +trap "_cleanup; exit \$status" 0 1 2 3 15 | 82 | +void qdev_del_unplug_blocker(DeviceState *dev, Error *reason) |
57 | 83 | +{ | |
58 | # get standard environment, filters and checks | 84 | + dev->unplug_blockers = g_slist_remove(dev->unplug_blockers, reason); |
59 | . ./common.rc | 85 | +} |
60 | @@ -XXX,XX +XXX,XX @@ _send_qemu_cmd $QEMU_HANDLE \ | 86 | + |
61 | 87 | +bool qdev_unplug_blocked(DeviceState *dev, Error **errp) | |
62 | wait=1 _cleanup_qemu | 88 | +{ |
63 | 89 | + ERRP_GUARD(); | |
64 | -_cleanup_test_img | 90 | + |
65 | -rm -f "$TEST_DIR/source.$IMGFMT" | 91 | + if (dev->unplug_blockers) { |
66 | 92 | + error_propagate(errp, error_copy(dev->unplug_blockers->data)); | |
67 | # success, all done | 93 | + return true; |
68 | echo '*** done' | 94 | + } |
69 | diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102 | 95 | + |
70 | index XXXXXXX..XXXXXXX 100755 | 96 | + return false; |
71 | --- a/tests/qemu-iotests/102 | 97 | +} |
72 | +++ b/tests/qemu-iotests/102 | 98 | + |
73 | @@ -XXX,XX +XXX,XX @@ seq=$(basename $0) | 99 | static bool device_get_realized(Object *obj, Error **errp) |
74 | echo "QA output created by $seq" | ||
75 | |||
76 | here=$PWD | ||
77 | -status=1 # failure is the default! | ||
78 | +status=1 # failure is the default! | ||
79 | |||
80 | _cleanup() | ||
81 | { | 100 | { |
82 | - _cleanup_test_img | 101 | DeviceState *dev = DEVICE(obj); |
83 | + _cleanup_qemu | 102 | @@ -XXX,XX +XXX,XX @@ static void device_finalize(Object *obj) |
84 | + _cleanup_test_img | 103 | |
85 | } | 104 | DeviceState *dev = DEVICE(obj); |
86 | trap "_cleanup; exit \$status" 0 1 2 3 15 | 105 | |
87 | 106 | + g_assert(!dev->unplug_blockers); | |
88 | diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 | 107 | + |
89 | index XXXXXXX..XXXXXXX 100755 | 108 | QLIST_FOREACH_SAFE(ngl, &dev->gpios, node, next) { |
90 | --- a/tests/qemu-iotests/109 | 109 | QLIST_REMOVE(ngl, node); |
91 | +++ b/tests/qemu-iotests/109 | 110 | qemu_free_irqs(ngl->in, ngl->num_in); |
92 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | 111 | diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c |
93 | 112 | index XXXXXXX..XXXXXXX 100644 | |
94 | _cleanup() | 113 | --- a/softmmu/qdev-monitor.c |
95 | { | 114 | +++ b/softmmu/qdev-monitor.c |
96 | + _cleanup_qemu | 115 | @@ -XXX,XX +XXX,XX @@ void qdev_unplug(DeviceState *dev, Error **errp) |
97 | rm -f $TEST_IMG.src | 116 | HotplugHandlerClass *hdc; |
98 | _cleanup_test_img | 117 | Error *local_err = NULL; |
99 | } | 118 | |
100 | diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117 | 119 | + if (qdev_unplug_blocked(dev, errp)) { |
101 | index XXXXXXX..XXXXXXX 100755 | 120 | + return; |
102 | --- a/tests/qemu-iotests/117 | 121 | + } |
103 | +++ b/tests/qemu-iotests/117 | 122 | + |
104 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | 123 | if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { |
105 | 124 | error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); | |
106 | _cleanup() | 125 | return; |
107 | { | ||
108 | + _cleanup_qemu | ||
109 | _cleanup_test_img | ||
110 | } | ||
111 | trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
112 | diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130 | ||
113 | index XXXXXXX..XXXXXXX 100755 | ||
114 | --- a/tests/qemu-iotests/130 | ||
115 | +++ b/tests/qemu-iotests/130 | ||
116 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
117 | |||
118 | _cleanup() | ||
119 | { | ||
120 | + _cleanup_qemu | ||
121 | _cleanup_test_img | ||
122 | } | ||
123 | trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
124 | diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140 | ||
125 | index XXXXXXX..XXXXXXX 100755 | ||
126 | --- a/tests/qemu-iotests/140 | ||
127 | +++ b/tests/qemu-iotests/140 | ||
128 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
129 | |||
130 | _cleanup() | ||
131 | { | ||
132 | + _cleanup_qemu | ||
133 | _cleanup_test_img | ||
134 | rm -f "$TEST_DIR/nbd" | ||
135 | } | ||
136 | diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 | ||
137 | index XXXXXXX..XXXXXXX 100755 | ||
138 | --- a/tests/qemu-iotests/141 | ||
139 | +++ b/tests/qemu-iotests/141 | ||
140 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
141 | |||
142 | _cleanup() | ||
143 | { | ||
144 | + _cleanup_qemu | ||
145 | _cleanup_test_img | ||
146 | rm -f "$TEST_DIR/{b,m,o}.$IMGFMT" | ||
147 | } | ||
148 | diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 | ||
149 | index XXXXXXX..XXXXXXX 100755 | ||
150 | --- a/tests/qemu-iotests/143 | ||
151 | +++ b/tests/qemu-iotests/143 | ||
152 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
153 | |||
154 | _cleanup() | ||
155 | { | ||
156 | + _cleanup_qemu | ||
157 | rm -f "$TEST_DIR/nbd" | ||
158 | } | ||
159 | trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
160 | diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156 | ||
161 | index XXXXXXX..XXXXXXX 100755 | ||
162 | --- a/tests/qemu-iotests/156 | ||
163 | +++ b/tests/qemu-iotests/156 | ||
164 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
165 | |||
166 | _cleanup() | ||
167 | { | ||
168 | + _cleanup_qemu | ||
169 | rm -f "$TEST_IMG{,.target}{,.backing,.overlay}" | ||
170 | } | ||
171 | trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
172 | -- | 126 | -- |
173 | 2.9.3 | 127 | 2.36.1 |
174 | |||
175 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Jagannathan Raman <jag.raman@oracle.com> | ||
1 | 2 | ||
3 | Allow hotplugging of PCI(e) devices to remote machine | ||
4 | |||
5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Message-id: d1e6cfa0afb528ad343758f9b1d918be0175c5e5.1655151679.git.jag.raman@oracle.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | hw/remote/machine.c | 10 ++++++++++ | ||
13 | 1 file changed, 10 insertions(+) | ||
14 | |||
15 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/remote/machine.c | ||
18 | +++ b/hw/remote/machine.c | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | #include "qapi/error.h" | ||
21 | #include "hw/pci/pci_host.h" | ||
22 | #include "hw/remote/iohub.h" | ||
23 | +#include "hw/qdev-core.h" | ||
24 | |||
25 | static void remote_machine_init(MachineState *machine) | ||
26 | { | ||
27 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine) | ||
28 | |||
29 | pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, | ||
30 | &s->iohub, REMOTE_IOHUB_NB_PIRQS); | ||
31 | + | ||
32 | + qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); | ||
33 | } | ||
34 | |||
35 | static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
36 | { | ||
37 | MachineClass *mc = MACHINE_CLASS(oc); | ||
38 | + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); | ||
39 | |||
40 | mc->init = remote_machine_init; | ||
41 | mc->desc = "Experimental remote machine"; | ||
42 | + | ||
43 | + hc->unplug = qdev_simple_device_unplug_cb; | ||
44 | } | ||
45 | |||
46 | static const TypeInfo remote_machine = { | ||
47 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo remote_machine = { | ||
48 | .parent = TYPE_MACHINE, | ||
49 | .instance_size = sizeof(RemoteMachineState), | ||
50 | .class_init = remote_machine_class_init, | ||
51 | + .interfaces = (InterfaceInfo[]) { | ||
52 | + { TYPE_HOTPLUG_HANDLER }, | ||
53 | + { } | ||
54 | + } | ||
55 | }; | ||
56 | |||
57 | static void remote_machine_register_types(void) | ||
58 | -- | ||
59 | 2.36.1 | diff view generated by jsdifflib |
1 | Move bdrv_is_read_only() up with its friends. | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Add vfio-user to x-remote machine. It is a boolean, which indicates if | ||
4 | the machine supports vfio-user protocol. The machine configures the bus | ||
5 | differently vfio-user and multiprocess protocols, so this property | ||
6 | informs it on how to configure the bus. | ||
7 | |||
8 | This property should be short lived. Once vfio-user fully replaces | ||
9 | multiprocess, this property could be removed. | ||
10 | |||
11 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
12 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
13 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
3 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
4 | Reviewed-by: John Snow <jsnow@redhat.com> | 15 | Message-id: 5d51a152a419cbda35d070b8e49b772b60a7230a.1655151679.git.jag.raman@oracle.com |
5 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
6 | Message-id: 73b2399459760c32506f9407efb9dddb3a2789de.1491597120.git.jcody@redhat.com | ||
7 | --- | 17 | --- |
8 | block.c | 10 +++++----- | 18 | include/hw/remote/machine.h | 2 ++ |
9 | 1 file changed, 5 insertions(+), 5 deletions(-) | 19 | hw/remote/machine.c | 23 +++++++++++++++++++++++ |
20 | 2 files changed, 25 insertions(+) | ||
10 | 21 | ||
11 | diff --git a/block.c b/block.c | 22 | diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h |
12 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/block.c | 24 | --- a/include/hw/remote/machine.h |
14 | +++ b/block.c | 25 | +++ b/include/hw/remote/machine.h |
15 | @@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size, | 26 | @@ -XXX,XX +XXX,XX @@ struct RemoteMachineState { |
16 | } | 27 | |
28 | RemotePCIHost *host; | ||
29 | RemoteIOHubState iohub; | ||
30 | + | ||
31 | + bool vfio_user; | ||
32 | }; | ||
33 | |||
34 | /* Used to pass to co-routine device and ioc. */ | ||
35 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/hw/remote/machine.c | ||
38 | +++ b/hw/remote/machine.c | ||
39 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine) | ||
40 | qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); | ||
17 | } | 41 | } |
18 | 42 | ||
19 | +bool bdrv_is_read_only(BlockDriverState *bs) | 43 | +static bool remote_machine_get_vfio_user(Object *obj, Error **errp) |
20 | +{ | 44 | +{ |
21 | + return bs->read_only; | 45 | + RemoteMachineState *s = REMOTE_MACHINE(obj); |
46 | + | ||
47 | + return s->vfio_user; | ||
22 | +} | 48 | +} |
23 | + | 49 | + |
24 | int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 50 | +static void remote_machine_set_vfio_user(Object *obj, bool value, Error **errp) |
51 | +{ | ||
52 | + RemoteMachineState *s = REMOTE_MACHINE(obj); | ||
53 | + | ||
54 | + if (phase_check(PHASE_MACHINE_CREATED)) { | ||
55 | + error_setg(errp, "Error enabling vfio-user - machine already created"); | ||
56 | + return; | ||
57 | + } | ||
58 | + | ||
59 | + s->vfio_user = value; | ||
60 | +} | ||
61 | + | ||
62 | static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
25 | { | 63 | { |
26 | /* Do not set read_only if copy_on_read is enabled */ | 64 | MachineClass *mc = MACHINE_CLASS(oc); |
27 | @@ -XXX,XX +XXX,XX @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) | 65 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data) |
28 | *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; | 66 | mc->desc = "Experimental remote machine"; |
67 | |||
68 | hc->unplug = qdev_simple_device_unplug_cb; | ||
69 | + | ||
70 | + object_class_property_add_bool(oc, "vfio-user", | ||
71 | + remote_machine_get_vfio_user, | ||
72 | + remote_machine_set_vfio_user); | ||
29 | } | 73 | } |
30 | 74 | ||
31 | -bool bdrv_is_read_only(BlockDriverState *bs) | 75 | static const TypeInfo remote_machine = { |
32 | -{ | ||
33 | - return bs->read_only; | ||
34 | -} | ||
35 | - | ||
36 | bool bdrv_is_sg(BlockDriverState *bs) | ||
37 | { | ||
38 | return bs->sg; | ||
39 | -- | 76 | -- |
40 | 2.9.3 | 77 | 2.36.1 |
41 | |||
42 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Jagannathan Raman <jag.raman@oracle.com> | |
2 | |||
3 | add the libvfio-user library as a submodule. build it as a meson | ||
4 | subproject. | ||
5 | |||
6 | libvfio-user is distributed with BSD 3-Clause license and | ||
7 | json-c with MIT (Expat) license | ||
8 | |||
9 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
10 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
11 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
12 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Message-id: c2adec87958b081d1dc8775d4aa05c897912f025.1655151679.git.jag.raman@oracle.com | ||
14 | |||
15 | [Changed submodule URL to QEMU's libvfio-user mirror on GitLab. The QEMU | ||
16 | project mirrors its dependencies so that it can provide full source code | ||
17 | even in the event that its dependencies become unavailable. Note that | ||
18 | the mirror repo is manually updated, so please contact me to make newer | ||
19 | libvfio-user commits available. If I become a bottleneck we can set up a | ||
20 | cronjob. | ||
21 | |||
22 | Updated scripts/meson-buildoptions.sh to match the meson_options.txt | ||
23 | change. Failure to do so can result in scripts/meson-buildoptions.sh | ||
24 | being modified by the build system later on and you end up with a dirty | ||
25 | working tree. | ||
26 | --Stefan] | ||
27 | |||
28 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
29 | --- | ||
30 | MAINTAINERS | 1 + | ||
31 | meson_options.txt | 2 ++ | ||
32 | configure | 17 +++++++++++++++++ | ||
33 | meson.build | 23 ++++++++++++++++++++++- | ||
34 | .gitlab-ci.d/buildtest.yml | 1 + | ||
35 | .gitmodules | 3 +++ | ||
36 | Kconfig.host | 4 ++++ | ||
37 | hw/remote/Kconfig | 4 ++++ | ||
38 | hw/remote/meson.build | 2 ++ | ||
39 | scripts/meson-buildoptions.sh | 4 ++++ | ||
40 | subprojects/libvfio-user | 1 + | ||
41 | tests/docker/dockerfiles/centos8.docker | 2 ++ | ||
42 | 12 files changed, 63 insertions(+), 1 deletion(-) | ||
43 | create mode 160000 subprojects/libvfio-user | ||
44 | |||
45 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/MAINTAINERS | ||
48 | +++ b/MAINTAINERS | ||
49 | @@ -XXX,XX +XXX,XX @@ F: hw/remote/proxy-memory-listener.c | ||
50 | F: include/hw/remote/proxy-memory-listener.h | ||
51 | F: hw/remote/iohub.c | ||
52 | F: include/hw/remote/iohub.h | ||
53 | +F: subprojects/libvfio-user | ||
54 | |||
55 | EBPF: | ||
56 | M: Jason Wang <jasowang@redhat.com> | ||
57 | diff --git a/meson_options.txt b/meson_options.txt | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/meson_options.txt | ||
60 | +++ b/meson_options.txt | ||
61 | @@ -XXX,XX +XXX,XX @@ option('cfi_debug', type: 'boolean', value: 'false', | ||
62 | description: 'Verbose errors in case of CFI violation') | ||
63 | option('multiprocess', type: 'feature', value: 'auto', | ||
64 | description: 'Out of process device emulation support') | ||
65 | +option('vfio_user_server', type: 'feature', value: 'disabled', | ||
66 | + description: 'vfio-user server support') | ||
67 | option('dbus_display', type: 'feature', value: 'auto', | ||
68 | description: '-display dbus support') | ||
69 | option('tpm', type : 'feature', value : 'auto', | ||
70 | diff --git a/configure b/configure | ||
71 | index XXXXXXX..XXXXXXX 100755 | ||
72 | --- a/configure | ||
73 | +++ b/configure | ||
74 | @@ -XXX,XX +XXX,XX @@ meson_args="" | ||
75 | ninja="" | ||
76 | bindir="bin" | ||
77 | skip_meson=no | ||
78 | +vfio_user_server="disabled" | ||
79 | |||
80 | # The following Meson options are handled manually (still they | ||
81 | # are included in the automatically generated help message) | ||
82 | @@ -XXX,XX +XXX,XX @@ for opt do | ||
83 | ;; | ||
84 | --disable-blobs) meson_option_parse --disable-install-blobs "" | ||
85 | ;; | ||
86 | + --enable-vfio-user-server) vfio_user_server="enabled" | ||
87 | + ;; | ||
88 | + --disable-vfio-user-server) vfio_user_server="disabled" | ||
89 | + ;; | ||
90 | --enable-tcmalloc) meson_option_parse --enable-malloc=tcmalloc tcmalloc | ||
91 | ;; | ||
92 | --enable-jemalloc) meson_option_parse --enable-malloc=jemalloc jemalloc | ||
93 | @@ -XXX,XX +XXX,XX @@ write_container_target_makefile() { | ||
94 | |||
95 | |||
96 | |||
97 | +########################################## | ||
98 | +# check for vfio_user_server | ||
99 | + | ||
100 | +case "$vfio_user_server" in | ||
101 | + enabled ) | ||
102 | + if test "$git_submodules_action" != "ignore"; then | ||
103 | + git_submodules="${git_submodules} subprojects/libvfio-user" | ||
104 | + fi | ||
105 | + ;; | ||
106 | +esac | ||
107 | + | ||
108 | ########################################## | ||
109 | # End of CC checks | ||
110 | # After here, no more $cc or $ld runs | ||
111 | @@ -XXX,XX +XXX,XX @@ if test "$skip_meson" = no; then | ||
112 | test "$slirp" != auto && meson_option_add "-Dslirp=$slirp" | ||
113 | test "$smbd" != '' && meson_option_add "-Dsmbd=$smbd" | ||
114 | test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg" | ||
115 | + test "$vfio_user_server" != auto && meson_option_add "-Dvfio_user_server=$vfio_user_server" | ||
116 | run_meson() { | ||
117 | NINJA=$ninja $meson setup --prefix "$prefix" "$@" $cross_arg "$PWD" "$source_path" | ||
118 | } | ||
119 | diff --git a/meson.build b/meson.build | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/meson.build | ||
122 | +++ b/meson.build | ||
123 | @@ -XXX,XX +XXX,XX @@ multiprocess_allowed = get_option('multiprocess') \ | ||
124 | .require(targetos == 'linux', error_message: 'Multiprocess QEMU is supported only on Linux') \ | ||
125 | .allowed() | ||
126 | |||
127 | +vfio_user_server_allowed = get_option('vfio_user_server') \ | ||
128 | + .require(targetos == 'linux', error_message: 'vfio-user server is supported only on Linux') \ | ||
129 | + .allowed() | ||
130 | + | ||
131 | have_tpm = get_option('tpm') \ | ||
132 | .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ | ||
133 | .allowed() | ||
134 | @@ -XXX,XX +XXX,XX @@ host_kconfig = \ | ||
135 | (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ | ||
136 | ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ | ||
137 | (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \ | ||
138 | - (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) | ||
139 | + (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \ | ||
140 | + (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) | ||
141 | |||
142 | ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ] | ||
143 | |||
144 | @@ -XXX,XX +XXX,XX @@ if have_system | ||
145 | endif | ||
146 | endif | ||
147 | |||
148 | +libvfio_user_dep = not_found | ||
149 | +if have_system and vfio_user_server_allowed | ||
150 | + have_internal = fs.exists(meson.current_source_dir() / 'subprojects/libvfio-user/meson.build') | ||
151 | + | ||
152 | + if not have_internal | ||
153 | + error('libvfio-user source not found - please pull git submodule') | ||
154 | + endif | ||
155 | + | ||
156 | + libvfio_user_proj = subproject('libvfio-user') | ||
157 | + | ||
158 | + libvfio_user_lib = libvfio_user_proj.get_variable('libvfio_user_dep') | ||
159 | + | ||
160 | + libvfio_user_dep = declare_dependency(dependencies: [libvfio_user_lib]) | ||
161 | +endif | ||
162 | + | ||
163 | fdt = not_found | ||
164 | if have_system | ||
165 | fdt_opt = get_option('fdt') | ||
166 | @@ -XXX,XX +XXX,XX @@ summary_info += {'target list': ' '.join(target_dirs)} | ||
167 | if have_system | ||
168 | summary_info += {'default devices': get_option('default_devices')} | ||
169 | summary_info += {'out of process emulation': multiprocess_allowed} | ||
170 | + summary_info += {'vfio-user server': vfio_user_server_allowed} | ||
171 | endif | ||
172 | summary(summary_info, bool_yn: true, section: 'Targets and accelerators') | ||
173 | |||
174 | diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml | ||
175 | index XXXXXXX..XXXXXXX 100644 | ||
176 | --- a/.gitlab-ci.d/buildtest.yml | ||
177 | +++ b/.gitlab-ci.d/buildtest.yml | ||
178 | @@ -XXX,XX +XXX,XX @@ build-system-centos: | ||
179 | IMAGE: centos8 | ||
180 | CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system | ||
181 | --enable-modules --enable-trace-backends=dtrace --enable-docs | ||
182 | + --enable-vfio-user-server | ||
183 | TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu | ||
184 | x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu | ||
185 | MAKE_CHECK_ARGS: check-build | ||
186 | diff --git a/.gitmodules b/.gitmodules | ||
187 | index XXXXXXX..XXXXXXX 100644 | ||
188 | --- a/.gitmodules | ||
189 | +++ b/.gitmodules | ||
190 | @@ -XXX,XX +XXX,XX @@ | ||
191 | [submodule "tests/lcitool/libvirt-ci"] | ||
192 | path = tests/lcitool/libvirt-ci | ||
193 | url = https://gitlab.com/libvirt/libvirt-ci.git | ||
194 | +[submodule "subprojects/libvfio-user"] | ||
195 | + path = subprojects/libvfio-user | ||
196 | + url = https://gitlab.com/qemu-project/libvfio-user.git | ||
197 | diff --git a/Kconfig.host b/Kconfig.host | ||
198 | index XXXXXXX..XXXXXXX 100644 | ||
199 | --- a/Kconfig.host | ||
200 | +++ b/Kconfig.host | ||
201 | @@ -XXX,XX +XXX,XX @@ config MULTIPROCESS_ALLOWED | ||
202 | config FUZZ | ||
203 | bool | ||
204 | select SPARSE_MEM | ||
205 | + | ||
206 | +config VFIO_USER_SERVER_ALLOWED | ||
207 | + bool | ||
208 | + imply VFIO_USER_SERVER | ||
209 | diff --git a/hw/remote/Kconfig b/hw/remote/Kconfig | ||
210 | index XXXXXXX..XXXXXXX 100644 | ||
211 | --- a/hw/remote/Kconfig | ||
212 | +++ b/hw/remote/Kconfig | ||
213 | @@ -XXX,XX +XXX,XX @@ config MULTIPROCESS | ||
214 | bool | ||
215 | depends on PCI && PCI_EXPRESS && KVM | ||
216 | select REMOTE_PCIHOST | ||
217 | + | ||
218 | +config VFIO_USER_SERVER | ||
219 | + bool | ||
220 | + depends on MULTIPROCESS | ||
221 | diff --git a/hw/remote/meson.build b/hw/remote/meson.build | ||
222 | index XXXXXXX..XXXXXXX 100644 | ||
223 | --- a/hw/remote/meson.build | ||
224 | +++ b/hw/remote/meson.build | ||
225 | @@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) | ||
226 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) | ||
227 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) | ||
228 | |||
229 | +remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep) | ||
230 | + | ||
231 | specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c')) | ||
232 | specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy-memory-listener.c')) | ||
233 | |||
234 | diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh | ||
235 | index XXXXXXX..XXXXXXX 100644 | ||
236 | --- a/scripts/meson-buildoptions.sh | ||
237 | +++ b/scripts/meson-buildoptions.sh | ||
238 | @@ -XXX,XX +XXX,XX @@ meson_options_help() { | ||
239 | printf "%s\n" ' usb-redir libusbredir support' | ||
240 | printf "%s\n" ' vde vde network backend support' | ||
241 | printf "%s\n" ' vdi vdi image format support' | ||
242 | + printf "%s\n" ' vfio-user-server' | ||
243 | + printf "%s\n" ' vfio-user server support' | ||
244 | printf "%s\n" ' vhost-crypto vhost-user crypto backend support' | ||
245 | printf "%s\n" ' vhost-kernel vhost kernel backend support' | ||
246 | printf "%s\n" ' vhost-net vhost-net kernel acceleration support' | ||
247 | @@ -XXX,XX +XXX,XX @@ _meson_option_parse() { | ||
248 | --disable-vde) printf "%s" -Dvde=disabled ;; | ||
249 | --enable-vdi) printf "%s" -Dvdi=enabled ;; | ||
250 | --disable-vdi) printf "%s" -Dvdi=disabled ;; | ||
251 | + --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;; | ||
252 | + --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;; | ||
253 | --enable-vhost-crypto) printf "%s" -Dvhost_crypto=enabled ;; | ||
254 | --disable-vhost-crypto) printf "%s" -Dvhost_crypto=disabled ;; | ||
255 | --enable-vhost-kernel) printf "%s" -Dvhost_kernel=enabled ;; | ||
256 | diff --git a/subprojects/libvfio-user b/subprojects/libvfio-user | ||
257 | new file mode 160000 | ||
258 | index XXXXXXX..XXXXXXX | ||
259 | --- /dev/null | ||
260 | +++ b/subprojects/libvfio-user | ||
261 | @@ -0,0 +1 @@ | ||
262 | +Subproject commit 0b28d205572c80b568a1003db2c8f37ca333e4d7 | ||
263 | diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker | ||
264 | index XXXXXXX..XXXXXXX 100644 | ||
265 | --- a/tests/docker/dockerfiles/centos8.docker | ||
266 | +++ b/tests/docker/dockerfiles/centos8.docker | ||
267 | @@ -XXX,XX +XXX,XX @@ RUN dnf update -y && \ | ||
268 | libbpf-devel \ | ||
269 | libcacard-devel \ | ||
270 | libcap-ng-devel \ | ||
271 | + libcmocka-devel \ | ||
272 | libcurl-devel \ | ||
273 | libdrm-devel \ | ||
274 | libepoxy-devel \ | ||
275 | @@ -XXX,XX +XXX,XX @@ RUN dnf update -y && \ | ||
276 | libgcrypt-devel \ | ||
277 | libiscsi-devel \ | ||
278 | libjpeg-devel \ | ||
279 | + json-c-devel \ | ||
280 | libnfs-devel \ | ||
281 | libpmem-devel \ | ||
282 | libpng-devel \ | ||
283 | -- | ||
284 | 2.36.1 | diff view generated by jsdifflib |
1 | From: Ashish Mittal <ashmit602@gmail.com> | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Source code for the qnio library that this code loads can be downloaded from: | 3 | Define vfio-user object which is remote process server for QEMU. Setup |
4 | https://github.com/VeritasHyperScale/libqnio.git | 4 | object initialization functions and properties necessary to instantiate |
5 | 5 | the object | |
6 | Sample command line using JSON syntax: | 6 | |
7 | ./x86_64-softmmu/qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 | 7 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> |
8 | -k en-us -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 | 8 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> |
9 | -msg timestamp=on | 9 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> |
10 | 'json:{"driver":"vxhs","vdisk-id":"c3e9095a-a5ee-4dce-afeb-2a59fb387410", | ||
11 | "server":{"host":"172.172.17.4","port":"9999"}}' | ||
12 | |||
13 | Sample command line using URI syntax: | ||
14 | qemu-img convert -f raw -O raw -n | ||
15 | /var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad | ||
16 | vxhs://192.168.0.1:9999/c6718f6b-0401-441d-a8c3-1f0064d75ee0 | ||
17 | |||
18 | Sample command line using TLS credentials (run in secure mode): | ||
19 | ./qemu-io --object | ||
20 | tls-creds-x509,id=tls0,dir=/etc/pki/qemu/vxhs,endpoint=client -c 'read | ||
21 | -v 66000 2.5k' 'json:{"server.host": "127.0.0.1", "server.port": "9999", | ||
22 | "vdisk-id": "/test.raw", "driver": "vxhs", "tls-creds":"tls0"}' | ||
23 | |||
24 | Signed-off-by: Ashish Mittal <Ashish.Mittal@veritas.com> | ||
25 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
26 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 11 | Message-id: e45a17001e9b38f451543a664ababdf860e5f2f2.1655151679.git.jag.raman@oracle.com |
27 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
28 | Message-id: 1491277689-24949-2-git-send-email-Ashish.Mittal@veritas.com | ||
29 | --- | 13 | --- |
30 | block/Makefile.objs | 2 + | 14 | MAINTAINERS | 1 + |
31 | block/trace-events | 17 ++ | 15 | qapi/qom.json | 20 +++- |
32 | block/vxhs.c | 575 +++++++++++++++++++++++++++++++++++++++++++++++++++ | 16 | include/hw/remote/machine.h | 2 + |
33 | configure | 39 ++++ | 17 | hw/remote/machine.c | 27 +++++ |
34 | qapi/block-core.json | 23 ++- | 18 | hw/remote/vfio-user-obj.c | 210 ++++++++++++++++++++++++++++++++++++ |
35 | 5 files changed, 654 insertions(+), 2 deletions(-) | 19 | hw/remote/meson.build | 1 + |
36 | create mode 100644 block/vxhs.c | 20 | hw/remote/trace-events | 3 + |
37 | 21 | 7 files changed, 262 insertions(+), 2 deletions(-) | |
38 | diff --git a/block/Makefile.objs b/block/Makefile.objs | 22 | create mode 100644 hw/remote/vfio-user-obj.c |
39 | index XXXXXXX..XXXXXXX 100644 | 23 | |
40 | --- a/block/Makefile.objs | 24 | diff --git a/MAINTAINERS b/MAINTAINERS |
41 | +++ b/block/Makefile.objs | 25 | index XXXXXXX..XXXXXXX 100644 |
42 | @@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_LIBNFS) += nfs.o | 26 | --- a/MAINTAINERS |
43 | block-obj-$(CONFIG_CURL) += curl.o | 27 | +++ b/MAINTAINERS |
44 | block-obj-$(CONFIG_RBD) += rbd.o | 28 | @@ -XXX,XX +XXX,XX @@ F: include/hw/remote/proxy-memory-listener.h |
45 | block-obj-$(CONFIG_GLUSTERFS) += gluster.o | 29 | F: hw/remote/iohub.c |
46 | +block-obj-$(CONFIG_VXHS) += vxhs.o | 30 | F: include/hw/remote/iohub.h |
47 | block-obj-$(CONFIG_LIBSSH2) += ssh.o | 31 | F: subprojects/libvfio-user |
48 | block-obj-y += accounting.o dirty-bitmap.o | 32 | +F: hw/remote/vfio-user-obj.c |
49 | block-obj-y += write-threshold.o | 33 | |
50 | @@ -XXX,XX +XXX,XX @@ rbd.o-cflags := $(RBD_CFLAGS) | 34 | EBPF: |
51 | rbd.o-libs := $(RBD_LIBS) | 35 | M: Jason Wang <jasowang@redhat.com> |
52 | gluster.o-cflags := $(GLUSTERFS_CFLAGS) | 36 | diff --git a/qapi/qom.json b/qapi/qom.json |
53 | gluster.o-libs := $(GLUSTERFS_LIBS) | 37 | index XXXXXXX..XXXXXXX 100644 |
54 | +vxhs.o-libs := $(VXHS_LIBS) | 38 | --- a/qapi/qom.json |
55 | ssh.o-cflags := $(LIBSSH2_CFLAGS) | 39 | +++ b/qapi/qom.json |
56 | ssh.o-libs := $(LIBSSH2_LIBS) | 40 | @@ -XXX,XX +XXX,XX @@ |
57 | block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o | 41 | { 'struct': 'RemoteObjectProperties', |
58 | diff --git a/block/trace-events b/block/trace-events | 42 | 'data': { 'fd': 'str', 'devid': 'str' } } |
59 | index XXXXXXX..XXXXXXX 100644 | 43 | |
60 | --- a/block/trace-events | 44 | +## |
61 | +++ b/block/trace-events | 45 | +# @VfioUserServerProperties: |
62 | @@ -XXX,XX +XXX,XX @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s | 46 | +# |
63 | qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 | 47 | +# Properties for x-vfio-user-server objects. |
64 | qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 | 48 | +# |
65 | qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" | 49 | +# @socket: socket to be used by the libvfio-user library |
66 | + | 50 | +# |
67 | +# block/vxhs.c | 51 | +# @device: the ID of the device to be emulated at the server |
68 | +vxhs_iio_callback(int error) "ctx is NULL: error %d" | 52 | +# |
69 | +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d" | 53 | +# Since: 7.1 |
70 | +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d" | 54 | +## |
71 | +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d" | 55 | +{ 'struct': 'VfioUserServerProperties', |
72 | +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d" | 56 | + 'data': { 'socket': 'SocketAddress', 'device': 'str' } } |
73 | +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d" | 57 | + |
74 | +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu" | 58 | ## |
75 | +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld" | 59 | # @RngProperties: |
76 | +vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s" | 60 | # |
77 | +vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s" | 61 | @@ -XXX,XX +XXX,XX @@ |
78 | +vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState" | 62 | 'tls-creds-psk', |
79 | +vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s" | 63 | 'tls-creds-x509', |
80 | +vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d" | 64 | 'tls-cipher-suites', |
81 | +vxhs_close(char *vdisk_guid) "Closing vdisk %s" | 65 | - { 'name': 'x-remote-object', 'features': [ 'unstable' ] } |
82 | +vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s" | 66 | + { 'name': 'x-remote-object', 'features': [ 'unstable' ] }, |
83 | diff --git a/block/vxhs.c b/block/vxhs.c | 67 | + { 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] } |
68 | ] } | ||
69 | |||
70 | ## | ||
71 | @@ -XXX,XX +XXX,XX @@ | ||
72 | 'tls-creds-psk': 'TlsCredsPskProperties', | ||
73 | 'tls-creds-x509': 'TlsCredsX509Properties', | ||
74 | 'tls-cipher-suites': 'TlsCredsProperties', | ||
75 | - 'x-remote-object': 'RemoteObjectProperties' | ||
76 | + 'x-remote-object': 'RemoteObjectProperties', | ||
77 | + 'x-vfio-user-server': 'VfioUserServerProperties' | ||
78 | } } | ||
79 | |||
80 | ## | ||
81 | diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/include/hw/remote/machine.h | ||
84 | +++ b/include/hw/remote/machine.h | ||
85 | @@ -XXX,XX +XXX,XX @@ struct RemoteMachineState { | ||
86 | RemoteIOHubState iohub; | ||
87 | |||
88 | bool vfio_user; | ||
89 | + | ||
90 | + bool auto_shutdown; | ||
91 | }; | ||
92 | |||
93 | /* Used to pass to co-routine device and ioc. */ | ||
94 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c | ||
95 | index XXXXXXX..XXXXXXX 100644 | ||
96 | --- a/hw/remote/machine.c | ||
97 | +++ b/hw/remote/machine.c | ||
98 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_set_vfio_user(Object *obj, bool value, Error **errp) | ||
99 | s->vfio_user = value; | ||
100 | } | ||
101 | |||
102 | +static bool remote_machine_get_auto_shutdown(Object *obj, Error **errp) | ||
103 | +{ | ||
104 | + RemoteMachineState *s = REMOTE_MACHINE(obj); | ||
105 | + | ||
106 | + return s->auto_shutdown; | ||
107 | +} | ||
108 | + | ||
109 | +static void remote_machine_set_auto_shutdown(Object *obj, bool value, | ||
110 | + Error **errp) | ||
111 | +{ | ||
112 | + RemoteMachineState *s = REMOTE_MACHINE(obj); | ||
113 | + | ||
114 | + s->auto_shutdown = value; | ||
115 | +} | ||
116 | + | ||
117 | +static void remote_machine_instance_init(Object *obj) | ||
118 | +{ | ||
119 | + RemoteMachineState *s = REMOTE_MACHINE(obj); | ||
120 | + | ||
121 | + s->auto_shutdown = true; | ||
122 | +} | ||
123 | + | ||
124 | static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
125 | { | ||
126 | MachineClass *mc = MACHINE_CLASS(oc); | ||
127 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
128 | object_class_property_add_bool(oc, "vfio-user", | ||
129 | remote_machine_get_vfio_user, | ||
130 | remote_machine_set_vfio_user); | ||
131 | + | ||
132 | + object_class_property_add_bool(oc, "auto-shutdown", | ||
133 | + remote_machine_get_auto_shutdown, | ||
134 | + remote_machine_set_auto_shutdown); | ||
135 | } | ||
136 | |||
137 | static const TypeInfo remote_machine = { | ||
138 | .name = TYPE_REMOTE_MACHINE, | ||
139 | .parent = TYPE_MACHINE, | ||
140 | .instance_size = sizeof(RemoteMachineState), | ||
141 | + .instance_init = remote_machine_instance_init, | ||
142 | .class_init = remote_machine_class_init, | ||
143 | .interfaces = (InterfaceInfo[]) { | ||
144 | { TYPE_HOTPLUG_HANDLER }, | ||
145 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c | ||
84 | new file mode 100644 | 146 | new file mode 100644 |
85 | index XXXXXXX..XXXXXXX | 147 | index XXXXXXX..XXXXXXX |
86 | --- /dev/null | 148 | --- /dev/null |
87 | +++ b/block/vxhs.c | 149 | +++ b/hw/remote/vfio-user-obj.c |
88 | @@ -XXX,XX +XXX,XX @@ | 150 | @@ -XXX,XX +XXX,XX @@ |
89 | +/* | 151 | +/** |
90 | + * QEMU Block driver for Veritas HyperScale (VxHS) | 152 | + * QEMU vfio-user-server server object |
91 | + * | 153 | + * |
92 | + * Copyright (c) 2017 Veritas Technologies LLC. | 154 | + * Copyright © 2022 Oracle and/or its affiliates. |
93 | + * | 155 | + * |
94 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | 156 | + * This work is licensed under the terms of the GNU GPL-v2, version 2 or later. |
157 | + * | ||
95 | + * See the COPYING file in the top-level directory. | 158 | + * See the COPYING file in the top-level directory. |
96 | + * | 159 | + * |
97 | + */ | 160 | + */ |
98 | + | 161 | + |
162 | +/** | ||
163 | + * Usage: add options: | ||
164 | + * -machine x-remote,vfio-user=on,auto-shutdown=on | ||
165 | + * -device <PCI-device>,id=<pci-dev-id> | ||
166 | + * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>, | ||
167 | + * device=<pci-dev-id> | ||
168 | + * | ||
169 | + * Note that x-vfio-user-server object must be used with x-remote machine only. | ||
170 | + * This server could only support PCI devices for now. | ||
171 | + * | ||
172 | + * type - SocketAddress type - presently "unix" alone is supported. Required | ||
173 | + * option | ||
174 | + * | ||
175 | + * path - named unix socket, it will be created by the server. It is | ||
176 | + * a required option | ||
177 | + * | ||
178 | + * device - id of a device on the server, a required option. PCI devices | ||
179 | + * alone are supported presently. | ||
180 | + */ | ||
181 | + | ||
99 | +#include "qemu/osdep.h" | 182 | +#include "qemu/osdep.h" |
100 | +#include <qnio/qnio_api.h> | 183 | + |
101 | +#include <sys/param.h> | 184 | +#include "qom/object.h" |
102 | +#include "block/block_int.h" | 185 | +#include "qom/object_interfaces.h" |
103 | +#include "qapi/qmp/qerror.h" | 186 | +#include "qemu/error-report.h" |
104 | +#include "qapi/qmp/qdict.h" | ||
105 | +#include "qapi/qmp/qstring.h" | ||
106 | +#include "trace.h" | 187 | +#include "trace.h" |
107 | +#include "qemu/uri.h" | 188 | +#include "sysemu/runstate.h" |
189 | +#include "hw/boards.h" | ||
190 | +#include "hw/remote/machine.h" | ||
108 | +#include "qapi/error.h" | 191 | +#include "qapi/error.h" |
109 | +#include "qemu/uuid.h" | 192 | +#include "qapi/qapi-visit-sockets.h" |
110 | +#include "crypto/tlscredsx509.h" | 193 | + |
111 | + | 194 | +#define TYPE_VFU_OBJECT "x-vfio-user-server" |
112 | +#define VXHS_OPT_FILENAME "filename" | 195 | +OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) |
113 | +#define VXHS_OPT_VDISK_ID "vdisk-id" | 196 | + |
114 | +#define VXHS_OPT_SERVER "server" | 197 | +/** |
115 | +#define VXHS_OPT_HOST "host" | 198 | + * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown |
116 | +#define VXHS_OPT_PORT "port" | 199 | + * is set, it aborts the machine on error. Otherwise, it logs an |
117 | + | 200 | + * error message without aborting. |
118 | +/* Only accessed under QEMU global mutex */ | ||
119 | +static uint32_t vxhs_ref; | ||
120 | + | ||
121 | +typedef enum { | ||
122 | + VDISK_AIO_READ, | ||
123 | + VDISK_AIO_WRITE, | ||
124 | +} VDISKAIOCmd; | ||
125 | + | ||
126 | +/* | ||
127 | + * HyperScale AIO callbacks structure | ||
128 | + */ | 201 | + */ |
129 | +typedef struct VXHSAIOCB { | 202 | +#define VFU_OBJECT_ERROR(o, fmt, ...) \ |
130 | + BlockAIOCB common; | 203 | + { \ |
131 | + int err; | 204 | + if (vfu_object_auto_shutdown()) { \ |
132 | +} VXHSAIOCB; | 205 | + error_setg(&error_abort, (fmt), ## __VA_ARGS__); \ |
133 | + | 206 | + } else { \ |
134 | +typedef struct VXHSvDiskHostsInfo { | 207 | + error_report((fmt), ## __VA_ARGS__); \ |
135 | + void *dev_handle; /* Device handle */ | 208 | + } \ |
136 | + char *host; /* Host name or IP */ | 209 | + } \ |
137 | + int port; /* Host's port number */ | 210 | + |
138 | +} VXHSvDiskHostsInfo; | 211 | +struct VfuObjectClass { |
139 | + | 212 | + ObjectClass parent_class; |
140 | +/* | 213 | + |
141 | + * Structure per vDisk maintained for state | 214 | + unsigned int nr_devs; |
142 | + */ | ||
143 | +typedef struct BDRVVXHSState { | ||
144 | + VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */ | ||
145 | + char *vdisk_guid; | ||
146 | + char *tlscredsid; /* tlscredsid */ | ||
147 | +} BDRVVXHSState; | ||
148 | + | ||
149 | +static void vxhs_complete_aio_bh(void *opaque) | ||
150 | +{ | ||
151 | + VXHSAIOCB *acb = opaque; | ||
152 | + BlockCompletionFunc *cb = acb->common.cb; | ||
153 | + void *cb_opaque = acb->common.opaque; | ||
154 | + int ret = 0; | ||
155 | + | ||
156 | + if (acb->err != 0) { | ||
157 | + trace_vxhs_complete_aio(acb, acb->err); | ||
158 | + ret = (-EIO); | ||
159 | + } | ||
160 | + | ||
161 | + qemu_aio_unref(acb); | ||
162 | + cb(cb_opaque, ret); | ||
163 | +} | ||
164 | + | ||
165 | +/* | ||
166 | + * Called from a libqnio thread | ||
167 | + */ | ||
168 | +static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error) | ||
169 | +{ | ||
170 | + VXHSAIOCB *acb = NULL; | ||
171 | + | ||
172 | + switch (opcode) { | ||
173 | + case IRP_READ_REQUEST: | ||
174 | + case IRP_WRITE_REQUEST: | ||
175 | + | ||
176 | + /* | ||
177 | + * ctx is VXHSAIOCB* | ||
178 | + * ctx is NULL if error is QNIOERROR_CHANNEL_HUP | ||
179 | + */ | ||
180 | + if (ctx) { | ||
181 | + acb = ctx; | ||
182 | + } else { | ||
183 | + trace_vxhs_iio_callback(error); | ||
184 | + goto out; | ||
185 | + } | ||
186 | + | ||
187 | + if (error) { | ||
188 | + if (!acb->err) { | ||
189 | + acb->err = error; | ||
190 | + } | ||
191 | + trace_vxhs_iio_callback(error); | ||
192 | + } | ||
193 | + | ||
194 | + aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), | ||
195 | + vxhs_complete_aio_bh, acb); | ||
196 | + break; | ||
197 | + | ||
198 | + default: | ||
199 | + if (error == QNIOERROR_HUP) { | ||
200 | + /* | ||
201 | + * Channel failed, spontaneous notification, | ||
202 | + * not in response to I/O | ||
203 | + */ | ||
204 | + trace_vxhs_iio_callback_chnfail(error, errno); | ||
205 | + } else { | ||
206 | + trace_vxhs_iio_callback_unknwn(opcode, error); | ||
207 | + } | ||
208 | + break; | ||
209 | + } | ||
210 | +out: | ||
211 | + return; | ||
212 | +} | ||
213 | + | ||
214 | +static QemuOptsList runtime_opts = { | ||
215 | + .name = "vxhs", | ||
216 | + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), | ||
217 | + .desc = { | ||
218 | + { | ||
219 | + .name = VXHS_OPT_FILENAME, | ||
220 | + .type = QEMU_OPT_STRING, | ||
221 | + .help = "URI to the Veritas HyperScale image", | ||
222 | + }, | ||
223 | + { | ||
224 | + .name = VXHS_OPT_VDISK_ID, | ||
225 | + .type = QEMU_OPT_STRING, | ||
226 | + .help = "UUID of the VxHS vdisk", | ||
227 | + }, | ||
228 | + { | ||
229 | + .name = "tls-creds", | ||
230 | + .type = QEMU_OPT_STRING, | ||
231 | + .help = "ID of the TLS/SSL credentials to use", | ||
232 | + }, | ||
233 | + { /* end of list */ } | ||
234 | + }, | ||
235 | +}; | 215 | +}; |
236 | + | 216 | + |
237 | +static QemuOptsList runtime_tcp_opts = { | 217 | +struct VfuObject { |
238 | + .name = "vxhs_tcp", | 218 | + /* private */ |
239 | + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), | 219 | + Object parent; |
240 | + .desc = { | 220 | + |
241 | + { | 221 | + SocketAddress *socket; |
242 | + .name = VXHS_OPT_HOST, | 222 | + |
243 | + .type = QEMU_OPT_STRING, | 223 | + char *device; |
244 | + .help = "host address (ipv4 addresses)", | 224 | + |
245 | + }, | 225 | + Error *err; |
246 | + { | ||
247 | + .name = VXHS_OPT_PORT, | ||
248 | + .type = QEMU_OPT_NUMBER, | ||
249 | + .help = "port number on which VxHSD is listening (default 9999)", | ||
250 | + .def_value_str = "9999" | ||
251 | + }, | ||
252 | + { /* end of list */ } | ||
253 | + }, | ||
254 | +}; | 226 | +}; |
255 | + | 227 | + |
256 | +/* | 228 | +static bool vfu_object_auto_shutdown(void) |
257 | + * Parse incoming URI and populate *options with the host | 229 | +{ |
258 | + * and device information | 230 | + bool auto_shutdown = true; |
259 | + */ | 231 | + Error *local_err = NULL; |
260 | +static int vxhs_parse_uri(const char *filename, QDict *options) | 232 | + |
261 | +{ | 233 | + if (!current_machine) { |
262 | + URI *uri = NULL; | 234 | + return auto_shutdown; |
263 | + char *port; | 235 | + } |
264 | + int ret = 0; | 236 | + |
265 | + | 237 | + auto_shutdown = object_property_get_bool(OBJECT(current_machine), |
266 | + trace_vxhs_parse_uri_filename(filename); | 238 | + "auto-shutdown", |
267 | + uri = uri_parse(filename); | 239 | + &local_err); |
268 | + if (!uri || !uri->server || !uri->path) { | 240 | + |
269 | + uri_free(uri); | 241 | + /* |
270 | + return -EINVAL; | 242 | + * local_err would be set if no such property exists - safe to ignore. |
271 | + } | 243 | + * Unlikely scenario as auto-shutdown is always defined for |
272 | + | 244 | + * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with |
273 | + qdict_put(options, VXHS_OPT_SERVER".host", qstring_from_str(uri->server)); | 245 | + * TYPE_REMOTE_MACHINE |
274 | + | 246 | + */ |
275 | + if (uri->port) { | 247 | + if (local_err) { |
276 | + port = g_strdup_printf("%d", uri->port); | 248 | + auto_shutdown = true; |
277 | + qdict_put(options, VXHS_OPT_SERVER".port", qstring_from_str(port)); | 249 | + error_free(local_err); |
278 | + g_free(port); | 250 | + } |
279 | + } | 251 | + |
280 | + | 252 | + return auto_shutdown; |
281 | + qdict_put(options, "vdisk-id", qstring_from_str(uri->path)); | 253 | +} |
282 | + | 254 | + |
283 | + trace_vxhs_parse_uri_hostinfo(uri->server, uri->port); | 255 | +static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, |
284 | + uri_free(uri); | 256 | + void *opaque, Error **errp) |
285 | + | 257 | +{ |
286 | + return ret; | 258 | + VfuObject *o = VFU_OBJECT(obj); |
287 | +} | 259 | + |
288 | + | 260 | + qapi_free_SocketAddress(o->socket); |
289 | +static void vxhs_parse_filename(const char *filename, QDict *options, | 261 | + |
290 | + Error **errp) | 262 | + o->socket = NULL; |
291 | +{ | 263 | + |
292 | + if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) { | 264 | + visit_type_SocketAddress(v, name, &o->socket, errp); |
293 | + error_setg(errp, "vdisk-id/server and a file name may not be specified " | 265 | + |
294 | + "at the same time"); | 266 | + if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { |
267 | + error_setg(errp, "vfu: Unsupported socket type - %s", | ||
268 | + SocketAddressType_str(o->socket->type)); | ||
269 | + qapi_free_SocketAddress(o->socket); | ||
270 | + o->socket = NULL; | ||
295 | + return; | 271 | + return; |
296 | + } | 272 | + } |
297 | + | 273 | + |
298 | + if (strstr(filename, "://")) { | 274 | + trace_vfu_prop("socket", o->socket->u.q_unix.path); |
299 | + int ret = vxhs_parse_uri(filename, options); | 275 | +} |
300 | + if (ret < 0) { | 276 | + |
301 | + error_setg(errp, "Invalid URI. URI should be of the form " | 277 | +static void vfu_object_set_device(Object *obj, const char *str, Error **errp) |
302 | + " vxhs://<host_ip>:<port>/<vdisk-id>"); | 278 | +{ |
303 | + } | 279 | + VfuObject *o = VFU_OBJECT(obj); |
304 | + } | 280 | + |
305 | +} | 281 | + g_free(o->device); |
306 | + | 282 | + |
307 | +static int vxhs_init_and_ref(void) | 283 | + o->device = g_strdup(str); |
308 | +{ | 284 | + |
309 | + if (vxhs_ref++ == 0) { | 285 | + trace_vfu_prop("device", str); |
310 | + if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { | 286 | +} |
311 | + return -ENODEV; | 287 | + |
312 | + } | 288 | +static void vfu_object_init(Object *obj) |
313 | + } | 289 | +{ |
314 | + return 0; | 290 | + VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); |
315 | +} | 291 | + VfuObject *o = VFU_OBJECT(obj); |
316 | + | 292 | + |
317 | +static void vxhs_unref(void) | 293 | + k->nr_devs++; |
318 | +{ | 294 | + |
319 | + if (--vxhs_ref == 0) { | 295 | + if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) { |
320 | + iio_fini(); | 296 | + error_setg(&o->err, "vfu: %s only compatible with %s machine", |
321 | + } | 297 | + TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); |
322 | +} | ||
323 | + | ||
324 | +static void vxhs_get_tls_creds(const char *id, char **cacert, | ||
325 | + char **key, char **cert, Error **errp) | ||
326 | +{ | ||
327 | + Object *obj; | ||
328 | + QCryptoTLSCreds *creds; | ||
329 | + QCryptoTLSCredsX509 *creds_x509; | ||
330 | + | ||
331 | + obj = object_resolve_path_component( | ||
332 | + object_get_objects_root(), id); | ||
333 | + | ||
334 | + if (!obj) { | ||
335 | + error_setg(errp, "No TLS credentials with id '%s'", | ||
336 | + id); | ||
337 | + return; | 298 | + return; |
338 | + } | 299 | + } |
339 | + | 300 | +} |
340 | + creds_x509 = (QCryptoTLSCredsX509 *) | 301 | + |
341 | + object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509); | 302 | +static void vfu_object_finalize(Object *obj) |
342 | + | 303 | +{ |
343 | + if (!creds_x509) { | 304 | + VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); |
344 | + error_setg(errp, "Object with id '%s' is not TLS credentials", | 305 | + VfuObject *o = VFU_OBJECT(obj); |
345 | + id); | 306 | + |
346 | + return; | 307 | + k->nr_devs--; |
347 | + } | 308 | + |
348 | + | 309 | + qapi_free_SocketAddress(o->socket); |
349 | + creds = &creds_x509->parent_obj; | 310 | + |
350 | + | 311 | + o->socket = NULL; |
351 | + if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { | 312 | + |
352 | + error_setg(errp, | 313 | + g_free(o->device); |
353 | + "Expecting TLS credentials with a client endpoint"); | 314 | + |
354 | + return; | 315 | + o->device = NULL; |
355 | + } | 316 | + |
356 | + | 317 | + if (!k->nr_devs && vfu_object_auto_shutdown()) { |
357 | + /* | 318 | + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); |
358 | + * Get the cacert, client_cert and client_key file names. | 319 | + } |
359 | + */ | 320 | +} |
360 | + if (!creds->dir) { | 321 | + |
361 | + error_setg(errp, "TLS object missing 'dir' property value"); | 322 | +static void vfu_object_class_init(ObjectClass *klass, void *data) |
362 | + return; | 323 | +{ |
363 | + } | 324 | + VfuObjectClass *k = VFU_OBJECT_CLASS(klass); |
364 | + | 325 | + |
365 | + *cacert = g_strdup_printf("%s/%s", creds->dir, | 326 | + k->nr_devs = 0; |
366 | + QCRYPTO_TLS_CREDS_X509_CA_CERT); | 327 | + |
367 | + *cert = g_strdup_printf("%s/%s", creds->dir, | 328 | + object_class_property_add(klass, "socket", "SocketAddress", NULL, |
368 | + QCRYPTO_TLS_CREDS_X509_CLIENT_CERT); | 329 | + vfu_object_set_socket, NULL, NULL); |
369 | + *key = g_strdup_printf("%s/%s", creds->dir, | 330 | + object_class_property_set_description(klass, "socket", |
370 | + QCRYPTO_TLS_CREDS_X509_CLIENT_KEY); | 331 | + "SocketAddress " |
371 | +} | 332 | + "(ex: type=unix,path=/tmp/sock). " |
372 | + | 333 | + "Only UNIX is presently supported"); |
373 | +static int vxhs_open(BlockDriverState *bs, QDict *options, | 334 | + object_class_property_add_str(klass, "device", NULL, |
374 | + int bdrv_flags, Error **errp) | 335 | + vfu_object_set_device); |
375 | +{ | 336 | + object_class_property_set_description(klass, "device", |
376 | + BDRVVXHSState *s = bs->opaque; | 337 | + "device ID - only PCI devices " |
377 | + void *dev_handlep; | 338 | + "are presently supported"); |
378 | + QDict *backing_options = NULL; | 339 | +} |
379 | + QemuOpts *opts = NULL; | 340 | + |
380 | + QemuOpts *tcp_opts = NULL; | 341 | +static const TypeInfo vfu_object_info = { |
381 | + char *of_vsa_addr = NULL; | 342 | + .name = TYPE_VFU_OBJECT, |
382 | + Error *local_err = NULL; | 343 | + .parent = TYPE_OBJECT, |
383 | + const char *vdisk_id_opt; | 344 | + .instance_size = sizeof(VfuObject), |
384 | + const char *server_host_opt; | 345 | + .instance_init = vfu_object_init, |
385 | + int ret = 0; | 346 | + .instance_finalize = vfu_object_finalize, |
386 | + char *cacert = NULL; | 347 | + .class_size = sizeof(VfuObjectClass), |
387 | + char *client_key = NULL; | 348 | + .class_init = vfu_object_class_init, |
388 | + char *client_cert = NULL; | 349 | + .interfaces = (InterfaceInfo[]) { |
389 | + | 350 | + { TYPE_USER_CREATABLE }, |
390 | + ret = vxhs_init_and_ref(); | 351 | + { } |
391 | + if (ret < 0) { | 352 | + } |
392 | + ret = -EINVAL; | ||
393 | + goto out; | ||
394 | + } | ||
395 | + | ||
396 | + /* Create opts info from runtime_opts and runtime_tcp_opts list */ | ||
397 | + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); | ||
398 | + tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); | ||
399 | + | ||
400 | + qemu_opts_absorb_qdict(opts, options, &local_err); | ||
401 | + if (local_err) { | ||
402 | + ret = -EINVAL; | ||
403 | + goto out; | ||
404 | + } | ||
405 | + | ||
406 | + /* vdisk-id is the disk UUID */ | ||
407 | + vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID); | ||
408 | + if (!vdisk_id_opt) { | ||
409 | + error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID); | ||
410 | + ret = -EINVAL; | ||
411 | + goto out; | ||
412 | + } | ||
413 | + | ||
414 | + /* vdisk-id may contain a leading '/' */ | ||
415 | + if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) { | ||
416 | + error_setg(&local_err, "vdisk-id cannot be more than %d characters", | ||
417 | + UUID_FMT_LEN); | ||
418 | + ret = -EINVAL; | ||
419 | + goto out; | ||
420 | + } | ||
421 | + | ||
422 | + s->vdisk_guid = g_strdup(vdisk_id_opt); | ||
423 | + trace_vxhs_open_vdiskid(vdisk_id_opt); | ||
424 | + | ||
425 | + /* get the 'server.' arguments */ | ||
426 | + qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER"."); | ||
427 | + | ||
428 | + qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err); | ||
429 | + if (local_err != NULL) { | ||
430 | + ret = -EINVAL; | ||
431 | + goto out; | ||
432 | + } | ||
433 | + | ||
434 | + server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST); | ||
435 | + if (!server_host_opt) { | ||
436 | + error_setg(&local_err, QERR_MISSING_PARAMETER, | ||
437 | + VXHS_OPT_SERVER"."VXHS_OPT_HOST); | ||
438 | + ret = -EINVAL; | ||
439 | + goto out; | ||
440 | + } | ||
441 | + | ||
442 | + if (strlen(server_host_opt) > MAXHOSTNAMELEN) { | ||
443 | + error_setg(&local_err, "server.host cannot be more than %d characters", | ||
444 | + MAXHOSTNAMELEN); | ||
445 | + ret = -EINVAL; | ||
446 | + goto out; | ||
447 | + } | ||
448 | + | ||
449 | + /* check if we got tls-creds via the --object argument */ | ||
450 | + s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds")); | ||
451 | + if (s->tlscredsid) { | ||
452 | + vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key, | ||
453 | + &client_cert, &local_err); | ||
454 | + if (local_err != NULL) { | ||
455 | + ret = -EINVAL; | ||
456 | + goto out; | ||
457 | + } | ||
458 | + trace_vxhs_get_creds(cacert, client_key, client_cert); | ||
459 | + } | ||
460 | + | ||
461 | + s->vdisk_hostinfo.host = g_strdup(server_host_opt); | ||
462 | + s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts, | ||
463 | + VXHS_OPT_PORT), | ||
464 | + NULL, 0); | ||
465 | + | ||
466 | + trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host, | ||
467 | + s->vdisk_hostinfo.port); | ||
468 | + | ||
469 | + of_vsa_addr = g_strdup_printf("of://%s:%d", | ||
470 | + s->vdisk_hostinfo.host, | ||
471 | + s->vdisk_hostinfo.port); | ||
472 | + | ||
473 | + /* | ||
474 | + * Open qnio channel to storage agent if not opened before | ||
475 | + */ | ||
476 | + dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, | ||
477 | + cacert, client_key, client_cert); | ||
478 | + if (dev_handlep == NULL) { | ||
479 | + trace_vxhs_open_iio_open(of_vsa_addr); | ||
480 | + ret = -ENODEV; | ||
481 | + goto out; | ||
482 | + } | ||
483 | + s->vdisk_hostinfo.dev_handle = dev_handlep; | ||
484 | + | ||
485 | +out: | ||
486 | + g_free(of_vsa_addr); | ||
487 | + QDECREF(backing_options); | ||
488 | + qemu_opts_del(tcp_opts); | ||
489 | + qemu_opts_del(opts); | ||
490 | + g_free(cacert); | ||
491 | + g_free(client_key); | ||
492 | + g_free(client_cert); | ||
493 | + | ||
494 | + if (ret < 0) { | ||
495 | + vxhs_unref(); | ||
496 | + error_propagate(errp, local_err); | ||
497 | + g_free(s->vdisk_hostinfo.host); | ||
498 | + g_free(s->vdisk_guid); | ||
499 | + g_free(s->tlscredsid); | ||
500 | + s->vdisk_guid = NULL; | ||
501 | + } | ||
502 | + | ||
503 | + return ret; | ||
504 | +} | ||
505 | + | ||
506 | +static const AIOCBInfo vxhs_aiocb_info = { | ||
507 | + .aiocb_size = sizeof(VXHSAIOCB) | ||
508 | +}; | 353 | +}; |
509 | + | 354 | + |
510 | +/* | 355 | +static void vfu_register_types(void) |
511 | + * This allocates QEMU-VXHS callback for each IO | 356 | +{ |
512 | + * and is passed to QNIO. When QNIO completes the work, | 357 | + type_register_static(&vfu_object_info); |
513 | + * it will be passed back through the callback. | 358 | +} |
514 | + */ | 359 | + |
515 | +static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num, | 360 | +type_init(vfu_register_types); |
516 | + QEMUIOVector *qiov, int nb_sectors, | 361 | diff --git a/hw/remote/meson.build b/hw/remote/meson.build |
517 | + BlockCompletionFunc *cb, void *opaque, | 362 | index XXXXXXX..XXXXXXX 100644 |
518 | + VDISKAIOCmd iodir) | 363 | --- a/hw/remote/meson.build |
519 | +{ | 364 | +++ b/hw/remote/meson.build |
520 | + VXHSAIOCB *acb = NULL; | 365 | @@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c')) |
521 | + BDRVVXHSState *s = bs->opaque; | 366 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) |
522 | + size_t size; | 367 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) |
523 | + uint64_t offset; | 368 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) |
524 | + int iio_flags = 0; | 369 | +remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) |
525 | + int ret = 0; | 370 | |
526 | + void *dev_handle = s->vdisk_hostinfo.dev_handle; | 371 | remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep) |
527 | + | 372 | |
528 | + offset = sector_num * BDRV_SECTOR_SIZE; | 373 | diff --git a/hw/remote/trace-events b/hw/remote/trace-events |
529 | + size = nb_sectors * BDRV_SECTOR_SIZE; | 374 | index XXXXXXX..XXXXXXX 100644 |
530 | + acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque); | 375 | --- a/hw/remote/trace-events |
531 | + | 376 | +++ b/hw/remote/trace-events |
532 | + /* | 377 | @@ -XXX,XX +XXX,XX @@ |
533 | + * Initialize VXHSAIOCB. | 378 | |
534 | + */ | 379 | mpqemu_send_io_error(int cmd, int size, int nfds) "send command %d size %d, %d file descriptors to remote process" |
535 | + acb->err = 0; | 380 | mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d, %d file descriptors to remote process" |
536 | + | 381 | + |
537 | + iio_flags = IIO_FLAG_ASYNC; | 382 | +# vfio-user-obj.c |
538 | + | 383 | +vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s" |
539 | + switch (iodir) { | ||
540 | + case VDISK_AIO_WRITE: | ||
541 | + ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, | ||
542 | + offset, (uint64_t)size, iio_flags); | ||
543 | + break; | ||
544 | + case VDISK_AIO_READ: | ||
545 | + ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, | ||
546 | + offset, (uint64_t)size, iio_flags); | ||
547 | + break; | ||
548 | + default: | ||
549 | + trace_vxhs_aio_rw_invalid(iodir); | ||
550 | + goto errout; | ||
551 | + } | ||
552 | + | ||
553 | + if (ret != 0) { | ||
554 | + trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset, | ||
555 | + acb, ret, errno); | ||
556 | + goto errout; | ||
557 | + } | ||
558 | + return &acb->common; | ||
559 | + | ||
560 | +errout: | ||
561 | + qemu_aio_unref(acb); | ||
562 | + return NULL; | ||
563 | +} | ||
564 | + | ||
565 | +static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs, | ||
566 | + int64_t sector_num, QEMUIOVector *qiov, | ||
567 | + int nb_sectors, | ||
568 | + BlockCompletionFunc *cb, void *opaque) | ||
569 | +{ | ||
570 | + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb, | ||
571 | + opaque, VDISK_AIO_READ); | ||
572 | +} | ||
573 | + | ||
574 | +static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs, | ||
575 | + int64_t sector_num, QEMUIOVector *qiov, | ||
576 | + int nb_sectors, | ||
577 | + BlockCompletionFunc *cb, void *opaque) | ||
578 | +{ | ||
579 | + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, | ||
580 | + cb, opaque, VDISK_AIO_WRITE); | ||
581 | +} | ||
582 | + | ||
583 | +static void vxhs_close(BlockDriverState *bs) | ||
584 | +{ | ||
585 | + BDRVVXHSState *s = bs->opaque; | ||
586 | + | ||
587 | + trace_vxhs_close(s->vdisk_guid); | ||
588 | + | ||
589 | + g_free(s->vdisk_guid); | ||
590 | + s->vdisk_guid = NULL; | ||
591 | + | ||
592 | + /* | ||
593 | + * Close vDisk device | ||
594 | + */ | ||
595 | + if (s->vdisk_hostinfo.dev_handle) { | ||
596 | + iio_close(s->vdisk_hostinfo.dev_handle); | ||
597 | + s->vdisk_hostinfo.dev_handle = NULL; | ||
598 | + } | ||
599 | + | ||
600 | + vxhs_unref(); | ||
601 | + | ||
602 | + /* | ||
603 | + * Free the dynamically allocated host string etc | ||
604 | + */ | ||
605 | + g_free(s->vdisk_hostinfo.host); | ||
606 | + g_free(s->tlscredsid); | ||
607 | + s->tlscredsid = NULL; | ||
608 | + s->vdisk_hostinfo.host = NULL; | ||
609 | + s->vdisk_hostinfo.port = 0; | ||
610 | +} | ||
611 | + | ||
612 | +static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) | ||
613 | +{ | ||
614 | + int64_t vdisk_size = -1; | ||
615 | + int ret = 0; | ||
616 | + void *dev_handle = s->vdisk_hostinfo.dev_handle; | ||
617 | + | ||
618 | + ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); | ||
619 | + if (ret < 0) { | ||
620 | + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); | ||
621 | + return -EIO; | ||
622 | + } | ||
623 | + | ||
624 | + trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size); | ||
625 | + return vdisk_size; | ||
626 | +} | ||
627 | + | ||
628 | +/* | ||
629 | + * Returns the size of vDisk in bytes. This is required | ||
630 | + * by QEMU block upper block layer so that it is visible | ||
631 | + * to guest. | ||
632 | + */ | ||
633 | +static int64_t vxhs_getlength(BlockDriverState *bs) | ||
634 | +{ | ||
635 | + BDRVVXHSState *s = bs->opaque; | ||
636 | + int64_t vdisk_size; | ||
637 | + | ||
638 | + vdisk_size = vxhs_get_vdisk_stat(s); | ||
639 | + if (vdisk_size < 0) { | ||
640 | + return -EIO; | ||
641 | + } | ||
642 | + | ||
643 | + return vdisk_size; | ||
644 | +} | ||
645 | + | ||
646 | +static BlockDriver bdrv_vxhs = { | ||
647 | + .format_name = "vxhs", | ||
648 | + .protocol_name = "vxhs", | ||
649 | + .instance_size = sizeof(BDRVVXHSState), | ||
650 | + .bdrv_file_open = vxhs_open, | ||
651 | + .bdrv_parse_filename = vxhs_parse_filename, | ||
652 | + .bdrv_close = vxhs_close, | ||
653 | + .bdrv_getlength = vxhs_getlength, | ||
654 | + .bdrv_aio_readv = vxhs_aio_readv, | ||
655 | + .bdrv_aio_writev = vxhs_aio_writev, | ||
656 | +}; | ||
657 | + | ||
658 | +static void bdrv_vxhs_init(void) | ||
659 | +{ | ||
660 | + bdrv_register(&bdrv_vxhs); | ||
661 | +} | ||
662 | + | ||
663 | +block_init(bdrv_vxhs_init); | ||
664 | diff --git a/configure b/configure | ||
665 | index XXXXXXX..XXXXXXX 100755 | ||
666 | --- a/configure | ||
667 | +++ b/configure | ||
668 | @@ -XXX,XX +XXX,XX @@ numa="" | ||
669 | tcmalloc="no" | ||
670 | jemalloc="no" | ||
671 | replication="yes" | ||
672 | +vxhs="" | ||
673 | |||
674 | supported_cpu="no" | ||
675 | supported_os="no" | ||
676 | @@ -XXX,XX +XXX,XX @@ for opt do | ||
677 | ;; | ||
678 | --enable-replication) replication="yes" | ||
679 | ;; | ||
680 | + --disable-vxhs) vxhs="no" | ||
681 | + ;; | ||
682 | + --enable-vxhs) vxhs="yes" | ||
683 | + ;; | ||
684 | *) | ||
685 | echo "ERROR: unknown option $opt" | ||
686 | echo "Try '$0 --help' for more information" | ||
687 | @@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available: | ||
688 | xfsctl xfsctl support | ||
689 | qom-cast-debug cast debugging support | ||
690 | tools build qemu-io, qemu-nbd and qemu-image tools | ||
691 | + vxhs Veritas HyperScale vDisk backend support | ||
692 | |||
693 | NOTE: The object files are built at the place where configure is launched | ||
694 | EOF | ||
695 | @@ -XXX,XX +XXX,XX @@ if compile_prog "" "" ; then | ||
696 | fi | ||
697 | |||
698 | ########################################## | ||
699 | +# Veritas HyperScale block driver VxHS | ||
700 | +# Check if libvxhs is installed | ||
701 | + | ||
702 | +if test "$vxhs" != "no" ; then | ||
703 | + cat > $TMPC <<EOF | ||
704 | +#include <stdint.h> | ||
705 | +#include <qnio/qnio_api.h> | ||
706 | + | ||
707 | +void *vxhs_callback; | ||
708 | + | ||
709 | +int main(void) { | ||
710 | + iio_init(QNIO_VERSION, vxhs_callback); | ||
711 | + return 0; | ||
712 | +} | ||
713 | +EOF | ||
714 | + vxhs_libs="-lvxhs -lssl" | ||
715 | + if compile_prog "" "$vxhs_libs" ; then | ||
716 | + vxhs=yes | ||
717 | + else | ||
718 | + if test "$vxhs" = "yes" ; then | ||
719 | + feature_not_found "vxhs block device" "Install libvxhs See github" | ||
720 | + fi | ||
721 | + vxhs=no | ||
722 | + fi | ||
723 | +fi | ||
724 | + | ||
725 | +########################################## | ||
726 | # End of CC checks | ||
727 | # After here, no more $cc or $ld runs | ||
728 | |||
729 | @@ -XXX,XX +XXX,XX @@ echo "tcmalloc support $tcmalloc" | ||
730 | echo "jemalloc support $jemalloc" | ||
731 | echo "avx2 optimization $avx2_opt" | ||
732 | echo "replication support $replication" | ||
733 | +echo "VxHS block device $vxhs" | ||
734 | |||
735 | if test "$sdl_too_old" = "yes"; then | ||
736 | echo "-> Your SDL version is too old - please upgrade to have SDL support" | ||
737 | @@ -XXX,XX +XXX,XX @@ if test "$pthread_setname_np" = "yes" ; then | ||
738 | echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak | ||
739 | fi | ||
740 | |||
741 | +if test "$vxhs" = "yes" ; then | ||
742 | + echo "CONFIG_VXHS=y" >> $config_host_mak | ||
743 | + echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak | ||
744 | +fi | ||
745 | + | ||
746 | if test "$tcg_interpreter" = "yes"; then | ||
747 | QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" | ||
748 | elif test "$ARCH" = "sparc64" ; then | ||
749 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
750 | index XXXXXXX..XXXXXXX 100644 | ||
751 | --- a/qapi/block-core.json | ||
752 | +++ b/qapi/block-core.json | ||
753 | @@ -XXX,XX +XXX,XX @@ | ||
754 | # | ||
755 | # Drivers that are supported in block device operations. | ||
756 | # | ||
757 | +# @vxhs: Since 2.10 | ||
758 | +# | ||
759 | # Since: 2.9 | ||
760 | ## | ||
761 | { 'enum': 'BlockdevDriver', | ||
762 | @@ -XXX,XX +XXX,XX @@ | ||
763 | 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', | ||
764 | 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', | ||
765 | 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', | ||
766 | - 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } | ||
767 | + 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } | ||
768 | |||
769 | ## | ||
770 | # @BlockdevOptionsFile: | ||
771 | @@ -XXX,XX +XXX,XX @@ | ||
772 | 'data': { '*offset': 'int', '*size': 'int' } } | ||
773 | |||
774 | ## | ||
775 | +# @BlockdevOptionsVxHS: | ||
776 | +# | ||
777 | +# Driver specific block device options for VxHS | ||
778 | +# | ||
779 | +# @vdisk-id: UUID of VxHS volume | ||
780 | +# @server: vxhs server IP, port | ||
781 | +# @tls-creds: TLS credentials ID | ||
782 | +# | ||
783 | +# Since: 2.10 | ||
784 | +## | ||
785 | +{ 'struct': 'BlockdevOptionsVxHS', | ||
786 | + 'data': { 'vdisk-id': 'str', | ||
787 | + 'server': 'InetSocketAddressBase', | ||
788 | + '*tls-creds': 'str' } } | ||
789 | + | ||
790 | +## | ||
791 | # @BlockdevOptions: | ||
792 | # | ||
793 | # Options for creating a block device. Many options are available for all | ||
794 | @@ -XXX,XX +XXX,XX @@ | ||
795 | 'vhdx': 'BlockdevOptionsGenericFormat', | ||
796 | 'vmdk': 'BlockdevOptionsGenericCOWFormat', | ||
797 | 'vpc': 'BlockdevOptionsGenericFormat', | ||
798 | - 'vvfat': 'BlockdevOptionsVVFAT' | ||
799 | + 'vvfat': 'BlockdevOptionsVVFAT', | ||
800 | + 'vxhs': 'BlockdevOptionsVxHS' | ||
801 | } } | ||
802 | |||
803 | ## | ||
804 | -- | 384 | -- |
805 | 2.9.3 | 385 | 2.36.1 |
806 | 386 | ||
807 | 387 | diff view generated by jsdifflib |
1 | Update 'clientname' to be 'user', which tracks better with both | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | the QAPI and rados variable naming. | ||
3 | 2 | ||
4 | Update 'name' to be 'image_name', as it indicates the rbd image. | 3 | create a context with the vfio-user library to run a PCI device |
5 | Naming it 'image' would have been ideal, but we are using that for | ||
6 | the rados_image_t value returned by rbd_open(). | ||
7 | 4 | ||
5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 9 | Message-id: a452871ac8c812ff96fc4f0ce6037f4769953fab.1655151679.git.jag.raman@oracle.com |
10 | Reviewed-by: John Snow <jsnow@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | Message-id: b7ec1fb2e1cf36f9b6911631447a5b0422590b7d.1491597120.git.jcody@redhat.com | ||
12 | --- | 11 | --- |
13 | block/rbd.c | 33 +++++++++++++++++---------------- | 12 | hw/remote/vfio-user-obj.c | 82 +++++++++++++++++++++++++++++++++++++++ |
14 | 1 file changed, 17 insertions(+), 16 deletions(-) | 13 | 1 file changed, 82 insertions(+) |
15 | 14 | ||
16 | diff --git a/block/rbd.c b/block/rbd.c | 15 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c |
17 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/rbd.c | 17 | --- a/hw/remote/vfio-user-obj.c |
19 | +++ b/block/rbd.c | 18 | +++ b/hw/remote/vfio-user-obj.c |
20 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRBDState { | 19 | @@ -XXX,XX +XXX,XX @@ |
21 | rados_t cluster; | 20 | #include "hw/remote/machine.h" |
22 | rados_ioctx_t io_ctx; | 21 | #include "qapi/error.h" |
23 | rbd_image_t image; | 22 | #include "qapi/qapi-visit-sockets.h" |
24 | - char *name; | 23 | +#include "qemu/notify.h" |
25 | + char *image_name; | 24 | +#include "sysemu/sysemu.h" |
26 | char *snap; | 25 | +#include "libvfio-user.h" |
27 | } BDRVRBDState; | 26 | |
28 | 27 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | |
29 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) | 28 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) |
30 | int64_t bytes = 0; | 29 | @@ -XXX,XX +XXX,XX @@ struct VfuObject { |
31 | int64_t objsize; | 30 | char *device; |
32 | int obj_order = 0; | 31 | |
33 | - const char *pool, *name, *conf, *clientname, *keypairs; | 32 | Error *err; |
34 | + const char *pool, *image_name, *conf, *user, *keypairs; | 33 | + |
35 | const char *secretid; | 34 | + Notifier machine_done; |
36 | rados_t cluster; | 35 | + |
37 | rados_ioctx_t io_ctx; | 36 | + vfu_ctx_t *vfu_ctx; |
38 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) | 37 | }; |
39 | */ | 38 | |
40 | pool = qdict_get_try_str(options, "pool"); | 39 | +static void vfu_object_init_ctx(VfuObject *o, Error **errp); |
41 | conf = qdict_get_try_str(options, "conf"); | 40 | + |
42 | - clientname = qdict_get_try_str(options, "user"); | 41 | static bool vfu_object_auto_shutdown(void) |
43 | - name = qdict_get_try_str(options, "image"); | 42 | { |
44 | + user = qdict_get_try_str(options, "user"); | 43 | bool auto_shutdown = true; |
45 | + image_name = qdict_get_try_str(options, "image"); | 44 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, |
46 | keypairs = qdict_get_try_str(options, "=keyvalue-pairs"); | 45 | { |
47 | 46 | VfuObject *o = VFU_OBJECT(obj); | |
48 | - ret = rados_create(&cluster, clientname); | 47 | |
49 | + ret = rados_create(&cluster, user); | 48 | + if (o->vfu_ctx) { |
50 | if (ret < 0) { | 49 | + error_setg(errp, "vfu: Unable to set socket property - server busy"); |
51 | error_setg_errno(errp, -ret, "error initializing"); | 50 | + return; |
52 | goto exit; | 51 | + } |
53 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) | 52 | + |
54 | goto shutdown; | 53 | qapi_free_SocketAddress(o->socket); |
54 | |||
55 | o->socket = NULL; | ||
56 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, | ||
55 | } | 57 | } |
56 | 58 | ||
57 | - ret = rbd_create(io_ctx, name, bytes, &obj_order); | 59 | trace_vfu_prop("socket", o->socket->u.q_unix.path); |
58 | + ret = rbd_create(io_ctx, image_name, bytes, &obj_order); | 60 | + |
59 | if (ret < 0) { | 61 | + vfu_object_init_ctx(o, errp); |
60 | error_setg_errno(errp, -ret, "error rbd create"); | 62 | } |
63 | |||
64 | static void vfu_object_set_device(Object *obj, const char *str, Error **errp) | ||
65 | { | ||
66 | VfuObject *o = VFU_OBJECT(obj); | ||
67 | |||
68 | + if (o->vfu_ctx) { | ||
69 | + error_setg(errp, "vfu: Unable to set device property - server busy"); | ||
70 | + return; | ||
71 | + } | ||
72 | + | ||
73 | g_free(o->device); | ||
74 | |||
75 | o->device = g_strdup(str); | ||
76 | |||
77 | trace_vfu_prop("device", str); | ||
78 | + | ||
79 | + vfu_object_init_ctx(o, errp); | ||
80 | +} | ||
81 | + | ||
82 | +/* | ||
83 | + * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | ||
84 | + * properties. It also depends on devices instantiated in QEMU. These | ||
85 | + * dependencies are not available during the instance_init phase of this | ||
86 | + * object's life-cycle. As such, the server is initialized after the | ||
87 | + * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT | ||
88 | + * when the machine is setup, and the dependencies are available. | ||
89 | + */ | ||
90 | +static void vfu_object_machine_done(Notifier *notifier, void *data) | ||
91 | +{ | ||
92 | + VfuObject *o = container_of(notifier, VfuObject, machine_done); | ||
93 | + Error *err = NULL; | ||
94 | + | ||
95 | + vfu_object_init_ctx(o, &err); | ||
96 | + | ||
97 | + if (err) { | ||
98 | + error_propagate(&error_abort, err); | ||
99 | + } | ||
100 | +} | ||
101 | + | ||
102 | +static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
103 | +{ | ||
104 | + ERRP_GUARD(); | ||
105 | + | ||
106 | + if (o->vfu_ctx || !o->socket || !o->device || | ||
107 | + !phase_check(PHASE_MACHINE_READY)) { | ||
108 | + return; | ||
109 | + } | ||
110 | + | ||
111 | + if (o->err) { | ||
112 | + error_propagate(errp, o->err); | ||
113 | + o->err = NULL; | ||
114 | + return; | ||
115 | + } | ||
116 | + | ||
117 | + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0, | ||
118 | + o, VFU_DEV_TYPE_PCI); | ||
119 | + if (o->vfu_ctx == NULL) { | ||
120 | + error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); | ||
121 | + return; | ||
122 | + } | ||
123 | } | ||
124 | |||
125 | static void vfu_object_init(Object *obj) | ||
126 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init(Object *obj) | ||
127 | TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); | ||
128 | return; | ||
61 | } | 129 | } |
62 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, | 130 | + |
63 | Error **errp) | 131 | + if (!phase_check(PHASE_MACHINE_READY)) { |
64 | { | 132 | + o->machine_done.notify = vfu_object_machine_done; |
65 | BDRVRBDState *s = bs->opaque; | 133 | + qemu_add_machine_init_done_notifier(&o->machine_done); |
66 | - const char *pool, *snap, *conf, *clientname, *name, *keypairs; | 134 | + } |
67 | + const char *pool, *snap, *conf, *user, *image_name, *keypairs; | 135 | + |
68 | const char *secretid; | 136 | } |
69 | QemuOpts *opts; | 137 | |
70 | Error *local_err = NULL; | 138 | static void vfu_object_finalize(Object *obj) |
71 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, | 139 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj) |
72 | pool = qemu_opt_get(opts, "pool"); | 140 | |
73 | conf = qemu_opt_get(opts, "conf"); | 141 | o->socket = NULL; |
74 | snap = qemu_opt_get(opts, "snapshot"); | 142 | |
75 | - clientname = qemu_opt_get(opts, "user"); | 143 | + if (o->vfu_ctx) { |
76 | - name = qemu_opt_get(opts, "image"); | 144 | + vfu_destroy_ctx(o->vfu_ctx); |
77 | + user = qemu_opt_get(opts, "user"); | 145 | + o->vfu_ctx = NULL; |
78 | + image_name = qemu_opt_get(opts, "image"); | 146 | + } |
79 | keypairs = qemu_opt_get(opts, "=keyvalue-pairs"); | 147 | + |
80 | 148 | g_free(o->device); | |
81 | - if (!pool || !name) { | 149 | |
82 | + if (!pool || !image_name) { | 150 | o->device = NULL; |
83 | error_setg(errp, "Parameters 'pool' and 'image' are required"); | 151 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj) |
84 | r = -EINVAL; | 152 | if (!k->nr_devs && vfu_object_auto_shutdown()) { |
85 | goto failed_opts; | 153 | qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); |
86 | } | 154 | } |
87 | 155 | + | |
88 | - r = rados_create(&s->cluster, clientname); | 156 | + if (o->machine_done.notify) { |
89 | + r = rados_create(&s->cluster, user); | 157 | + qemu_remove_machine_init_done_notifier(&o->machine_done); |
90 | if (r < 0) { | 158 | + o->machine_done.notify = NULL; |
91 | error_setg_errno(errp, -r, "error initializing"); | 159 | + } |
92 | goto failed_opts; | ||
93 | } | ||
94 | |||
95 | s->snap = g_strdup(snap); | ||
96 | - s->name = g_strdup(name); | ||
97 | + s->image_name = g_strdup(image_name); | ||
98 | |||
99 | /* try default location when conf=NULL, but ignore failure */ | ||
100 | r = rados_conf_read_file(s->cluster, conf); | ||
101 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, | ||
102 | } | ||
103 | |||
104 | /* rbd_open is always r/w */ | ||
105 | - r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); | ||
106 | + r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap); | ||
107 | if (r < 0) { | ||
108 | - error_setg_errno(errp, -r, "error reading header from %s", s->name); | ||
109 | + error_setg_errno(errp, -r, "error reading header from %s", | ||
110 | + s->image_name); | ||
111 | goto failed_open; | ||
112 | } | ||
113 | |||
114 | @@ -XXX,XX +XXX,XX @@ failed_open: | ||
115 | failed_shutdown: | ||
116 | rados_shutdown(s->cluster); | ||
117 | g_free(s->snap); | ||
118 | - g_free(s->name); | ||
119 | + g_free(s->image_name); | ||
120 | failed_opts: | ||
121 | qemu_opts_del(opts); | ||
122 | g_free(mon_host); | ||
123 | @@ -XXX,XX +XXX,XX @@ static void qemu_rbd_close(BlockDriverState *bs) | ||
124 | rbd_close(s->image); | ||
125 | rados_ioctx_destroy(s->io_ctx); | ||
126 | g_free(s->snap); | ||
127 | - g_free(s->name); | ||
128 | + g_free(s->image_name); | ||
129 | rados_shutdown(s->cluster); | ||
130 | } | 160 | } |
131 | 161 | ||
162 | static void vfu_object_class_init(ObjectClass *klass, void *data) | ||
132 | -- | 163 | -- |
133 | 2.9.3 | 164 | 2.36.1 |
134 | |||
135 | diff view generated by jsdifflib |
1 | The BDRV_O_ALLOW_RDWR flag allows / prohibits the changing of | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | the BDS 'read_only' state, but there are a few places where it | ||
3 | is ignored. In the bdrv_set_read_only() helper, make sure to | ||
4 | honor the flag. | ||
5 | 2 | ||
6 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 3 | Find the PCI device with specified id. Initialize the device context |
4 | with the QEMU PCI device | ||
5 | |||
6 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
7 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
8 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
7 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | Reviewed-by: John Snow <jsnow@redhat.com> | 10 | Message-id: 7798dbd730099b33fdd00c4c202cfe79e5c5c151.1655151679.git.jag.raman@oracle.com |
9 | Message-id: be2e5fb2d285cbece2b6d06bed54a6f56520d251.1491597120.git.jcody@redhat.com | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | --- | 12 | --- |
11 | block.c | 7 +++++++ | 13 | hw/remote/vfio-user-obj.c | 67 +++++++++++++++++++++++++++++++++++++++ |
12 | 1 file changed, 7 insertions(+) | 14 | 1 file changed, 67 insertions(+) |
13 | 15 | ||
14 | diff --git a/block.c b/block.c | 16 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block.c | 18 | --- a/hw/remote/vfio-user-obj.c |
17 | +++ b/block.c | 19 | +++ b/hw/remote/vfio-user-obj.c |
18 | @@ -XXX,XX +XXX,XX @@ int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 20 | @@ -XXX,XX +XXX,XX @@ |
19 | return -EINVAL; | 21 | #include "qemu/notify.h" |
22 | #include "sysemu/sysemu.h" | ||
23 | #include "libvfio-user.h" | ||
24 | +#include "hw/qdev-core.h" | ||
25 | +#include "hw/pci/pci.h" | ||
26 | |||
27 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | ||
28 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) | ||
29 | @@ -XXX,XX +XXX,XX @@ struct VfuObject { | ||
30 | Notifier machine_done; | ||
31 | |||
32 | vfu_ctx_t *vfu_ctx; | ||
33 | + | ||
34 | + PCIDevice *pci_dev; | ||
35 | + | ||
36 | + Error *unplug_blocker; | ||
37 | }; | ||
38 | |||
39 | static void vfu_object_init_ctx(VfuObject *o, Error **errp); | ||
40 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_machine_done(Notifier *notifier, void *data) | ||
41 | static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
42 | { | ||
43 | ERRP_GUARD(); | ||
44 | + DeviceState *dev = NULL; | ||
45 | + vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL; | ||
46 | + int ret; | ||
47 | |||
48 | if (o->vfu_ctx || !o->socket || !o->device || | ||
49 | !phase_check(PHASE_MACHINE_READY)) { | ||
50 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
51 | error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); | ||
52 | return; | ||
20 | } | 53 | } |
21 | 54 | + | |
22 | + /* Do not clear read_only if it is prohibited */ | 55 | + dev = qdev_find_recursive(sysbus_get_default(), o->device); |
23 | + if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) { | 56 | + if (dev == NULL) { |
24 | + error_setg(errp, "Node '%s' is read only", | 57 | + error_setg(errp, "vfu: Device %s not found", o->device); |
25 | + bdrv_get_device_or_node_name(bs)); | 58 | + goto fail; |
26 | + return -EPERM; | ||
27 | + } | 59 | + } |
28 | + | 60 | + |
29 | bs->read_only = read_only; | 61 | + if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { |
30 | return 0; | 62 | + error_setg(errp, "vfu: %s not a PCI device", o->device); |
63 | + goto fail; | ||
64 | + } | ||
65 | + | ||
66 | + o->pci_dev = PCI_DEVICE(dev); | ||
67 | + | ||
68 | + object_ref(OBJECT(o->pci_dev)); | ||
69 | + | ||
70 | + if (pci_is_express(o->pci_dev)) { | ||
71 | + pci_type = VFU_PCI_TYPE_EXPRESS; | ||
72 | + } | ||
73 | + | ||
74 | + ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0); | ||
75 | + if (ret < 0) { | ||
76 | + error_setg(errp, | ||
77 | + "vfu: Failed to attach PCI device %s to context - %s", | ||
78 | + o->device, strerror(errno)); | ||
79 | + goto fail; | ||
80 | + } | ||
81 | + | ||
82 | + error_setg(&o->unplug_blocker, | ||
83 | + "vfu: %s for %s must be deleted before unplugging", | ||
84 | + TYPE_VFU_OBJECT, o->device); | ||
85 | + qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | ||
86 | + | ||
87 | + return; | ||
88 | + | ||
89 | +fail: | ||
90 | + vfu_destroy_ctx(o->vfu_ctx); | ||
91 | + if (o->unplug_blocker && o->pci_dev) { | ||
92 | + qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | ||
93 | + error_free(o->unplug_blocker); | ||
94 | + o->unplug_blocker = NULL; | ||
95 | + } | ||
96 | + if (o->pci_dev) { | ||
97 | + object_unref(OBJECT(o->pci_dev)); | ||
98 | + o->pci_dev = NULL; | ||
99 | + } | ||
100 | + o->vfu_ctx = NULL; | ||
31 | } | 101 | } |
102 | |||
103 | static void vfu_object_init(Object *obj) | ||
104 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj) | ||
105 | |||
106 | o->device = NULL; | ||
107 | |||
108 | + if (o->unplug_blocker && o->pci_dev) { | ||
109 | + qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); | ||
110 | + error_free(o->unplug_blocker); | ||
111 | + o->unplug_blocker = NULL; | ||
112 | + } | ||
113 | + | ||
114 | + if (o->pci_dev) { | ||
115 | + object_unref(OBJECT(o->pci_dev)); | ||
116 | + o->pci_dev = NULL; | ||
117 | + } | ||
118 | + | ||
119 | if (!k->nr_devs && vfu_object_auto_shutdown()) { | ||
120 | qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); | ||
121 | } | ||
32 | -- | 122 | -- |
33 | 2.9.3 | 123 | 2.36.1 |
34 | |||
35 | diff view generated by jsdifflib |
1 | We have a helper wrapper for checking for the BDS read_only flag, | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | add a helper wrapper to set the read_only flag as well. | ||
3 | 2 | ||
3 | Setup a handler to run vfio-user context. The context is driven by | ||
4 | messages to the file descriptor associated with it - get the fd for | ||
5 | the context and hook up the handler with it | ||
6 | |||
7 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
8 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
9 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
4 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 10 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
5 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 11 | Message-id: e934b0090529d448b6a7972b21dfc3d7421ce494.1655151679.git.jag.raman@oracle.com |
6 | Reviewed-by: John Snow <jsnow@redhat.com> | 12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Message-id: 9b18972d05f5fa2ac16c014f0af98d680553048d.1491597120.git.jcody@redhat.com | ||
8 | --- | 13 | --- |
9 | block.c | 5 +++++ | 14 | qapi/misc.json | 31 ++++++++++ |
10 | block/bochs.c | 2 +- | 15 | hw/remote/vfio-user-obj.c | 118 +++++++++++++++++++++++++++++++++++++- |
11 | block/cloop.c | 2 +- | 16 | 2 files changed, 148 insertions(+), 1 deletion(-) |
12 | block/dmg.c | 2 +- | ||
13 | block/rbd.c | 2 +- | ||
14 | block/vvfat.c | 4 ++-- | ||
15 | include/block/block.h | 1 + | ||
16 | 7 files changed, 12 insertions(+), 6 deletions(-) | ||
17 | 17 | ||
18 | diff --git a/block.c b/block.c | 18 | diff --git a/qapi/misc.json b/qapi/misc.json |
19 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block.c | 20 | --- a/qapi/misc.json |
21 | +++ b/block.c | 21 | +++ b/qapi/misc.json |
22 | @@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size, | 22 | @@ -XXX,XX +XXX,XX @@ |
23 | ## | ||
24 | { 'event': 'RTC_CHANGE', | ||
25 | 'data': { 'offset': 'int', 'qom-path': 'str' } } | ||
26 | + | ||
27 | +## | ||
28 | +# @VFU_CLIENT_HANGUP: | ||
29 | +# | ||
30 | +# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the | ||
31 | +# communication channel | ||
32 | +# | ||
33 | +# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object. It is the last component | ||
34 | +# of @vfu-qom-path referenced below | ||
35 | +# | ||
36 | +# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree | ||
37 | +# | ||
38 | +# @dev-id: ID of attached PCI device | ||
39 | +# | ||
40 | +# @dev-qom-path: path to attached PCI device in the QOM tree | ||
41 | +# | ||
42 | +# Since: 7.1 | ||
43 | +# | ||
44 | +# Example: | ||
45 | +# | ||
46 | +# <- { "event": "VFU_CLIENT_HANGUP", | ||
47 | +# "data": { "vfu-id": "vfu1", | ||
48 | +# "vfu-qom-path": "/objects/vfu1", | ||
49 | +# "dev-id": "sas1", | ||
50 | +# "dev-qom-path": "/machine/peripheral/sas1" }, | ||
51 | +# "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } | ||
52 | +# | ||
53 | +## | ||
54 | +{ 'event': 'VFU_CLIENT_HANGUP', | ||
55 | + 'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str', | ||
56 | + 'dev-id': 'str', 'dev-qom-path': 'str' } } | ||
57 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/hw/remote/vfio-user-obj.c | ||
60 | +++ b/hw/remote/vfio-user-obj.c | ||
61 | @@ -XXX,XX +XXX,XX @@ | ||
62 | * | ||
63 | * device - id of a device on the server, a required option. PCI devices | ||
64 | * alone are supported presently. | ||
65 | + * | ||
66 | + * notes - x-vfio-user-server could block IO and monitor during the | ||
67 | + * initialization phase. | ||
68 | */ | ||
69 | |||
70 | #include "qemu/osdep.h" | ||
71 | @@ -XXX,XX +XXX,XX @@ | ||
72 | #include "hw/remote/machine.h" | ||
73 | #include "qapi/error.h" | ||
74 | #include "qapi/qapi-visit-sockets.h" | ||
75 | +#include "qapi/qapi-events-misc.h" | ||
76 | #include "qemu/notify.h" | ||
77 | +#include "qemu/thread.h" | ||
78 | #include "sysemu/sysemu.h" | ||
79 | #include "libvfio-user.h" | ||
80 | #include "hw/qdev-core.h" | ||
81 | #include "hw/pci/pci.h" | ||
82 | +#include "qemu/timer.h" | ||
83 | |||
84 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | ||
85 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) | ||
86 | @@ -XXX,XX +XXX,XX @@ struct VfuObject { | ||
87 | PCIDevice *pci_dev; | ||
88 | |||
89 | Error *unplug_blocker; | ||
90 | + | ||
91 | + int vfu_poll_fd; | ||
92 | }; | ||
93 | |||
94 | static void vfu_object_init_ctx(VfuObject *o, Error **errp); | ||
95 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) | ||
96 | vfu_object_init_ctx(o, errp); | ||
97 | } | ||
98 | |||
99 | +static void vfu_object_ctx_run(void *opaque) | ||
100 | +{ | ||
101 | + VfuObject *o = opaque; | ||
102 | + const char *vfu_id; | ||
103 | + char *vfu_path, *pci_dev_path; | ||
104 | + int ret = -1; | ||
105 | + | ||
106 | + while (ret != 0) { | ||
107 | + ret = vfu_run_ctx(o->vfu_ctx); | ||
108 | + if (ret < 0) { | ||
109 | + if (errno == EINTR) { | ||
110 | + continue; | ||
111 | + } else if (errno == ENOTCONN) { | ||
112 | + vfu_id = object_get_canonical_path_component(OBJECT(o)); | ||
113 | + vfu_path = object_get_canonical_path(OBJECT(o)); | ||
114 | + g_assert(o->pci_dev); | ||
115 | + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev)); | ||
116 | + /* o->device is a required property and is non-NULL here */ | ||
117 | + g_assert(o->device); | ||
118 | + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path, | ||
119 | + o->device, pci_dev_path); | ||
120 | + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); | ||
121 | + o->vfu_poll_fd = -1; | ||
122 | + object_unparent(OBJECT(o)); | ||
123 | + g_free(vfu_path); | ||
124 | + g_free(pci_dev_path); | ||
125 | + break; | ||
126 | + } else { | ||
127 | + VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s", | ||
128 | + o->device, strerror(errno)); | ||
129 | + break; | ||
130 | + } | ||
131 | + } | ||
132 | + } | ||
133 | +} | ||
134 | + | ||
135 | +static void vfu_object_attach_ctx(void *opaque) | ||
136 | +{ | ||
137 | + VfuObject *o = opaque; | ||
138 | + GPollFD pfds[1]; | ||
139 | + int ret; | ||
140 | + | ||
141 | + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); | ||
142 | + | ||
143 | + pfds[0].fd = o->vfu_poll_fd; | ||
144 | + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; | ||
145 | + | ||
146 | +retry_attach: | ||
147 | + ret = vfu_attach_ctx(o->vfu_ctx); | ||
148 | + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { | ||
149 | + /** | ||
150 | + * vfu_object_attach_ctx can block QEMU's main loop | ||
151 | + * during attach - the monitor and other IO | ||
152 | + * could be unresponsive during this time. | ||
153 | + */ | ||
154 | + (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); | ||
155 | + goto retry_attach; | ||
156 | + } else if (ret < 0) { | ||
157 | + VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s", | ||
158 | + o->device, strerror(errno)); | ||
159 | + return; | ||
160 | + } | ||
161 | + | ||
162 | + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); | ||
163 | + if (o->vfu_poll_fd < 0) { | ||
164 | + VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device); | ||
165 | + return; | ||
166 | + } | ||
167 | + | ||
168 | + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); | ||
169 | +} | ||
170 | + | ||
171 | /* | ||
172 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | ||
173 | * properties. It also depends on devices instantiated in QEMU. These | ||
174 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_machine_done(Notifier *notifier, void *data) | ||
23 | } | 175 | } |
24 | } | 176 | } |
25 | 177 | ||
26 | +void bdrv_set_read_only(BlockDriverState *bs, bool read_only) | 178 | +/** |
27 | +{ | 179 | + * vfu_object_init_ctx: Create and initialize libvfio-user context. Add |
28 | + bs->read_only = read_only; | 180 | + * an unplug blocker for the associated PCI device. Setup a FD handler |
29 | +} | 181 | + * to process incoming messages in the context's socket. |
30 | + | 182 | + * |
31 | void bdrv_get_full_backing_filename_from_filename(const char *backed, | 183 | + * The socket and device properties are mandatory, and this function |
32 | const char *backing, | 184 | + * will not create the context without them - the setters for these |
33 | char *dest, size_t sz, | 185 | + * properties should call this function when the property is set. The |
34 | diff --git a/block/bochs.c b/block/bochs.c | 186 | + * machine should also be ready when this function is invoked - it is |
35 | index XXXXXXX..XXXXXXX 100644 | 187 | + * because QEMU objects are initialized before devices, and the |
36 | --- a/block/bochs.c | 188 | + * associated PCI device wouldn't be available at the object |
37 | +++ b/block/bochs.c | 189 | + * initialization time. Until these conditions are satisfied, this |
38 | @@ -XXX,XX +XXX,XX @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, | 190 | + * function would return early without performing any task. |
39 | return -EINVAL; | 191 | + */ |
192 | static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
193 | { | ||
194 | ERRP_GUARD(); | ||
195 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
196 | return; | ||
40 | } | 197 | } |
41 | 198 | ||
42 | - bs->read_only = true; /* no write support yet */ | 199 | - o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0, |
43 | + bdrv_set_read_only(bs, true); /* no write support yet */ | 200 | + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, |
44 | 201 | + LIBVFIO_USER_FLAG_ATTACH_NB, | |
45 | ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); | 202 | o, VFU_DEV_TYPE_PCI); |
46 | if (ret < 0) { | 203 | if (o->vfu_ctx == NULL) { |
47 | diff --git a/block/cloop.c b/block/cloop.c | 204 | error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); |
48 | index XXXXXXX..XXXXXXX 100644 | 205 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) |
49 | --- a/block/cloop.c | 206 | TYPE_VFU_OBJECT, o->device); |
50 | +++ b/block/cloop.c | 207 | qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); |
51 | @@ -XXX,XX +XXX,XX @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, | 208 | |
52 | return -EINVAL; | 209 | + ret = vfu_realize_ctx(o->vfu_ctx); |
210 | + if (ret < 0) { | ||
211 | + error_setg(errp, "vfu: Failed to realize device %s- %s", | ||
212 | + o->device, strerror(errno)); | ||
213 | + goto fail; | ||
214 | + } | ||
215 | + | ||
216 | + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); | ||
217 | + if (o->vfu_poll_fd < 0) { | ||
218 | + error_setg(errp, "vfu: Failed to get poll fd %s", o->device); | ||
219 | + goto fail; | ||
220 | + } | ||
221 | + | ||
222 | + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); | ||
223 | + | ||
224 | return; | ||
225 | |||
226 | fail: | ||
227 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init(Object *obj) | ||
228 | qemu_add_machine_init_done_notifier(&o->machine_done); | ||
53 | } | 229 | } |
54 | 230 | ||
55 | - bs->read_only = true; | 231 | + o->vfu_poll_fd = -1; |
56 | + bdrv_set_read_only(bs, true); | 232 | } |
57 | 233 | ||
58 | /* read header */ | 234 | static void vfu_object_finalize(Object *obj) |
59 | ret = bdrv_pread(bs->file, 128, &s->block_size, 4); | 235 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj) |
60 | diff --git a/block/dmg.c b/block/dmg.c | 236 | |
61 | index XXXXXXX..XXXXXXX 100644 | 237 | o->socket = NULL; |
62 | --- a/block/dmg.c | 238 | |
63 | +++ b/block/dmg.c | 239 | + if (o->vfu_poll_fd != -1) { |
64 | @@ -XXX,XX +XXX,XX @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, | 240 | + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); |
65 | } | 241 | + o->vfu_poll_fd = -1; |
66 | 242 | + } | |
67 | block_module_load_one("dmg-bz2"); | 243 | + |
68 | - bs->read_only = true; | 244 | if (o->vfu_ctx) { |
69 | + bdrv_set_read_only(bs, true); | 245 | vfu_destroy_ctx(o->vfu_ctx); |
70 | 246 | o->vfu_ctx = NULL; | |
71 | s->n_chunks = 0; | ||
72 | s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; | ||
73 | diff --git a/block/rbd.c b/block/rbd.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/block/rbd.c | ||
76 | +++ b/block/rbd.c | ||
77 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, | ||
78 | goto failed_open; | ||
79 | } | ||
80 | |||
81 | - bs->read_only = (s->snap != NULL); | ||
82 | + bdrv_set_read_only(bs, (s->snap != NULL)); | ||
83 | |||
84 | qemu_opts_del(opts); | ||
85 | return 0; | ||
86 | diff --git a/block/vvfat.c b/block/vvfat.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/block/vvfat.c | ||
89 | +++ b/block/vvfat.c | ||
90 | @@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, | ||
91 | s->current_cluster=0xffffffff; | ||
92 | |||
93 | /* read only is the default for safety */ | ||
94 | - bs->read_only = true; | ||
95 | + bdrv_set_read_only(bs, true); | ||
96 | s->qcow = NULL; | ||
97 | s->qcow_filename = NULL; | ||
98 | s->fat2 = NULL; | ||
99 | @@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, | ||
100 | if (ret < 0) { | ||
101 | goto fail; | ||
102 | } | ||
103 | - bs->read_only = false; | ||
104 | + bdrv_set_read_only(bs, false); | ||
105 | } | ||
106 | |||
107 | bs->total_sectors = cyls * heads * secs; | ||
108 | diff --git a/include/block/block.h b/include/block/block.h | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/include/block/block.h | ||
111 | +++ b/include/block/block.h | ||
112 | @@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, | ||
113 | int64_t sector_num, int nb_sectors, int *pnum); | ||
114 | |||
115 | bool bdrv_is_read_only(BlockDriverState *bs); | ||
116 | +void bdrv_set_read_only(BlockDriverState *bs, bool read_only); | ||
117 | bool bdrv_is_sg(BlockDriverState *bs); | ||
118 | bool bdrv_is_inserted(BlockDriverState *bs); | ||
119 | int bdrv_media_changed(BlockDriverState *bs); | ||
120 | -- | 247 | -- |
121 | 2.9.3 | 248 | 2.36.1 |
122 | |||
123 | diff view generated by jsdifflib |
1 | This adds support for reopen in rbd, for changing between r/w and r/o. | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Note, that this is only a flag change, but we will block a change from | 3 | Define and register handlers for PCI config space accesses |
4 | r/o to r/w if we are using an RBD internal snapshot. | ||
5 | 4 | ||
5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
6 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
7 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 9 | Message-id: be9d2ccf9b1d24e50dcd9c23404dbf284142cec7.1655151679.git.jag.raman@oracle.com |
8 | Reviewed-by: John Snow <jsnow@redhat.com> | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | Message-id: d4e87539167ec6527d44c97b164eabcccf96e4f3.1491597120.git.jcody@redhat.com | ||
10 | --- | 11 | --- |
11 | block/rbd.c | 21 +++++++++++++++++++++ | 12 | hw/remote/vfio-user-obj.c | 51 +++++++++++++++++++++++++++++++++++++++ |
12 | 1 file changed, 21 insertions(+) | 13 | hw/remote/trace-events | 2 ++ |
14 | 2 files changed, 53 insertions(+) | ||
13 | 15 | ||
14 | diff --git a/block/rbd.c b/block/rbd.c | 16 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/rbd.c | 18 | --- a/hw/remote/vfio-user-obj.c |
17 | +++ b/block/rbd.c | 19 | +++ b/hw/remote/vfio-user-obj.c |
18 | @@ -XXX,XX +XXX,XX @@ failed_opts: | 20 | @@ -XXX,XX +XXX,XX @@ |
19 | return r; | 21 | #include "qapi/qapi-events-misc.h" |
22 | #include "qemu/notify.h" | ||
23 | #include "qemu/thread.h" | ||
24 | +#include "qemu/main-loop.h" | ||
25 | #include "sysemu/sysemu.h" | ||
26 | #include "libvfio-user.h" | ||
27 | #include "hw/qdev-core.h" | ||
28 | @@ -XXX,XX +XXX,XX @@ retry_attach: | ||
29 | qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); | ||
20 | } | 30 | } |
21 | 31 | ||
32 | +static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, | ||
33 | + size_t count, loff_t offset, | ||
34 | + const bool is_write) | ||
35 | +{ | ||
36 | + VfuObject *o = vfu_get_private(vfu_ctx); | ||
37 | + uint32_t pci_access_width = sizeof(uint32_t); | ||
38 | + size_t bytes = count; | ||
39 | + uint32_t val = 0; | ||
40 | + char *ptr = buf; | ||
41 | + int len; | ||
22 | + | 42 | + |
23 | +/* Since RBD is currently always opened R/W via the API, | 43 | + /* |
24 | + * we just need to check if we are using a snapshot or not, in | 44 | + * Writes to the BAR registers would trigger an update to the |
25 | + * order to determine if we will allow it to be R/W */ | 45 | + * global Memory and IO AddressSpaces. But the remote device |
26 | +static int qemu_rbd_reopen_prepare(BDRVReopenState *state, | 46 | + * never uses the global AddressSpaces, therefore overlapping |
27 | + BlockReopenQueue *queue, Error **errp) | 47 | + * memory regions are not a problem |
28 | +{ | 48 | + */ |
29 | + BDRVRBDState *s = state->bs->opaque; | 49 | + while (bytes > 0) { |
30 | + int ret = 0; | 50 | + len = (bytes > pci_access_width) ? pci_access_width : bytes; |
31 | + | 51 | + if (is_write) { |
32 | + if (s->snap && state->flags & BDRV_O_RDWR) { | 52 | + memcpy(&val, ptr, len); |
33 | + error_setg(errp, | 53 | + pci_host_config_write_common(o->pci_dev, offset, |
34 | + "Cannot change node '%s' to r/w when using RBD snapshot", | 54 | + pci_config_size(o->pci_dev), |
35 | + bdrv_get_device_or_node_name(state->bs)); | 55 | + val, len); |
36 | + ret = -EINVAL; | 56 | + trace_vfu_cfg_write(offset, val); |
57 | + } else { | ||
58 | + val = pci_host_config_read_common(o->pci_dev, offset, | ||
59 | + pci_config_size(o->pci_dev), len); | ||
60 | + memcpy(ptr, &val, len); | ||
61 | + trace_vfu_cfg_read(offset, val); | ||
62 | + } | ||
63 | + offset += len; | ||
64 | + ptr += len; | ||
65 | + bytes -= len; | ||
37 | + } | 66 | + } |
38 | + | 67 | + |
39 | + return ret; | 68 | + return count; |
40 | +} | 69 | +} |
41 | + | 70 | + |
42 | static void qemu_rbd_close(BlockDriverState *bs) | 71 | /* |
43 | { | 72 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' |
44 | BDRVRBDState *s = bs->opaque; | 73 | * properties. It also depends on devices instantiated in QEMU. These |
45 | @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = { | 74 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) |
46 | .bdrv_parse_filename = qemu_rbd_parse_filename, | 75 | TYPE_VFU_OBJECT, o->device); |
47 | .bdrv_file_open = qemu_rbd_open, | 76 | qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); |
48 | .bdrv_close = qemu_rbd_close, | 77 | |
49 | + .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, | 78 | + ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, |
50 | .bdrv_create = qemu_rbd_create, | 79 | + pci_config_size(o->pci_dev), &vfu_object_cfg_access, |
51 | .bdrv_has_zero_init = bdrv_has_zero_init_1, | 80 | + VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB, |
52 | .bdrv_get_info = qemu_rbd_getinfo, | 81 | + NULL, 0, -1, 0); |
82 | + if (ret < 0) { | ||
83 | + error_setg(errp, | ||
84 | + "vfu: Failed to setup config space handlers for %s- %s", | ||
85 | + o->device, strerror(errno)); | ||
86 | + goto fail; | ||
87 | + } | ||
88 | + | ||
89 | ret = vfu_realize_ctx(o->vfu_ctx); | ||
90 | if (ret < 0) { | ||
91 | error_setg(errp, "vfu: Failed to realize device %s- %s", | ||
92 | diff --git a/hw/remote/trace-events b/hw/remote/trace-events | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/remote/trace-events | ||
95 | +++ b/hw/remote/trace-events | ||
96 | @@ -XXX,XX +XXX,XX @@ mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d, | ||
97 | |||
98 | # vfio-user-obj.c | ||
99 | vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s" | ||
100 | +vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" | ||
101 | +vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" | ||
53 | -- | 102 | -- |
54 | 2.9.3 | 103 | 2.36.1 |
55 | |||
56 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Jagannathan Raman <jag.raman@oracle.com> | |
2 | |||
3 | Assign separate address space for each device in the remote processes. | ||
4 | |||
5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Message-id: afe0b0a97582cdad42b5b25636a29c523265a10a.1655151679.git.jag.raman@oracle.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | MAINTAINERS | 2 + | ||
13 | include/hw/remote/iommu.h | 40 ++++++++++++ | ||
14 | hw/remote/iommu.c | 131 ++++++++++++++++++++++++++++++++++++++ | ||
15 | hw/remote/machine.c | 13 +++- | ||
16 | hw/remote/meson.build | 1 + | ||
17 | 5 files changed, 186 insertions(+), 1 deletion(-) | ||
18 | create mode 100644 include/hw/remote/iommu.h | ||
19 | create mode 100644 hw/remote/iommu.c | ||
20 | |||
21 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/MAINTAINERS | ||
24 | +++ b/MAINTAINERS | ||
25 | @@ -XXX,XX +XXX,XX @@ F: hw/remote/iohub.c | ||
26 | F: include/hw/remote/iohub.h | ||
27 | F: subprojects/libvfio-user | ||
28 | F: hw/remote/vfio-user-obj.c | ||
29 | +F: hw/remote/iommu.c | ||
30 | +F: include/hw/remote/iommu.h | ||
31 | |||
32 | EBPF: | ||
33 | M: Jason Wang <jasowang@redhat.com> | ||
34 | diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h | ||
35 | new file mode 100644 | ||
36 | index XXXXXXX..XXXXXXX | ||
37 | --- /dev/null | ||
38 | +++ b/include/hw/remote/iommu.h | ||
39 | @@ -XXX,XX +XXX,XX @@ | ||
40 | +/** | ||
41 | + * Copyright © 2022 Oracle and/or its affiliates. | ||
42 | + * | ||
43 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
44 | + * See the COPYING file in the top-level directory. | ||
45 | + * | ||
46 | + */ | ||
47 | + | ||
48 | +#ifndef REMOTE_IOMMU_H | ||
49 | +#define REMOTE_IOMMU_H | ||
50 | + | ||
51 | +#include "hw/pci/pci_bus.h" | ||
52 | +#include "hw/pci/pci.h" | ||
53 | + | ||
54 | +#ifndef INT2VOIDP | ||
55 | +#define INT2VOIDP(i) (void *)(uintptr_t)(i) | ||
56 | +#endif | ||
57 | + | ||
58 | +typedef struct RemoteIommuElem { | ||
59 | + MemoryRegion *mr; | ||
60 | + | ||
61 | + AddressSpace as; | ||
62 | +} RemoteIommuElem; | ||
63 | + | ||
64 | +#define TYPE_REMOTE_IOMMU "x-remote-iommu" | ||
65 | +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) | ||
66 | + | ||
67 | +struct RemoteIommu { | ||
68 | + Object parent; | ||
69 | + | ||
70 | + GHashTable *elem_by_devfn; | ||
71 | + | ||
72 | + QemuMutex lock; | ||
73 | +}; | ||
74 | + | ||
75 | +void remote_iommu_setup(PCIBus *pci_bus); | ||
76 | + | ||
77 | +void remote_iommu_unplug_dev(PCIDevice *pci_dev); | ||
78 | + | ||
79 | +#endif | ||
80 | diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c | ||
81 | new file mode 100644 | ||
82 | index XXXXXXX..XXXXXXX | ||
83 | --- /dev/null | ||
84 | +++ b/hw/remote/iommu.c | ||
85 | @@ -XXX,XX +XXX,XX @@ | ||
86 | +/** | ||
87 | + * IOMMU for remote device | ||
88 | + * | ||
89 | + * Copyright © 2022 Oracle and/or its affiliates. | ||
90 | + * | ||
91 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
92 | + * See the COPYING file in the top-level directory. | ||
93 | + * | ||
94 | + */ | ||
95 | + | ||
96 | +#include "qemu/osdep.h" | ||
97 | + | ||
98 | +#include "hw/remote/iommu.h" | ||
99 | +#include "hw/pci/pci_bus.h" | ||
100 | +#include "hw/pci/pci.h" | ||
101 | +#include "exec/memory.h" | ||
102 | +#include "exec/address-spaces.h" | ||
103 | +#include "trace.h" | ||
104 | + | ||
105 | +/** | ||
106 | + * IOMMU for TYPE_REMOTE_MACHINE - manages DMA address space isolation | ||
107 | + * for remote machine. It is used by TYPE_VFIO_USER_SERVER. | ||
108 | + * | ||
109 | + * - Each TYPE_VFIO_USER_SERVER instance handles one PCIDevice on a PCIBus. | ||
110 | + * There is one RemoteIommu per PCIBus, so the RemoteIommu tracks multiple | ||
111 | + * PCIDevices by maintaining a ->elem_by_devfn mapping. | ||
112 | + * | ||
113 | + * - memory_region_init_iommu() is not used because vfio-user MemoryRegions | ||
114 | + * will be added to the elem->mr container instead. This is more natural | ||
115 | + * than implementing the IOMMUMemoryRegionClass APIs since vfio-user | ||
116 | + * provides something that is close to a full-fledged MemoryRegion and | ||
117 | + * not like an IOMMU mapping. | ||
118 | + * | ||
119 | + * - When a device is hot unplugged, the elem->mr reference is dropped so | ||
120 | + * all vfio-user MemoryRegions associated with this vfio-user server are | ||
121 | + * destroyed. | ||
122 | + */ | ||
123 | + | ||
124 | +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, | ||
125 | + void *opaque, int devfn) | ||
126 | +{ | ||
127 | + RemoteIommu *iommu = opaque; | ||
128 | + RemoteIommuElem *elem = NULL; | ||
129 | + | ||
130 | + qemu_mutex_lock(&iommu->lock); | ||
131 | + | ||
132 | + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); | ||
133 | + | ||
134 | + if (!elem) { | ||
135 | + elem = g_malloc0(sizeof(RemoteIommuElem)); | ||
136 | + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); | ||
137 | + } | ||
138 | + | ||
139 | + if (!elem->mr) { | ||
140 | + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); | ||
141 | + memory_region_set_size(elem->mr, UINT64_MAX); | ||
142 | + address_space_init(&elem->as, elem->mr, NULL); | ||
143 | + } | ||
144 | + | ||
145 | + qemu_mutex_unlock(&iommu->lock); | ||
146 | + | ||
147 | + return &elem->as; | ||
148 | +} | ||
149 | + | ||
150 | +void remote_iommu_unplug_dev(PCIDevice *pci_dev) | ||
151 | +{ | ||
152 | + AddressSpace *as = pci_device_iommu_address_space(pci_dev); | ||
153 | + RemoteIommuElem *elem = NULL; | ||
154 | + | ||
155 | + if (as == &address_space_memory) { | ||
156 | + return; | ||
157 | + } | ||
158 | + | ||
159 | + elem = container_of(as, RemoteIommuElem, as); | ||
160 | + | ||
161 | + address_space_destroy(&elem->as); | ||
162 | + | ||
163 | + object_unref(elem->mr); | ||
164 | + | ||
165 | + elem->mr = NULL; | ||
166 | +} | ||
167 | + | ||
168 | +static void remote_iommu_init(Object *obj) | ||
169 | +{ | ||
170 | + RemoteIommu *iommu = REMOTE_IOMMU(obj); | ||
171 | + | ||
172 | + iommu->elem_by_devfn = g_hash_table_new_full(NULL, NULL, NULL, g_free); | ||
173 | + | ||
174 | + qemu_mutex_init(&iommu->lock); | ||
175 | +} | ||
176 | + | ||
177 | +static void remote_iommu_finalize(Object *obj) | ||
178 | +{ | ||
179 | + RemoteIommu *iommu = REMOTE_IOMMU(obj); | ||
180 | + | ||
181 | + qemu_mutex_destroy(&iommu->lock); | ||
182 | + | ||
183 | + g_hash_table_destroy(iommu->elem_by_devfn); | ||
184 | + | ||
185 | + iommu->elem_by_devfn = NULL; | ||
186 | +} | ||
187 | + | ||
188 | +void remote_iommu_setup(PCIBus *pci_bus) | ||
189 | +{ | ||
190 | + RemoteIommu *iommu = NULL; | ||
191 | + | ||
192 | + g_assert(pci_bus); | ||
193 | + | ||
194 | + iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); | ||
195 | + | ||
196 | + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); | ||
197 | + | ||
198 | + object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); | ||
199 | + | ||
200 | + object_unref(OBJECT(iommu)); | ||
201 | +} | ||
202 | + | ||
203 | +static const TypeInfo remote_iommu_info = { | ||
204 | + .name = TYPE_REMOTE_IOMMU, | ||
205 | + .parent = TYPE_OBJECT, | ||
206 | + .instance_size = sizeof(RemoteIommu), | ||
207 | + .instance_init = remote_iommu_init, | ||
208 | + .instance_finalize = remote_iommu_finalize, | ||
209 | +}; | ||
210 | + | ||
211 | +static void remote_iommu_register_types(void) | ||
212 | +{ | ||
213 | + type_register_static(&remote_iommu_info); | ||
214 | +} | ||
215 | + | ||
216 | +type_init(remote_iommu_register_types) | ||
217 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c | ||
218 | index XXXXXXX..XXXXXXX 100644 | ||
219 | --- a/hw/remote/machine.c | ||
220 | +++ b/hw/remote/machine.c | ||
221 | @@ -XXX,XX +XXX,XX @@ | ||
222 | #include "qapi/error.h" | ||
223 | #include "hw/pci/pci_host.h" | ||
224 | #include "hw/remote/iohub.h" | ||
225 | +#include "hw/remote/iommu.h" | ||
226 | #include "hw/qdev-core.h" | ||
227 | |||
228 | static void remote_machine_init(MachineState *machine) | ||
229 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_instance_init(Object *obj) | ||
230 | s->auto_shutdown = true; | ||
231 | } | ||
232 | |||
233 | +static void remote_machine_dev_unplug_cb(HotplugHandler *hotplug_dev, | ||
234 | + DeviceState *dev, Error **errp) | ||
235 | +{ | ||
236 | + qdev_unrealize(dev); | ||
237 | + | ||
238 | + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { | ||
239 | + remote_iommu_unplug_dev(PCI_DEVICE(dev)); | ||
240 | + } | ||
241 | +} | ||
242 | + | ||
243 | static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
244 | { | ||
245 | MachineClass *mc = MACHINE_CLASS(oc); | ||
246 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_class_init(ObjectClass *oc, void *data) | ||
247 | mc->init = remote_machine_init; | ||
248 | mc->desc = "Experimental remote machine"; | ||
249 | |||
250 | - hc->unplug = qdev_simple_device_unplug_cb; | ||
251 | + hc->unplug = remote_machine_dev_unplug_cb; | ||
252 | |||
253 | object_class_property_add_bool(oc, "vfio-user", | ||
254 | remote_machine_get_vfio_user, | ||
255 | diff --git a/hw/remote/meson.build b/hw/remote/meson.build | ||
256 | index XXXXXXX..XXXXXXX 100644 | ||
257 | --- a/hw/remote/meson.build | ||
258 | +++ b/hw/remote/meson.build | ||
259 | @@ -XXX,XX +XXX,XX @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c')) | ||
260 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) | ||
261 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) | ||
262 | remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) | ||
263 | +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) | ||
264 | remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) | ||
265 | |||
266 | remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep) | ||
267 | -- | ||
268 | 2.36.1 | ||
269 | |||
270 | diff view generated by jsdifflib |
1 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | |||
3 | Define and register callbacks to manage the RAM regions used for | ||
4 | device DMA | ||
5 | |||
6 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
7 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
8 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
2 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
3 | Reviewed-by: John Snow <jsnow@redhat.com> | 10 | Message-id: faacbcd45c4d02c591f0dbfdc19041fbb3eae7eb.1655151679.git.jag.raman@oracle.com |
4 | Message-id: 00aed7ffdd7be4b9ed9ce1007d50028a72b34ebe.1491597120.git.jcody@redhat.com | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
5 | --- | 12 | --- |
6 | block.c | 14 ++++++++------ | 13 | hw/remote/machine.c | 5 ++++ |
7 | 1 file changed, 8 insertions(+), 6 deletions(-) | 14 | hw/remote/vfio-user-obj.c | 55 +++++++++++++++++++++++++++++++++++++++ |
15 | hw/remote/trace-events | 2 ++ | ||
16 | 3 files changed, 62 insertions(+) | ||
8 | 17 | ||
9 | diff --git a/block.c b/block.c | 18 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c |
10 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/block.c | 20 | --- a/hw/remote/machine.c |
12 | +++ b/block.c | 21 | +++ b/hw/remote/machine.c |
13 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, | 22 | @@ -XXX,XX +XXX,XX @@ |
14 | BlockDriver *drv; | 23 | #include "hw/remote/iohub.h" |
15 | QemuOpts *opts; | 24 | #include "hw/remote/iommu.h" |
16 | const char *value; | 25 | #include "hw/qdev-core.h" |
17 | + bool read_only; | 26 | +#include "hw/remote/iommu.h" |
18 | 27 | ||
19 | assert(reopen_state != NULL); | 28 | static void remote_machine_init(MachineState *machine) |
20 | assert(reopen_state->bs->drv != NULL); | 29 | { |
21 | @@ -XXX,XX +XXX,XX @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, | 30 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine) |
22 | qdict_put(reopen_state->options, "driver", qstring_from_str(value)); | 31 | |
32 | pci_host = PCI_HOST_BRIDGE(rem_host); | ||
33 | |||
34 | + if (s->vfio_user) { | ||
35 | + remote_iommu_setup(pci_host->bus); | ||
36 | + } | ||
37 | + | ||
38 | remote_iohub_init(&s->iohub); | ||
39 | |||
40 | pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, | ||
41 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/hw/remote/vfio-user-obj.c | ||
44 | +++ b/hw/remote/vfio-user-obj.c | ||
45 | @@ -XXX,XX +XXX,XX @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, | ||
46 | return count; | ||
47 | } | ||
48 | |||
49 | +static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) | ||
50 | +{ | ||
51 | + VfuObject *o = vfu_get_private(vfu_ctx); | ||
52 | + AddressSpace *dma_as = NULL; | ||
53 | + MemoryRegion *subregion = NULL; | ||
54 | + g_autofree char *name = NULL; | ||
55 | + struct iovec *iov = &info->iova; | ||
56 | + | ||
57 | + if (!info->vaddr) { | ||
58 | + return; | ||
59 | + } | ||
60 | + | ||
61 | + name = g_strdup_printf("mem-%s-%"PRIx64"", o->device, | ||
62 | + (uint64_t)info->vaddr); | ||
63 | + | ||
64 | + subregion = g_new0(MemoryRegion, 1); | ||
65 | + | ||
66 | + memory_region_init_ram_ptr(subregion, NULL, name, | ||
67 | + iov->iov_len, info->vaddr); | ||
68 | + | ||
69 | + dma_as = pci_device_iommu_address_space(o->pci_dev); | ||
70 | + | ||
71 | + memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion); | ||
72 | + | ||
73 | + trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len); | ||
74 | +} | ||
75 | + | ||
76 | +static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) | ||
77 | +{ | ||
78 | + VfuObject *o = vfu_get_private(vfu_ctx); | ||
79 | + AddressSpace *dma_as = NULL; | ||
80 | + MemoryRegion *mr = NULL; | ||
81 | + ram_addr_t offset; | ||
82 | + | ||
83 | + mr = memory_region_from_host(info->vaddr, &offset); | ||
84 | + if (!mr) { | ||
85 | + return; | ||
86 | + } | ||
87 | + | ||
88 | + dma_as = pci_device_iommu_address_space(o->pci_dev); | ||
89 | + | ||
90 | + memory_region_del_subregion(dma_as->root, mr); | ||
91 | + | ||
92 | + object_unparent((OBJECT(mr))); | ||
93 | + | ||
94 | + trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); | ||
95 | +} | ||
96 | + | ||
97 | /* | ||
98 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | ||
99 | * properties. It also depends on devices instantiated in QEMU. These | ||
100 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
101 | goto fail; | ||
23 | } | 102 | } |
24 | 103 | ||
25 | - /* if we are to stay read-only, do not allow permission change | 104 | + ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister); |
26 | - * to r/w */ | 105 | + if (ret < 0) { |
27 | - if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && | 106 | + error_setg(errp, "vfu: Failed to setup DMA handlers for %s", |
28 | - reopen_state->flags & BDRV_O_RDWR) { | 107 | + o->device); |
29 | - error_setg(errp, "Node '%s' is read only", | 108 | + goto fail; |
30 | - bdrv_get_device_or_node_name(reopen_state->bs)); | 109 | + } |
31 | + /* If we are to stay read-only, do not allow permission change | 110 | + |
32 | + * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is | 111 | ret = vfu_realize_ctx(o->vfu_ctx); |
33 | + * not set, or if the BDS still has copy_on_read enabled */ | 112 | if (ret < 0) { |
34 | + read_only = !(reopen_state->flags & BDRV_O_RDWR); | 113 | error_setg(errp, "vfu: Failed to realize device %s- %s", |
35 | + ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err); | 114 | diff --git a/hw/remote/trace-events b/hw/remote/trace-events |
36 | + if (local_err) { | 115 | index XXXXXXX..XXXXXXX 100644 |
37 | + error_propagate(errp, local_err); | 116 | --- a/hw/remote/trace-events |
38 | goto error; | 117 | +++ b/hw/remote/trace-events |
39 | } | 118 | @@ -XXX,XX +XXX,XX @@ mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d, |
40 | 119 | vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s" | |
120 | vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" | ||
121 | vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" | ||
122 | +vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes" | ||
123 | +vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" | ||
41 | -- | 124 | -- |
42 | 2.9.3 | 125 | 2.36.1 |
43 | |||
44 | diff view generated by jsdifflib |
1 | From: Ashish Mittal <ashmit602@gmail.com> | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | These changes use a vxhs test server that is a part of the following | 3 | Determine the BARs used by the PCI device and register handlers to |
4 | repository: | 4 | manage the access to the same. |
5 | https://github.com/VeritasHyperScale/libqnio.git | ||
6 | 5 | ||
7 | Signed-off-by: Ashish Mittal <Ashish.Mittal@veritas.com> | 6 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> |
7 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
8 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 9 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
9 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 10 | Message-id: 3373e10b5be5f42846f0632d4382466e1698c505.1655151679.git.jag.raman@oracle.com |
10 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | Message-id: 1491277689-24949-3-git-send-email-Ashish.Mittal@veritas.com | ||
12 | --- | 12 | --- |
13 | tests/qemu-iotests/common | 6 ++++++ | 13 | include/exec/memory.h | 3 + |
14 | tests/qemu-iotests/common.config | 13 +++++++++++++ | 14 | hw/remote/vfio-user-obj.c | 190 ++++++++++++++++++++++++++++++++ |
15 | tests/qemu-iotests/common.filter | 1 + | 15 | softmmu/physmem.c | 4 +- |
16 | tests/qemu-iotests/common.rc | 19 +++++++++++++++++++ | 16 | tests/qtest/fuzz/generic_fuzz.c | 9 +- |
17 | 4 files changed, 39 insertions(+) | 17 | hw/remote/trace-events | 3 + |
18 | 5 files changed, 203 insertions(+), 6 deletions(-) | ||
18 | 19 | ||
19 | diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common | 20 | diff --git a/include/exec/memory.h b/include/exec/memory.h |
20 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/tests/qemu-iotests/common | 22 | --- a/include/exec/memory.h |
22 | +++ b/tests/qemu-iotests/common | 23 | +++ b/include/exec/memory.h |
23 | @@ -XXX,XX +XXX,XX @@ check options | 24 | @@ -XXX,XX +XXX,XX @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, |
24 | -ssh test ssh | 25 | hwaddr addr, const void *buf, |
25 | -nfs test nfs | 26 | hwaddr len); |
26 | -luks test luks | 27 | |
27 | + -vxhs test vxhs | 28 | +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr); |
28 | -xdiff graphical mode diff | 29 | +bool prepare_mmio_access(MemoryRegion *mr); |
29 | -nocache use O_DIRECT on backing file | 30 | + |
30 | -misalign misalign memory allocations | 31 | static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) |
31 | @@ -XXX,XX +XXX,XX @@ testlist options | ||
32 | xpand=false | ||
33 | ;; | ||
34 | |||
35 | + -vxhs) | ||
36 | + IMGPROTO=vxhs | ||
37 | + xpand=false | ||
38 | + ;; | ||
39 | + | ||
40 | -ssh) | ||
41 | IMGPROTO=ssh | ||
42 | xpand=false | ||
43 | diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/tests/qemu-iotests/common.config | ||
46 | +++ b/tests/qemu-iotests/common.config | ||
47 | @@ -XXX,XX +XXX,XX @@ if [ -z "$QEMU_NBD_PROG" ]; then | ||
48 | export QEMU_NBD_PROG="`set_prog_path qemu-nbd`" | ||
49 | fi | ||
50 | |||
51 | +if [ -z "$QEMU_VXHS_PROG" ]; then | ||
52 | + export QEMU_VXHS_PROG="`set_prog_path qnio_server`" | ||
53 | +fi | ||
54 | + | ||
55 | _qemu_wrapper() | ||
56 | { | 32 | { |
57 | ( | 33 | if (is_write) { |
58 | @@ -XXX,XX +XXX,XX @@ _qemu_nbd_wrapper() | 34 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c |
59 | ) | 35 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/hw/remote/vfio-user-obj.c | ||
37 | +++ b/hw/remote/vfio-user-obj.c | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | #include "hw/qdev-core.h" | ||
40 | #include "hw/pci/pci.h" | ||
41 | #include "qemu/timer.h" | ||
42 | +#include "exec/memory.h" | ||
43 | |||
44 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | ||
45 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) | ||
46 | @@ -XXX,XX +XXX,XX @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) | ||
47 | trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); | ||
60 | } | 48 | } |
61 | 49 | ||
62 | +_qemu_vxhs_wrapper() | 50 | +static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, |
51 | + hwaddr size, const bool is_write) | ||
63 | +{ | 52 | +{ |
64 | + ( | 53 | + uint8_t *ptr = buf; |
65 | + echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid" | 54 | + bool release_lock = false; |
66 | + exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@" | 55 | + uint8_t *ram_ptr = NULL; |
67 | + ) | 56 | + MemTxResult result; |
57 | + int access_size; | ||
58 | + uint64_t val; | ||
59 | + | ||
60 | + if (memory_access_is_direct(mr, is_write)) { | ||
61 | + /** | ||
62 | + * Some devices expose a PCI expansion ROM, which could be buffer | ||
63 | + * based as compared to other regions which are primarily based on | ||
64 | + * MemoryRegionOps. memory_region_find() would already check | ||
65 | + * for buffer overflow, we don't need to repeat it here. | ||
66 | + */ | ||
67 | + ram_ptr = memory_region_get_ram_ptr(mr); | ||
68 | + | ||
69 | + if (is_write) { | ||
70 | + memcpy((ram_ptr + offset), buf, size); | ||
71 | + } else { | ||
72 | + memcpy(buf, (ram_ptr + offset), size); | ||
73 | + } | ||
74 | + | ||
75 | + return 0; | ||
76 | + } | ||
77 | + | ||
78 | + while (size) { | ||
79 | + /** | ||
80 | + * The read/write logic used below is similar to the ones in | ||
81 | + * flatview_read/write_continue() | ||
82 | + */ | ||
83 | + release_lock = prepare_mmio_access(mr); | ||
84 | + | ||
85 | + access_size = memory_access_size(mr, size, offset); | ||
86 | + | ||
87 | + if (is_write) { | ||
88 | + val = ldn_he_p(ptr, access_size); | ||
89 | + | ||
90 | + result = memory_region_dispatch_write(mr, offset, val, | ||
91 | + size_memop(access_size), | ||
92 | + MEMTXATTRS_UNSPECIFIED); | ||
93 | + } else { | ||
94 | + result = memory_region_dispatch_read(mr, offset, &val, | ||
95 | + size_memop(access_size), | ||
96 | + MEMTXATTRS_UNSPECIFIED); | ||
97 | + | ||
98 | + stn_he_p(ptr, access_size, val); | ||
99 | + } | ||
100 | + | ||
101 | + if (release_lock) { | ||
102 | + qemu_mutex_unlock_iothread(); | ||
103 | + release_lock = false; | ||
104 | + } | ||
105 | + | ||
106 | + if (result != MEMTX_OK) { | ||
107 | + return -1; | ||
108 | + } | ||
109 | + | ||
110 | + size -= access_size; | ||
111 | + ptr += access_size; | ||
112 | + offset += access_size; | ||
113 | + } | ||
114 | + | ||
115 | + return 0; | ||
68 | +} | 116 | +} |
69 | + | 117 | + |
70 | export QEMU=_qemu_wrapper | 118 | +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, |
71 | export QEMU_IMG=_qemu_img_wrapper | 119 | + hwaddr bar_offset, char * const buf, |
72 | export QEMU_IO=_qemu_io_wrapper | 120 | + hwaddr len, const bool is_write) |
73 | export QEMU_NBD=_qemu_nbd_wrapper | 121 | +{ |
74 | +export QEMU_VXHS=_qemu_vxhs_wrapper | 122 | + MemoryRegionSection section = { 0 }; |
75 | 123 | + uint8_t *ptr = (uint8_t *)buf; | |
76 | QEMU_IMG_EXTRA_ARGS= | 124 | + MemoryRegion *section_mr = NULL; |
77 | if [ "$IMGOPTSSYNTAX" = "true" ]; then | 125 | + uint64_t section_size; |
78 | diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter | 126 | + hwaddr section_offset; |
79 | index XXXXXXX..XXXXXXX 100644 | 127 | + hwaddr size = 0; |
80 | --- a/tests/qemu-iotests/common.filter | 128 | + |
81 | +++ b/tests/qemu-iotests/common.filter | 129 | + while (len) { |
82 | @@ -XXX,XX +XXX,XX @@ _filter_img_info() | 130 | + section = memory_region_find(pci_dev->io_regions[pci_bar].memory, |
83 | -e "s#$TEST_DIR#TEST_DIR#g" \ | 131 | + bar_offset, len); |
84 | -e "s#$IMGFMT#IMGFMT#g" \ | 132 | + |
85 | -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \ | 133 | + if (!section.mr) { |
86 | + -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \ | 134 | + warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset); |
87 | -e "/encrypted: yes/d" \ | 135 | + return size; |
88 | -e "/cluster_size: [0-9]\\+/d" \ | 136 | + } |
89 | -e "/table_size: [0-9]\\+/d" \ | 137 | + |
90 | diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc | 138 | + section_mr = section.mr; |
91 | index XXXXXXX..XXXXXXX 100644 | 139 | + section_offset = section.offset_within_region; |
92 | --- a/tests/qemu-iotests/common.rc | 140 | + section_size = int128_get64(section.size); |
93 | +++ b/tests/qemu-iotests/common.rc | 141 | + |
94 | @@ -XXX,XX +XXX,XX @@ else | 142 | + if (is_write && section_mr->readonly) { |
95 | elif [ "$IMGPROTO" = "nfs" ]; then | 143 | + warn_report("vfu: attempting to write to readonly region in " |
96 | TEST_DIR="nfs://127.0.0.1/$TEST_DIR" | 144 | + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", |
97 | TEST_IMG=$TEST_DIR/t.$IMGFMT | 145 | + pci_bar, bar_offset, |
98 | + elif [ "$IMGPROTO" = "vxhs" ]; then | 146 | + (bar_offset + section_size)); |
99 | + TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT | 147 | + memory_region_unref(section_mr); |
100 | + TEST_IMG="vxhs://127.0.0.1:9999/t.$IMGFMT" | 148 | + return size; |
101 | else | 149 | + } |
102 | TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT | 150 | + |
103 | fi | 151 | + if (vfu_object_mr_rw(section_mr, ptr, section_offset, |
104 | @@ -XXX,XX +XXX,XX @@ _make_test_img() | 152 | + section_size, is_write)) { |
105 | eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE >/dev/null &" | 153 | + warn_report("vfu: failed to %s " |
106 | sleep 1 # FIXME: qemu-nbd needs to be listening before we continue | 154 | + "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d", |
107 | fi | 155 | + is_write ? "write to" : "read from", bar_offset, |
108 | + | 156 | + (bar_offset + section_size), pci_bar); |
109 | + # Start QNIO server on image directory for vxhs protocol | 157 | + memory_region_unref(section_mr); |
110 | + if [ $IMGPROTO = "vxhs" ]; then | 158 | + return size; |
111 | + eval "$QEMU_VXHS -d $TEST_DIR > /dev/null &" | 159 | + } |
112 | + sleep 1 # Wait for server to come up. | 160 | + |
113 | + fi | 161 | + size += section_size; |
162 | + bar_offset += section_size; | ||
163 | + ptr += section_size; | ||
164 | + len -= section_size; | ||
165 | + | ||
166 | + memory_region_unref(section_mr); | ||
167 | + } | ||
168 | + | ||
169 | + return size; | ||
170 | +} | ||
171 | + | ||
172 | +/** | ||
173 | + * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. | ||
174 | + * | ||
175 | + * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would | ||
176 | + * define vfu_object_bar2_handler | ||
177 | + */ | ||
178 | +#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ | ||
179 | + static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ | ||
180 | + char * const buf, size_t count, \ | ||
181 | + loff_t offset, const bool is_write) \ | ||
182 | + { \ | ||
183 | + VfuObject *o = vfu_get_private(vfu_ctx); \ | ||
184 | + PCIDevice *pci_dev = o->pci_dev; \ | ||
185 | + \ | ||
186 | + return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ | ||
187 | + buf, count, is_write); \ | ||
188 | + } \ | ||
189 | + | ||
190 | +VFU_OBJECT_BAR_HANDLER(0) | ||
191 | +VFU_OBJECT_BAR_HANDLER(1) | ||
192 | +VFU_OBJECT_BAR_HANDLER(2) | ||
193 | +VFU_OBJECT_BAR_HANDLER(3) | ||
194 | +VFU_OBJECT_BAR_HANDLER(4) | ||
195 | +VFU_OBJECT_BAR_HANDLER(5) | ||
196 | +VFU_OBJECT_BAR_HANDLER(6) | ||
197 | + | ||
198 | +static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { | ||
199 | + &vfu_object_bar0_handler, | ||
200 | + &vfu_object_bar1_handler, | ||
201 | + &vfu_object_bar2_handler, | ||
202 | + &vfu_object_bar3_handler, | ||
203 | + &vfu_object_bar4_handler, | ||
204 | + &vfu_object_bar5_handler, | ||
205 | + &vfu_object_bar6_handler, | ||
206 | +}; | ||
207 | + | ||
208 | +/** | ||
209 | + * vfu_object_register_bars - Identify active BAR regions of pdev and setup | ||
210 | + * callbacks to handle read/write accesses | ||
211 | + */ | ||
212 | +static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) | ||
213 | +{ | ||
214 | + int flags = VFU_REGION_FLAG_RW; | ||
215 | + int i; | ||
216 | + | ||
217 | + for (i = 0; i < PCI_NUM_REGIONS; i++) { | ||
218 | + if (!pdev->io_regions[i].size) { | ||
219 | + continue; | ||
220 | + } | ||
221 | + | ||
222 | + if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || | ||
223 | + pdev->io_regions[i].memory->readonly) { | ||
224 | + flags &= ~VFU_REGION_FLAG_WRITE; | ||
225 | + } | ||
226 | + | ||
227 | + vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, | ||
228 | + (size_t)pdev->io_regions[i].size, | ||
229 | + vfu_object_bar_handlers[i], | ||
230 | + flags, NULL, 0, -1, 0); | ||
231 | + | ||
232 | + trace_vfu_bar_register(i, pdev->io_regions[i].addr, | ||
233 | + pdev->io_regions[i].size); | ||
234 | + } | ||
235 | +} | ||
236 | + | ||
237 | /* | ||
238 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | ||
239 | * properties. It also depends on devices instantiated in QEMU. These | ||
240 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
241 | goto fail; | ||
242 | } | ||
243 | |||
244 | + vfu_object_register_bars(o->vfu_ctx, o->pci_dev); | ||
245 | + | ||
246 | ret = vfu_realize_ctx(o->vfu_ctx); | ||
247 | if (ret < 0) { | ||
248 | error_setg(errp, "vfu: Failed to realize device %s- %s", | ||
249 | diff --git a/softmmu/physmem.c b/softmmu/physmem.c | ||
250 | index XXXXXXX..XXXXXXX 100644 | ||
251 | --- a/softmmu/physmem.c | ||
252 | +++ b/softmmu/physmem.c | ||
253 | @@ -XXX,XX +XXX,XX @@ void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) | ||
254 | invalidate_and_set_dirty(mr, addr, size); | ||
114 | } | 255 | } |
115 | 256 | ||
116 | _rm_test_img() | 257 | -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) |
117 | @@ -XXX,XX +XXX,XX @@ _cleanup_test_img() | 258 | +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) |
118 | fi | 259 | { |
119 | rm -f "$TEST_IMG_FILE" | 260 | unsigned access_size_max = mr->ops->valid.max_access_size; |
120 | ;; | 261 | |
121 | + vxhs) | 262 | @@ -XXX,XX +XXX,XX @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) |
122 | + if [ -f "${TEST_DIR}/qemu-vxhs.pid" ]; then | 263 | return l; |
123 | + local QEMU_VXHS_PID | 264 | } |
124 | + read QEMU_VXHS_PID < "${TEST_DIR}/qemu-vxhs.pid" | 265 | |
125 | + kill ${QEMU_VXHS_PID} >/dev/null 2>&1 | 266 | -static bool prepare_mmio_access(MemoryRegion *mr) |
126 | + rm -f "${TEST_DIR}/qemu-vxhs.pid" | 267 | +bool prepare_mmio_access(MemoryRegion *mr) |
127 | + fi | 268 | { |
128 | + rm -f "$TEST_IMG_FILE" | 269 | bool release_lock = false; |
129 | + ;; | 270 | |
130 | + | 271 | diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c |
131 | file) | 272 | index XXXXXXX..XXXXXXX 100644 |
132 | _rm_test_img "$TEST_DIR/t.$IMGFMT" | 273 | --- a/tests/qtest/fuzz/generic_fuzz.c |
133 | _rm_test_img "$TEST_DIR/t.$IMGFMT.orig" | 274 | +++ b/tests/qtest/fuzz/generic_fuzz.c |
275 | @@ -XXX,XX +XXX,XX @@ static void *pattern_alloc(pattern p, size_t len) | ||
276 | return buf; | ||
277 | } | ||
278 | |||
279 | -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) | ||
280 | +static int fuzz_memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) | ||
281 | { | ||
282 | unsigned access_size_max = mr->ops->valid.max_access_size; | ||
283 | |||
284 | @@ -XXX,XX +XXX,XX @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr) | ||
285 | |||
286 | /* | ||
287 | * If mr1 isn't RAM, address_space_translate doesn't update l. Use | ||
288 | - * memory_access_size to identify the number of bytes that it is safe | ||
289 | - * to write without accidentally writing to another MemoryRegion. | ||
290 | + * fuzz_memory_access_size to identify the number of bytes that it | ||
291 | + * is safe to write without accidentally writing to another | ||
292 | + * MemoryRegion. | ||
293 | */ | ||
294 | if (!memory_region_is_ram(mr1)) { | ||
295 | - l = memory_access_size(mr1, l, addr1); | ||
296 | + l = fuzz_memory_access_size(mr1, l, addr1); | ||
297 | } | ||
298 | if (memory_region_is_ram(mr1) || | ||
299 | memory_region_is_romd(mr1) || | ||
300 | diff --git a/hw/remote/trace-events b/hw/remote/trace-events | ||
301 | index XXXXXXX..XXXXXXX 100644 | ||
302 | --- a/hw/remote/trace-events | ||
303 | +++ b/hw/remote/trace-events | ||
304 | @@ -XXX,XX +XXX,XX @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" | ||
305 | vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" | ||
306 | vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes" | ||
307 | vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" | ||
308 | +vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" | ||
309 | +vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" | ||
310 | +vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" | ||
134 | -- | 311 | -- |
135 | 2.9.3 | 312 | 2.36.1 |
136 | |||
137 | diff view generated by jsdifflib |
1 | A few block drivers will set the BDS read_only flag from their | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | .bdrv_open() function. This means the bs->read_only flag could | ||
3 | be set after we enable copy_on_read, as the BDRV_O_COPY_ON_READ | ||
4 | flag check occurs prior to the call to bdrv->bdrv_open(). | ||
5 | 2 | ||
6 | This adds an error return to bdrv_set_read_only(), and an error will be | 3 | Forward remote device's interrupts to the guest |
7 | return if we try to set the BDS to read_only while copy_on_read is | ||
8 | enabled. | ||
9 | 4 | ||
10 | This patch also changes the behavior of vvfat. Before, vvfat could | 5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> |
11 | override the drive 'readonly' flag with its own, internal 'rw' flag. | 6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> |
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
8 | Message-id: 9523479eaafe050677f4de2af5dd0df18c27cfd9.1655151679.git.jag.raman@oracle.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | MAINTAINERS | 1 + | ||
12 | include/hw/pci/msi.h | 1 + | ||
13 | include/hw/pci/msix.h | 1 + | ||
14 | include/hw/pci/pci.h | 13 +++ | ||
15 | include/hw/remote/vfio-user-obj.h | 6 ++ | ||
16 | hw/pci/msi.c | 49 +++++++-- | ||
17 | hw/pci/msix.c | 35 ++++++- | ||
18 | hw/pci/pci.c | 13 +++ | ||
19 | hw/remote/machine.c | 16 ++- | ||
20 | hw/remote/vfio-user-obj.c | 167 ++++++++++++++++++++++++++++++ | ||
21 | stubs/vfio-user-obj.c | 6 ++ | ||
22 | hw/remote/trace-events | 1 + | ||
23 | stubs/meson.build | 1 + | ||
24 | 13 files changed, 298 insertions(+), 12 deletions(-) | ||
25 | create mode 100644 include/hw/remote/vfio-user-obj.h | ||
26 | create mode 100644 stubs/vfio-user-obj.c | ||
12 | 27 | ||
13 | For instance, this -drive parameter would result in a writable image: | 28 | diff --git a/MAINTAINERS b/MAINTAINERS |
14 | 29 | index XXXXXXX..XXXXXXX 100644 | |
15 | "-drive format=vvfat,dir=/tmp/vvfat,rw,if=virtio,readonly=on" | 30 | --- a/MAINTAINERS |
16 | 31 | +++ b/MAINTAINERS | |
17 | This is not correct. Now, attempting to use the above -drive parameter | 32 | @@ -XXX,XX +XXX,XX @@ F: hw/remote/iohub.c |
18 | will result in an error (i.e., 'rw' is incompatible with 'readonly=on'). | 33 | F: include/hw/remote/iohub.h |
19 | 34 | F: subprojects/libvfio-user | |
20 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 35 | F: hw/remote/vfio-user-obj.c |
21 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 36 | +F: include/hw/remote/vfio-user-obj.h |
22 | Reviewed-by: John Snow <jsnow@redhat.com> | 37 | F: hw/remote/iommu.c |
23 | Message-id: 0c5b4c1cc2c651471b131f21376dfd5ea24d2196.1491597120.git.jcody@redhat.com | 38 | F: include/hw/remote/iommu.h |
24 | --- | 39 | |
25 | block.c | 10 +++++++++- | 40 | diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h |
26 | block/bochs.c | 5 ++++- | 41 | index XXXXXXX..XXXXXXX 100644 |
27 | block/cloop.c | 5 ++++- | 42 | --- a/include/hw/pci/msi.h |
28 | block/dmg.c | 6 +++++- | 43 | +++ b/include/hw/pci/msi.h |
29 | block/rbd.c | 11 ++++++++++- | 44 | @@ -XXX,XX +XXX,XX @@ void msi_notify(PCIDevice *dev, unsigned int vector); |
30 | block/vvfat.c | 19 +++++++++++++++---- | 45 | void msi_send_message(PCIDevice *dev, MSIMessage msg); |
31 | include/block/block.h | 2 +- | 46 | void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len); |
32 | 7 files changed, 48 insertions(+), 10 deletions(-) | 47 | unsigned int msi_nr_vectors_allocated(const PCIDevice *dev); |
33 | 48 | +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp); | |
34 | diff --git a/block.c b/block.c | 49 | |
35 | index XXXXXXX..XXXXXXX 100644 | 50 | static inline bool msi_present(const PCIDevice *dev) |
36 | --- a/block.c | 51 | { |
37 | +++ b/block.c | 52 | diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h |
38 | @@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size, | 53 | index XXXXXXX..XXXXXXX 100644 |
39 | } | 54 | --- a/include/hw/pci/msix.h |
40 | } | 55 | +++ b/include/hw/pci/msix.h |
41 | 56 | @@ -XXX,XX +XXX,XX @@ void msix_clr_pending(PCIDevice *dev, int vector); | |
42 | -void bdrv_set_read_only(BlockDriverState *bs, bool read_only) | 57 | int msix_vector_use(PCIDevice *dev, unsigned vector); |
43 | +int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 58 | void msix_vector_unuse(PCIDevice *dev, unsigned vector); |
44 | { | 59 | void msix_unuse_all_vectors(PCIDevice *dev); |
45 | + /* Do not set read_only if copy_on_read is enabled */ | 60 | +void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp); |
46 | + if (bs->copy_on_read && read_only) { | 61 | |
47 | + error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", | 62 | void msix_notify(PCIDevice *dev, unsigned vector); |
48 | + bdrv_get_device_or_node_name(bs)); | 63 | |
49 | + return -EINVAL; | 64 | diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h |
50 | + } | 65 | index XXXXXXX..XXXXXXX 100644 |
51 | + | 66 | --- a/include/hw/pci/pci.h |
52 | bs->read_only = read_only; | 67 | +++ b/include/hw/pci/pci.h |
53 | + return 0; | 68 | @@ -XXX,XX +XXX,XX @@ extern bool pci_available; |
54 | } | 69 | #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) |
55 | 70 | #define PCI_FUNC(devfn) ((devfn) & 0x07) | |
56 | void bdrv_get_full_backing_filename_from_filename(const char *backed, | 71 | #define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) |
57 | diff --git a/block/bochs.c b/block/bochs.c | 72 | +#define PCI_BDF_TO_DEVFN(x) ((x) & 0xff) |
58 | index XXXXXXX..XXXXXXX 100644 | 73 | #define PCI_BUS_MAX 256 |
59 | --- a/block/bochs.c | 74 | #define PCI_DEVFN_MAX 256 |
60 | +++ b/block/bochs.c | 75 | #define PCI_SLOT_MAX 32 |
61 | @@ -XXX,XX +XXX,XX @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, | 76 | @@ -XXX,XX +XXX,XX @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, |
62 | return -EINVAL; | 77 | pcibus_t addr, pcibus_t size, int type); |
63 | } | 78 | typedef void PCIUnregisterFunc(PCIDevice *pci_dev); |
64 | 79 | ||
65 | - bdrv_set_read_only(bs, true); /* no write support yet */ | 80 | +typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg); |
66 | + ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */ | 81 | +typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector); |
82 | +typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector); | ||
83 | + | ||
84 | typedef struct PCIIORegion { | ||
85 | pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ | ||
86 | #define PCI_BAR_UNMAPPED (~(pcibus_t)0) | ||
87 | @@ -XXX,XX +XXX,XX @@ struct PCIDevice { | ||
88 | /* Space to store MSIX table & pending bit array */ | ||
89 | uint8_t *msix_table; | ||
90 | uint8_t *msix_pba; | ||
91 | + | ||
92 | + /* May be used by INTx or MSI during interrupt notification */ | ||
93 | + void *irq_opaque; | ||
94 | + | ||
95 | + MSITriggerFunc *msi_trigger; | ||
96 | + MSIPrepareMessageFunc *msi_prepare_message; | ||
97 | + MSIxPrepareMessageFunc *msix_prepare_message; | ||
98 | + | ||
99 | /* MemoryRegion container for msix exclusive BAR setup */ | ||
100 | MemoryRegion msix_exclusive_bar; | ||
101 | /* Memory Regions for MSIX table and pending bit entries. */ | ||
102 | diff --git a/include/hw/remote/vfio-user-obj.h b/include/hw/remote/vfio-user-obj.h | ||
103 | new file mode 100644 | ||
104 | index XXXXXXX..XXXXXXX | ||
105 | --- /dev/null | ||
106 | +++ b/include/hw/remote/vfio-user-obj.h | ||
107 | @@ -XXX,XX +XXX,XX @@ | ||
108 | +#ifndef VFIO_USER_OBJ_H | ||
109 | +#define VFIO_USER_OBJ_H | ||
110 | + | ||
111 | +void vfu_object_set_bus_irq(PCIBus *pci_bus); | ||
112 | + | ||
113 | +#endif | ||
114 | diff --git a/hw/pci/msi.c b/hw/pci/msi.c | ||
115 | index XXXXXXX..XXXXXXX 100644 | ||
116 | --- a/hw/pci/msi.c | ||
117 | +++ b/hw/pci/msi.c | ||
118 | @@ -XXX,XX +XXX,XX @@ void msi_set_message(PCIDevice *dev, MSIMessage msg) | ||
119 | pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data); | ||
120 | } | ||
121 | |||
122 | -MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) | ||
123 | +static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector) | ||
124 | { | ||
125 | uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); | ||
126 | bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; | ||
127 | @@ -XXX,XX +XXX,XX @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) | ||
128 | return msg; | ||
129 | } | ||
130 | |||
131 | +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) | ||
132 | +{ | ||
133 | + return dev->msi_prepare_message(dev, vector); | ||
134 | +} | ||
135 | + | ||
136 | bool msi_enabled(const PCIDevice *dev) | ||
137 | { | ||
138 | return msi_present(dev) && | ||
139 | @@ -XXX,XX +XXX,XX @@ int msi_init(struct PCIDevice *dev, uint8_t offset, | ||
140 | 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); | ||
141 | } | ||
142 | |||
143 | + dev->msi_prepare_message = msi_prepare_message; | ||
144 | + | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | @@ -XXX,XX +XXX,XX @@ void msi_uninit(struct PCIDevice *dev) | ||
149 | cap_size = msi_cap_sizeof(flags); | ||
150 | pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); | ||
151 | dev->cap_present &= ~QEMU_PCI_CAP_MSI; | ||
152 | + dev->msi_prepare_message = NULL; | ||
153 | |||
154 | MSI_DEV_PRINTF(dev, "uninit\n"); | ||
155 | } | ||
156 | @@ -XXX,XX +XXX,XX @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) | ||
157 | return mask & (1U << vector); | ||
158 | } | ||
159 | |||
160 | +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) | ||
161 | +{ | ||
162 | + ERRP_GUARD(); | ||
163 | + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); | ||
164 | + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; | ||
165 | + uint32_t irq_state, vector_mask, pending; | ||
166 | + | ||
167 | + if (vector > PCI_MSI_VECTORS_MAX) { | ||
168 | + error_setg(errp, "msi: vector %d not allocated. max vector is %d", | ||
169 | + vector, PCI_MSI_VECTORS_MAX); | ||
170 | + return; | ||
171 | + } | ||
172 | + | ||
173 | + vector_mask = (1U << vector); | ||
174 | + | ||
175 | + irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit)); | ||
176 | + | ||
177 | + if (mask) { | ||
178 | + irq_state |= vector_mask; | ||
179 | + } else { | ||
180 | + irq_state &= ~vector_mask; | ||
181 | + } | ||
182 | + | ||
183 | + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state); | ||
184 | + | ||
185 | + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); | ||
186 | + if (!mask && (pending & vector_mask)) { | ||
187 | + pending &= ~vector_mask; | ||
188 | + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); | ||
189 | + msi_notify(dev, vector); | ||
190 | + } | ||
191 | +} | ||
192 | + | ||
193 | void msi_notify(PCIDevice *dev, unsigned int vector) | ||
194 | { | ||
195 | uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); | ||
196 | @@ -XXX,XX +XXX,XX @@ void msi_notify(PCIDevice *dev, unsigned int vector) | ||
197 | |||
198 | void msi_send_message(PCIDevice *dev, MSIMessage msg) | ||
199 | { | ||
200 | - MemTxAttrs attrs = {}; | ||
201 | - | ||
202 | - attrs.requester_id = pci_requester_id(dev); | ||
203 | - address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, | ||
204 | - attrs, NULL); | ||
205 | + dev->msi_trigger(dev, msg); | ||
206 | } | ||
207 | |||
208 | /* Normally called by pci_default_write_config(). */ | ||
209 | diff --git a/hw/pci/msix.c b/hw/pci/msix.c | ||
210 | index XXXXXXX..XXXXXXX 100644 | ||
211 | --- a/hw/pci/msix.c | ||
212 | +++ b/hw/pci/msix.c | ||
213 | @@ -XXX,XX +XXX,XX @@ | ||
214 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) | ||
215 | #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) | ||
216 | |||
217 | -MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) | ||
218 | +static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector) | ||
219 | { | ||
220 | uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; | ||
221 | MSIMessage msg; | ||
222 | @@ -XXX,XX +XXX,XX @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) | ||
223 | return msg; | ||
224 | } | ||
225 | |||
226 | +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) | ||
227 | +{ | ||
228 | + return dev->msix_prepare_message(dev, vector); | ||
229 | +} | ||
230 | + | ||
231 | /* | ||
232 | * Special API for POWER to configure the vectors through | ||
233 | * a side channel. Should never be used by devices. | ||
234 | @@ -XXX,XX +XXX,XX @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) | ||
235 | } | ||
236 | } | ||
237 | |||
238 | +void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) | ||
239 | +{ | ||
240 | + ERRP_GUARD(); | ||
241 | + unsigned offset; | ||
242 | + bool was_masked; | ||
243 | + | ||
244 | + if (vector > dev->msix_entries_nr) { | ||
245 | + error_setg(errp, "msix: vector %d not allocated. max vector is %d", | ||
246 | + vector, dev->msix_entries_nr); | ||
247 | + return; | ||
248 | + } | ||
249 | + | ||
250 | + offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; | ||
251 | + | ||
252 | + was_masked = msix_is_masked(dev, vector); | ||
253 | + | ||
254 | + if (mask) { | ||
255 | + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; | ||
256 | + } else { | ||
257 | + dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; | ||
258 | + } | ||
259 | + | ||
260 | + msix_handle_mask_update(dev, vector, was_masked); | ||
261 | +} | ||
262 | + | ||
263 | static bool msix_masked(PCIDevice *dev) | ||
264 | { | ||
265 | return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; | ||
266 | @@ -XXX,XX +XXX,XX @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, | ||
267 | "msix-pba", pba_size); | ||
268 | memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); | ||
269 | |||
270 | + dev->msix_prepare_message = msix_prepare_message; | ||
271 | + | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | @@ -XXX,XX +XXX,XX @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) | ||
276 | g_free(dev->msix_entry_used); | ||
277 | dev->msix_entry_used = NULL; | ||
278 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | ||
279 | + dev->msix_prepare_message = NULL; | ||
280 | } | ||
281 | |||
282 | void msix_uninit_exclusive_bar(PCIDevice *dev) | ||
283 | diff --git a/hw/pci/pci.c b/hw/pci/pci.c | ||
284 | index XXXXXXX..XXXXXXX 100644 | ||
285 | --- a/hw/pci/pci.c | ||
286 | +++ b/hw/pci/pci.c | ||
287 | @@ -XXX,XX +XXX,XX @@ void pci_device_deassert_intx(PCIDevice *dev) | ||
288 | } | ||
289 | } | ||
290 | |||
291 | +static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) | ||
292 | +{ | ||
293 | + MemTxAttrs attrs = {}; | ||
294 | + | ||
295 | + attrs.requester_id = pci_requester_id(dev); | ||
296 | + address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, | ||
297 | + attrs, NULL); | ||
298 | +} | ||
299 | + | ||
300 | static void pci_reset_regions(PCIDevice *dev) | ||
301 | { | ||
302 | int r; | ||
303 | @@ -XXX,XX +XXX,XX @@ static void pci_qdev_unrealize(DeviceState *dev) | ||
304 | |||
305 | pci_device_deassert_intx(pci_dev); | ||
306 | do_pci_unregister_device(pci_dev); | ||
307 | + | ||
308 | + pci_dev->msi_trigger = NULL; | ||
309 | } | ||
310 | |||
311 | void pci_register_bar(PCIDevice *pci_dev, int region_num, | ||
312 | @@ -XXX,XX +XXX,XX @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) | ||
313 | } | ||
314 | |||
315 | pci_set_power(pci_dev, true); | ||
316 | + | ||
317 | + pci_dev->msi_trigger = pci_msi_trigger; | ||
318 | } | ||
319 | |||
320 | PCIDevice *pci_new_multifunction(int devfn, bool multifunction, | ||
321 | diff --git a/hw/remote/machine.c b/hw/remote/machine.c | ||
322 | index XXXXXXX..XXXXXXX 100644 | ||
323 | --- a/hw/remote/machine.c | ||
324 | +++ b/hw/remote/machine.c | ||
325 | @@ -XXX,XX +XXX,XX @@ | ||
326 | #include "hw/remote/iommu.h" | ||
327 | #include "hw/qdev-core.h" | ||
328 | #include "hw/remote/iommu.h" | ||
329 | +#include "hw/remote/vfio-user-obj.h" | ||
330 | +#include "hw/pci/msi.h" | ||
331 | |||
332 | static void remote_machine_init(MachineState *machine) | ||
333 | { | ||
334 | @@ -XXX,XX +XXX,XX @@ static void remote_machine_init(MachineState *machine) | ||
335 | |||
336 | if (s->vfio_user) { | ||
337 | remote_iommu_setup(pci_host->bus); | ||
338 | + | ||
339 | + msi_nonbroken = true; | ||
340 | + | ||
341 | + vfu_object_set_bus_irq(pci_host->bus); | ||
342 | + } else { | ||
343 | + remote_iohub_init(&s->iohub); | ||
344 | + | ||
345 | + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, | ||
346 | + &s->iohub, REMOTE_IOHUB_NB_PIRQS); | ||
347 | } | ||
348 | |||
349 | - remote_iohub_init(&s->iohub); | ||
350 | - | ||
351 | - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, | ||
352 | - &s->iohub, REMOTE_IOHUB_NB_PIRQS); | ||
353 | - | ||
354 | qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); | ||
355 | } | ||
356 | |||
357 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c | ||
358 | index XXXXXXX..XXXXXXX 100644 | ||
359 | --- a/hw/remote/vfio-user-obj.c | ||
360 | +++ b/hw/remote/vfio-user-obj.c | ||
361 | @@ -XXX,XX +XXX,XX @@ | ||
362 | #include "hw/pci/pci.h" | ||
363 | #include "qemu/timer.h" | ||
364 | #include "exec/memory.h" | ||
365 | +#include "hw/pci/msi.h" | ||
366 | +#include "hw/pci/msix.h" | ||
367 | +#include "hw/remote/vfio-user-obj.h" | ||
368 | |||
369 | #define TYPE_VFU_OBJECT "x-vfio-user-server" | ||
370 | OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) | ||
371 | @@ -XXX,XX +XXX,XX @@ struct VfuObject { | ||
372 | Error *unplug_blocker; | ||
373 | |||
374 | int vfu_poll_fd; | ||
375 | + | ||
376 | + MSITriggerFunc *default_msi_trigger; | ||
377 | + MSIPrepareMessageFunc *default_msi_prepare_message; | ||
378 | + MSIxPrepareMessageFunc *default_msix_prepare_message; | ||
379 | }; | ||
380 | |||
381 | static void vfu_object_init_ctx(VfuObject *o, Error **errp); | ||
382 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) | ||
383 | } | ||
384 | } | ||
385 | |||
386 | +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) | ||
387 | +{ | ||
388 | + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), | ||
389 | + pci_dev->devfn); | ||
390 | + | ||
391 | + return pci_bdf; | ||
392 | +} | ||
393 | + | ||
394 | +static void vfu_object_set_irq(void *opaque, int pirq, int level) | ||
395 | +{ | ||
396 | + PCIBus *pci_bus = opaque; | ||
397 | + PCIDevice *pci_dev = NULL; | ||
398 | + vfu_ctx_t *vfu_ctx = NULL; | ||
399 | + int pci_bus_num, devfn; | ||
400 | + | ||
401 | + if (level) { | ||
402 | + pci_bus_num = PCI_BUS_NUM(pirq); | ||
403 | + devfn = PCI_BDF_TO_DEVFN(pirq); | ||
404 | + | ||
405 | + /* | ||
406 | + * pci_find_device() performs at O(1) if the device is attached | ||
407 | + * to the root PCI bus. Whereas, if the device is attached to a | ||
408 | + * secondary PCI bus (such as when a root port is involved), | ||
409 | + * finding the parent PCI bus could take O(n) | ||
410 | + */ | ||
411 | + pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); | ||
412 | + | ||
413 | + vfu_ctx = pci_dev->irq_opaque; | ||
414 | + | ||
415 | + g_assert(vfu_ctx); | ||
416 | + | ||
417 | + vfu_irq_trigger(vfu_ctx, 0); | ||
418 | + } | ||
419 | +} | ||
420 | + | ||
421 | +static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, | ||
422 | + unsigned int vector) | ||
423 | +{ | ||
424 | + MSIMessage msg; | ||
425 | + | ||
426 | + msg.address = 0; | ||
427 | + msg.data = vector; | ||
428 | + | ||
429 | + return msg; | ||
430 | +} | ||
431 | + | ||
432 | +static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) | ||
433 | +{ | ||
434 | + vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; | ||
435 | + | ||
436 | + vfu_irq_trigger(vfu_ctx, msg.data); | ||
437 | +} | ||
438 | + | ||
439 | +static void vfu_object_setup_msi_cbs(VfuObject *o) | ||
440 | +{ | ||
441 | + o->default_msi_trigger = o->pci_dev->msi_trigger; | ||
442 | + o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; | ||
443 | + o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; | ||
444 | + | ||
445 | + o->pci_dev->msi_trigger = vfu_object_msi_trigger; | ||
446 | + o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; | ||
447 | + o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; | ||
448 | +} | ||
449 | + | ||
450 | +static void vfu_object_restore_msi_cbs(VfuObject *o) | ||
451 | +{ | ||
452 | + o->pci_dev->msi_trigger = o->default_msi_trigger; | ||
453 | + o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; | ||
454 | + o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; | ||
455 | +} | ||
456 | + | ||
457 | +static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, | ||
458 | + uint32_t count, bool mask) | ||
459 | +{ | ||
460 | + VfuObject *o = vfu_get_private(vfu_ctx); | ||
461 | + Error *err = NULL; | ||
462 | + uint32_t vector; | ||
463 | + | ||
464 | + for (vector = start; vector < count; vector++) { | ||
465 | + msix_set_mask(o->pci_dev, vector, mask, &err); | ||
466 | + if (err) { | ||
467 | + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, | ||
468 | + error_get_pretty(err)); | ||
469 | + error_free(err); | ||
470 | + err = NULL; | ||
471 | + } | ||
472 | + } | ||
473 | +} | ||
474 | + | ||
475 | +static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, | ||
476 | + uint32_t count, bool mask) | ||
477 | +{ | ||
478 | + VfuObject *o = vfu_get_private(vfu_ctx); | ||
479 | + Error *err = NULL; | ||
480 | + uint32_t vector; | ||
481 | + | ||
482 | + for (vector = start; vector < count; vector++) { | ||
483 | + msi_set_mask(o->pci_dev, vector, mask, &err); | ||
484 | + if (err) { | ||
485 | + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, | ||
486 | + error_get_pretty(err)); | ||
487 | + error_free(err); | ||
488 | + err = NULL; | ||
489 | + } | ||
490 | + } | ||
491 | +} | ||
492 | + | ||
493 | +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) | ||
494 | +{ | ||
495 | + vfu_ctx_t *vfu_ctx = o->vfu_ctx; | ||
496 | + int ret; | ||
497 | + | ||
498 | + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); | ||
67 | + if (ret < 0) { | 499 | + if (ret < 0) { |
68 | + return ret; | 500 | + return ret; |
69 | + } | 501 | + } |
70 | 502 | + | |
71 | ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); | 503 | + if (msix_nr_vectors_allocated(pci_dev)) { |
72 | if (ret < 0) { | 504 | + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, |
73 | diff --git a/block/cloop.c b/block/cloop.c | 505 | + msix_nr_vectors_allocated(pci_dev)); |
74 | index XXXXXXX..XXXXXXX 100644 | 506 | + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, |
75 | --- a/block/cloop.c | 507 | + &vfu_msix_irq_state); |
76 | +++ b/block/cloop.c | 508 | + } else if (msi_nr_vectors_allocated(pci_dev)) { |
77 | @@ -XXX,XX +XXX,XX @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, | 509 | + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, |
78 | return -EINVAL; | 510 | + msi_nr_vectors_allocated(pci_dev)); |
79 | } | 511 | + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, |
80 | 512 | + &vfu_msi_irq_state); | |
81 | - bdrv_set_read_only(bs, true); | 513 | + } |
82 | + ret = bdrv_set_read_only(bs, true, errp); | 514 | + |
83 | + if (ret < 0) { | 515 | + if (ret < 0) { |
84 | + return ret; | 516 | + return ret; |
85 | + } | 517 | + } |
86 | 518 | + | |
87 | /* read header */ | 519 | + vfu_object_setup_msi_cbs(o); |
88 | ret = bdrv_pread(bs->file, 128, &s->block_size, 4); | 520 | + |
89 | diff --git a/block/dmg.c b/block/dmg.c | 521 | + pci_dev->irq_opaque = vfu_ctx; |
90 | index XXXXXXX..XXXXXXX 100644 | 522 | + |
91 | --- a/block/dmg.c | 523 | + return 0; |
92 | +++ b/block/dmg.c | 524 | +} |
93 | @@ -XXX,XX +XXX,XX @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, | 525 | + |
94 | return -EINVAL; | 526 | +void vfu_object_set_bus_irq(PCIBus *pci_bus) |
95 | } | 527 | +{ |
96 | 528 | + int bus_num = pci_bus_num(pci_bus); | |
97 | + ret = bdrv_set_read_only(bs, true, errp); | 529 | + int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); |
530 | + | ||
531 | + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus, | ||
532 | + max_bdf); | ||
533 | +} | ||
534 | + | ||
535 | /* | ||
536 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' | ||
537 | * properties. It also depends on devices instantiated in QEMU. These | ||
538 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) | ||
539 | |||
540 | vfu_object_register_bars(o->vfu_ctx, o->pci_dev); | ||
541 | |||
542 | + ret = vfu_object_setup_irqs(o, o->pci_dev); | ||
98 | + if (ret < 0) { | 543 | + if (ret < 0) { |
99 | + return ret; | 544 | + error_setg(errp, "vfu: Failed to setup interrupts for %s", |
100 | + } | 545 | + o->device); |
101 | + | 546 | + goto fail; |
102 | block_module_load_one("dmg-bz2"); | 547 | + } |
103 | - bdrv_set_read_only(bs, true); | 548 | + |
104 | 549 | ret = vfu_realize_ctx(o->vfu_ctx); | |
105 | s->n_chunks = 0; | 550 | if (ret < 0) { |
106 | s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; | 551 | error_setg(errp, "vfu: Failed to realize device %s- %s", |
107 | diff --git a/block/rbd.c b/block/rbd.c | 552 | @@ -XXX,XX +XXX,XX @@ fail: |
108 | index XXXXXXX..XXXXXXX 100644 | 553 | o->unplug_blocker = NULL; |
109 | --- a/block/rbd.c | 554 | } |
110 | +++ b/block/rbd.c | 555 | if (o->pci_dev) { |
111 | @@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, | 556 | + vfu_object_restore_msi_cbs(o); |
112 | goto failed_shutdown; | 557 | + o->pci_dev->irq_opaque = NULL; |
113 | } | 558 | object_unref(OBJECT(o->pci_dev)); |
114 | 559 | o->pci_dev = NULL; | |
115 | + /* rbd_open is always r/w */ | 560 | } |
116 | r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); | 561 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_finalize(Object *obj) |
117 | if (r < 0) { | 562 | } |
118 | error_setg_errno(errp, -r, "error reading header from %s", s->name); | 563 | |
119 | goto failed_open; | 564 | if (o->pci_dev) { |
120 | } | 565 | + vfu_object_restore_msi_cbs(o); |
121 | 566 | + o->pci_dev->irq_opaque = NULL; | |
122 | - bdrv_set_read_only(bs, (s->snap != NULL)); | 567 | object_unref(OBJECT(o->pci_dev)); |
123 | + /* If we are using an rbd snapshot, we must be r/o, otherwise | 568 | o->pci_dev = NULL; |
124 | + * leave as-is */ | 569 | } |
125 | + if (s->snap != NULL) { | 570 | diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c |
126 | + r = bdrv_set_read_only(bs, true, &local_err); | 571 | new file mode 100644 |
127 | + if (r < 0) { | 572 | index XXXXXXX..XXXXXXX |
128 | + error_propagate(errp, local_err); | 573 | --- /dev/null |
129 | + goto failed_open; | 574 | +++ b/stubs/vfio-user-obj.c |
130 | + } | 575 | @@ -XXX,XX +XXX,XX @@ |
131 | + } | 576 | +#include "qemu/osdep.h" |
132 | 577 | +#include "hw/remote/vfio-user-obj.h" | |
133 | qemu_opts_del(opts); | 578 | + |
134 | return 0; | 579 | +void vfu_object_set_bus_irq(PCIBus *pci_bus) |
135 | diff --git a/block/vvfat.c b/block/vvfat.c | 580 | +{ |
136 | index XXXXXXX..XXXXXXX 100644 | 581 | +} |
137 | --- a/block/vvfat.c | 582 | diff --git a/hw/remote/trace-events b/hw/remote/trace-events |
138 | +++ b/block/vvfat.c | 583 | index XXXXXXX..XXXXXXX 100644 |
139 | @@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, | 584 | --- a/hw/remote/trace-events |
140 | 585 | +++ b/hw/remote/trace-events | |
141 | s->current_cluster=0xffffffff; | 586 | @@ -XXX,XX +XXX,XX @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" |
142 | 587 | vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" | |
143 | - /* read only is the default for safety */ | 588 | vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" |
144 | - bdrv_set_read_only(bs, true); | 589 | vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" |
145 | s->qcow = NULL; | 590 | +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d" |
146 | s->qcow_filename = NULL; | 591 | diff --git a/stubs/meson.build b/stubs/meson.build |
147 | s->fat2 = NULL; | 592 | index XXXXXXX..XXXXXXX 100644 |
148 | @@ -XXX,XX +XXX,XX @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, | 593 | --- a/stubs/meson.build |
149 | s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1); | 594 | +++ b/stubs/meson.build |
150 | 595 | @@ -XXX,XX +XXX,XX @@ if have_system | |
151 | if (qemu_opt_get_bool(opts, "rw", false)) { | 596 | else |
152 | - ret = enable_write_target(bs, errp); | 597 | stub_ss.add(files('qdev.c')) |
153 | + if (!bdrv_is_read_only(bs)) { | 598 | endif |
154 | + ret = enable_write_target(bs, errp); | 599 | +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c')) |
155 | + if (ret < 0) { | ||
156 | + goto fail; | ||
157 | + } | ||
158 | + } else { | ||
159 | + ret = -EPERM; | ||
160 | + error_setg(errp, | ||
161 | + "Unable to set VVFAT to 'rw' when drive is read-only"); | ||
162 | + goto fail; | ||
163 | + } | ||
164 | + } else { | ||
165 | + /* read only is the default for safety */ | ||
166 | + ret = bdrv_set_read_only(bs, true, &local_err); | ||
167 | if (ret < 0) { | ||
168 | + error_propagate(errp, local_err); | ||
169 | goto fail; | ||
170 | } | ||
171 | - bdrv_set_read_only(bs, false); | ||
172 | } | ||
173 | |||
174 | bs->total_sectors = cyls * heads * secs; | ||
175 | diff --git a/include/block/block.h b/include/block/block.h | ||
176 | index XXXXXXX..XXXXXXX 100644 | ||
177 | --- a/include/block/block.h | ||
178 | +++ b/include/block/block.h | ||
179 | @@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, | ||
180 | int64_t sector_num, int nb_sectors, int *pnum); | ||
181 | |||
182 | bool bdrv_is_read_only(BlockDriverState *bs); | ||
183 | -void bdrv_set_read_only(BlockDriverState *bs, bool read_only); | ||
184 | +int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); | ||
185 | bool bdrv_is_sg(BlockDriverState *bs); | ||
186 | bool bdrv_is_inserted(BlockDriverState *bs); | ||
187 | int bdrv_media_changed(BlockDriverState *bs); | ||
188 | -- | 600 | -- |
189 | 2.9.3 | 601 | 2.36.1 |
190 | |||
191 | diff view generated by jsdifflib |
1 | Introduce check function for setting read_only flags. Will return < 0 on | 1 | From: Jagannathan Raman <jag.raman@oracle.com> |
---|---|---|---|
2 | error, with appropriate Error value set. Does not alter any flags. | ||
3 | 2 | ||
4 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 3 | Adds handler to reset a remote device |
4 | |||
5 | Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> | ||
6 | Signed-off-by: John G Johnson <john.g.johnson@oracle.com> | ||
7 | Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> | ||
5 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | 8 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> |
6 | Reviewed-by: John Snow <jsnow@redhat.com> | 9 | Message-id: 112eeadf3bc4c6cdb100bc3f9a6fcfc20b467c1b.1655151679.git.jag.raman@oracle.com |
7 | Message-id: e2bba34ac3bc76a0c42adc390413f358ae0566e8.1491597120.git.jcody@redhat.com | 10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
8 | --- | 11 | --- |
9 | block.c | 14 +++++++++++++- | 12 | hw/remote/vfio-user-obj.c | 20 ++++++++++++++++++++ |
10 | include/block/block.h | 1 + | 13 | 1 file changed, 20 insertions(+) |
11 | 2 files changed, 14 insertions(+), 1 deletion(-) | ||
12 | 14 | ||
13 | diff --git a/block.c b/block.c | 15 | diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/block.c | 17 | --- a/hw/remote/vfio-user-obj.c |
16 | +++ b/block.c | 18 | +++ b/hw/remote/vfio-user-obj.c |
17 | @@ -XXX,XX +XXX,XX @@ bool bdrv_is_read_only(BlockDriverState *bs) | 19 | @@ -XXX,XX +XXX,XX @@ void vfu_object_set_bus_irq(PCIBus *pci_bus) |
18 | return bs->read_only; | 20 | max_bdf); |
19 | } | 21 | } |
20 | 22 | ||
21 | -int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 23 | +static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) |
22 | +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 24 | +{ |
23 | { | 25 | + VfuObject *o = vfu_get_private(vfu_ctx); |
24 | /* Do not set read_only if copy_on_read is enabled */ | 26 | + |
25 | if (bs->copy_on_read && read_only) { | 27 | + /* vfu_object_ctx_run() handles lost connection */ |
26 | @@ -XXX,XX +XXX,XX @@ int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 28 | + if (type == VFU_RESET_LOST_CONN) { |
27 | return -EPERM; | 29 | + return 0; |
28 | } | 30 | + } |
29 | 31 | + | |
32 | + qdev_reset_all(DEVICE(o->pci_dev)); | ||
33 | + | ||
30 | + return 0; | 34 | + return 0; |
31 | +} | 35 | +} |
32 | + | 36 | + |
33 | +int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) | 37 | /* |
34 | +{ | 38 | * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' |
35 | + int ret = 0; | 39 | * properties. It also depends on devices instantiated in QEMU. These |
36 | + | 40 | @@ -XXX,XX +XXX,XX @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) |
37 | + ret = bdrv_can_set_read_only(bs, read_only, errp); | 41 | goto fail; |
42 | } | ||
43 | |||
44 | + ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset); | ||
38 | + if (ret < 0) { | 45 | + if (ret < 0) { |
39 | + return ret; | 46 | + error_setg(errp, "vfu: Failed to setup reset callback"); |
47 | + goto fail; | ||
40 | + } | 48 | + } |
41 | + | 49 | + |
42 | bs->read_only = read_only; | 50 | ret = vfu_realize_ctx(o->vfu_ctx); |
43 | return 0; | 51 | if (ret < 0) { |
44 | } | 52 | error_setg(errp, "vfu: Failed to realize device %s- %s", |
45 | diff --git a/include/block/block.h b/include/block/block.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/include/block/block.h | ||
48 | +++ b/include/block/block.h | ||
49 | @@ -XXX,XX +XXX,XX @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, | ||
50 | int64_t sector_num, int nb_sectors, int *pnum); | ||
51 | |||
52 | bool bdrv_is_read_only(BlockDriverState *bs); | ||
53 | +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); | ||
54 | int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); | ||
55 | bool bdrv_is_sg(BlockDriverState *bs); | ||
56 | bool bdrv_is_inserted(BlockDriverState *bs); | ||
57 | -- | 53 | -- |
58 | 2.9.3 | 54 | 2.36.1 |
59 | |||
60 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Every laio_io_plug() call has a matching laio_io_unplug() call. There is | ||
2 | a plugged counter that tracks the number of levels of plugging and | ||
3 | allows for nesting. | ||
1 | 4 | ||
5 | The plugged counter must reflect the balance between laio_io_plug() and | ||
6 | laio_io_unplug() calls accurately. Otherwise I/O stalls occur since | ||
7 | io_submit(2) calls are skipped while plugged. | ||
8 | |||
9 | Reported-by: Nikolay Tenev <nt@storpool.com> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
12 | Message-id: 20220609164712.1539045-2-stefanha@redhat.com | ||
13 | Cc: Stefano Garzarella <sgarzare@redhat.com> | ||
14 | Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") | ||
15 | [Stefano Garzarella suggested adding a Fixes tag. | ||
16 | --Stefan] | ||
17 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
18 | --- | ||
19 | block/linux-aio.c | 4 +++- | ||
20 | 1 file changed, 3 insertions(+), 1 deletion(-) | ||
21 | |||
22 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/block/linux-aio.c | ||
25 | +++ b/block/linux-aio.c | ||
26 | @@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
27 | uint64_t dev_max_batch) | ||
28 | { | ||
29 | assert(s->io_q.plugged); | ||
30 | + s->io_q.plugged--; | ||
31 | + | ||
32 | if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || | ||
33 | - (--s->io_q.plugged == 0 && | ||
34 | + (!s->io_q.plugged && | ||
35 | !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { | ||
36 | ioq_submit(s); | ||
37 | } | ||
38 | -- | ||
39 | 2.36.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | It may not be obvious why laio_io_unplug() checks max batch. I discussed | ||
2 | this with Stefano and have added a comment summarizing the reason. | ||
1 | 3 | ||
4 | Cc: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Cc: Kevin Wolf <kwolf@redhat.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> | ||
8 | Message-id: 20220609164712.1539045-3-stefanha@redhat.com | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/linux-aio.c | 6 ++++++ | ||
12 | 1 file changed, 6 insertions(+) | ||
13 | |||
14 | diff --git a/block/linux-aio.c b/block/linux-aio.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/linux-aio.c | ||
17 | +++ b/block/linux-aio.c | ||
18 | @@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, | ||
19 | assert(s->io_q.plugged); | ||
20 | s->io_q.plugged--; | ||
21 | |||
22 | + /* | ||
23 | + * Why max batch checking is performed here: | ||
24 | + * Another BDS may have queued requests with a higher dev_max_batch and | ||
25 | + * therefore in_queue could now exceed our dev_max_batch. Re-check the max | ||
26 | + * batch so we can honor our device's dev_max_batch. | ||
27 | + */ | ||
28 | if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || | ||
29 | (!s->io_q.plugged && | ||
30 | !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { | ||
31 | -- | ||
32 | 2.36.1 | diff view generated by jsdifflib |