1 | The following changes since commit e8a01102936286e012ed0f00bd7f3b7474d415c9: | 1 | The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'ui-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging (2025-03-05 21:58:23 +0800) | 3 | Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/legoater/qemu/ tags/pull-vfio-20250306 | 7 | https://github.com/legoater/qemu/ tags/pull-vfio-20250311 |
8 | 8 | ||
9 | for you to fetch changes up to 59a67e70950bcc2002d3a8d22a17743e0f70da96: | 9 | for you to fetch changes up to 4d9607481560e6c8e1508a0aafe94f86a0503c8c: |
10 | 10 | ||
11 | hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property (2025-03-06 06:47:34 +0100) | 11 | vfio/pci: Drop debug commentary from x-device-dirty-page-tracking (2025-03-11 19:04:58 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | vfio queue: | 14 | vfio queue: |
15 | 15 | ||
16 | * Added property documentation | 16 | * Fixed endianness of VFIO device state packets |
17 | * Added Minor fixes | 17 | * Improved IGD passthrough support with legacy mode |
18 | * Implemented basic PCI PM capability backing | 18 | * Improved build |
19 | * Promoted new IGD maintainer | 19 | * Added support for old AMD GPUs (x550) |
20 | * Deprecated vfio-plaform | 20 | * Updated property documentation |
21 | * Extended VFIO migration with multifd support | ||
22 | 21 | ||
23 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
24 | Alex Williamson (5): | 23 | Joao Martins (1): |
25 | hw/pci: Basic support for PCI power management | 24 | vfio/pci: Drop debug commentary from x-device-dirty-page-tracking |
26 | pci: Use PCI PM capability initializer | ||
27 | vfio/pci: Delete local pm_cap | ||
28 | pcie, virtio: Remove redundant pm_cap | ||
29 | hw/vfio/pci: Re-order pre-reset | ||
30 | 25 | ||
31 | Cédric Le Goater (2): | 26 | Maciej S. Szmigiero (1): |
32 | vfio: Add property documentation | 27 | vfio/migration: Use BE byte order for device state wire packets |
33 | vfio/ccw: Replace warn_once_pfch() with warn_report_once() | ||
34 | 28 | ||
35 | Eric Auger (1): | 29 | Philippe Mathieu-Daudé (8): |
36 | vfio-platform: Deprecate all forms of vfio-platform devices | 30 | system: Declare qemu_[min/max]rampagesize() in 'system/hostmem.h' |
31 | hw/vfio/spapr: Do not include <linux/kvm.h> | ||
32 | hw/vfio/common: Include missing 'system/tcg.h' header | ||
33 | hw/vfio/common: Get target page size using runtime helpers | ||
34 | hw/vfio: Compile some common objects once | ||
35 | hw/vfio: Compile more objects once | ||
36 | hw/vfio: Compile iommufd.c once | ||
37 | hw/vfio: Compile display.c once | ||
37 | 38 | ||
38 | Maciej S. Szmigiero (32): | 39 | Tomita Moeko (10): |
39 | migration: Clarify that {load, save}_cleanup handlers can run without setup | 40 | vfio/igd: Remove GTT write quirk in IO BAR 4 |
40 | thread-pool: Remove thread_pool_submit() function | 41 | vfio/igd: Do not include GTT stolen size in etc/igd-bdsm-size |
41 | thread-pool: Rename AIO pool functions to *_aio() and data types to *Aio | 42 | vfio/igd: Consolidate OpRegion initialization into a single function |
42 | thread-pool: Implement generic (non-AIO) pool support | 43 | vfio/igd: Move LPC bridge initialization to a separate function |
43 | migration: Add MIG_CMD_SWITCHOVER_START and its load handler | 44 | vfio/pci: Add placeholder for device-specific config space quirks |
44 | migration: Add qemu_loadvm_load_state_buffer() and its handler | 45 | vfio/igd: Refactor vfio_probe_igd_bar4_quirk into pci config quirk |
45 | migration: Always take BQL for migration_incoming_state_destroy() | 46 | vfio/igd: Decouple common quirks from legacy mode |
46 | error: define g_autoptr() cleanup function for the Error type | 47 | vfio/igd: Handle x-igd-opregion option in config quirk |
47 | migration: Add thread pool of optional load threads | 48 | vfio/igd: Introduce x-igd-lpc option for LPC bridge ID quirk |
48 | migration/multifd: Split packet into header and RAM data | 49 | vfio/igd: Fix broken KVMGT OpRegion support |
49 | migration/multifd: Device state transfer support - receive side | ||
50 | migration/multifd: Make multifd_send() thread safe | ||
51 | migration/multifd: Add an explicit MultiFDSendData destructor | ||
52 | migration/multifd: Device state transfer support - send side | ||
53 | migration/multifd: Add multifd_device_state_supported() | ||
54 | migration: Add save_live_complete_precopy_thread handler | ||
55 | vfio/migration: Add load_device_config_state_start trace event | ||
56 | vfio/migration: Convert bytes_transferred counter to atomic | ||
57 | vfio/migration: Add vfio_add_bytes_transferred() | ||
58 | vfio/migration: Move migration channel flags to vfio-common.h header file | ||
59 | vfio/migration: Multifd device state transfer support - basic types | ||
60 | vfio/migration: Multifd device state transfer - add support checking function | ||
61 | vfio/migration: Multifd setup/cleanup functions and associated VFIOMultifd | ||
62 | vfio/migration: Setup and cleanup multifd transfer in these general methods | ||
63 | vfio/migration: Multifd device state transfer support - received buffers queuing | ||
64 | vfio/migration: Multifd device state transfer support - load thread | ||
65 | migration/qemu-file: Define g_autoptr() cleanup function for QEMUFile | ||
66 | vfio/migration: Multifd device state transfer support - config loading support | ||
67 | vfio/migration: Multifd device state transfer support - send side | ||
68 | vfio/migration: Add x-migration-multifd-transfer VFIO property | ||
69 | vfio/migration: Make x-migration-multifd-transfer VFIO property mutable | ||
70 | hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property | ||
71 | 50 | ||
72 | Peter Xu (1): | 51 | Vasilis Liaskovitis (1): |
73 | migration/multifd: Make MultiFDSendData a struct | 52 | vfio/pci-quirks: Exclude non-ioport BAR from ATI quirk |
74 | 53 | ||
75 | Tomita Moeko (1): | 54 | hw/vfio/pci.h | 11 +- |
76 | MAINTAINERS: Add myself as vfio-igd maintainer | 55 | include/exec/ram_addr.h | 3 - |
77 | 56 | include/system/hostmem.h | 3 + | |
78 | MAINTAINERS | 9 +- | 57 | hw/ppc/spapr_caps.c | 1 + |
79 | docs/about/deprecated.rst | 25 ++ | 58 | hw/s390x/s390-virtio-ccw.c | 1 + |
80 | docs/devel/migration/vfio.rst | 45 ++- | 59 | hw/vfio/common.c | 9 +- |
81 | hw/vfio/migration-multifd.h | 34 ++ | 60 | hw/vfio/igd.c | 529 +++++++++++++++++++------------------------- |
82 | hw/vfio/pci.h | 1 - | 61 | hw/vfio/iommufd.c | 1 - |
83 | include/block/aio.h | 8 +- | 62 | hw/vfio/migration-multifd.c | 15 +- |
84 | include/block/thread-pool.h | 62 +++- | 63 | hw/vfio/migration.c | 1 - |
85 | include/hw/pci/pci.h | 3 + | 64 | hw/vfio/pci-quirks.c | 53 +---- |
86 | include/hw/pci/pci_device.h | 3 + | 65 | hw/vfio/pci.c | 35 +-- |
87 | include/hw/pci/pcie.h | 2 - | 66 | hw/vfio/spapr.c | 4 +- |
88 | include/hw/vfio/vfio-common.h | 31 ++ | 67 | hw/vfio/meson.build | 27 ++- |
89 | include/migration/client-options.h | 4 + | 68 | 14 files changed, 288 insertions(+), 405 deletions(-) |
90 | include/migration/misc.h | 25 ++ | ||
91 | include/migration/register.h | 52 ++- | ||
92 | include/qapi/error.h | 2 + | ||
93 | include/qemu/typedefs.h | 5 + | ||
94 | migration/migration.h | 7 + | ||
95 | migration/multifd.h | 74 +++- | ||
96 | migration/qemu-file.h | 2 + | ||
97 | migration/savevm.h | 6 +- | ||
98 | hw/core/machine.c | 2 + | ||
99 | hw/net/e1000e.c | 3 +- | ||
100 | hw/net/eepro100.c | 4 +- | ||
101 | hw/net/igb.c | 3 +- | ||
102 | hw/nvme/ctrl.c | 3 +- | ||
103 | hw/pci-bridge/pcie_pci_bridge.c | 3 +- | ||
104 | hw/pci/pci.c | 93 ++++- | ||
105 | hw/vfio/amd-xgbe.c | 2 + | ||
106 | hw/vfio/ap.c | 9 + | ||
107 | hw/vfio/calxeda-xgmac.c | 2 + | ||
108 | hw/vfio/ccw.c | 27 +- | ||
109 | hw/vfio/migration-multifd.c | 679 +++++++++++++++++++++++++++++++++++++ | ||
110 | hw/vfio/migration.c | 106 ++++-- | ||
111 | hw/vfio/pci.c | 180 +++++++++- | ||
112 | hw/vfio/platform.c | 25 ++ | ||
113 | hw/virtio/virtio-pci.c | 11 +- | ||
114 | migration/colo.c | 3 + | ||
115 | migration/migration-hmp-cmds.c | 2 + | ||
116 | migration/migration.c | 17 +- | ||
117 | migration/multifd-device-state.c | 212 ++++++++++++ | ||
118 | migration/multifd-nocomp.c | 30 +- | ||
119 | migration/multifd.c | 248 +++++++++++--- | ||
120 | migration/options.c | 9 + | ||
121 | migration/savevm.c | 201 ++++++++++- | ||
122 | tests/unit/test-thread-pool.c | 6 +- | ||
123 | util/async.c | 6 +- | ||
124 | util/thread-pool.c | 184 ++++++++-- | ||
125 | hw/pci/trace-events | 2 + | ||
126 | hw/vfio/meson.build | 1 + | ||
127 | hw/vfio/trace-events | 13 +- | ||
128 | migration/meson.build | 1 + | ||
129 | migration/trace-events | 1 + | ||
130 | scripts/analyze-migration.py | 11 + | ||
131 | util/trace-events | 6 +- | ||
132 | 54 files changed, 2296 insertions(+), 209 deletions(-) | ||
133 | create mode 100644 hw/vfio/migration-multifd.h | ||
134 | create mode 100644 hw/vfio/migration-multifd.c | ||
135 | create mode 100644 migration/multifd-device-state.c | ||
136 | 69 | ||
137 | 70 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Investigate the git history to uncover when and why the VFIO | ||
2 | properties were introduced and update the models. This is mostly | ||
3 | targeting vfio-pci device, since vfio-platform, vfio-ap and vfio-ccw | ||
4 | devices are simpler. | ||
5 | 1 | ||
6 | Sort the properties based on the QEMU version in which they were | ||
7 | introduced. | ||
8 | |||
9 | Cc: Tony Krowiak <akrowiak@linux.ibm.com> | ||
10 | Cc: Eric Farman <farman@linux.ibm.com> | ||
11 | Cc: Eric Auger <eric.auger@redhat.com> | ||
12 | Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com> | ||
13 | Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com> | ||
14 | Reviewed-by: Eric Farman <farman@linux.ibm.com> # vfio-ccw | ||
15 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
16 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
17 | Link: https://lore.kernel.org/qemu-devel/20250217173455.449983-1-clg@redhat.com | ||
18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
19 | --- | ||
20 | hw/vfio/ap.c | 9 ++++ | ||
21 | hw/vfio/ccw.c | 15 ++++++ | ||
22 | hw/vfio/pci.c | 125 +++++++++++++++++++++++++++++++++++++++++++++ | ||
23 | hw/vfio/platform.c | 24 +++++++++ | ||
24 | 4 files changed, 173 insertions(+) | ||
25 | |||
26 | diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/hw/vfio/ap.c | ||
29 | +++ b/hw/vfio/ap.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static void vfio_ap_class_init(ObjectClass *klass, void *data) | ||
31 | dc->hotpluggable = true; | ||
32 | device_class_set_legacy_reset(dc, vfio_ap_reset); | ||
33 | dc->bus_type = TYPE_AP_BUS; | ||
34 | + | ||
35 | + object_class_property_set_description(klass, /* 3.1 */ | ||
36 | + "sysfsdev", | ||
37 | + "Host sysfs path of assigned device"); | ||
38 | +#ifdef CONFIG_IOMMUFD | ||
39 | + object_class_property_set_description(klass, /* 9.0 */ | ||
40 | + "iommufd", | ||
41 | + "Set host IOMMUFD backend device"); | ||
42 | +#endif | ||
43 | } | ||
44 | |||
45 | static const TypeInfo vfio_ap_info = { | ||
46 | diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/hw/vfio/ccw.c | ||
49 | +++ b/hw/vfio/ccw.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data) | ||
51 | cdc->handle_halt = vfio_ccw_handle_halt; | ||
52 | cdc->handle_clear = vfio_ccw_handle_clear; | ||
53 | cdc->handle_store = vfio_ccw_handle_store; | ||
54 | + | ||
55 | + object_class_property_set_description(klass, /* 2.10 */ | ||
56 | + "sysfsdev", | ||
57 | + "Host sysfs path of assigned device"); | ||
58 | + object_class_property_set_description(klass, /* 3.0 */ | ||
59 | + "force-orb-pfch", | ||
60 | + "Force unlimited prefetch"); | ||
61 | +#ifdef CONFIG_IOMMUFD | ||
62 | + object_class_property_set_description(klass, /* 9.0 */ | ||
63 | + "iommufd", | ||
64 | + "Set host IOMMUFD backend device"); | ||
65 | +#endif | ||
66 | + object_class_property_set_description(klass, /* 9.2 */ | ||
67 | + "loadparm", | ||
68 | + "Define which devices that can be used for booting"); | ||
69 | } | ||
70 | |||
71 | static const TypeInfo vfio_ccw_info = { | ||
72 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/hw/vfio/pci.c | ||
75 | +++ b/hw/vfio/pci.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) | ||
77 | pdc->exit = vfio_exitfn; | ||
78 | pdc->config_read = vfio_pci_read_config; | ||
79 | pdc->config_write = vfio_pci_write_config; | ||
80 | + | ||
81 | + object_class_property_set_description(klass, /* 1.3 */ | ||
82 | + "host", | ||
83 | + "Host PCI address [domain:]<bus:slot.function> of assigned device"); | ||
84 | + object_class_property_set_description(klass, /* 1.3 */ | ||
85 | + "x-intx-mmap-timeout-ms", | ||
86 | + "When EOI is not provided by KVM/QEMU, wait time " | ||
87 | + "(milliseconds) to re-enable device direct access " | ||
88 | + "after INTx (DEBUG)"); | ||
89 | + object_class_property_set_description(klass, /* 1.5 */ | ||
90 | + "x-vga", | ||
91 | + "Expose VGA address spaces for device"); | ||
92 | + object_class_property_set_description(klass, /* 2.3 */ | ||
93 | + "x-req", | ||
94 | + "Disable device request notification support (DEBUG)"); | ||
95 | + object_class_property_set_description(klass, /* 2.4 and 2.5 */ | ||
96 | + "x-no-mmap", | ||
97 | + "Disable MMAP for device. Allows to trace MMIO " | ||
98 | + "accesses (DEBUG)"); | ||
99 | + object_class_property_set_description(klass, /* 2.5 */ | ||
100 | + "x-no-kvm-intx", | ||
101 | + "Disable direct VFIO->KVM INTx injection. Allows to " | ||
102 | + "trace INTx interrupts (DEBUG)"); | ||
103 | + object_class_property_set_description(klass, /* 2.5 */ | ||
104 | + "x-no-kvm-msi", | ||
105 | + "Disable direct VFIO->KVM MSI injection. Allows to " | ||
106 | + "trace MSI interrupts (DEBUG)"); | ||
107 | + object_class_property_set_description(klass, /* 2.5 */ | ||
108 | + "x-no-kvm-msix", | ||
109 | + "Disable direct VFIO->KVM MSIx injection. Allows to " | ||
110 | + "trace MSIx interrupts (DEBUG)"); | ||
111 | + object_class_property_set_description(klass, /* 2.5 */ | ||
112 | + "x-pci-vendor-id", | ||
113 | + "Override PCI Vendor ID with provided value (DEBUG)"); | ||
114 | + object_class_property_set_description(klass, /* 2.5 */ | ||
115 | + "x-pci-device-id", | ||
116 | + "Override PCI device ID with provided value (DEBUG)"); | ||
117 | + object_class_property_set_description(klass, /* 2.5 */ | ||
118 | + "x-pci-sub-vendor-id", | ||
119 | + "Override PCI Subsystem Vendor ID with provided value " | ||
120 | + "(DEBUG)"); | ||
121 | + object_class_property_set_description(klass, /* 2.5 */ | ||
122 | + "x-pci-sub-device-id", | ||
123 | + "Override PCI Subsystem Device ID with provided value " | ||
124 | + "(DEBUG)"); | ||
125 | + object_class_property_set_description(klass, /* 2.6 */ | ||
126 | + "sysfsdev", | ||
127 | + "Host sysfs path of assigned device"); | ||
128 | + object_class_property_set_description(klass, /* 2.7 */ | ||
129 | + "x-igd-opregion", | ||
130 | + "Expose host IGD OpRegion to guest"); | ||
131 | + object_class_property_set_description(klass, /* 2.7 (See c4c45e943e51) */ | ||
132 | + "x-igd-gms", | ||
133 | + "Override IGD data stolen memory size (32MiB units)"); | ||
134 | + object_class_property_set_description(klass, /* 2.11 */ | ||
135 | + "x-nv-gpudirect-clique", | ||
136 | + "Add NVIDIA GPUDirect capability indicating P2P DMA " | ||
137 | + "clique for device [0-15]"); | ||
138 | + object_class_property_set_description(klass, /* 2.12 */ | ||
139 | + "x-no-geforce-quirks", | ||
140 | + "Disable GeForce quirks (for NVIDIA Quadro/GRID/Tesla). " | ||
141 | + "Improves performance"); | ||
142 | + object_class_property_set_description(klass, /* 2.12 */ | ||
143 | + "display", | ||
144 | + "Enable display support for device, ex. vGPU"); | ||
145 | + object_class_property_set_description(klass, /* 2.12 */ | ||
146 | + "x-msix-relocation", | ||
147 | + "Specify MSI-X MMIO relocation to the end of specified " | ||
148 | + "existing BAR or new BAR to avoid virtualization overhead " | ||
149 | + "due to adjacent device registers"); | ||
150 | + object_class_property_set_description(klass, /* 3.0 */ | ||
151 | + "x-no-kvm-ioeventfd", | ||
152 | + "Disable registration of ioeventfds with KVM (DEBUG)"); | ||
153 | + object_class_property_set_description(klass, /* 3.0 */ | ||
154 | + "x-no-vfio-ioeventfd", | ||
155 | + "Disable linking of KVM ioeventfds to VFIO ioeventfds " | ||
156 | + "(DEBUG)"); | ||
157 | + object_class_property_set_description(klass, /* 3.1 */ | ||
158 | + "x-balloon-allowed", | ||
159 | + "Override allowing ballooning with device (DEBUG, DANGER)"); | ||
160 | + object_class_property_set_description(klass, /* 3.2 */ | ||
161 | + "xres", | ||
162 | + "Set X display resolution the vGPU should use"); | ||
163 | + object_class_property_set_description(klass, /* 3.2 */ | ||
164 | + "yres", | ||
165 | + "Set Y display resolution the vGPU should use"); | ||
166 | + object_class_property_set_description(klass, /* 5.2 */ | ||
167 | + "x-pre-copy-dirty-page-tracking", | ||
168 | + "Disable dirty pages tracking during iterative phase " | ||
169 | + "(DEBUG)"); | ||
170 | + object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */ | ||
171 | + "enable-migration", | ||
172 | + "Enale device migration. Also requires a host VFIO PCI " | ||
173 | + "variant or mdev driver with migration support enabled"); | ||
174 | + object_class_property_set_description(klass, /* 8.1 */ | ||
175 | + "vf-token", | ||
176 | + "Specify UUID VF token. Required for VF when PF is owned " | ||
177 | + "by another VFIO driver"); | ||
178 | +#ifdef CONFIG_IOMMUFD | ||
179 | + object_class_property_set_description(klass, /* 9.0 */ | ||
180 | + "iommufd", | ||
181 | + "Set host IOMMUFD backend device"); | ||
182 | +#endif | ||
183 | + object_class_property_set_description(klass, /* 9.1 */ | ||
184 | + "x-device-dirty-page-tracking", | ||
185 | + "Disable device dirty page tracking and use " | ||
186 | + "container-based dirty page tracking (DEBUG)"); | ||
187 | + object_class_property_set_description(klass, /* 9.1 */ | ||
188 | + "migration-events", | ||
189 | + "Emit VFIO migration QAPI event when a VFIO device " | ||
190 | + "changes its migration state. For management applications"); | ||
191 | + object_class_property_set_description(klass, /* 9.1 */ | ||
192 | + "skip-vsc-check", | ||
193 | + "Skip config space check for Vendor Specific Capability. " | ||
194 | + "Setting to false will enforce strict checking of VSC content " | ||
195 | + "(DEBUG)"); | ||
196 | } | ||
197 | |||
198 | static const TypeInfo vfio_pci_dev_info = { | ||
199 | @@ -XXX,XX +XXX,XX @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data) | ||
200 | |||
201 | device_class_set_props(dc, vfio_pci_dev_nohotplug_properties); | ||
202 | dc->hotpluggable = false; | ||
203 | + | ||
204 | + object_class_property_set_description(klass, /* 3.1 */ | ||
205 | + "ramfb", | ||
206 | + "Enable ramfb to provide pre-boot graphics for devices " | ||
207 | + "enabling display option"); | ||
208 | + object_class_property_set_description(klass, /* 8.2 */ | ||
209 | + "x-ramfb-migrate", | ||
210 | + "Override default migration support for ramfb support " | ||
211 | + "(DEBUG)"); | ||
212 | } | ||
213 | |||
214 | static const TypeInfo vfio_pci_nohotplug_dev_info = { | ||
215 | diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c | ||
216 | index XXXXXXX..XXXXXXX 100644 | ||
217 | --- a/hw/vfio/platform.c | ||
218 | +++ b/hw/vfio/platform.c | ||
219 | @@ -XXX,XX +XXX,XX @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) | ||
220 | dc->desc = "VFIO-based platform device assignment"; | ||
221 | sbc->connect_irq_notifier = vfio_start_irqfd_injection; | ||
222 | set_bit(DEVICE_CATEGORY_MISC, dc->categories); | ||
223 | + | ||
224 | + object_class_property_set_description(klass, /* 2.4 */ | ||
225 | + "host", | ||
226 | + "Host device name of assigned device"); | ||
227 | + object_class_property_set_description(klass, /* 2.4 and 2.5 */ | ||
228 | + "x-no-mmap", | ||
229 | + "Disable MMAP for device. Allows to trace MMIO " | ||
230 | + "accesses (DEBUG)"); | ||
231 | + object_class_property_set_description(klass, /* 2.4 */ | ||
232 | + "mmap-timeout-ms", | ||
233 | + "When EOI is not provided by KVM/QEMU, wait time " | ||
234 | + "(milliseconds) to re-enable device direct access " | ||
235 | + "after level interrupt (DEBUG)"); | ||
236 | + object_class_property_set_description(klass, /* 2.4 */ | ||
237 | + "x-irqfd", | ||
238 | + "Allow disabling irqfd support (DEBUG)"); | ||
239 | + object_class_property_set_description(klass, /* 2.6 */ | ||
240 | + "sysfsdev", | ||
241 | + "Host sysfs path of assigned device"); | ||
242 | +#ifdef CONFIG_IOMMUFD | ||
243 | + object_class_property_set_description(klass, /* 9.0 */ | ||
244 | + "iommufd", | ||
245 | + "Set host IOMMUFD backend device"); | ||
246 | +#endif | ||
247 | } | ||
248 | |||
249 | static const TypeInfo vfio_platform_dev_info = { | ||
250 | -- | ||
251 | 2.48.1 | ||
252 | |||
253 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Use the common helper warn_report_once() instead of implementing its | ||
2 | own. | ||
3 | 1 | ||
4 | Cc: Eric Farman <farman@linux.ibm.com> | ||
5 | Reviewed-by: Eric Farman <farman@linux.ibm.com> | ||
6 | Link: https://lore.kernel.org/qemu-devel/20250214161936.1720039-1-clg@redhat.com | ||
7 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
8 | --- | ||
9 | hw/vfio/ccw.c | 12 ++---------- | ||
10 | 1 file changed, 2 insertions(+), 10 deletions(-) | ||
11 | |||
12 | diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/hw/vfio/ccw.c | ||
15 | +++ b/hw/vfio/ccw.c | ||
16 | @@ -XXX,XX +XXX,XX @@ struct VFIOCCWDevice { | ||
17 | EventNotifier crw_notifier; | ||
18 | EventNotifier req_notifier; | ||
19 | bool force_orb_pfch; | ||
20 | - bool warned_orb_pfch; | ||
21 | }; | ||
22 | |||
23 | -static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, | ||
24 | - const char *msg) | ||
25 | -{ | ||
26 | - warn_report_once_cond(&vcdev->warned_orb_pfch, | ||
27 | - "vfio-ccw (devno %x.%x.%04x): %s", | ||
28 | - sch->cssid, sch->ssid, sch->devno, msg); | ||
29 | -} | ||
30 | - | ||
31 | static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) | ||
32 | { | ||
33 | vdev->needs_reset = false; | ||
34 | @@ -XXX,XX +XXX,XX @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) | ||
35 | |||
36 | if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) { | ||
37 | sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; | ||
38 | - warn_once_pfch(vcdev, sch, "PFCH flag forced"); | ||
39 | + warn_report_once("vfio-ccw (devno %x.%x.%04x): PFCH flag forced", | ||
40 | + sch->cssid, sch->ssid, sch->devno); | ||
41 | } | ||
42 | |||
43 | QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); | ||
44 | -- | ||
45 | 2.48.1 | ||
46 | |||
47 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Williamson <alex.williamson@redhat.com> | ||
2 | 1 | ||
3 | The memory and IO BARs for devices are only accessible in the D0 power | ||
4 | state. In other power states the PCI spec defines that the device | ||
5 | responds to TLPs and messages with an Unsupported Request response. | ||
6 | |||
7 | To approximate this behavior, consider the BARs as unmapped when the | ||
8 | device is not in the D0 power state. This makes the BARs inaccessible | ||
9 | and has the additional bonus for vfio-pci that we don't attempt to DMA | ||
10 | map BARs for devices in a non-D0 power state. | ||
11 | |||
12 | To support this, an interface is added for devices to register the PM | ||
13 | capability, which allows central tracking to enforce valid transitions | ||
14 | and unmap BARs in non-D0 states. | ||
15 | |||
16 | NB. We currently have device models (eepro100 and pcie_pci_bridge) | ||
17 | that register a PM capability but do not set wmask to enable writes to | ||
18 | the power state field. In order to maintain migration compatibility, | ||
19 | this new helper does not manage the wmask to enable guest writes to | ||
20 | initiate a power state change. The contents and write access of the | ||
21 | PM capability are still managed by the caller. | ||
22 | |||
23 | Cc: Michael S. Tsirkin <mst@redhat.com> | ||
24 | Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> | ||
25 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | ||
26 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
27 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
28 | Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-2-alex.williamson@redhat.com | ||
29 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
30 | --- | ||
31 | include/hw/pci/pci.h | 3 ++ | ||
32 | include/hw/pci/pci_device.h | 3 ++ | ||
33 | hw/pci/pci.c | 93 ++++++++++++++++++++++++++++++++++++- | ||
34 | hw/pci/trace-events | 2 + | ||
35 | 4 files changed, 99 insertions(+), 2 deletions(-) | ||
36 | |||
37 | diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/include/hw/pci/pci.h | ||
40 | +++ b/include/hw/pci/pci.h | ||
41 | @@ -XXX,XX +XXX,XX @@ enum { | ||
42 | QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR), | ||
43 | #define QEMU_PCIE_EXT_TAG_BITNR 13 | ||
44 | QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR), | ||
45 | +#define QEMU_PCI_CAP_PM_BITNR 14 | ||
46 | + QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR), | ||
47 | }; | ||
48 | |||
49 | typedef struct PCIINTxRoute { | ||
50 | @@ -XXX,XX +XXX,XX @@ static inline void pci_irq_deassert(PCIDevice *pci_dev) | ||
51 | MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); | ||
52 | void pci_set_enabled(PCIDevice *pci_dev, bool state); | ||
53 | void pci_set_power(PCIDevice *pci_dev, bool state); | ||
54 | +int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp); | ||
55 | |||
56 | #endif | ||
57 | diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/include/hw/pci/pci_device.h | ||
60 | +++ b/include/hw/pci/pci_device.h | ||
61 | @@ -XXX,XX +XXX,XX @@ struct PCIDevice { | ||
62 | /* Capability bits */ | ||
63 | uint32_t cap_present; | ||
64 | |||
65 | + /* Offset of PM capability in config space */ | ||
66 | + uint8_t pm_cap; | ||
67 | + | ||
68 | /* Offset of MSI-X capability in config space */ | ||
69 | uint8_t msix_cap; | ||
70 | |||
71 | diff --git a/hw/pci/pci.c b/hw/pci/pci.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/hw/pci/pci.c | ||
74 | +++ b/hw/pci/pci.c | ||
75 | @@ -XXX,XX +XXX,XX @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) | ||
76 | attrs, NULL); | ||
77 | } | ||
78 | |||
79 | +/* | ||
80 | + * Register and track a PM capability. If wmask is also enabled for the power | ||
81 | + * state field of the pmcsr register, guest writes may change the device PM | ||
82 | + * state. BAR access is only enabled while the device is in the D0 state. | ||
83 | + * Return the capability offset or negative error code. | ||
84 | + */ | ||
85 | +int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp) | ||
86 | +{ | ||
87 | + int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp); | ||
88 | + | ||
89 | + if (cap < 0) { | ||
90 | + return cap; | ||
91 | + } | ||
92 | + | ||
93 | + d->pm_cap = cap; | ||
94 | + d->cap_present |= QEMU_PCI_CAP_PM; | ||
95 | + | ||
96 | + return cap; | ||
97 | +} | ||
98 | + | ||
99 | +static uint8_t pci_pm_state(PCIDevice *d) | ||
100 | +{ | ||
101 | + uint16_t pmcsr; | ||
102 | + | ||
103 | + if (!(d->cap_present & QEMU_PCI_CAP_PM)) { | ||
104 | + return 0; | ||
105 | + } | ||
106 | + | ||
107 | + pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL); | ||
108 | + | ||
109 | + return pmcsr & PCI_PM_CTRL_STATE_MASK; | ||
110 | +} | ||
111 | + | ||
112 | +/* | ||
113 | + * Update the PM capability state based on the new value stored in config | ||
114 | + * space respective to the old, pre-write state provided. If the new value | ||
115 | + * is rejected (unsupported or invalid transition) restore the old value. | ||
116 | + * Return the resulting PM state. | ||
117 | + */ | ||
118 | +static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old) | ||
119 | +{ | ||
120 | + uint16_t pmc; | ||
121 | + uint8_t new; | ||
122 | + | ||
123 | + if (!(d->cap_present & QEMU_PCI_CAP_PM) || | ||
124 | + !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) { | ||
125 | + return old; | ||
126 | + } | ||
127 | + | ||
128 | + new = pci_pm_state(d); | ||
129 | + if (new == old) { | ||
130 | + return old; | ||
131 | + } | ||
132 | + | ||
133 | + pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC); | ||
134 | + | ||
135 | + /* | ||
136 | + * Transitions to D1 & D2 are only allowed if supported. Devices may | ||
137 | + * only transition to higher D-states or to D0. | ||
138 | + */ | ||
139 | + if ((!(pmc & PCI_PM_CAP_D1) && new == 1) || | ||
140 | + (!(pmc & PCI_PM_CAP_D2) && new == 2) || | ||
141 | + (old && new && new < old)) { | ||
142 | + pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL, | ||
143 | + PCI_PM_CTRL_STATE_MASK); | ||
144 | + pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL, | ||
145 | + old); | ||
146 | + trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d), | ||
147 | + PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), | ||
148 | + old, new); | ||
149 | + return old; | ||
150 | + } | ||
151 | + | ||
152 | + trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn), | ||
153 | + PCI_FUNC(d->devfn), old, new); | ||
154 | + return new; | ||
155 | +} | ||
156 | + | ||
157 | static void pci_reset_regions(PCIDevice *dev) | ||
158 | { | ||
159 | int r; | ||
160 | @@ -XXX,XX +XXX,XX @@ static void pci_do_device_reset(PCIDevice *dev) | ||
161 | pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) | | ||
162 | pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE)); | ||
163 | dev->config[PCI_CACHE_LINE_SIZE] = 0x0; | ||
164 | + /* Default PM state is D0 */ | ||
165 | + if (dev->cap_present & QEMU_PCI_CAP_PM) { | ||
166 | + pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL, | ||
167 | + PCI_PM_CTRL_STATE_MASK); | ||
168 | + } | ||
169 | pci_reset_regions(dev); | ||
170 | pci_update_mappings(dev); | ||
171 | |||
172 | @@ -XXX,XX +XXX,XX @@ static void pci_update_mappings(PCIDevice *d) | ||
173 | continue; | ||
174 | |||
175 | new_addr = pci_bar_address(d, i, r->type, r->size); | ||
176 | - if (!d->enabled) { | ||
177 | + if (!d->enabled || pci_pm_state(d)) { | ||
178 | new_addr = PCI_BAR_UNMAPPED; | ||
179 | } | ||
180 | |||
181 | @@ -XXX,XX +XXX,XX @@ uint32_t pci_default_read_config(PCIDevice *d, | ||
182 | |||
183 | void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l) | ||
184 | { | ||
185 | + uint8_t new_pm_state, old_pm_state = pci_pm_state(d); | ||
186 | int i, was_irq_disabled = pci_irq_disabled(d); | ||
187 | uint32_t val = val_in; | ||
188 | |||
189 | @@ -XXX,XX +XXX,XX @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int | ||
190 | d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask); | ||
191 | d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ | ||
192 | } | ||
193 | + | ||
194 | + new_pm_state = pci_pm_update(d, addr, l, old_pm_state); | ||
195 | + | ||
196 | if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) || | ||
197 | ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) || | ||
198 | ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) || | ||
199 | - range_covers_byte(addr, l, PCI_COMMAND)) | ||
200 | + range_covers_byte(addr, l, PCI_COMMAND) || | ||
201 | + !!new_pm_state != !!old_pm_state) { | ||
202 | pci_update_mappings(d); | ||
203 | + } | ||
204 | |||
205 | if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { | ||
206 | pci_update_irq_disabled(d, was_irq_disabled); | ||
207 | diff --git a/hw/pci/trace-events b/hw/pci/trace-events | ||
208 | index XXXXXXX..XXXXXXX 100644 | ||
209 | --- a/hw/pci/trace-events | ||
210 | +++ b/hw/pci/trace-events | ||
211 | @@ -XXX,XX +XXX,XX @@ | ||
212 | # See docs/devel/tracing.rst for syntax documentation. | ||
213 | |||
214 | # pci.c | ||
215 | +pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d" | ||
216 | +pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d" | ||
217 | pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 | ||
218 | pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 | ||
219 | pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" | ||
220 | -- | ||
221 | 2.48.1 | ||
222 | |||
223 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | A new function multifd_queue_device_state() is provided for device to queue | 3 | The IO BAR4 of IGD devices contains a pair of 32-bit address/data |
4 | its state for transmission via a multifd channel. | 4 | registers, MMIO_Index (0x0) and MMIO_Data (0x4), which provide access |
5 | 5 | to the MMIO BAR0 (GTTMMADR) from IO space. These registers are probably | |
6 | Reviewed-by: Peter Xu <peterx@redhat.com> | 6 | only used by the VBIOS, and are not documented by intel. The observed |
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 7 | layout of MMIO_Index register is: |
8 | Link: https://lore.kernel.org/qemu-devel/ebd55768d3e5fecb5eb3f197bad9c0c07e5bc084.1741124640.git.maciej.szmigiero@oracle.com | 8 | 31 2 1 0 |
9 | +-------------------------------------------------------------------+ | ||
10 | | Offset | Rsvd | Sel | | ||
11 | +-------------------------------------------------------------------+ | ||
12 | - Offset: Byte offset in specified region, 4-byte aligned. | ||
13 | - Sel: Region selector | ||
14 | 0: MMIO register region (first half of MMIO BAR0) | ||
15 | 1: GTT region (second half of MMIO BAR0). Pre Gen11 only. | ||
16 | |||
17 | Currently, QEMU implements a quirk that adjusts the guest Data Stolen | ||
18 | Memory (DSM) region address to be (addr - host BDSM + guest BDSM) when | ||
19 | programming GTT entries via IO BAR4, assuming guest still programs GTT | ||
20 | with host DSM address, which is not the case. Guest's BDSM register is | ||
21 | emulated and initialized to 0 at startup by QEMU, then SeaBIOS programs | ||
22 | its value[1]. As result, the address programmed to GTT entries by VBIOS | ||
23 | running in guest are valid GPA, and this unnecessary adjustment brings | ||
24 | inconsistency. | ||
25 | |||
26 | [1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332 | ||
27 | |||
28 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
29 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
30 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
31 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
32 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-2-tomitamoeko@gmail.com | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 33 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 34 | --- |
11 | include/migration/misc.h | 4 ++ | 35 | hw/vfio/igd.c | 191 +------------------------------------------------- |
12 | migration/multifd.h | 34 ++++++--- | 36 | 1 file changed, 1 insertion(+), 190 deletions(-) |
13 | migration/multifd-device-state.c | 118 +++++++++++++++++++++++++++++++ | 37 | |
14 | migration/multifd-nocomp.c | 14 +++- | 38 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
15 | migration/multifd.c | 42 +++++++++-- | ||
16 | migration/meson.build | 1 + | ||
17 | 6 files changed, 197 insertions(+), 16 deletions(-) | ||
18 | create mode 100644 migration/multifd-device-state.c | ||
19 | |||
20 | diff --git a/include/migration/misc.h b/include/migration/misc.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/migration/misc.h | 40 | --- a/hw/vfio/igd.c |
23 | +++ b/include/migration/misc.h | 41 | +++ b/hw/vfio/igd.c |
24 | @@ -XXX,XX +XXX,XX @@ bool migrate_is_uri(const char *uri); | 42 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) |
25 | bool migrate_uri_parse(const char *uri, MigrationChannel **channel, | 43 | return -1; |
26 | Error **errp); | ||
27 | |||
28 | +/* migration/multifd-device-state.c */ | ||
29 | +bool multifd_queue_device_state(char *idstr, uint32_t instance_id, | ||
30 | + char *data, size_t len); | ||
31 | + | ||
32 | #endif | ||
33 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/migration/multifd.h | ||
36 | +++ b/migration/multifd.h | ||
37 | @@ -XXX,XX +XXX,XX @@ struct MultiFDRecvData { | ||
38 | off_t file_offset; | ||
39 | }; | ||
40 | |||
41 | +typedef struct { | ||
42 | + char *idstr; | ||
43 | + uint32_t instance_id; | ||
44 | + char *buf; | ||
45 | + size_t buf_len; | ||
46 | +} MultiFDDeviceState_t; | ||
47 | + | ||
48 | typedef enum { | ||
49 | MULTIFD_PAYLOAD_NONE, | ||
50 | MULTIFD_PAYLOAD_RAM, | ||
51 | + MULTIFD_PAYLOAD_DEVICE_STATE, | ||
52 | } MultiFDPayloadType; | ||
53 | |||
54 | typedef union MultiFDPayload { | ||
55 | MultiFDPages_t ram; | ||
56 | + MultiFDDeviceState_t device_state; | ||
57 | } MultiFDPayload; | ||
58 | |||
59 | struct MultiFDSendData { | ||
60 | @@ -XXX,XX +XXX,XX @@ static inline bool multifd_payload_empty(MultiFDSendData *data) | ||
61 | return data->type == MULTIFD_PAYLOAD_NONE; | ||
62 | } | 44 | } |
63 | 45 | ||
64 | +static inline bool multifd_payload_device_state(MultiFDSendData *data) | 46 | -typedef struct VFIOIGDQuirk { |
65 | +{ | 47 | - struct VFIOPCIDevice *vdev; |
66 | + return data->type == MULTIFD_PAYLOAD_DEVICE_STATE; | 48 | - uint32_t index; |
67 | +} | 49 | - uint64_t bdsm; |
68 | + | 50 | -} VFIOIGDQuirk; |
69 | static inline void multifd_set_payload_type(MultiFDSendData *data, | 51 | - |
70 | MultiFDPayloadType type) | 52 | #define IGD_GMCH 0x50 /* Graphics Control Register */ |
71 | { | 53 | #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ |
72 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 54 | #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ |
73 | 55 | @@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, | |
74 | /* thread local variables. No locking required */ | 56 | return ret; |
75 | |||
76 | - /* pointer to the packet */ | ||
77 | + /* pointers to the possible packet types */ | ||
78 | MultiFDPacket_t *packet; | ||
79 | + MultiFDPacketDeviceState_t *packet_device_state; | ||
80 | /* size of the next packet that contains pages */ | ||
81 | uint32_t next_packet_size; | ||
82 | /* packets sent through this channel */ | ||
83 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_prepare_common(MultiFDSendParams *p); | ||
84 | void multifd_send_zero_page_detect(MultiFDSendParams *p); | ||
85 | void multifd_recv_zero_page_process(MultiFDRecvParams *p); | ||
86 | |||
87 | -static inline void multifd_send_prepare_header(MultiFDSendParams *p) | ||
88 | -{ | ||
89 | - p->iov[0].iov_len = p->packet_len; | ||
90 | - p->iov[0].iov_base = p->packet; | ||
91 | - p->iovs_num++; | ||
92 | -} | ||
93 | - | ||
94 | void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc); | ||
95 | bool multifd_send(MultiFDSendData **send_data); | ||
96 | MultiFDSendData *multifd_send_data_alloc(void); | ||
97 | @@ -XXX,XX +XXX,XX @@ bool multifd_ram_sync_per_section(void); | ||
98 | size_t multifd_ram_payload_size(void); | ||
99 | void multifd_ram_fill_packet(MultiFDSendParams *p); | ||
100 | int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp); | ||
101 | + | ||
102 | +size_t multifd_device_state_payload_size(void); | ||
103 | + | ||
104 | +void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state); | ||
105 | + | ||
106 | +void multifd_device_state_send_setup(void); | ||
107 | +void multifd_device_state_send_cleanup(void); | ||
108 | + | ||
109 | +void multifd_device_state_send_prepare(MultiFDSendParams *p); | ||
110 | + | ||
111 | #endif | ||
112 | diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c | ||
113 | new file mode 100644 | ||
114 | index XXXXXXX..XXXXXXX | ||
115 | --- /dev/null | ||
116 | +++ b/migration/multifd-device-state.c | ||
117 | @@ -XXX,XX +XXX,XX @@ | ||
118 | +/* | ||
119 | + * Multifd device state migration | ||
120 | + * | ||
121 | + * Copyright (C) 2024,2025 Oracle and/or its affiliates. | ||
122 | + * | ||
123 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
124 | + * See the COPYING file in the top-level directory. | ||
125 | + * | ||
126 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
127 | + */ | ||
128 | + | ||
129 | +#include "qemu/osdep.h" | ||
130 | +#include "qemu/lockable.h" | ||
131 | +#include "migration/misc.h" | ||
132 | +#include "multifd.h" | ||
133 | + | ||
134 | +static struct { | ||
135 | + QemuMutex queue_job_mutex; | ||
136 | + | ||
137 | + MultiFDSendData *send_data; | ||
138 | +} *multifd_send_device_state; | ||
139 | + | ||
140 | +size_t multifd_device_state_payload_size(void) | ||
141 | +{ | ||
142 | + return sizeof(MultiFDDeviceState_t); | ||
143 | +} | ||
144 | + | ||
145 | +void multifd_device_state_send_setup(void) | ||
146 | +{ | ||
147 | + assert(!multifd_send_device_state); | ||
148 | + multifd_send_device_state = g_malloc(sizeof(*multifd_send_device_state)); | ||
149 | + | ||
150 | + qemu_mutex_init(&multifd_send_device_state->queue_job_mutex); | ||
151 | + | ||
152 | + multifd_send_device_state->send_data = multifd_send_data_alloc(); | ||
153 | +} | ||
154 | + | ||
155 | +void multifd_device_state_send_cleanup(void) | ||
156 | +{ | ||
157 | + g_clear_pointer(&multifd_send_device_state->send_data, | ||
158 | + multifd_send_data_free); | ||
159 | + | ||
160 | + qemu_mutex_destroy(&multifd_send_device_state->queue_job_mutex); | ||
161 | + | ||
162 | + g_clear_pointer(&multifd_send_device_state, g_free); | ||
163 | +} | ||
164 | + | ||
165 | +void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state) | ||
166 | +{ | ||
167 | + g_clear_pointer(&device_state->idstr, g_free); | ||
168 | + g_clear_pointer(&device_state->buf, g_free); | ||
169 | +} | ||
170 | + | ||
171 | +static void multifd_device_state_fill_packet(MultiFDSendParams *p) | ||
172 | +{ | ||
173 | + MultiFDDeviceState_t *device_state = &p->data->u.device_state; | ||
174 | + MultiFDPacketDeviceState_t *packet = p->packet_device_state; | ||
175 | + | ||
176 | + packet->hdr.flags = cpu_to_be32(p->flags); | ||
177 | + strncpy(packet->idstr, device_state->idstr, sizeof(packet->idstr) - 1); | ||
178 | + packet->idstr[sizeof(packet->idstr) - 1] = 0; | ||
179 | + packet->instance_id = cpu_to_be32(device_state->instance_id); | ||
180 | + packet->next_packet_size = cpu_to_be32(p->next_packet_size); | ||
181 | +} | ||
182 | + | ||
183 | +static void multifd_prepare_header_device_state(MultiFDSendParams *p) | ||
184 | +{ | ||
185 | + p->iov[0].iov_len = sizeof(*p->packet_device_state); | ||
186 | + p->iov[0].iov_base = p->packet_device_state; | ||
187 | + p->iovs_num++; | ||
188 | +} | ||
189 | + | ||
190 | +void multifd_device_state_send_prepare(MultiFDSendParams *p) | ||
191 | +{ | ||
192 | + MultiFDDeviceState_t *device_state = &p->data->u.device_state; | ||
193 | + | ||
194 | + assert(multifd_payload_device_state(p->data)); | ||
195 | + | ||
196 | + multifd_prepare_header_device_state(p); | ||
197 | + | ||
198 | + assert(!(p->flags & MULTIFD_FLAG_SYNC)); | ||
199 | + | ||
200 | + p->next_packet_size = device_state->buf_len; | ||
201 | + if (p->next_packet_size > 0) { | ||
202 | + p->iov[p->iovs_num].iov_base = device_state->buf; | ||
203 | + p->iov[p->iovs_num].iov_len = p->next_packet_size; | ||
204 | + p->iovs_num++; | ||
205 | + } | ||
206 | + | ||
207 | + p->flags |= MULTIFD_FLAG_NOCOMP | MULTIFD_FLAG_DEVICE_STATE; | ||
208 | + | ||
209 | + multifd_device_state_fill_packet(p); | ||
210 | +} | ||
211 | + | ||
212 | +bool multifd_queue_device_state(char *idstr, uint32_t instance_id, | ||
213 | + char *data, size_t len) | ||
214 | +{ | ||
215 | + /* Device state submissions can come from multiple threads */ | ||
216 | + QEMU_LOCK_GUARD(&multifd_send_device_state->queue_job_mutex); | ||
217 | + MultiFDDeviceState_t *device_state; | ||
218 | + | ||
219 | + assert(multifd_payload_empty(multifd_send_device_state->send_data)); | ||
220 | + | ||
221 | + multifd_set_payload_type(multifd_send_device_state->send_data, | ||
222 | + MULTIFD_PAYLOAD_DEVICE_STATE); | ||
223 | + device_state = &multifd_send_device_state->send_data->u.device_state; | ||
224 | + device_state->idstr = g_strdup(idstr); | ||
225 | + device_state->instance_id = instance_id; | ||
226 | + device_state->buf = g_memdup2(data, len); | ||
227 | + device_state->buf_len = len; | ||
228 | + | ||
229 | + if (!multifd_send(&multifd_send_device_state->send_data)) { | ||
230 | + multifd_send_data_clear(multifd_send_device_state->send_data); | ||
231 | + return false; | ||
232 | + } | ||
233 | + | ||
234 | + return true; | ||
235 | +} | ||
236 | diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c | ||
237 | index XXXXXXX..XXXXXXX 100644 | ||
238 | --- a/migration/multifd-nocomp.c | ||
239 | +++ b/migration/multifd-nocomp.c | ||
240 | @@ -XXX,XX +XXX,XX @@ | ||
241 | #include "exec/ramblock.h" | ||
242 | #include "exec/target_page.h" | ||
243 | #include "file.h" | ||
244 | +#include "migration-stats.h" | ||
245 | #include "multifd.h" | ||
246 | #include "options.h" | ||
247 | #include "qapi/error.h" | ||
248 | @@ -XXX,XX +XXX,XX @@ static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) | ||
249 | return; | ||
250 | } | 57 | } |
251 | 58 | ||
252 | +static void multifd_ram_prepare_header(MultiFDSendParams *p) | 59 | -/* |
253 | +{ | 60 | - * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE |
254 | + p->iov[0].iov_len = p->packet_len; | 61 | - * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore |
255 | + p->iov[0].iov_base = p->packet; | 62 | - * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index |
256 | + p->iovs_num++; | 63 | - * for programming the GTT. |
257 | +} | 64 | - * |
258 | + | 65 | - * See linux:include/drm/i915_drm.h for shift and mask values. |
259 | static void multifd_send_prepare_iovs(MultiFDSendParams *p) | 66 | - */ |
260 | { | 67 | -static int vfio_igd_gtt_max(VFIOPCIDevice *vdev) |
261 | MultiFDPages_t *pages = &p->data->u.ram; | 68 | -{ |
262 | @@ -XXX,XX +XXX,XX @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) | 69 | - uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch)); |
263 | * Only !zerocopy needs the header in IOV; zerocopy will | 70 | - int gen = igd_gen(vdev); |
264 | * send it separately. | 71 | - uint64_t ggms_size = igd_gtt_memory_size(gen, gmch); |
265 | */ | 72 | - |
266 | - multifd_send_prepare_header(p); | 73 | - return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8); |
267 | + multifd_ram_prepare_header(p); | 74 | -} |
75 | - | ||
76 | -/* | ||
77 | - * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes. | ||
78 | - * Somehow the host stolen memory range is used for this, but how the ROM gets | ||
79 | - * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it | ||
80 | - * reprograms the GTT through the IOBAR where we can trap it and transpose the | ||
81 | - * programming to the VM allocated buffer. That buffer gets reserved by the VM | ||
82 | - * firmware via the fw_cfg entry added below. Here we're just monitoring the | ||
83 | - * IOBAR address and data registers to detect a write sequence targeting the | ||
84 | - * GTTADR. This code is developed by observed behavior and doesn't have a | ||
85 | - * direct spec reference, unfortunately. | ||
86 | - */ | ||
87 | -static uint64_t vfio_igd_quirk_data_read(void *opaque, | ||
88 | - hwaddr addr, unsigned size) | ||
89 | -{ | ||
90 | - VFIOIGDQuirk *igd = opaque; | ||
91 | - VFIOPCIDevice *vdev = igd->vdev; | ||
92 | - | ||
93 | - igd->index = ~0; | ||
94 | - | ||
95 | - return vfio_region_read(&vdev->bars[4].region, addr + 4, size); | ||
96 | -} | ||
97 | - | ||
98 | -static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr, | ||
99 | - uint64_t data, unsigned size) | ||
100 | -{ | ||
101 | - VFIOIGDQuirk *igd = opaque; | ||
102 | - VFIOPCIDevice *vdev = igd->vdev; | ||
103 | - uint64_t val = data; | ||
104 | - int gen = igd_gen(vdev); | ||
105 | - | ||
106 | - /* | ||
107 | - * Programming the GGMS starts at index 0x1 and uses every 4th index (ie. | ||
108 | - * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE | ||
109 | - * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so | ||
110 | - * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points | ||
111 | - * to a 4k page, which we translate to a page from the VM allocated region, | ||
112 | - * pointed to by the BDSM register. If this is not set, we fail. | ||
113 | - * | ||
114 | - * We trap writes to the full configured GTT size, but we typically only | ||
115 | - * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often | ||
116 | - * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous | ||
117 | - * write of that last entry does work, but is hopefully unnecessary since | ||
118 | - * we clear the previous GTT on initialization. | ||
119 | - */ | ||
120 | - if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) { | ||
121 | - if (gen < 8 || (igd->index % 8 == 1)) { | ||
122 | - uint64_t base; | ||
123 | - | ||
124 | - if (gen < 11) { | ||
125 | - base = pci_get_long(vdev->pdev.config + IGD_BDSM); | ||
126 | - } else { | ||
127 | - base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11); | ||
128 | - } | ||
129 | - if (!base) { | ||
130 | - hw_error("vfio-igd: Guest attempted to program IGD GTT before " | ||
131 | - "BIOS reserved stolen memory. Unsupported BIOS?"); | ||
132 | - } | ||
133 | - | ||
134 | - val = data - igd->bdsm + base; | ||
135 | - } else { | ||
136 | - val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */ | ||
137 | - } | ||
138 | - | ||
139 | - trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name, | ||
140 | - igd->index, data, val); | ||
141 | - } | ||
142 | - | ||
143 | - vfio_region_write(&vdev->bars[4].region, addr + 4, val, size); | ||
144 | - | ||
145 | - igd->index = ~0; | ||
146 | -} | ||
147 | - | ||
148 | -static const MemoryRegionOps vfio_igd_data_quirk = { | ||
149 | - .read = vfio_igd_quirk_data_read, | ||
150 | - .write = vfio_igd_quirk_data_write, | ||
151 | - .endianness = DEVICE_LITTLE_ENDIAN, | ||
152 | -}; | ||
153 | - | ||
154 | -static uint64_t vfio_igd_quirk_index_read(void *opaque, | ||
155 | - hwaddr addr, unsigned size) | ||
156 | -{ | ||
157 | - VFIOIGDQuirk *igd = opaque; | ||
158 | - VFIOPCIDevice *vdev = igd->vdev; | ||
159 | - | ||
160 | - igd->index = ~0; | ||
161 | - | ||
162 | - return vfio_region_read(&vdev->bars[4].region, addr, size); | ||
163 | -} | ||
164 | - | ||
165 | -static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr, | ||
166 | - uint64_t data, unsigned size) | ||
167 | -{ | ||
168 | - VFIOIGDQuirk *igd = opaque; | ||
169 | - VFIOPCIDevice *vdev = igd->vdev; | ||
170 | - | ||
171 | - igd->index = data; | ||
172 | - | ||
173 | - vfio_region_write(&vdev->bars[4].region, addr, data, size); | ||
174 | -} | ||
175 | - | ||
176 | -static const MemoryRegionOps vfio_igd_index_quirk = { | ||
177 | - .read = vfio_igd_quirk_index_read, | ||
178 | - .write = vfio_igd_quirk_index_write, | ||
179 | - .endianness = DEVICE_LITTLE_ENDIAN, | ||
180 | -}; | ||
181 | - | ||
182 | #define IGD_GGC_MMIO_OFFSET 0x108040 | ||
183 | #define IGD_BDSM_MMIO_OFFSET 0x1080C0 | ||
184 | |||
185 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
186 | g_autofree struct vfio_region_info *opregion = NULL; | ||
187 | g_autofree struct vfio_region_info *host = NULL; | ||
188 | g_autofree struct vfio_region_info *lpc = NULL; | ||
189 | - VFIOQuirk *quirk; | ||
190 | - VFIOIGDQuirk *igd; | ||
191 | PCIDevice *lpc_bridge; | ||
192 | - int i, ret, gen; | ||
193 | + int ret, gen; | ||
194 | uint64_t ggms_size, gms_size; | ||
195 | uint64_t *bdsm_size; | ||
196 | uint32_t gmch; | ||
197 | - uint16_t cmd_orig, cmd; | ||
198 | Error *err = NULL; | ||
199 | |||
200 | /* | ||
201 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
202 | return; | ||
268 | } | 203 | } |
269 | 204 | ||
270 | multifd_send_prepare_iovs(p); | 205 | - /* Setup our quirk to munge GTT addresses to the VM allocated buffer */ |
271 | @@ -XXX,XX +XXX,XX @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) | 206 | - quirk = vfio_quirk_alloc(2); |
272 | if (ret != 0) { | 207 | - igd = quirk->data = g_malloc0(sizeof(*igd)); |
273 | return -1; | 208 | - igd->vdev = vdev; |
274 | } | 209 | - igd->index = ~0; |
275 | + | 210 | - if (gen < 11) { |
276 | + stat64_add(&mig_stats.multifd_bytes, p->packet_len); | 211 | - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4); |
212 | - } else { | ||
213 | - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4); | ||
214 | - igd->bdsm |= | ||
215 | - (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32; | ||
216 | - } | ||
217 | - igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */ | ||
218 | - | ||
219 | - memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk, | ||
220 | - igd, "vfio-igd-index-quirk", 4); | ||
221 | - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, | ||
222 | - 0, &quirk->mem[0], 1); | ||
223 | - | ||
224 | - memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk, | ||
225 | - igd, "vfio-igd-data-quirk", 4); | ||
226 | - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, | ||
227 | - 4, &quirk->mem[1], 1); | ||
228 | - | ||
229 | - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); | ||
230 | - | ||
231 | /* | ||
232 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
233 | * 32MiB. This option should only be used when the desired size cannot be | ||
234 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
235 | pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); | ||
277 | } | 236 | } |
278 | 237 | ||
279 | return 0; | 238 | - /* |
280 | @@ -XXX,XX +XXX,XX @@ int multifd_ram_flush_and_sync(QEMUFile *f) | 239 | - * This IOBAR gives us access to GTTADR, which allows us to write to |
281 | bool multifd_send_prepare_common(MultiFDSendParams *p) | 240 | - * the GTT itself. So let's go ahead and write zero to all the GTT |
282 | { | 241 | - * entries to avoid spurious DMA faults. Be sure I/O access is enabled |
283 | MultiFDPages_t *pages = &p->data->u.ram; | 242 | - * before talking to the device. |
284 | - multifd_send_prepare_header(p); | 243 | - */ |
285 | + multifd_ram_prepare_header(p); | 244 | - if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), |
286 | multifd_send_zero_page_detect(p); | 245 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { |
287 | 246 | - error_report("IGD device %s - failed to read PCI command register", | |
288 | if (!pages->normal_num) { | 247 | - vdev->vbasedev.name); |
289 | diff --git a/migration/multifd.c b/migration/multifd.c | 248 | - } |
290 | index XXXXXXX..XXXXXXX 100644 | 249 | - |
291 | --- a/migration/multifd.c | 250 | - cmd = cmd_orig | PCI_COMMAND_IO; |
292 | +++ b/migration/multifd.c | 251 | - |
293 | @@ -XXX,XX +XXX,XX @@ | 252 | - if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd), |
294 | 253 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) { | |
295 | #include "qemu/osdep.h" | 254 | - error_report("IGD device %s - failed to write PCI command register", |
296 | #include "qemu/cutils.h" | 255 | - vdev->vbasedev.name); |
297 | +#include "qemu/iov.h" | 256 | - } |
298 | #include "qemu/rcu.h" | 257 | - |
299 | #include "exec/target_page.h" | 258 | - for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) { |
300 | #include "system/system.h" | 259 | - vfio_region_write(&vdev->bars[4].region, 0, i, 4); |
301 | @@ -XXX,XX +XXX,XX @@ | 260 | - vfio_region_write(&vdev->bars[4].region, 4, 0, 4); |
302 | #include "qemu/error-report.h" | 261 | - } |
303 | #include "qapi/error.h" | 262 | - |
304 | #include "file.h" | 263 | - if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), |
305 | +#include "migration/misc.h" | 264 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { |
306 | #include "migration.h" | 265 | - error_report("IGD device %s - failed to restore PCI command register", |
307 | #include "migration-stats.h" | 266 | - vdev->vbasedev.name); |
308 | #include "savevm.h" | 267 | - } |
309 | @@ -XXX,XX +XXX,XX @@ MultiFDSendData *multifd_send_data_alloc(void) | 268 | - |
310 | * added to the union in the future are larger than | 269 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, |
311 | * (MultiFDPages_t + flex array). | 270 | (ggms_size + gms_size) / MiB); |
312 | */ | ||
313 | - max_payload_size = MAX(multifd_ram_payload_size(), sizeof(MultiFDPayload)); | ||
314 | + max_payload_size = MAX(multifd_ram_payload_size(), | ||
315 | + multifd_device_state_payload_size()); | ||
316 | + max_payload_size = MAX(max_payload_size, sizeof(MultiFDPayload)); | ||
317 | |||
318 | /* | ||
319 | * Account for any holes the compiler might insert. We can't pack | ||
320 | @@ -XXX,XX +XXX,XX @@ void multifd_send_data_clear(MultiFDSendData *data) | ||
321 | } | ||
322 | |||
323 | switch (data->type) { | ||
324 | + case MULTIFD_PAYLOAD_DEVICE_STATE: | ||
325 | + multifd_send_data_clear_device_state(&data->u.device_state); | ||
326 | + break; | ||
327 | default: | ||
328 | /* Nothing to do */ | ||
329 | break; | ||
330 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) | ||
331 | return msg.id; | ||
332 | } | 271 | } |
333 | |||
334 | +/* Fills a RAM multifd packet */ | ||
335 | void multifd_send_fill_packet(MultiFDSendParams *p) | ||
336 | { | ||
337 | MultiFDPacket_t *packet = p->packet; | ||
338 | @@ -XXX,XX +XXX,XX @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) | ||
339 | p->name = NULL; | ||
340 | g_clear_pointer(&p->data, multifd_send_data_free); | ||
341 | p->packet_len = 0; | ||
342 | + g_clear_pointer(&p->packet_device_state, g_free); | ||
343 | g_free(p->packet); | ||
344 | p->packet = NULL; | ||
345 | multifd_send_state->ops->send_cleanup(p, errp); | ||
346 | @@ -XXX,XX +XXX,XX @@ static void multifd_send_cleanup_state(void) | ||
347 | { | ||
348 | file_cleanup_outgoing_migration(); | ||
349 | socket_cleanup_outgoing_migration(); | ||
350 | + multifd_device_state_send_cleanup(); | ||
351 | qemu_sem_destroy(&multifd_send_state->channels_created); | ||
352 | qemu_sem_destroy(&multifd_send_state->channels_ready); | ||
353 | qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex); | ||
354 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
355 | * qatomic_store_release() in multifd_send(). | ||
356 | */ | ||
357 | if (qatomic_load_acquire(&p->pending_job)) { | ||
358 | + bool is_device_state = multifd_payload_device_state(p->data); | ||
359 | + size_t total_size; | ||
360 | + | ||
361 | p->flags = 0; | ||
362 | p->iovs_num = 0; | ||
363 | assert(!multifd_payload_empty(p->data)); | ||
364 | |||
365 | - ret = multifd_send_state->ops->send_prepare(p, &local_err); | ||
366 | - if (ret != 0) { | ||
367 | - break; | ||
368 | + if (is_device_state) { | ||
369 | + multifd_device_state_send_prepare(p); | ||
370 | + } else { | ||
371 | + ret = multifd_send_state->ops->send_prepare(p, &local_err); | ||
372 | + if (ret != 0) { | ||
373 | + break; | ||
374 | + } | ||
375 | } | ||
376 | |||
377 | + /* | ||
378 | + * The packet header in the zerocopy RAM case is accounted for | ||
379 | + * in multifd_nocomp_send_prepare() - where it is actually | ||
380 | + * being sent. | ||
381 | + */ | ||
382 | + total_size = iov_size(p->iov, p->iovs_num); | ||
383 | + | ||
384 | if (migrate_mapped_ram()) { | ||
385 | + assert(!is_device_state); | ||
386 | + | ||
387 | ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num, | ||
388 | &p->data->u.ram, &local_err); | ||
389 | } else { | ||
390 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
391 | break; | ||
392 | } | ||
393 | |||
394 | - stat64_add(&mig_stats.multifd_bytes, | ||
395 | - (uint64_t)p->next_packet_size + p->packet_len); | ||
396 | + stat64_add(&mig_stats.multifd_bytes, total_size); | ||
397 | |||
398 | p->next_packet_size = 0; | ||
399 | multifd_send_data_clear(p->data); | ||
400 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) | ||
401 | p->packet_len = sizeof(MultiFDPacket_t) | ||
402 | + sizeof(uint64_t) * page_count; | ||
403 | p->packet = g_malloc0(p->packet_len); | ||
404 | + p->packet_device_state = g_malloc0(sizeof(*p->packet_device_state)); | ||
405 | + p->packet_device_state->hdr.magic = cpu_to_be32(MULTIFD_MAGIC); | ||
406 | + p->packet_device_state->hdr.version = cpu_to_be32(MULTIFD_VERSION); | ||
407 | } | ||
408 | p->name = g_strdup_printf(MIGRATION_THREAD_SRC_MULTIFD, i); | ||
409 | p->write_flags = 0; | ||
410 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) | ||
411 | assert(p->iov); | ||
412 | } | ||
413 | |||
414 | + multifd_device_state_send_setup(); | ||
415 | + | ||
416 | return true; | ||
417 | |||
418 | err: | ||
419 | diff --git a/migration/meson.build b/migration/meson.build | ||
420 | index XXXXXXX..XXXXXXX 100644 | ||
421 | --- a/migration/meson.build | ||
422 | +++ b/migration/meson.build | ||
423 | @@ -XXX,XX +XXX,XX @@ system_ss.add(files( | ||
424 | 'migration-hmp-cmds.c', | ||
425 | 'migration.c', | ||
426 | 'multifd.c', | ||
427 | + 'multifd-device-state.c', | ||
428 | 'multifd-nocomp.c', | ||
429 | 'multifd-zlib.c', | ||
430 | 'multifd-zero-page.c', | ||
431 | -- | 272 | -- |
432 | 2.48.1 | 273 | 2.48.1 |
433 | 274 | ||
434 | 275 | diff view generated by jsdifflib |
1 | From: Tomita Moeko <tomitamoeko@gmail.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | As suggested by Cédric, I'm glad to be a maintainer of vfio-igd. | 3 | Though GTT Stolen Memory (GSM) is right below Data Stolen Memory (DSM) |
4 | in host address space, direct access to GSM is prohibited, and it is | ||
5 | not mapped to guest address space. Both host and guest accesses GSM | ||
6 | indirectly through the second half of MMIO BAR0 (GTTMMADR). | ||
7 | |||
8 | Guest firmware only need to reserve a memory region for DSM and program | ||
9 | the BDSM register with the base address of that region, that's actually | ||
10 | what both SeaBIOS[1] and IgdAssignmentDxe does now. | ||
11 | |||
12 | [1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332 | ||
4 | 13 | ||
5 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | 14 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
6 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | 15 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
7 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 16 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
8 | Link: https://lore.kernel.org/qemu-devel/20250227162741.9860-1-tomitamoeko@gmail.com | 17 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
18 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-3-tomitamoeko@gmail.com | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 20 | --- |
11 | MAINTAINERS | 9 ++++++++- | 21 | hw/vfio/igd.c | 28 +++------------------------- |
12 | 1 file changed, 8 insertions(+), 1 deletion(-) | 22 | 1 file changed, 3 insertions(+), 25 deletions(-) |
13 | 23 | ||
14 | diff --git a/MAINTAINERS b/MAINTAINERS | 24 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
15 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/MAINTAINERS | 26 | --- a/hw/vfio/igd.c |
17 | +++ b/MAINTAINERS | 27 | +++ b/hw/vfio/igd.c |
18 | @@ -XXX,XX +XXX,XX @@ M: Cédric Le Goater <clg@redhat.com> | 28 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) |
19 | S: Supported | 29 | |
20 | F: hw/vfio/* | 30 | #define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */ |
21 | F: include/hw/vfio/ | 31 | #define IGD_GMCH_GEN6_GMS_MASK 0x1f |
22 | -F: docs/igd-assign.txt | 32 | -#define IGD_GMCH_GEN6_GGMS_SHIFT 8 |
23 | F: docs/devel/migration/vfio.rst | 33 | -#define IGD_GMCH_GEN6_GGMS_MASK 0x3 |
24 | F: qapi/vfio.json | 34 | #define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */ |
25 | 35 | #define IGD_GMCH_GEN8_GMS_MASK 0xff | |
26 | +vfio-igd | 36 | -#define IGD_GMCH_GEN8_GGMS_SHIFT 6 |
27 | +M: Alex Williamson <alex.williamson@redhat.com> | 37 | -#define IGD_GMCH_GEN8_GGMS_MASK 0x3 |
28 | +M: Cédric Le Goater <clg@redhat.com> | 38 | - |
29 | +M: Tomita Moeko <tomitamoeko@gmail.com> | 39 | -static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch) |
30 | +S: Supported | 40 | -{ |
31 | +F: hw/vfio/igd.c | 41 | - uint64_t ggms; |
32 | +F: docs/igd-assign.txt | 42 | - |
33 | + | 43 | - if (gen < 8) { |
34 | vfio-ccw | 44 | - ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK; |
35 | M: Eric Farman <farman@linux.ibm.com> | 45 | - } else { |
36 | M: Matthew Rosato <mjrosato@linux.ibm.com> | 46 | - ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK; |
47 | - if (ggms != 0) { | ||
48 | - ggms = 1ULL << ggms; | ||
49 | - } | ||
50 | - } | ||
51 | - | ||
52 | - return ggms * MiB; | ||
53 | -} | ||
54 | |||
55 | static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) | ||
56 | { | ||
57 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
58 | g_autofree struct vfio_region_info *lpc = NULL; | ||
59 | PCIDevice *lpc_bridge; | ||
60 | int ret, gen; | ||
61 | - uint64_t ggms_size, gms_size; | ||
62 | + uint64_t gms_size; | ||
63 | uint64_t *bdsm_size; | ||
64 | uint32_t gmch; | ||
65 | Error *err = NULL; | ||
66 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
67 | } | ||
68 | } | ||
69 | |||
70 | - ggms_size = igd_gtt_memory_size(gen, gmch); | ||
71 | gms_size = igd_stolen_memory_size(gen, gmch); | ||
72 | |||
73 | /* | ||
74 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
75 | * config offset 0x5C. | ||
76 | */ | ||
77 | bdsm_size = g_malloc(sizeof(*bdsm_size)); | ||
78 | - *bdsm_size = cpu_to_le64(ggms_size + gms_size); | ||
79 | + *bdsm_size = cpu_to_le64(gms_size); | ||
80 | fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", | ||
81 | bdsm_size, sizeof(*bdsm_size)); | ||
82 | |||
83 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
84 | pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); | ||
85 | } | ||
86 | |||
87 | - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, | ||
88 | - (ggms_size + gms_size) / MiB); | ||
89 | + trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
90 | } | ||
37 | -- | 91 | -- |
38 | 2.48.1 | 92 | 2.48.1 |
39 | 93 | ||
40 | 94 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Implement the multifd device state transfer via additional per-device | 3 | Both x-igd-opregion option and legacy mode require identical steps to |
4 | thread inside save_live_complete_precopy_thread handler. | 4 | set up OpRegion for IGD devices. Consolidate these steps into a single |
5 | 5 | vfio_pci_igd_setup_opregion function. | |
6 | Switch between doing the data transfer in the new handler and doing it | 6 | |
7 | in the old save_state handler depending if VFIO multifd transfer is enabled | 7 | The function call in pci.c is wrapped with ifdef temporarily to prevent |
8 | or not. | 8 | build error for non-x86 archs, it will be removed after we decouple it |
9 | 9 | from legacy mode. | |
10 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 10 | |
11 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 11 | Additionally, move vfio_pci_igd_opregion_init to igd.c to prevent it |
12 | Link: https://lore.kernel.org/qemu-devel/4d727e2e0435e0022d50004e474077632830e08d.1741124640.git.maciej.szmigiero@oracle.com | 12 | from being compiled in non-x86 builds. |
13 | [ clg: - Reordered savevm_vfio_handlers | 13 | |
14 | - Updated save_live_complete_precopy* documentation ] | 14 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
15 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
16 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
17 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
18 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-4-tomitamoeko@gmail.com | ||
19 | [ clg: Fixed spelling in vfio_pci_igd_setup_opregion() ] | ||
15 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 20 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
16 | --- | 21 | --- |
17 | docs/devel/migration/vfio.rst | 19 ++++- | 22 | hw/vfio/pci.h | 4 +- |
18 | hw/vfio/migration-multifd.h | 6 ++ | 23 | hw/vfio/igd.c | 101 +++++++++++++++++++++++++++++++++++-------- |
19 | include/hw/vfio/vfio-common.h | 6 ++ | 24 | hw/vfio/pci-quirks.c | 50 --------------------- |
20 | hw/vfio/migration-multifd.c | 142 ++++++++++++++++++++++++++++++++++ | 25 | hw/vfio/pci.c | 22 ++-------- |
21 | hw/vfio/migration.c | 22 ++++-- | 26 | 4 files changed, 88 insertions(+), 89 deletions(-) |
22 | hw/vfio/trace-events | 2 + | 27 | |
23 | 6 files changed, 189 insertions(+), 8 deletions(-) | 28 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
24 | 29 | index XXXXXXX..XXXXXXX 100644 | |
25 | diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst | 30 | --- a/hw/vfio/pci.h |
26 | index XXXXXXX..XXXXXXX 100644 | 31 | +++ b/hw/vfio/pci.h |
27 | --- a/docs/devel/migration/vfio.rst | 32 | @@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, |
28 | +++ b/docs/devel/migration/vfio.rst | 33 | |
29 | @@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows: | 34 | bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); |
30 | reassembles the multifd received data and loads it in-order into the device. | 35 | |
31 | In the non-multifd mode this function is a NOP. | 36 | -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, |
32 | 37 | - struct vfio_region_info *info, | |
33 | -* A ``save_state`` function to save the device config space if it is present. | 38 | - Error **errp); |
34 | +* A ``save_state`` function to save the device config space if it is present | 39 | +bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp); |
35 | + in the non-multifd mode. | 40 | |
36 | + In the multifd mode it just emits either a dummy EOS marker. | 41 | void vfio_display_reset(VFIOPCIDevice *vdev); |
37 | 42 | bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); | |
38 | * A ``save_live_complete_precopy`` function that sets the VFIO device in | 43 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
39 | _STOP_COPY state and iteratively copies the data for the VFIO device until | 44 | index XXXXXXX..XXXXXXX 100644 |
40 | the vendor driver indicates that no data remains. | 45 | --- a/hw/vfio/igd.c |
41 | + In the multifd mode it just emits a dummy EOS marker. | 46 | +++ b/hw/vfio/igd.c |
42 | + | 47 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) |
43 | +* A ``save_live_complete_precopy_thread`` function that in the multifd mode | 48 | return -1; |
44 | + provides thread handler performing multifd device state transfer. | ||
45 | + It sets the VFIO device to _STOP_COPY state, iteratively reads the data | ||
46 | + from the VFIO device and queues it for multifd transmission until the vendor | ||
47 | + driver indicates that no data remains. | ||
48 | + After that, it saves the device config space and queues it for multifd | ||
49 | + transfer too. | ||
50 | + In the non-multifd mode this thread is a NOP. | ||
51 | |||
52 | * A ``load_state`` function that loads the config section and the data | ||
53 | sections that are generated by the save functions above. | ||
54 | @@ -XXX,XX +XXX,XX @@ Live migration save path | ||
55 | Then the VFIO device is put in _STOP_COPY state | ||
56 | (FINISH_MIGRATE, _ACTIVE, _STOP_COPY) | ||
57 | .save_live_complete_precopy() is called for each active device | ||
58 | - For the VFIO device, iterate in .save_live_complete_precopy() until | ||
59 | + For the VFIO device: in the non-multifd mode iterate in | ||
60 | + .save_live_complete_precopy() until | ||
61 | pending data is 0 | ||
62 | + In the multifd mode this iteration is done in | ||
63 | + .save_live_complete_precopy_thread() instead. | ||
64 | | | ||
65 | (POSTMIGRATE, _COMPLETED, _STOP_COPY) | ||
66 | Migraton thread schedules cleanup bottom half and exits | ||
67 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/hw/vfio/migration-multifd.h | ||
70 | +++ b/hw/vfio/migration-multifd.h | ||
71 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); | ||
72 | bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | ||
73 | Error **errp); | ||
74 | |||
75 | +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); | ||
76 | + | ||
77 | +bool | ||
78 | +vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, | ||
79 | + Error **errp); | ||
80 | + | ||
81 | int vfio_multifd_switchover_start(VFIODevice *vbasedev); | ||
82 | |||
83 | #endif | ||
84 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/hw/vfio/vfio-common.h | ||
87 | +++ b/include/hw/vfio/vfio-common.h | ||
88 | @@ -XXX,XX +XXX,XX @@ void vfio_mig_add_bytes_transferred(unsigned long val); | ||
89 | bool vfio_device_state_is_running(VFIODevice *vbasedev); | ||
90 | bool vfio_device_state_is_precopy(VFIODevice *vbasedev); | ||
91 | |||
92 | +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp); | ||
93 | int vfio_load_device_config_state(QEMUFile *f, void *opaque); | ||
94 | |||
95 | #ifdef CONFIG_LINUX | ||
96 | @@ -XXX,XX +XXX,XX @@ struct vfio_info_cap_header * | ||
97 | vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); | ||
98 | struct vfio_info_cap_header * | ||
99 | vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); | ||
100 | + | ||
101 | +int vfio_migration_set_state(VFIODevice *vbasedev, | ||
102 | + enum vfio_device_mig_state new_state, | ||
103 | + enum vfio_device_mig_state recover_state, | ||
104 | + Error **errp); | ||
105 | #endif | ||
106 | |||
107 | bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); | ||
108 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/hw/vfio/migration-multifd.c | ||
111 | +++ b/hw/vfio/migration-multifd.c | ||
112 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) | ||
113 | return true; | ||
114 | } | 49 | } |
115 | 50 | ||
116 | +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) | 51 | +#define IGD_ASLS 0xfc /* ASL Storage Register */ |
52 | #define IGD_GMCH 0x50 /* Graphics Control Register */ | ||
53 | #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ | ||
54 | #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ | ||
55 | @@ -XXX,XX +XXX,XX @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | +/* | ||
60 | + * The OpRegion includes the Video BIOS Table, which seems important for | ||
61 | + * telling the driver what sort of outputs it has. Without this, the device | ||
62 | + * may work in the guest, but we may not get output. This also requires BIOS | ||
63 | + * support to reserve and populate a section of guest memory sufficient for | ||
64 | + * the table and to write the base address of that memory to the ASLS register | ||
65 | + * of the IGD device. | ||
66 | + */ | ||
67 | +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, | ||
68 | + struct vfio_region_info *info, | ||
69 | + Error **errp) | ||
117 | +{ | 70 | +{ |
118 | + assert(vfio_multifd_transfer_enabled(vbasedev)); | 71 | + int ret; |
72 | + | ||
73 | + vdev->igd_opregion = g_malloc0(info->size); | ||
74 | + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, | ||
75 | + info->size, info->offset); | ||
76 | + if (ret != info->size) { | ||
77 | + error_setg(errp, "failed to read IGD OpRegion"); | ||
78 | + g_free(vdev->igd_opregion); | ||
79 | + vdev->igd_opregion = NULL; | ||
80 | + return false; | ||
81 | + } | ||
119 | + | 82 | + |
120 | + /* | 83 | + /* |
121 | + * Emit dummy NOP data on the main migration channel since the actual | 84 | + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to |
122 | + * device state transfer is done via multifd channels. | 85 | + * allocate 32bit reserved memory for, copy these contents into, and write |
86 | + * the reserved memory base address to the device ASLS register at 0xFC. | ||
87 | + * Alignment of this reserved region seems flexible, but using a 4k page | ||
88 | + * alignment seems to work well. This interface assumes a single IGD | ||
89 | + * device, which may be at VM address 00:02.0 in legacy mode or another | ||
90 | + * address in UPT mode. | ||
91 | + * | ||
92 | + * NB, there may be future use cases discovered where the VM should have | ||
93 | + * direct interaction with the host OpRegion, in which case the write to | ||
94 | + * the ASLS register would trigger MemoryRegion setup to enable that. | ||
123 | + */ | 95 | + */ |
124 | + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); | 96 | + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", |
125 | +} | 97 | + vdev->igd_opregion, info->size); |
126 | + | 98 | + |
127 | +static bool | 99 | + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); |
128 | +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, | 100 | + |
129 | + char *idstr, | 101 | + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); |
130 | + uint32_t instance_id, | 102 | + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); |
131 | + uint32_t idx, | 103 | + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); |
132 | + Error **errp) | ||
133 | +{ | ||
134 | + g_autoptr(QIOChannelBuffer) bioc = NULL; | ||
135 | + g_autoptr(QEMUFile) f = NULL; | ||
136 | + int ret; | ||
137 | + g_autofree VFIODeviceStatePacket *packet = NULL; | ||
138 | + size_t packet_len; | ||
139 | + | ||
140 | + bioc = qio_channel_buffer_new(0); | ||
141 | + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); | ||
142 | + | ||
143 | + f = qemu_file_new_output(QIO_CHANNEL(bioc)); | ||
144 | + | ||
145 | + if (vfio_save_device_config_state(f, vbasedev, errp)) { | ||
146 | + return false; | ||
147 | + } | ||
148 | + | ||
149 | + ret = qemu_fflush(f); | ||
150 | + if (ret) { | ||
151 | + error_setg(errp, "%s: save config state flush failed: %d", | ||
152 | + vbasedev->name, ret); | ||
153 | + return false; | ||
154 | + } | ||
155 | + | ||
156 | + packet_len = sizeof(*packet) + bioc->usage; | ||
157 | + packet = g_malloc0(packet_len); | ||
158 | + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; | ||
159 | + packet->idx = idx; | ||
160 | + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; | ||
161 | + memcpy(&packet->data, bioc->data, bioc->usage); | ||
162 | + | ||
163 | + if (!multifd_queue_device_state(idstr, instance_id, | ||
164 | + (char *)packet, packet_len)) { | ||
165 | + error_setg(errp, "%s: multifd config data queuing failed", | ||
166 | + vbasedev->name); | ||
167 | + return false; | ||
168 | + } | ||
169 | + | ||
170 | + vfio_mig_add_bytes_transferred(packet_len); | ||
171 | + | 104 | + |
172 | + return true; | 105 | + return true; |
173 | +} | 106 | +} |
174 | + | 107 | + |
175 | +/* | 108 | +bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) |
176 | + * This thread is spawned by the migration core directly via | ||
177 | + * .save_live_complete_precopy_thread SaveVMHandler. | ||
178 | + * | ||
179 | + * It exits after either: | ||
180 | + * * completing saving the remaining device state and device config, OR: | ||
181 | + * * encountering some error while doing the above, OR: | ||
182 | + * * being forcefully aborted by the migration core by | ||
183 | + * multifd_device_state_save_thread_should_exit() returning true. | ||
184 | + */ | ||
185 | +bool | ||
186 | +vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, | ||
187 | + Error **errp) | ||
188 | +{ | 109 | +{ |
189 | + VFIODevice *vbasedev = d->handler_opaque; | 110 | + g_autofree struct vfio_region_info *opregion = NULL; |
190 | + VFIOMigration *migration = vbasedev->migration; | 111 | + int ret; |
191 | + bool ret = false; | 112 | + |
192 | + g_autofree VFIODeviceStatePacket *packet = NULL; | 113 | + /* Hotplugging is not supported for opregion access */ |
193 | + uint32_t idx; | 114 | + if (vdev->pdev.qdev.hotplugged) { |
194 | + | 115 | + error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); |
195 | + if (!vfio_multifd_transfer_enabled(vbasedev)) { | 116 | + return false; |
196 | + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ | 117 | + } |
197 | + return true; | 118 | + |
198 | + } | 119 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, |
199 | + | 120 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, |
200 | + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, | 121 | + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); |
201 | + d->idstr, d->instance_id); | 122 | + if (ret) { |
202 | + | 123 | + error_setg_errno(errp, -ret, |
203 | + /* We reach here with device state STOP or STOP_COPY only */ | 124 | + "Device does not supports IGD OpRegion feature"); |
204 | + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, | 125 | + return false; |
205 | + VFIO_DEVICE_STATE_STOP, errp)) { | 126 | + } |
206 | + goto thread_exit; | 127 | + |
207 | + } | 128 | + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { |
208 | + | 129 | + return false; |
209 | + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); | 130 | + } |
210 | + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; | 131 | + |
211 | + | 132 | + return true; |
212 | + for (idx = 0; ; idx++) { | ||
213 | + ssize_t data_size; | ||
214 | + size_t packet_size; | ||
215 | + | ||
216 | + if (multifd_device_state_save_thread_should_exit()) { | ||
217 | + error_setg(errp, "operation cancelled"); | ||
218 | + goto thread_exit; | ||
219 | + } | ||
220 | + | ||
221 | + data_size = read(migration->data_fd, &packet->data, | ||
222 | + migration->data_buffer_size); | ||
223 | + if (data_size < 0) { | ||
224 | + error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d", | ||
225 | + vbasedev->name, idx, errno); | ||
226 | + goto thread_exit; | ||
227 | + } else if (data_size == 0) { | ||
228 | + break; | ||
229 | + } | ||
230 | + | ||
231 | + packet->idx = idx; | ||
232 | + packet_size = sizeof(*packet) + data_size; | ||
233 | + | ||
234 | + if (!multifd_queue_device_state(d->idstr, d->instance_id, | ||
235 | + (char *)packet, packet_size)) { | ||
236 | + error_setg(errp, "%s: multifd data queuing failed", vbasedev->name); | ||
237 | + goto thread_exit; | ||
238 | + } | ||
239 | + | ||
240 | + vfio_mig_add_bytes_transferred(packet_size); | ||
241 | + } | ||
242 | + | ||
243 | + if (!vfio_save_complete_precopy_thread_config_state(vbasedev, | ||
244 | + d->idstr, | ||
245 | + d->instance_id, | ||
246 | + idx, errp)) { | ||
247 | + goto thread_exit; | ||
248 | + } | ||
249 | + | ||
250 | + ret = true; | ||
251 | + | ||
252 | +thread_exit: | ||
253 | + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret); | ||
254 | + | ||
255 | + return ret; | ||
256 | +} | 133 | +} |
257 | + | 134 | + |
258 | int vfio_multifd_switchover_start(VFIODevice *vbasedev) | 135 | /* |
136 | * The rather short list of registers that we copy from the host devices. | ||
137 | * The LPC/ISA bridge values are definitely needed to support the vBIOS, the | ||
138 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
139 | void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
259 | { | 140 | { |
260 | VFIOMigration *migration = vbasedev->migration; | 141 | g_autofree struct vfio_region_info *rom = NULL; |
261 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 142 | - g_autofree struct vfio_region_info *opregion = NULL; |
262 | index XXXXXXX..XXXXXXX 100644 | 143 | g_autofree struct vfio_region_info *host = NULL; |
263 | --- a/hw/vfio/migration.c | 144 | g_autofree struct vfio_region_info *lpc = NULL; |
264 | +++ b/hw/vfio/migration.c | 145 | PCIDevice *lpc_bridge; |
265 | @@ -XXX,XX +XXX,XX @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev, | 146 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
266 | vfio_migration_send_event(vbasedev); | 147 | * Check whether we have all the vfio device specific regions to |
148 | * support legacy mode (added in Linux v4.6). If not, bail. | ||
149 | */ | ||
150 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
151 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
152 | - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); | ||
153 | - if (ret) { | ||
154 | - error_report("IGD device %s does not support OpRegion access," | ||
155 | - "legacy mode disabled", vdev->vbasedev.name); | ||
156 | - return; | ||
157 | - } | ||
158 | - | ||
159 | ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
160 | VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
161 | VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); | ||
162 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
163 | return; | ||
164 | } | ||
165 | |||
166 | + /* Setup OpRegion access */ | ||
167 | + if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
168 | + error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
169 | + error_report_err(err); | ||
170 | + return; | ||
171 | + } | ||
172 | + | ||
173 | /* Create our LPC/ISA bridge */ | ||
174 | ret = vfio_pci_igd_lpc_init(vdev, lpc); | ||
175 | if (ret) { | ||
176 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
177 | return; | ||
178 | } | ||
179 | |||
180 | - /* Setup OpRegion access */ | ||
181 | - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { | ||
182 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
183 | - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
184 | - return; | ||
185 | - } | ||
186 | - | ||
187 | /* | ||
188 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
189 | * 32MiB. This option should only be used when the desired size cannot be | ||
190 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/hw/vfio/pci-quirks.c | ||
193 | +++ b/hw/vfio/pci-quirks.c | ||
194 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) | ||
195 | trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); | ||
267 | } | 196 | } |
268 | 197 | ||
269 | -static int vfio_migration_set_state(VFIODevice *vbasedev, | 198 | -#define IGD_ASLS 0xfc /* ASL Storage Register */ |
270 | - enum vfio_device_mig_state new_state, | 199 | - |
271 | - enum vfio_device_mig_state recover_state, | 200 | -/* |
272 | - Error **errp) | 201 | - * The OpRegion includes the Video BIOS Table, which seems important for |
273 | +int vfio_migration_set_state(VFIODevice *vbasedev, | 202 | - * telling the driver what sort of outputs it has. Without this, the device |
274 | + enum vfio_device_mig_state new_state, | 203 | - * may work in the guest, but we may not get output. This also requires BIOS |
275 | + enum vfio_device_mig_state recover_state, | 204 | - * support to reserve and populate a section of guest memory sufficient for |
276 | + Error **errp) | 205 | - * the table and to write the base address of that memory to the ASLS register |
277 | { | 206 | - * of the IGD device. |
278 | VFIOMigration *migration = vbasedev->migration; | 207 | - */ |
279 | uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + | 208 | -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, |
280 | @@ -XXX,XX +XXX,XX @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, | 209 | - struct vfio_region_info *info, Error **errp) |
281 | return ret; | 210 | -{ |
282 | } | 211 | - int ret; |
283 | 212 | - | |
284 | -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, | 213 | - vdev->igd_opregion = g_malloc0(info->size); |
285 | - Error **errp) | 214 | - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, |
286 | +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) | 215 | - info->size, info->offset); |
287 | { | 216 | - if (ret != info->size) { |
288 | VFIODevice *vbasedev = opaque; | 217 | - error_setg(errp, "failed to read IGD OpRegion"); |
289 | int ret; | 218 | - g_free(vdev->igd_opregion); |
290 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) | 219 | - vdev->igd_opregion = NULL; |
291 | int ret; | 220 | - return false; |
292 | Error *local_err = NULL; | 221 | - } |
293 | 222 | - | |
294 | + if (vfio_multifd_transfer_enabled(vbasedev)) { | 223 | - /* |
295 | + vfio_multifd_emit_dummy_eos(vbasedev, f); | 224 | - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to |
296 | + return 0; | 225 | - * allocate 32bit reserved memory for, copy these contents into, and write |
297 | + } | 226 | - * the reserved memory base address to the device ASLS register at 0xFC. |
298 | + | 227 | - * Alignment of this reserved region seems flexible, but using a 4k page |
299 | trace_vfio_save_complete_precopy_start(vbasedev->name); | 228 | - * alignment seems to work well. This interface assumes a single IGD |
300 | 229 | - * device, which may be at VM address 00:02.0 in legacy mode or another | |
301 | /* We reach here with device state STOP or STOP_COPY only */ | 230 | - * address in UPT mode. |
302 | @@ -XXX,XX +XXX,XX @@ static void vfio_save_state(QEMUFile *f, void *opaque) | 231 | - * |
303 | Error *local_err = NULL; | 232 | - * NB, there may be future use cases discovered where the VM should have |
304 | int ret; | 233 | - * direct interaction with the host OpRegion, in which case the write to |
305 | 234 | - * the ASLS register would trigger MemoryRegion setup to enable that. | |
306 | + if (vfio_multifd_transfer_enabled(vbasedev)) { | 235 | - */ |
307 | + vfio_multifd_emit_dummy_eos(vbasedev, f); | 236 | - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", |
308 | + return; | 237 | - vdev->igd_opregion, info->size); |
309 | + } | 238 | - |
310 | + | 239 | - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); |
311 | ret = vfio_save_device_config_state(f, opaque, &local_err); | 240 | - |
312 | if (ret) { | 241 | - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); |
313 | error_prepend(&local_err, | 242 | - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); |
314 | @@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = { | 243 | - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); |
315 | */ | 244 | - |
316 | .load_state_buffer = vfio_multifd_load_state_buffer, | 245 | - return true; |
317 | .switchover_start = vfio_switchover_start, | 246 | -} |
318 | + .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, | 247 | - |
319 | }; | 248 | /* |
320 | 249 | * Common quirk probe entry points. | |
321 | /* ---------------------------------------------------------------------- */ | 250 | */ |
322 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | 251 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
323 | index XXXXXXX..XXXXXXX 100644 | 252 | index XXXXXXX..XXXXXXX 100644 |
324 | --- a/hw/vfio/trace-events | 253 | --- a/hw/vfio/pci.c |
325 | +++ b/hw/vfio/trace-events | 254 | +++ b/hw/vfio/pci.c |
326 | @@ -XXX,XX +XXX,XX @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)" | 255 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) |
327 | vfio_save_cleanup(const char *name) " (%s)" | 256 | vfio_bar_quirk_setup(vdev, i); |
328 | vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" | 257 | } |
329 | vfio_save_complete_precopy_start(const char *name) " (%s)" | 258 | |
330 | +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32 | 259 | +#ifdef CONFIG_VFIO_IGD |
331 | +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d" | 260 | if (!vdev->igd_opregion && |
332 | vfio_save_device_config_state(const char *name) " (%s)" | 261 | vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { |
333 | vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64 | 262 | - g_autofree struct vfio_region_info *opregion = NULL; |
334 | vfio_save_iterate_start(const char *name) " (%s)" | 263 | - |
264 | - if (vdev->pdev.qdev.hotplugged) { | ||
265 | - error_setg(errp, | ||
266 | - "cannot support IGD OpRegion feature on hotplugged " | ||
267 | - "device"); | ||
268 | - goto out_unset_idev; | ||
269 | - } | ||
270 | - | ||
271 | - ret = vfio_get_dev_region_info(vbasedev, | ||
272 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
273 | - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); | ||
274 | - if (ret) { | ||
275 | - error_setg_errno(errp, -ret, | ||
276 | - "does not support requested IGD OpRegion feature"); | ||
277 | - goto out_unset_idev; | ||
278 | - } | ||
279 | - | ||
280 | - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { | ||
281 | + if (!vfio_pci_igd_setup_opregion(vdev, errp)) { | ||
282 | goto out_unset_idev; | ||
283 | } | ||
284 | } | ||
285 | +#endif | ||
286 | |||
287 | /* QEMU emulates all of MSI & MSIX */ | ||
288 | if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { | ||
335 | -- | 289 | -- |
336 | 2.48.1 | 290 | 2.48.1 |
337 | 291 | ||
338 | 292 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | The multifd received data needs to be reassembled since device state | 3 | A new option will soon be introduced to decouple the LPC bridge/Host |
4 | packets sent via different multifd channels can arrive out-of-order. | 4 | bridge ID quirk from legacy mode. To prepare for this, move the LPC |
5 | bridge initialization into a separate function. | ||
5 | 6 | ||
6 | Therefore, each VFIO device state packet carries a header indicating its | 7 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
7 | position in the stream. | 8 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
8 | The raw device state data is saved into a VFIOStateBuffer for later | 9 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
9 | in-order loading into the device. | 10 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
10 | 11 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-5-tomitamoeko@gmail.com | |
11 | The last such VFIO device state packet should have | ||
12 | VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. | ||
13 | |||
14 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
15 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
16 | Link: https://lore.kernel.org/qemu-devel/e3bff515a8d61c582b94b409eb12a45b1a143a69.1741124640.git.maciej.szmigiero@oracle.com | ||
17 | [ clg: - Reordered savevm_vfio_handlers | ||
18 | - Added load_state_buffer documentation ] | ||
19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
20 | --- | 13 | --- |
21 | docs/devel/migration/vfio.rst | 7 ++ | 14 | hw/vfio/igd.c | 122 +++++++++++++++++++++++++++++--------------------- |
22 | hw/vfio/migration-multifd.h | 3 + | 15 | 1 file changed, 70 insertions(+), 52 deletions(-) |
23 | hw/vfio/migration-multifd.c | 163 ++++++++++++++++++++++++++++++++++ | ||
24 | hw/vfio/migration.c | 4 + | ||
25 | hw/vfio/trace-events | 1 + | ||
26 | 5 files changed, 178 insertions(+) | ||
27 | 16 | ||
28 | diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst | 17 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
29 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/docs/devel/migration/vfio.rst | 19 | --- a/hw/vfio/igd.c |
31 | +++ b/docs/devel/migration/vfio.rst | 20 | +++ b/hw/vfio/igd.c |
32 | @@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows: | 21 | @@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, |
33 | * A ``load_state`` function that loads the config section and the data | 22 | return ret; |
34 | sections that are generated by the save functions above. | 23 | } |
35 | 24 | ||
36 | +* A ``load_state_buffer`` function that loads the device state and the device | 25 | +static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) |
37 | + config that arrived via multifd channels. | 26 | +{ |
38 | + It's used only in the multifd mode. | 27 | + g_autofree struct vfio_region_info *host = NULL; |
28 | + g_autofree struct vfio_region_info *lpc = NULL; | ||
29 | + PCIDevice *lpc_bridge; | ||
30 | + int ret; | ||
39 | + | 31 | + |
40 | * ``cleanup`` functions for both save and load that perform any migration | 32 | + /* |
41 | related cleanup. | 33 | + * Copying IDs or creating new devices are not supported on hotplug |
42 | 34 | + */ | |
43 | @@ -XXX,XX +XXX,XX @@ Live migration resume path | 35 | + if (vdev->pdev.qdev.hotplugged) { |
44 | (RESTORE_VM, _ACTIVE, _STOP) | 36 | + error_setg(errp, "IGD LPC is not supported on hotplugged device"); |
45 | | | ||
46 | For each device, .load_state() is called for that device section data | ||
47 | + transmitted via the main migration channel. | ||
48 | + For data transmitted via multifd channels .load_state_buffer() is called | ||
49 | + instead. | ||
50 | (RESTORE_VM, _ACTIVE, _RESUMING) | ||
51 | | | ||
52 | At the end, .load_cleanup() is called for each device and vCPUs are started | ||
53 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/hw/vfio/migration-multifd.h | ||
56 | +++ b/hw/vfio/migration-multifd.h | ||
57 | @@ -XXX,XX +XXX,XX @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); | ||
58 | bool vfio_multifd_transfer_supported(void); | ||
59 | bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); | ||
60 | |||
61 | +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | ||
62 | + Error **errp); | ||
63 | + | ||
64 | #endif | ||
65 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/hw/vfio/migration-multifd.c | ||
68 | +++ b/hw/vfio/migration-multifd.c | ||
69 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket { | ||
70 | uint8_t data[0]; | ||
71 | } QEMU_PACKED VFIODeviceStatePacket; | ||
72 | |||
73 | +/* type safety */ | ||
74 | +typedef struct VFIOStateBuffers { | ||
75 | + GArray *array; | ||
76 | +} VFIOStateBuffers; | ||
77 | + | ||
78 | +typedef struct VFIOStateBuffer { | ||
79 | + bool is_present; | ||
80 | + char *data; | ||
81 | + size_t len; | ||
82 | +} VFIOStateBuffer; | ||
83 | + | ||
84 | typedef struct VFIOMultifd { | ||
85 | + VFIOStateBuffers load_bufs; | ||
86 | + QemuCond load_bufs_buffer_ready_cond; | ||
87 | + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ | ||
88 | + uint32_t load_buf_idx; | ||
89 | + uint32_t load_buf_idx_last; | ||
90 | } VFIOMultifd; | ||
91 | |||
92 | +static void vfio_state_buffer_clear(gpointer data) | ||
93 | +{ | ||
94 | + VFIOStateBuffer *lb = data; | ||
95 | + | ||
96 | + if (!lb->is_present) { | ||
97 | + return; | ||
98 | + } | ||
99 | + | ||
100 | + g_clear_pointer(&lb->data, g_free); | ||
101 | + lb->is_present = false; | ||
102 | +} | ||
103 | + | ||
104 | +static void vfio_state_buffers_init(VFIOStateBuffers *bufs) | ||
105 | +{ | ||
106 | + bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer)); | ||
107 | + g_array_set_clear_func(bufs->array, vfio_state_buffer_clear); | ||
108 | +} | ||
109 | + | ||
110 | +static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs) | ||
111 | +{ | ||
112 | + g_clear_pointer(&bufs->array, g_array_unref); | ||
113 | +} | ||
114 | + | ||
115 | +static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs) | ||
116 | +{ | ||
117 | + assert(bufs->array); | ||
118 | +} | ||
119 | + | ||
120 | +static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs) | ||
121 | +{ | ||
122 | + return bufs->array->len; | ||
123 | +} | ||
124 | + | ||
125 | +static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs, | ||
126 | + unsigned int size) | ||
127 | +{ | ||
128 | + g_array_set_size(bufs->array, size); | ||
129 | +} | ||
130 | + | ||
131 | +static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, | ||
132 | + unsigned int idx) | ||
133 | +{ | ||
134 | + return &g_array_index(bufs->array, VFIOStateBuffer, idx); | ||
135 | +} | ||
136 | + | ||
137 | +/* called with load_bufs_mutex locked */ | ||
138 | +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, | ||
139 | + VFIODeviceStatePacket *packet, | ||
140 | + size_t packet_total_size, | ||
141 | + Error **errp) | ||
142 | +{ | ||
143 | + VFIOMigration *migration = vbasedev->migration; | ||
144 | + VFIOMultifd *multifd = migration->multifd; | ||
145 | + VFIOStateBuffer *lb; | ||
146 | + | ||
147 | + vfio_state_buffers_assert_init(&multifd->load_bufs); | ||
148 | + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { | ||
149 | + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); | ||
150 | + } | ||
151 | + | ||
152 | + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); | ||
153 | + if (lb->is_present) { | ||
154 | + error_setg(errp, "%s: state buffer %" PRIu32 " already filled", | ||
155 | + vbasedev->name, packet->idx); | ||
156 | + return false; | 37 | + return false; |
157 | + } | 38 | + } |
158 | + | 39 | + |
159 | + assert(packet->idx >= multifd->load_buf_idx); | 40 | + /* |
160 | + | 41 | + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we |
161 | + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); | 42 | + * can stuff host values into, so if there's already one there and it's not |
162 | + lb->len = packet_total_size - sizeof(*packet); | 43 | + * one we can hack on, this quirk is no-go. Sorry Q35. |
163 | + lb->is_present = true; | 44 | + */ |
164 | + | 45 | + lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), |
165 | + return true; | 46 | + 0, PCI_DEVFN(0x1f, 0)); |
166 | +} | 47 | + if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), |
167 | + | 48 | + "vfio-pci-igd-lpc-bridge")) { |
168 | +bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | ||
169 | + Error **errp) | ||
170 | +{ | ||
171 | + VFIODevice *vbasedev = opaque; | ||
172 | + VFIOMigration *migration = vbasedev->migration; | ||
173 | + VFIOMultifd *multifd = migration->multifd; | ||
174 | + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; | ||
175 | + | ||
176 | + if (!vfio_multifd_transfer_enabled(vbasedev)) { | ||
177 | + error_setg(errp, | 49 | + error_setg(errp, |
178 | + "%s: got device state packet but not doing multifd transfer", | 50 | + "Cannot create LPC bridge due to existing device at 1f.0"); |
179 | + vbasedev->name); | ||
180 | + return false; | 51 | + return false; |
181 | + } | 52 | + } |
182 | + | 53 | + |
183 | + assert(multifd); | 54 | + /* |
184 | + | 55 | + * Check whether we have all the vfio device specific regions to |
185 | + if (data_size < sizeof(*packet)) { | 56 | + * support LPC quirk (added in Linux v4.6). |
186 | + error_setg(errp, "%s: packet too short at %zu (min is %zu)", | 57 | + */ |
187 | + vbasedev->name, data_size, sizeof(*packet)); | 58 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, |
59 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
60 | + VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); | ||
61 | + if (ret) { | ||
62 | + error_setg(errp, "IGD LPC bridge access is not supported by kernel"); | ||
188 | + return false; | 63 | + return false; |
189 | + } | 64 | + } |
190 | + | 65 | + |
191 | + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { | 66 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, |
192 | + error_setg(errp, "%s: packet has unknown version %" PRIu32, | 67 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, |
193 | + vbasedev->name, packet->version); | 68 | + VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); |
69 | + if (ret) { | ||
70 | + error_setg(errp, "IGD host bridge access is not supported by kernel"); | ||
194 | + return false; | 71 | + return false; |
195 | + } | 72 | + } |
196 | + | 73 | + |
197 | + if (packet->idx == UINT32_MAX) { | 74 | + /* Create/modify LPC bridge */ |
198 | + error_setg(errp, "%s: packet index is invalid", vbasedev->name); | 75 | + ret = vfio_pci_igd_lpc_init(vdev, lpc); |
76 | + if (ret) { | ||
77 | + error_setg(errp, "Failed to create/modify LPC bridge for IGD"); | ||
199 | + return false; | 78 | + return false; |
200 | + } | 79 | + } |
201 | + | 80 | + |
202 | + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); | 81 | + /* Stuff some host values into the VM PCI host bridge */ |
203 | + | 82 | + ret = vfio_pci_igd_host_init(vdev, host); |
204 | + /* | 83 | + if (ret) { |
205 | + * Holding BQL here would violate the lock order and can cause | 84 | + error_setg(errp, "Failed to modify host bridge for IGD"); |
206 | + * a deadlock once we attempt to lock load_bufs_mutex below. | 85 | + return false; |
207 | + */ | ||
208 | + assert(!bql_locked()); | ||
209 | + | ||
210 | + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { | ||
211 | + /* config state packet should be the last one in the stream */ | ||
212 | + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { | ||
213 | + multifd->load_buf_idx_last = packet->idx; | ||
214 | + } | ||
215 | + | ||
216 | + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, | ||
217 | + errp)) { | ||
218 | + return false; | ||
219 | + } | ||
220 | + | ||
221 | + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); | ||
222 | + } | 86 | + } |
223 | + | 87 | + |
224 | + return true; | 88 | + return true; |
225 | +} | 89 | +} |
226 | + | 90 | + |
227 | static VFIOMultifd *vfio_multifd_new(void) | 91 | #define IGD_GGC_MMIO_OFFSET 0x108040 |
92 | #define IGD_BDSM_MMIO_OFFSET 0x1080C0 | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
95 | void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
228 | { | 96 | { |
229 | VFIOMultifd *multifd = g_new(VFIOMultifd, 1); | 97 | g_autofree struct vfio_region_info *rom = NULL; |
230 | 98 | - g_autofree struct vfio_region_info *host = NULL; | |
231 | + vfio_state_buffers_init(&multifd->load_bufs); | 99 | - g_autofree struct vfio_region_info *lpc = NULL; |
232 | + | 100 | - PCIDevice *lpc_bridge; |
233 | + qemu_mutex_init(&multifd->load_bufs_mutex); | 101 | int ret, gen; |
234 | + | 102 | uint64_t gms_size; |
235 | + multifd->load_buf_idx = 0; | 103 | uint64_t *bdsm_size; |
236 | + multifd->load_buf_idx_last = UINT32_MAX; | 104 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
237 | + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); | 105 | return; |
238 | + | 106 | } |
239 | return multifd; | 107 | |
240 | } | 108 | - /* |
241 | 109 | - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we | |
242 | static void vfio_multifd_free(VFIOMultifd *multifd) | 110 | - * can stuff host values into, so if there's already one there and it's not |
243 | { | 111 | - * one we can hack on, legacy mode is no-go. Sorry Q35. |
244 | + vfio_state_buffers_destroy(&multifd->load_bufs); | 112 | - */ |
245 | + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); | 113 | - lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), |
246 | + qemu_mutex_destroy(&multifd->load_bufs_mutex); | 114 | - 0, PCI_DEVFN(0x1f, 0)); |
247 | + | 115 | - if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), |
248 | g_free(multifd); | 116 | - "vfio-pci-igd-lpc-bridge")) { |
249 | } | 117 | - error_report("IGD device %s cannot support legacy mode due to existing " |
250 | 118 | - "devices at address 1f.0", vdev->vbasedev.name); | |
251 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 119 | - return; |
252 | index XXXXXXX..XXXXXXX 100644 | 120 | - } |
253 | --- a/hw/vfio/migration.c | 121 | - |
254 | +++ b/hw/vfio/migration.c | 122 | /* |
255 | @@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = { | 123 | * IGD is not a standard, they like to change their specs often. We |
256 | .load_cleanup = vfio_load_cleanup, | 124 | * only attempt to support back to SandBridge and we hope that newer |
257 | .load_state = vfio_load_state, | 125 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
258 | .switchover_ack_needed = vfio_switchover_ack_needed, | 126 | return; |
259 | + /* | 127 | } |
260 | + * Multifd support | 128 | |
261 | + */ | 129 | - /* |
262 | + .load_state_buffer = vfio_multifd_load_state_buffer, | 130 | - * Check whether we have all the vfio device specific regions to |
263 | }; | 131 | - * support legacy mode (added in Linux v4.6). If not, bail. |
264 | 132 | - */ | |
265 | /* ---------------------------------------------------------------------- */ | 133 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, |
266 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | 134 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, |
267 | index XXXXXXX..XXXXXXX 100644 | 135 | - VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); |
268 | --- a/hw/vfio/trace-events | 136 | - if (ret) { |
269 | +++ b/hw/vfio/trace-events | 137 | - error_report("IGD device %s does not support host bridge access," |
270 | @@ -XXX,XX +XXX,XX @@ vfio_load_device_config_state_start(const char *name) " (%s)" | 138 | - "legacy mode disabled", vdev->vbasedev.name); |
271 | vfio_load_device_config_state_end(const char *name) " (%s)" | 139 | - return; |
272 | vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 | 140 | - } |
273 | vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" | 141 | - |
274 | +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 | 142 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, |
275 | vfio_migration_realize(const char *name) " (%s)" | 143 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, |
276 | vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" | 144 | - VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); |
277 | vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s" | 145 | - if (ret) { |
146 | - error_report("IGD device %s does not support LPC bridge access," | ||
147 | - "legacy mode disabled", vdev->vbasedev.name); | ||
148 | - return; | ||
149 | - } | ||
150 | - | ||
151 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
152 | |||
153 | /* | ||
154 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
155 | return; | ||
156 | } | ||
157 | |||
158 | - /* Create our LPC/ISA bridge */ | ||
159 | - ret = vfio_pci_igd_lpc_init(vdev, lpc); | ||
160 | - if (ret) { | ||
161 | - error_report("IGD device %s failed to create LPC bridge, " | ||
162 | - "legacy mode disabled", vdev->vbasedev.name); | ||
163 | - return; | ||
164 | - } | ||
165 | - | ||
166 | - /* Stuff some host values into the VM PCI host bridge */ | ||
167 | - ret = vfio_pci_igd_host_init(vdev, host); | ||
168 | - if (ret) { | ||
169 | - error_report("IGD device %s failed to modify host bridge, " | ||
170 | - "legacy mode disabled", vdev->vbasedev.name); | ||
171 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
172 | + if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
173 | + error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
174 | + error_report_err(err); | ||
175 | return; | ||
176 | } | ||
177 | |||
278 | -- | 178 | -- |
279 | 2.48.1 | 179 | 2.48.1 |
280 | 180 | ||
281 | 181 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | DEFINE_PROP_ON_OFF_AUTO() property isn't runtime-mutable so using it | 3 | IGD devices require device-specific quirk to be applied to their PCI |
4 | would mean that the source VM would need to decide upfront at startup | 4 | config space. Currently, it is put in the BAR4 quirk that does nothing |
5 | time whether it wants to do a multifd device state transfer at some | 5 | to BAR4 itself. Add a placeholder for PCI config space quirks to hold |
6 | point. | 6 | that quirk later. |
7 | 7 | ||
8 | Source VM can run for a long time before being migrated so it is | 8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
9 | desirable to have a fallback mechanism to the old way of transferring | 9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
10 | VFIO device state if it turns to be necessary. | 10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
11 | 11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | |
12 | This brings this property to the same mutability level as ordinary | 12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-6-tomitamoeko@gmail.com |
13 | migration parameters, which too can be adjusted at the run time. | ||
14 | |||
15 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
16 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
17 | Link: https://lore.kernel.org/qemu-devel/f2f2d66bda477da3e6cb8c0311006cff36e8651d.1741124640.git.maciej.szmigiero@oracle.com | ||
18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
19 | --- | 14 | --- |
20 | hw/vfio/migration-multifd.c | 4 ++++ | 15 | hw/vfio/pci.h | 1 + |
21 | hw/vfio/pci.c | 20 +++++++++++++++++--- | 16 | hw/vfio/pci-quirks.c | 5 +++++ |
22 | 2 files changed, 21 insertions(+), 3 deletions(-) | 17 | hw/vfio/pci.c | 4 ++++ |
18 | 3 files changed, 10 insertions(+) | ||
23 | 19 | ||
24 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | 20 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
25 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/hw/vfio/migration-multifd.c | 22 | --- a/hw/vfio/pci.h |
27 | +++ b/hw/vfio/migration-multifd.c | 23 | +++ b/hw/vfio/pci.h |
28 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) | 24 | @@ -XXX,XX +XXX,XX @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); |
25 | void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); | ||
26 | |||
27 | bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); | ||
28 | +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); | ||
29 | void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); | ||
30 | void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); | ||
31 | void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); | ||
32 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/hw/vfio/pci-quirks.c | ||
35 | +++ b/hw/vfio/pci-quirks.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) | ||
37 | /* | ||
38 | * Common quirk probe entry points. | ||
39 | */ | ||
40 | +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp) | ||
41 | +{ | ||
42 | + return true; | ||
43 | +} | ||
44 | + | ||
45 | void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) | ||
29 | { | 46 | { |
30 | VFIOMigration *migration = vbasedev->migration; | 47 | vfio_vga_probe_ati_3c3_quirk(vdev); |
31 | |||
32 | + /* | ||
33 | + * Make a copy of this setting at the start in case it is changed | ||
34 | + * mid-migration. | ||
35 | + */ | ||
36 | if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) { | ||
37 | migration->multifd_transfer = vfio_multifd_transfer_supported(); | ||
38 | } else { | ||
39 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | 48 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
40 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/hw/vfio/pci.c | 50 | --- a/hw/vfio/pci.c |
42 | +++ b/hw/vfio/pci.c | 51 | +++ b/hw/vfio/pci.c |
43 | @@ -XXX,XX +XXX,XX @@ static void vfio_instance_init(Object *obj) | 52 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) |
44 | pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; | 53 | goto out_unset_idev; |
45 | } | 54 | } |
46 | 55 | ||
47 | +static PropertyInfo vfio_pci_migration_multifd_transfer_prop; | 56 | + if (!vfio_config_quirk_setup(vdev, errp)) { |
57 | + goto out_unset_idev; | ||
58 | + } | ||
48 | + | 59 | + |
49 | static const Property vfio_pci_dev_properties[] = { | 60 | if (vdev->vga) { |
50 | DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), | 61 | vfio_vga_quirk_setup(vdev); |
51 | DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), | 62 | } |
52 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { | ||
53 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), | ||
54 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, | ||
55 | vbasedev.enable_migration, ON_OFF_AUTO_AUTO), | ||
56 | - DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice, | ||
57 | - vbasedev.migration_multifd_transfer, | ||
58 | - ON_OFF_AUTO_AUTO), | ||
59 | + DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice, | ||
60 | + vbasedev.migration_multifd_transfer, | ||
61 | + vfio_pci_migration_multifd_transfer_prop, OnOffAuto, | ||
62 | + .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), | ||
63 | DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, | ||
64 | vbasedev.migration_events, false), | ||
65 | DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), | ||
66 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { | ||
67 | |||
68 | static void register_vfio_pci_dev_type(void) | ||
69 | { | ||
70 | + /* | ||
71 | + * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can | ||
72 | + * run for a long time before being migrated so it is desirable to have a | ||
73 | + * fallback mechanism to the old way of transferring VFIO device state if | ||
74 | + * it turns to be necessary. | ||
75 | + * The following makes this type of property have the same mutability level | ||
76 | + * as ordinary migration parameters. | ||
77 | + */ | ||
78 | + vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; | ||
79 | + vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; | ||
80 | + | ||
81 | type_register_static(&vfio_pci_dev_info); | ||
82 | type_register_static(&vfio_pci_nohotplug_dev_info); | ||
83 | } | ||
84 | -- | 63 | -- |
85 | 2.48.1 | 64 | 2.48.1 |
86 | 65 | ||
87 | 66 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | This QEMU_VM_COMMAND sub-command and its switchover_start SaveVMHandler is | 3 | The actual IO BAR4 write quirk in vfio_probe_igd_bar4_quirk was removed |
4 | used to mark the switchover point in main migration stream. | 4 | in previous change, leaving the function not matching its name, so move |
5 | it into the newly introduced vfio_config_quirk_setup. There is no | ||
6 | functional change in this commit. | ||
5 | 7 | ||
6 | It can be used to inform the destination that all pre-switchover main | 8 | For now, to align with current legacy mode behavior, it returns and |
7 | migration stream data has been sent/received so it can start to process | 9 | proceeds on error. Later it will fail on error after decoupling the |
8 | post-switchover data that it might have received via other migration | 10 | quirks from legacy mode. |
9 | channels like the multifd ones. | ||
10 | 11 | ||
11 | Add also the relevant MigrationState bit stream compatibility property and | 12 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
12 | its hw_compat entry. | 13 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
13 | 14 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | |
14 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | 15 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
15 | Reviewed-by: Zhang Chen <zhangckid@gmail.com> # for the COLO part | 16 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-7-tomitamoeko@gmail.com |
16 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
17 | Link: https://lore.kernel.org/qemu-devel/311be6da85fc7e49a7598684d80aa631778dcbce.1741124640.git.maciej.szmigiero@oracle.com | ||
18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 17 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
19 | --- | 18 | --- |
20 | include/migration/client-options.h | 4 +++ | 19 | hw/vfio/pci.h | 2 +- |
21 | include/migration/register.h | 12 +++++++++ | 20 | hw/vfio/igd.c | 21 ++++++++++++--------- |
22 | migration/migration.h | 2 ++ | 21 | hw/vfio/pci-quirks.c | 6 +++++- |
23 | migration/savevm.h | 1 + | 22 | 3 files changed, 18 insertions(+), 11 deletions(-) |
24 | hw/core/machine.c | 1 + | ||
25 | migration/colo.c | 3 +++ | ||
26 | migration/migration-hmp-cmds.c | 2 ++ | ||
27 | migration/migration.c | 2 ++ | ||
28 | migration/options.c | 9 +++++++ | ||
29 | migration/savevm.c | 39 ++++++++++++++++++++++++++++++ | ||
30 | migration/trace-events | 1 + | ||
31 | scripts/analyze-migration.py | 11 +++++++++ | ||
32 | 12 files changed, 87 insertions(+) | ||
33 | 23 | ||
34 | diff --git a/include/migration/client-options.h b/include/migration/client-options.h | 24 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
35 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/include/migration/client-options.h | 26 | --- a/hw/vfio/pci.h |
37 | +++ b/include/migration/client-options.h | 27 | +++ b/hw/vfio/pci.h |
38 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); |
39 | #ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H | 29 | void vfio_quirk_reset(VFIOPCIDevice *vdev); |
40 | #define QEMU_MIGRATION_CLIENT_OPTIONS_H | 30 | VFIOQuirk *vfio_quirk_alloc(int nr_mem); |
41 | 31 | void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); | |
32 | -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); | ||
33 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp); | ||
34 | |||
35 | extern const PropertyInfo qdev_prop_nv_gpudirect_clique; | ||
36 | |||
37 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/hw/vfio/igd.c | ||
40 | +++ b/hw/vfio/igd.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
42 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); | ||
43 | } | ||
44 | |||
45 | -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
46 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
47 | + Error **errp G_GNUC_UNUSED) | ||
48 | { | ||
49 | g_autofree struct vfio_region_info *rom = NULL; | ||
50 | int ret, gen; | ||
51 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
52 | * PCI bus address. | ||
53 | */ | ||
54 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || | ||
55 | - !vfio_is_vga(vdev) || nr != 4 || | ||
56 | + !vfio_is_vga(vdev) || | ||
57 | &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
58 | 0, PCI_DEVFN(0x2, 0))) { | ||
59 | - return; | ||
60 | + return true; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
65 | if (gen == -1) { | ||
66 | error_report("IGD device %s is unsupported in legacy mode, " | ||
67 | "try SandyBridge or newer", vdev->vbasedev.name); | ||
68 | - return; | ||
69 | + return true; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
74 | if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
75 | error_report("IGD device %s has no ROM, legacy mode disabled", | ||
76 | vdev->vbasedev.name); | ||
77 | - return; | ||
78 | + return true; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
83 | error_report("IGD device %s hotplugged, ROM disabled, " | ||
84 | "legacy mode disabled", vdev->vbasedev.name); | ||
85 | vdev->rom_read_failed = true; | ||
86 | - return; | ||
87 | + return true; | ||
88 | } | ||
89 | |||
90 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
91 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
92 | error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
93 | error_report("IGD device %s failed to enable VGA access, " | ||
94 | "legacy mode disabled", vdev->vbasedev.name); | ||
95 | - return; | ||
96 | + return true; | ||
97 | } | ||
98 | |||
99 | /* Setup OpRegion access */ | ||
100 | if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
101 | error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
102 | error_report_err(err); | ||
103 | - return; | ||
104 | + return true; | ||
105 | } | ||
106 | |||
107 | /* Setup LPC bridge / Host bridge PCI IDs */ | ||
108 | if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
109 | error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
110 | error_report_err(err); | ||
111 | - return; | ||
112 | + return true; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
117 | } | ||
118 | |||
119 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
42 | + | 120 | + |
43 | +/* properties */ | 121 | + return true; |
44 | +bool migrate_send_switchover_start(void); | 122 | } |
45 | + | 123 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c |
46 | /* capabilities */ | ||
47 | |||
48 | bool migrate_background_snapshot(void); | ||
49 | diff --git a/include/migration/register.h b/include/migration/register.h | ||
50 | index XXXXXXX..XXXXXXX 100644 | 124 | index XXXXXXX..XXXXXXX 100644 |
51 | --- a/include/migration/register.h | 125 | --- a/hw/vfio/pci-quirks.c |
52 | +++ b/include/migration/register.h | 126 | +++ b/hw/vfio/pci-quirks.c |
53 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | 127 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) |
54 | * otherwise | 128 | */ |
55 | */ | 129 | bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp) |
56 | bool (*switchover_ack_needed)(void *opaque); | 130 | { |
57 | + | 131 | +#ifdef CONFIG_VFIO_IGD |
58 | + /** | 132 | + if (!vfio_probe_igd_config_quirk(vdev, errp)) { |
59 | + * @switchover_start | 133 | + return false; |
60 | + * | 134 | + } |
61 | + * Notifies that the switchover has started. Called only on | 135 | +#endif |
62 | + * the destination. | ||
63 | + * | ||
64 | + * @opaque: data pointer passed to register_savevm_live() | ||
65 | + * | ||
66 | + * Returns zero to indicate success and negative for error | ||
67 | + */ | ||
68 | + int (*switchover_start)(void *opaque); | ||
69 | } SaveVMHandlers; | ||
70 | |||
71 | /** | ||
72 | diff --git a/migration/migration.h b/migration/migration.h | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/migration/migration.h | ||
75 | +++ b/migration/migration.h | ||
76 | @@ -XXX,XX +XXX,XX @@ struct MigrationState { | ||
77 | bool send_configuration; | ||
78 | /* Whether we send section footer during migration */ | ||
79 | bool send_section_footer; | ||
80 | + /* Whether we send switchover start notification during migration */ | ||
81 | + bool send_switchover_start; | ||
82 | |||
83 | /* Needed by postcopy-pause state */ | ||
84 | QemuSemaphore postcopy_pause_sem; | ||
85 | diff --git a/migration/savevm.h b/migration/savevm.h | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/migration/savevm.h | ||
88 | +++ b/migration/savevm.h | ||
89 | @@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_listen(QEMUFile *f); | ||
90 | void qemu_savevm_send_postcopy_run(QEMUFile *f); | ||
91 | void qemu_savevm_send_postcopy_resume(QEMUFile *f); | ||
92 | void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name); | ||
93 | +void qemu_savevm_maybe_send_switchover_start(QEMUFile *f); | ||
94 | |||
95 | void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, | ||
96 | uint16_t len, | ||
97 | diff --git a/hw/core/machine.c b/hw/core/machine.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/hw/core/machine.c | ||
100 | +++ b/hw/core/machine.c | ||
101 | @@ -XXX,XX +XXX,XX @@ GlobalProperty hw_compat_9_2[] = { | ||
102 | { "virtio-balloon-pci-non-transitional", "vectors", "0" }, | ||
103 | { "virtio-mem-pci", "vectors", "0" }, | ||
104 | { "migration", "multifd-clean-tls-termination", "false" }, | ||
105 | + { "migration", "send-switchover-start", "off"}, | ||
106 | }; | ||
107 | const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); | ||
108 | |||
109 | diff --git a/migration/colo.c b/migration/colo.c | ||
110 | index XXXXXXX..XXXXXXX 100644 | ||
111 | --- a/migration/colo.c | ||
112 | +++ b/migration/colo.c | ||
113 | @@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s, | ||
114 | bql_unlock(); | ||
115 | goto out; | ||
116 | } | ||
117 | + | ||
118 | + qemu_savevm_maybe_send_switchover_start(s->to_dst_file); | ||
119 | + | ||
120 | /* Note: device state is saved into buffer */ | ||
121 | ret = qemu_save_device_state(fb); | ||
122 | |||
123 | diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c | ||
124 | index XXXXXXX..XXXXXXX 100644 | ||
125 | --- a/migration/migration-hmp-cmds.c | ||
126 | +++ b/migration/migration-hmp-cmds.c | ||
127 | @@ -XXX,XX +XXX,XX @@ static void migration_global_dump(Monitor *mon) | ||
128 | ms->send_configuration ? "on" : "off"); | ||
129 | monitor_printf(mon, "send-section-footer: %s\n", | ||
130 | ms->send_section_footer ? "on" : "off"); | ||
131 | + monitor_printf(mon, "send-switchover-start: %s\n", | ||
132 | + ms->send_switchover_start ? "on" : "off"); | ||
133 | monitor_printf(mon, "clear-bitmap-shift: %u\n", | ||
134 | ms->clear_bitmap_shift); | ||
135 | } | ||
136 | diff --git a/migration/migration.c b/migration/migration.c | ||
137 | index XXXXXXX..XXXXXXX 100644 | ||
138 | --- a/migration/migration.c | ||
139 | +++ b/migration/migration.c | ||
140 | @@ -XXX,XX +XXX,XX @@ static bool migration_switchover_start(MigrationState *s, Error **errp) | ||
141 | |||
142 | precopy_notify_complete(); | ||
143 | |||
144 | + qemu_savevm_maybe_send_switchover_start(s->to_dst_file); | ||
145 | + | ||
146 | return true; | 136 | return true; |
147 | } | 137 | } |
148 | 138 | ||
149 | diff --git a/migration/options.c b/migration/options.c | 139 | @@ -XXX,XX +XXX,XX @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) |
150 | index XXXXXXX..XXXXXXX 100644 | 140 | vfio_probe_rtl8168_bar2_quirk(vdev, nr); |
151 | --- a/migration/options.c | 141 | #ifdef CONFIG_VFIO_IGD |
152 | +++ b/migration/options.c | 142 | vfio_probe_igd_bar0_quirk(vdev, nr); |
153 | @@ -XXX,XX +XXX,XX @@ const Property migration_properties[] = { | 143 | - vfio_probe_igd_bar4_quirk(vdev, nr); |
154 | send_configuration, true), | 144 | #endif |
155 | DEFINE_PROP_BOOL("send-section-footer", MigrationState, | ||
156 | send_section_footer, true), | ||
157 | + DEFINE_PROP_BOOL("send-switchover-start", MigrationState, | ||
158 | + send_switchover_start, true), | ||
159 | DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, | ||
160 | multifd_flush_after_each_section, false), | ||
161 | DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, | ||
162 | @@ -XXX,XX +XXX,XX @@ bool migrate_auto_converge(void) | ||
163 | return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; | ||
164 | } | 145 | } |
165 | 146 | ||
166 | +bool migrate_send_switchover_start(void) | ||
167 | +{ | ||
168 | + MigrationState *s = migrate_get_current(); | ||
169 | + | ||
170 | + return s->send_switchover_start; | ||
171 | +} | ||
172 | + | ||
173 | bool migrate_background_snapshot(void) | ||
174 | { | ||
175 | MigrationState *s = migrate_get_current(); | ||
176 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
177 | index XXXXXXX..XXXXXXX 100644 | ||
178 | --- a/migration/savevm.c | ||
179 | +++ b/migration/savevm.c | ||
180 | @@ -XXX,XX +XXX,XX @@ enum qemu_vm_cmd { | ||
181 | MIG_CMD_ENABLE_COLO, /* Enable COLO */ | ||
182 | MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ | ||
183 | MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ | ||
184 | + MIG_CMD_SWITCHOVER_START, /* Switchover start notification */ | ||
185 | MIG_CMD_MAX | ||
186 | }; | ||
187 | |||
188 | @@ -XXX,XX +XXX,XX @@ static struct mig_cmd_args { | ||
189 | [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" }, | ||
190 | [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" }, | ||
191 | [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, | ||
192 | + [MIG_CMD_SWITCHOVER_START] = { .len = 0, .name = "SWITCHOVER_START" }, | ||
193 | [MIG_CMD_MAX] = { .len = -1, .name = "MAX" }, | ||
194 | }; | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name) | ||
197 | qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf); | ||
198 | } | ||
199 | |||
200 | +static void qemu_savevm_send_switchover_start(QEMUFile *f) | ||
201 | +{ | ||
202 | + trace_savevm_send_switchover_start(); | ||
203 | + qemu_savevm_command_send(f, MIG_CMD_SWITCHOVER_START, 0, NULL); | ||
204 | +} | ||
205 | + | ||
206 | +void qemu_savevm_maybe_send_switchover_start(QEMUFile *f) | ||
207 | +{ | ||
208 | + if (migrate_send_switchover_start()) { | ||
209 | + qemu_savevm_send_switchover_start(f); | ||
210 | + } | ||
211 | +} | ||
212 | + | ||
213 | bool qemu_savevm_state_blocked(Error **errp) | ||
214 | { | ||
215 | SaveStateEntry *se; | ||
216 | @@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) | ||
217 | |||
218 | ret = qemu_file_get_error(f); | ||
219 | if (ret == 0) { | ||
220 | + qemu_savevm_maybe_send_switchover_start(f); | ||
221 | qemu_savevm_state_complete_precopy(f, false); | ||
222 | ret = qemu_file_get_error(f); | ||
223 | } | ||
224 | @@ -XXX,XX +XXX,XX @@ static int loadvm_process_enable_colo(MigrationIncomingState *mis) | ||
225 | return ret; | ||
226 | } | ||
227 | |||
228 | +static int loadvm_postcopy_handle_switchover_start(void) | ||
229 | +{ | ||
230 | + SaveStateEntry *se; | ||
231 | + | ||
232 | + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { | ||
233 | + int ret; | ||
234 | + | ||
235 | + if (!se->ops || !se->ops->switchover_start) { | ||
236 | + continue; | ||
237 | + } | ||
238 | + | ||
239 | + ret = se->ops->switchover_start(se->opaque); | ||
240 | + if (ret < 0) { | ||
241 | + return ret; | ||
242 | + } | ||
243 | + } | ||
244 | + | ||
245 | + return 0; | ||
246 | +} | ||
247 | + | ||
248 | /* | ||
249 | * Process an incoming 'QEMU_VM_COMMAND' | ||
250 | * 0 just a normal return | ||
251 | @@ -XXX,XX +XXX,XX @@ static int loadvm_process_command(QEMUFile *f) | ||
252 | |||
253 | case MIG_CMD_ENABLE_COLO: | ||
254 | return loadvm_process_enable_colo(mis); | ||
255 | + | ||
256 | + case MIG_CMD_SWITCHOVER_START: | ||
257 | + return loadvm_postcopy_handle_switchover_start(); | ||
258 | } | ||
259 | |||
260 | return 0; | ||
261 | diff --git a/migration/trace-events b/migration/trace-events | ||
262 | index XXXXXXX..XXXXXXX 100644 | ||
263 | --- a/migration/trace-events | ||
264 | +++ b/migration/trace-events | ||
265 | @@ -XXX,XX +XXX,XX @@ savevm_send_postcopy_run(void) "" | ||
266 | savevm_send_postcopy_resume(void) "" | ||
267 | savevm_send_colo_enable(void) "" | ||
268 | savevm_send_recv_bitmap(char *name) "%s" | ||
269 | +savevm_send_switchover_start(void) "" | ||
270 | savevm_state_setup(void) "" | ||
271 | savevm_state_resume_prepare(void) "" | ||
272 | savevm_state_header(void) "" | ||
273 | diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py | ||
274 | index XXXXXXX..XXXXXXX 100755 | ||
275 | --- a/scripts/analyze-migration.py | ||
276 | +++ b/scripts/analyze-migration.py | ||
277 | @@ -XXX,XX +XXX,XX @@ class MigrationDump(object): | ||
278 | QEMU_VM_SUBSECTION = 0x05 | ||
279 | QEMU_VM_VMDESCRIPTION = 0x06 | ||
280 | QEMU_VM_CONFIGURATION = 0x07 | ||
281 | + QEMU_VM_COMMAND = 0x08 | ||
282 | QEMU_VM_SECTION_FOOTER= 0x7e | ||
283 | + QEMU_MIG_CMD_SWITCHOVER_START = 0x0b | ||
284 | |||
285 | def __init__(self, filename): | ||
286 | self.section_classes = { | ||
287 | @@ -XXX,XX +XXX,XX @@ def read(self, desc_only = False, dump_memory = False, | ||
288 | elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END: | ||
289 | section_id = file.read32() | ||
290 | self.sections[section_id].read() | ||
291 | + elif section_type == self.QEMU_VM_COMMAND: | ||
292 | + command_type = file.read16() | ||
293 | + command_data_len = file.read16() | ||
294 | + if command_type != self.QEMU_MIG_CMD_SWITCHOVER_START: | ||
295 | + raise Exception("Unknown QEMU_VM_COMMAND: %x" % | ||
296 | + (command_type)) | ||
297 | + if command_data_len != 0: | ||
298 | + raise Exception("Invalid SWITCHOVER_START length: %x" % | ||
299 | + (command_data_len)) | ||
300 | elif section_type == self.QEMU_VM_SECTION_FOOTER: | ||
301 | read_section_id = file.read32() | ||
302 | if read_section_id != section_id: | ||
303 | -- | 147 | -- |
304 | 2.48.1 | 148 | 2.48.1 |
305 | 149 | ||
306 | 150 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | This property allows configuring whether to transfer the particular device | 3 | So far, IGD-specific quirks all require enabling legacy mode, which is |
4 | state via multifd channels when live migrating that device. | 4 | toggled by assigning IGD to 00:02.0. However, some quirks, like the BDSM |
5 | 5 | and GGC register quirks, should be applied to all supported IGD devices. | |
6 | It defaults to AUTO, which means that VFIO device state transfer via | 6 | A new config option, x-igd-legacy-mode=[on|off|auto], is introduced to |
7 | multifd channels is attempted in configurations that otherwise support it. | 7 | control the legacy mode only quirks. The default value is "auto", which |
8 | 8 | keeps current behavior that enables legacy mode implicitly and continues | |
9 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 9 | on error when all following conditions are met. |
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 10 | * Machine type is i440fx |
11 | Link: https://lore.kernel.org/qemu-devel/d6dbb326e3d53c7104d62c96c9e3dd64e1c7b940.1741124640.git.maciej.szmigiero@oracle.com | 11 | * IGD device is at guest BDF 00:02.0 |
12 | [ clg: Added documentation ] | 12 | |
13 | If any one of the conditions above is not met, the default behavior is | ||
14 | equivalent to "off", QEMU will fail immediately if any error occurs. | ||
15 | |||
16 | Users can also use "on" to force enabling legacy mode. It checks if all | ||
17 | the conditions above are met and set up legacy mode. QEMU will also fail | ||
18 | immediately on error in this case. | ||
19 | |||
20 | Additionally, the hotplug check in legacy mode is removed as hotplugging | ||
21 | IGD device is never supported, and it will be checked when enabling the | ||
22 | OpRegion quirk. | ||
23 | |||
24 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
25 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
26 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
27 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
28 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-8-tomitamoeko@gmail.com | ||
29 | [ clg: - Changed warn_report() by info_report() in | ||
30 | vfio_probe_igd_config_quirk() as suggested by Alex W. | ||
31 | - Fixed spelling in vfio_probe_igd_config_quirk () ] | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 32 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
14 | --- | 33 | --- |
15 | docs/devel/migration/vfio.rst | 15 +++++++++++++++ | 34 | hw/vfio/pci.h | 1 + |
16 | include/hw/vfio/vfio-common.h | 2 ++ | 35 | hw/vfio/igd.c | 127 +++++++++++++++++++++++++++++--------------------- |
17 | hw/vfio/migration-multifd.c | 18 +++++++++++++++++- | 36 | hw/vfio/pci.c | 2 + |
18 | hw/vfio/pci.c | 7 +++++++ | 37 | 3 files changed, 77 insertions(+), 53 deletions(-) |
19 | 4 files changed, 41 insertions(+), 1 deletion(-) | 38 | |
20 | 39 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h | |
21 | diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst | ||
22 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/docs/devel/migration/vfio.rst | 41 | --- a/hw/vfio/pci.h |
24 | +++ b/docs/devel/migration/vfio.rst | 42 | +++ b/hw/vfio/pci.h |
25 | @@ -XXX,XX +XXX,XX @@ Postcopy | 43 | @@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice { |
26 | ======== | 44 | uint32_t display_xres; |
27 | 45 | uint32_t display_yres; | |
28 | Postcopy migration is currently not supported for VFIO devices. | 46 | int32_t bootindex; |
29 | + | 47 | + OnOffAuto igd_legacy_mode; |
30 | +Multifd | 48 | uint32_t igd_gms; |
31 | +======= | 49 | OffAutoPCIBAR msix_relo; |
32 | + | 50 | uint8_t nv_gpudirect_clique; |
33 | +Starting from QEMU version 10.0 there's a possibility to transfer VFIO device | 51 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
34 | +_STOP_COPY state via multifd channels. This helps reduce downtime - especially | ||
35 | +with multiple VFIO devices or with devices having a large migration state. | ||
36 | +As an additional benefit, setting the VFIO device to _STOP_COPY state and | ||
37 | +saving its config space is also parallelized (run in a separate thread) in | ||
38 | +such migration mode. | ||
39 | + | ||
40 | +The multifd VFIO device state transfer is controlled by | ||
41 | +"x-migration-multifd-transfer" VFIO device property. This property defaults to | ||
42 | +AUTO, which means that VFIO device state transfer via multifd channels is | ||
43 | +attempted in configurations that otherwise support it. | ||
44 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | ||
45 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
46 | --- a/include/hw/vfio/vfio-common.h | 53 | --- a/hw/vfio/igd.c |
47 | +++ b/include/hw/vfio/vfio-common.h | 54 | +++ b/hw/vfio/igd.c |
48 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIOMigration { | 55 | @@ -XXX,XX +XXX,XX @@ |
49 | uint64_t mig_flags; | 56 | #include "qemu/error-report.h" |
50 | uint64_t precopy_init_size; | 57 | #include "qapi/error.h" |
51 | uint64_t precopy_dirty_size; | 58 | #include "qapi/qmp/qerror.h" |
52 | + bool multifd_transfer; | 59 | +#include "hw/boards.h" |
53 | VFIOMultifd *multifd; | 60 | #include "hw/hw.h" |
54 | bool initial_data_sent; | 61 | #include "hw/nvram/fw_cfg.h" |
55 | 62 | #include "pci.h" | |
56 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODevice { | 63 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) |
57 | bool no_mmap; | 64 | * bus address. |
58 | bool ram_block_discard_allowed; | 65 | */ |
59 | OnOffAuto enable_migration; | 66 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || |
60 | + OnOffAuto migration_multifd_transfer; | 67 | - !vfio_is_vga(vdev) || nr != 0 || |
61 | bool migration_events; | 68 | - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), |
62 | VFIODeviceOps *ops; | 69 | - 0, PCI_DEVFN(0x2, 0))) { |
63 | unsigned int num_irqs; | 70 | + !vfio_is_vga(vdev) || nr != 0) { |
64 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | 71 | return; |
65 | index XXXXXXX..XXXXXXX 100644 | 72 | } |
66 | --- a/hw/vfio/migration-multifd.c | 73 | |
67 | +++ b/hw/vfio/migration-multifd.c | 74 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) |
68 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_supported(void) | 75 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); |
69 | 76 | } | |
70 | bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev) | 77 | |
78 | -bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
79 | - Error **errp G_GNUC_UNUSED) | ||
80 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
71 | { | 81 | { |
72 | - return false; | 82 | - g_autofree struct vfio_region_info *rom = NULL; |
73 | + VFIOMigration *migration = vbasedev->migration; | 83 | int ret, gen; |
74 | + | 84 | uint64_t gms_size; |
75 | + return migration->multifd_transfer; | 85 | uint64_t *bdsm_size; |
86 | uint32_t gmch; | ||
87 | + bool legacy_mode_enabled = false; | ||
88 | Error *err = NULL; | ||
89 | |||
90 | /* | ||
91 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
92 | * PCI bus address. | ||
93 | */ | ||
94 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || | ||
95 | - !vfio_is_vga(vdev) || | ||
96 | - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
97 | - 0, PCI_DEVFN(0x2, 0))) { | ||
98 | + !vfio_is_vga(vdev)) { | ||
99 | return true; | ||
100 | } | ||
101 | |||
102 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
103 | return true; | ||
104 | } | ||
105 | |||
106 | - /* | ||
107 | - * Most of what we're doing here is to enable the ROM to run, so if | ||
108 | - * there's no ROM, there's no point in setting up this quirk. | ||
109 | - * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support. | ||
110 | - */ | ||
111 | - ret = vfio_get_region_info(&vdev->vbasedev, | ||
112 | - VFIO_PCI_ROM_REGION_INDEX, &rom); | ||
113 | - if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
114 | - error_report("IGD device %s has no ROM, legacy mode disabled", | ||
115 | - vdev->vbasedev.name); | ||
116 | - return true; | ||
117 | - } | ||
118 | - | ||
119 | - /* | ||
120 | - * Ignore the hotplug corner case, mark the ROM failed, we can't | ||
121 | - * create the devices we need for legacy mode in the hotplug scenario. | ||
122 | - */ | ||
123 | - if (vdev->pdev.qdev.hotplugged) { | ||
124 | - error_report("IGD device %s hotplugged, ROM disabled, " | ||
125 | - "legacy mode disabled", vdev->vbasedev.name); | ||
126 | - vdev->rom_read_failed = true; | ||
127 | - return true; | ||
128 | - } | ||
129 | - | ||
130 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
131 | |||
132 | /* | ||
133 | - * If IGD VGA Disable is clear (expected) and VGA is not already enabled, | ||
134 | - * try to enable it. Probably shouldn't be using legacy mode without VGA, | ||
135 | - * but also no point in us enabling VGA if disabled in hardware. | ||
136 | + * For backward compatibility, enable legacy mode when | ||
137 | + * - Machine type is i440fx (pc_piix) | ||
138 | + * - IGD device is at guest BDF 00:02.0 | ||
139 | + * - Not manually disabled by x-igd-legacy-mode=off | ||
140 | */ | ||
141 | - if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { | ||
142 | - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
143 | - error_report("IGD device %s failed to enable VGA access, " | ||
144 | - "legacy mode disabled", vdev->vbasedev.name); | ||
145 | - return true; | ||
146 | - } | ||
147 | + if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && | ||
148 | + !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && | ||
149 | + (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
150 | + 0, PCI_DEVFN(0x2, 0)))) { | ||
151 | + /* | ||
152 | + * IGD legacy mode requires: | ||
153 | + * - VBIOS in ROM BAR or file | ||
154 | + * - VGA IO/MMIO ranges are claimed by IGD | ||
155 | + * - OpRegion | ||
156 | + * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host | ||
157 | + */ | ||
158 | + g_autofree struct vfio_region_info *rom = NULL; | ||
159 | + | ||
160 | + legacy_mode_enabled = true; | ||
161 | + info_report("IGD legacy mode enabled, " | ||
162 | + "use x-igd-legacy-mode=off to disable it if unwanted."); | ||
163 | + | ||
164 | + /* | ||
165 | + * Most of what we're doing here is to enable the ROM to run, so if | ||
166 | + * there's no ROM, there's no point in setting up this quirk. | ||
167 | + * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support. | ||
168 | + */ | ||
169 | + ret = vfio_get_region_info(&vdev->vbasedev, | ||
170 | + VFIO_PCI_ROM_REGION_INDEX, &rom); | ||
171 | + if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
172 | + error_setg(&err, "Device has no ROM"); | ||
173 | + goto error; | ||
174 | + } | ||
175 | |||
176 | - /* Setup OpRegion access */ | ||
177 | - if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
178 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
179 | - error_report_err(err); | ||
180 | - return true; | ||
181 | - } | ||
182 | + /* | ||
183 | + * If IGD VGA Disable is clear (expected) and VGA is not already | ||
184 | + * enabled, try to enable it. Probably shouldn't be using legacy mode | ||
185 | + * without VGA, but also no point in us enabling VGA if disabled in | ||
186 | + * hardware. | ||
187 | + */ | ||
188 | + if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { | ||
189 | + error_setg(&err, "Unable to enable VGA access"); | ||
190 | + goto error; | ||
191 | + } | ||
192 | |||
193 | - /* Setup LPC bridge / Host bridge PCI IDs */ | ||
194 | - if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
195 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
196 | - error_report_err(err); | ||
197 | - return true; | ||
198 | + /* Setup OpRegion access */ | ||
199 | + if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
200 | + goto error; | ||
201 | + } | ||
202 | + | ||
203 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
204 | + if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
205 | + goto error; | ||
206 | + } | ||
207 | + } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { | ||
208 | + error_setg(&err, | ||
209 | + "Machine is not i440fx or assigned BDF is not 00:02.0"); | ||
210 | + goto error; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
215 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
216 | |||
217 | return true; | ||
218 | + | ||
219 | +error: | ||
220 | + /* | ||
221 | + * When legacy mode is implicity enabled, continue on error, | ||
222 | + * to keep compatibility | ||
223 | + */ | ||
224 | + if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) { | ||
225 | + error_report_err(err); | ||
226 | + error_report("IGD legacy mode disabled"); | ||
227 | + return true; | ||
228 | + } | ||
229 | + | ||
230 | + error_propagate(errp, err); | ||
231 | + return false; | ||
76 | } | 232 | } |
77 | |||
78 | bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) | ||
79 | { | ||
80 | VFIOMigration *migration = vbasedev->migration; | ||
81 | |||
82 | + if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) { | ||
83 | + migration->multifd_transfer = vfio_multifd_transfer_supported(); | ||
84 | + } else { | ||
85 | + migration->multifd_transfer = | ||
86 | + vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON; | ||
87 | + } | ||
88 | + | ||
89 | if (!vfio_multifd_transfer_enabled(vbasedev)) { | ||
90 | /* Nothing further to check or do */ | ||
91 | return true; | ||
92 | } | ||
93 | |||
94 | + if (!vfio_multifd_transfer_supported()) { | ||
95 | + error_setg(errp, | ||
96 | + "%s: Multifd device transfer requested but unsupported in the current config", | ||
97 | + vbasedev->name); | ||
98 | + return false; | ||
99 | + } | ||
100 | + | ||
101 | if (alloc_multifd) { | ||
102 | assert(!migration->multifd); | ||
103 | migration->multifd = vfio_multifd_new(); | ||
104 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | 233 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
105 | index XXXXXXX..XXXXXXX 100644 | 234 | index XXXXXXX..XXXXXXX 100644 |
106 | --- a/hw/vfio/pci.c | 235 | --- a/hw/vfio/pci.c |
107 | +++ b/hw/vfio/pci.c | 236 | +++ b/hw/vfio/pci.c |
108 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { | 237 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { |
238 | VFIO_FEATURE_ENABLE_REQ_BIT, true), | ||
239 | DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, | ||
109 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), | 240 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), |
241 | + DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, | ||
242 | + igd_legacy_mode, ON_OFF_AUTO_AUTO), | ||
110 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, | 243 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, |
111 | vbasedev.enable_migration, ON_OFF_AUTO_AUTO), | 244 | vbasedev.enable_migration, ON_OFF_AUTO_AUTO), |
112 | + DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice, | 245 | DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice, |
113 | + vbasedev.migration_multifd_transfer, | ||
114 | + ON_OFF_AUTO_AUTO), | ||
115 | DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, | ||
116 | vbasedev.migration_events, false), | ||
117 | DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), | ||
118 | @@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) | ||
119 | "Skip config space check for Vendor Specific Capability. " | ||
120 | "Setting to false will enforce strict checking of VSC content " | ||
121 | "(DEBUG)"); | ||
122 | + object_class_property_set_description(klass, /* 10.0 */ | ||
123 | + "x-migration-multifd-transfer", | ||
124 | + "Transfer this device state via " | ||
125 | + "multifd channels when live migrating it"); | ||
126 | } | ||
127 | |||
128 | static const TypeInfo vfio_pci_dev_info = { | ||
129 | -- | 246 | -- |
130 | 2.48.1 | 247 | 2.48.1 |
131 | 248 | ||
132 | 249 | diff view generated by jsdifflib |
1 | From: Alex Williamson <alex.williamson@redhat.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Switch callers directly initializing the PCI PM capability with | 3 | Both enable OpRegion option (x-igd-opregion) and legacy mode require |
4 | pci_add_capability() to use pci_pm_init(). | 4 | setting up OpRegion copy for IGD devices. As the config quirk no longer |
5 | depends on legacy mode, we can now handle x-igd-opregion option there | ||
6 | instead of in vfio_realize. | ||
5 | 7 | ||
6 | Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com> | 8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
7 | Cc: Akihiko Odaki <akihiko.odaki@daynix.com> | 9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
8 | Cc: Jason Wang <jasowang@redhat.com> | 10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
9 | Cc: Stefan Weil <sw@weilnetz.de> | 11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
10 | Cc: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> | 12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-9-tomitamoeko@gmail.com |
11 | Cc: Keith Busch <kbusch@kernel.org> | ||
12 | Cc: Klaus Jensen <its@irrelevant.dk> | ||
13 | Cc: Jesper Devantier <foss@defmacro.it> | ||
14 | Cc: Michael S. Tsirkin <mst@redhat.com> | ||
15 | Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> | ||
16 | Cc: Cédric Le Goater <clg@redhat.com> | ||
17 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | ||
18 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
19 | Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
20 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
21 | Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-3-alex.williamson@redhat.com | ||
22 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
23 | --- | 14 | --- |
24 | hw/net/e1000e.c | 3 +-- | 15 | hw/vfio/pci.h | 2 -- |
25 | hw/net/eepro100.c | 4 +--- | 16 | hw/vfio/igd.c | 14 +++++++++----- |
26 | hw/net/igb.c | 3 +-- | 17 | hw/vfio/pci.c | 9 --------- |
27 | hw/nvme/ctrl.c | 3 +-- | 18 | 3 files changed, 9 insertions(+), 16 deletions(-) |
28 | hw/pci-bridge/pcie_pci_bridge.c | 2 +- | ||
29 | hw/vfio/pci.c | 7 ++++++- | ||
30 | hw/virtio/virtio-pci.c | 3 +-- | ||
31 | 7 files changed, 12 insertions(+), 13 deletions(-) | ||
32 | 19 | ||
33 | diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c | 20 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
34 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/hw/net/e1000e.c | 22 | --- a/hw/vfio/pci.h |
36 | +++ b/hw/net/e1000e.c | 23 | +++ b/hw/vfio/pci.h |
37 | @@ -XXX,XX +XXX,XX @@ static int | 24 | @@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, |
38 | e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) | 25 | |
26 | bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); | ||
27 | |||
28 | -bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp); | ||
29 | - | ||
30 | void vfio_display_reset(VFIOPCIDevice *vdev); | ||
31 | bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); | ||
32 | void vfio_display_finalize(VFIOPCIDevice *vdev); | ||
33 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/hw/vfio/igd.c | ||
36 | +++ b/hw/vfio/igd.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, | ||
38 | return true; | ||
39 | } | ||
40 | |||
41 | -bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) | ||
42 | +static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) | ||
39 | { | 43 | { |
40 | Error *local_err = NULL; | 44 | g_autofree struct vfio_region_info *opregion = NULL; |
41 | - int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, | 45 | int ret; |
42 | - PCI_PM_SIZEOF, &local_err); | 46 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
43 | + int ret = pci_pm_init(pdev, offset, &local_err); | 47 | goto error; |
44 | |||
45 | if (local_err) { | ||
46 | error_report_err(local_err); | ||
47 | diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/hw/net/eepro100.c | ||
50 | +++ b/hw/net/eepro100.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static void e100_pci_reset(EEPRO100State *s, Error **errp) | ||
52 | if (info->power_management) { | ||
53 | /* Power Management Capabilities */ | ||
54 | int cfg_offset = 0xdc; | ||
55 | - int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM, | ||
56 | - cfg_offset, PCI_PM_SIZEOF, | ||
57 | - errp); | ||
58 | + int r = pci_pm_init(&s->dev, cfg_offset, errp); | ||
59 | if (r < 0) { | ||
60 | return; | ||
61 | } | 48 | } |
62 | diff --git a/hw/net/igb.c b/hw/net/igb.c | 49 | |
63 | index XXXXXXX..XXXXXXX 100644 | 50 | - /* Setup OpRegion access */ |
64 | --- a/hw/net/igb.c | 51 | - if (!vfio_pci_igd_setup_opregion(vdev, &err)) { |
65 | +++ b/hw/net/igb.c | 52 | - goto error; |
66 | @@ -XXX,XX +XXX,XX @@ static int | 53 | - } |
67 | igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) | 54 | + /* Enable OpRegion quirk */ |
68 | { | 55 | + vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION; |
69 | Error *local_err = NULL; | 56 | |
70 | - int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, | 57 | /* Setup LPC bridge / Host bridge PCI IDs */ |
71 | - PCI_PM_SIZEOF, &local_err); | 58 | if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { |
72 | + int ret = pci_pm_init(pdev, offset, &local_err); | 59 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
73 | 60 | goto error; | |
74 | if (local_err) { | ||
75 | error_report_err(local_err); | ||
76 | diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/hw/nvme/ctrl.c | ||
79 | +++ b/hw/nvme/ctrl.c | ||
80 | @@ -XXX,XX +XXX,XX @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) | ||
81 | Error *err = NULL; | ||
82 | int ret; | ||
83 | |||
84 | - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, | ||
85 | - PCI_PM_SIZEOF, &err); | ||
86 | + ret = pci_pm_init(pci_dev, offset, &err); | ||
87 | if (err) { | ||
88 | error_report_err(err); | ||
89 | return ret; | ||
90 | diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/hw/pci-bridge/pcie_pci_bridge.c | ||
93 | +++ b/hw/pci-bridge/pcie_pci_bridge.c | ||
94 | @@ -XXX,XX +XXX,XX @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp) | ||
95 | goto cap_error; | ||
96 | } | 61 | } |
97 | 62 | ||
98 | - pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp); | 63 | + /* Setup OpRegion access */ |
99 | + pos = pci_pm_init(d, 0, errp); | 64 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && |
100 | if (pos < 0) { | 65 | + !vfio_pci_igd_setup_opregion(vdev, errp)) { |
101 | goto pm_error; | 66 | + goto error; |
102 | } | 67 | + } |
68 | + | ||
69 | /* | ||
70 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
71 | * 32MiB. This option should only be used when the desired size cannot be | ||
103 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | 72 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
104 | index XXXXXXX..XXXXXXX 100644 | 73 | index XXXXXXX..XXXXXXX 100644 |
105 | --- a/hw/vfio/pci.c | 74 | --- a/hw/vfio/pci.c |
106 | +++ b/hw/vfio/pci.c | 75 | +++ b/hw/vfio/pci.c |
107 | @@ -XXX,XX +XXX,XX @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) | 76 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) |
108 | case PCI_CAP_ID_PM: | 77 | vfio_bar_quirk_setup(vdev, i); |
109 | vfio_check_pm_reset(vdev, pos); | 78 | } |
110 | vdev->pm_cap = pos; | 79 | |
111 | - ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0; | 80 | -#ifdef CONFIG_VFIO_IGD |
112 | + ret = pci_pm_init(pdev, pos, errp) >= 0; | 81 | - if (!vdev->igd_opregion && |
113 | + /* | 82 | - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { |
114 | + * PCI-core config space emulation needs write access to the power | 83 | - if (!vfio_pci_igd_setup_opregion(vdev, errp)) { |
115 | + * state enabled for tracking BAR mapping relative to PM state. | 84 | - goto out_unset_idev; |
116 | + */ | 85 | - } |
117 | + pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK); | 86 | - } |
118 | break; | 87 | -#endif |
119 | case PCI_CAP_ID_AF: | 88 | - |
120 | vfio_check_af_flr(vdev, pos); | 89 | /* QEMU emulates all of MSI & MSIX */ |
121 | diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c | 90 | if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { |
122 | index XXXXXXX..XXXXXXX 100644 | 91 | memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, |
123 | --- a/hw/virtio/virtio-pci.c | ||
124 | +++ b/hw/virtio/virtio-pci.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) | ||
126 | pos = pcie_endpoint_cap_init(pci_dev, 0); | ||
127 | assert(pos > 0); | ||
128 | |||
129 | - pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, | ||
130 | - PCI_PM_SIZEOF, errp); | ||
131 | + pos = pci_pm_init(pci_dev, 0, errp); | ||
132 | if (pos < 0) { | ||
133 | return; | ||
134 | } | ||
135 | -- | 92 | -- |
136 | 2.48.1 | 93 | 2.48.1 |
137 | 94 | ||
138 | 95 | diff view generated by jsdifflib |
1 | From: Alex Williamson <alex.williamson@redhat.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | This is now redundant to PCIDevice.pm_cap. | 3 | The LPC bridge/Host bridge IDs quirk is also not dependent on legacy |
4 | mode. Recent Windows driver no longer depends on these IDs, as well as | ||
5 | Linux i915 driver, while UEFI GOP seems still needs them. Make it an | ||
6 | option to allow users enabling and disabling it as needed. | ||
4 | 7 | ||
5 | Cc: Cédric Le Goater <clg@redhat.com> | 8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
6 | Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com> | 9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
7 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
8 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | 11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
9 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-10-tomitamoeko@gmail.com |
10 | Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-4-alex.williamson@redhat.com | 13 | [ clg: - Fixed spelling in vfio_probe_igd_config_quirk() ] |
11 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
12 | --- | 15 | --- |
13 | hw/vfio/pci.h | 1 - | 16 | hw/vfio/pci.h | 3 +++ |
14 | hw/vfio/pci.c | 9 ++++----- | 17 | hw/vfio/igd.c | 14 ++++++++------ |
15 | 2 files changed, 4 insertions(+), 6 deletions(-) | 18 | hw/vfio/pci.c | 2 ++ |
19 | 3 files changed, 13 insertions(+), 6 deletions(-) | ||
16 | 20 | ||
17 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h | 21 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
18 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/vfio/pci.h | 23 | --- a/hw/vfio/pci.h |
20 | +++ b/hw/vfio/pci.h | 24 | +++ b/hw/vfio/pci.h |
21 | @@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice { | 25 | @@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice { |
22 | int32_t bootindex; | 26 | #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2 |
23 | uint32_t igd_gms; | 27 | #define VFIO_FEATURE_ENABLE_IGD_OPREGION \ |
24 | OffAutoPCIBAR msix_relo; | 28 | (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT) |
25 | - uint8_t pm_cap; | 29 | +#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3 |
26 | uint8_t nv_gpudirect_clique; | 30 | +#define VFIO_FEATURE_ENABLE_IGD_LPC \ |
27 | bool pci_aer; | 31 | + (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT) |
28 | bool req_enabled; | 32 | OnOffAuto display; |
33 | uint32_t display_xres; | ||
34 | uint32_t display_yres; | ||
35 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/hw/vfio/igd.c | ||
38 | +++ b/hw/vfio/igd.c | ||
39 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
40 | goto error; | ||
41 | } | ||
42 | |||
43 | - /* Enable OpRegion quirk */ | ||
44 | + /* Enable OpRegion and LPC bridge quirk */ | ||
45 | vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION; | ||
46 | - | ||
47 | - /* Setup LPC bridge / Host bridge PCI IDs */ | ||
48 | - if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
49 | - goto error; | ||
50 | - } | ||
51 | + vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; | ||
52 | } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { | ||
53 | error_setg(&err, | ||
54 | "Machine is not i440fx or assigned BDF is not 00:02.0"); | ||
55 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
56 | goto error; | ||
57 | } | ||
58 | |||
59 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
60 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && | ||
61 | + !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { | ||
62 | + goto error; | ||
63 | + } | ||
64 | + | ||
65 | /* | ||
66 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
67 | * 32MiB. This option should only be used when the desired size cannot be | ||
29 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | 68 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
30 | index XXXXXXX..XXXXXXX 100644 | 69 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/hw/vfio/pci.c | 70 | --- a/hw/vfio/pci.c |
32 | +++ b/hw/vfio/pci.c | 71 | +++ b/hw/vfio/pci.c |
33 | @@ -XXX,XX +XXX,XX @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) | 72 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { |
34 | break; | 73 | VFIO_FEATURE_ENABLE_REQ_BIT, true), |
35 | case PCI_CAP_ID_PM: | 74 | DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, |
36 | vfio_check_pm_reset(vdev, pos); | 75 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), |
37 | - vdev->pm_cap = pos; | 76 | + DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, |
38 | ret = pci_pm_init(pdev, pos, errp) >= 0; | 77 | + VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), |
39 | /* | 78 | DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, |
40 | * PCI-core config space emulation needs write access to the power | 79 | igd_legacy_mode, ON_OFF_AUTO_AUTO), |
41 | @@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) | 80 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, |
42 | vfio_disable_interrupts(vdev); | ||
43 | |||
44 | /* Make sure the device is in D0 */ | ||
45 | - if (vdev->pm_cap) { | ||
46 | + if (pdev->pm_cap) { | ||
47 | uint16_t pmcsr; | ||
48 | uint8_t state; | ||
49 | |||
50 | - pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); | ||
51 | + pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); | ||
52 | state = pmcsr & PCI_PM_CTRL_STATE_MASK; | ||
53 | if (state) { | ||
54 | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; | ||
55 | - vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); | ||
56 | + vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); | ||
57 | /* vfio handles the necessary delay here */ | ||
58 | - pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); | ||
59 | + pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); | ||
60 | state = pmcsr & PCI_PM_CTRL_STATE_MASK; | ||
61 | if (state) { | ||
62 | error_report("vfio: Unable to power on device, stuck in D%d", | ||
63 | -- | 81 | -- |
64 | 2.48.1 | 82 | 2.48.1 |
65 | 83 | ||
66 | 84 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Williamson <alex.williamson@redhat.com> | ||
2 | 1 | ||
3 | The pm_cap on the PCIExpressDevice object can be distilled down | ||
4 | to the new instance on the PCIDevice object. | ||
5 | |||
6 | Cc: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> | ||
8 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
9 | Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com> | ||
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | ||
12 | Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-5-alex.williamson@redhat.com | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | include/hw/pci/pcie.h | 2 -- | ||
16 | hw/pci-bridge/pcie_pci_bridge.c | 1 - | ||
17 | hw/virtio/virtio-pci.c | 8 +++----- | ||
18 | 3 files changed, 3 insertions(+), 8 deletions(-) | ||
19 | |||
20 | diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/include/hw/pci/pcie.h | ||
23 | +++ b/include/hw/pci/pcie.h | ||
24 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
25 | struct PCIExpressDevice { | ||
26 | /* Offset of express capability in config space */ | ||
27 | uint8_t exp_cap; | ||
28 | - /* Offset of Power Management capability in config space */ | ||
29 | - uint8_t pm_cap; | ||
30 | |||
31 | /* SLOT */ | ||
32 | bool hpev_notified; /* Logical AND of conditions for hot plug event. | ||
33 | diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/hw/pci-bridge/pcie_pci_bridge.c | ||
36 | +++ b/hw/pci-bridge/pcie_pci_bridge.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp) | ||
38 | if (pos < 0) { | ||
39 | goto pm_error; | ||
40 | } | ||
41 | - d->exp.pm_cap = pos; | ||
42 | pci_set_word(d->config + pos + PCI_PM_PMC, 0x3); | ||
43 | |||
44 | pcie_cap_arifwd_init(d); | ||
45 | diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/hw/virtio/virtio-pci.c | ||
48 | +++ b/hw/virtio/virtio-pci.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) | ||
50 | return; | ||
51 | } | ||
52 | |||
53 | - pci_dev->exp.pm_cap = pos; | ||
54 | - | ||
55 | /* | ||
56 | * Indicates that this function complies with revision 1.2 of the | ||
57 | * PCI Power Management Interface Specification. | ||
58 | @@ -XXX,XX +XXX,XX @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev) | ||
59 | { | ||
60 | uint16_t pmcsr; | ||
61 | |||
62 | - if (!pci_is_express(dev) || !dev->exp.pm_cap) { | ||
63 | + if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) { | ||
64 | return false; | ||
65 | } | ||
66 | |||
67 | - pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL); | ||
68 | + pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL); | ||
69 | |||
70 | /* | ||
71 | * When No_Soft_Reset bit is set and the device | ||
72 | @@ -XXX,XX +XXX,XX @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) | ||
73 | |||
74 | if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { | ||
75 | pci_word_test_and_clear_mask( | ||
76 | - dev->config + dev->exp.pm_cap + PCI_PM_CTRL, | ||
77 | + dev->config + dev->pm_cap + PCI_PM_CTRL, | ||
78 | PCI_PM_CTRL_STATE_MASK); | ||
79 | } | ||
80 | } | ||
81 | -- | ||
82 | 2.48.1 | ||
83 | |||
84 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Migration code wants to manage device data sending threads in one place. | 3 | The KVMGT/GVT-g vGPU also exposes OpRegion. But unlike IGD passthrough, |
4 | it only needs the OpRegion quirk. A previous change moved x-igd-opregion | ||
5 | handling to config quirk breaks KVMGT functionality as it brings extra | ||
6 | checks and applied other quirks. Here we check if the device is mdev | ||
7 | (KVMGT) or not (passthrough), and then applies corresponding quirks. | ||
4 | 8 | ||
5 | QEMU has an existing thread pool implementation, however it is limited | 9 | As before, users must manually specify x-igd-opregion=on to enable it |
6 | to queuing AIO operations only and essentially has a 1:1 mapping between | 10 | on KVMGT devices. In the future, we may check the VID/DID and enable |
7 | the current AioContext and the AIO ThreadPool in use. | 11 | OpRegion automatically. |
8 | 12 | ||
9 | Implement generic (non-AIO) ThreadPool by essentially wrapping Glib's | 13 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
10 | GThreadPool. | 14 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
11 | 15 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | |
12 | This brings a few new operations on a pool: | 16 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
13 | * thread_pool_wait() operation waits until all the submitted work requests | 17 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-11-tomitamoeko@gmail.com |
14 | have finished. | ||
15 | |||
16 | * thread_pool_set_max_threads() explicitly sets the maximum thread count | ||
17 | in the pool. | ||
18 | |||
19 | * thread_pool_adjust_max_threads_to_work() adjusts the maximum thread count | ||
20 | in the pool to equal the number of still waiting in queue or unfinished work. | ||
21 | |||
22 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
23 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
24 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
25 | Link: https://lore.kernel.org/qemu-devel/b1efaebdbea7cb7068b8fb74148777012383e12b.1741124640.git.maciej.szmigiero@oracle.com | ||
26 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
27 | --- | 19 | --- |
28 | include/block/thread-pool.h | 51 ++++++++++++++++ | 20 | hw/vfio/igd.c | 27 ++++++++++++++++++++++++++- |
29 | util/thread-pool.c | 119 ++++++++++++++++++++++++++++++++++++ | 21 | 1 file changed, 26 insertions(+), 1 deletion(-) |
30 | 2 files changed, 170 insertions(+) | ||
31 | 22 | ||
32 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h | 23 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
33 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/include/block/thread-pool.h | 25 | --- a/hw/vfio/igd.c |
35 | +++ b/include/block/thread-pool.h | 26 | +++ b/hw/vfio/igd.c |
36 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | 27 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) |
37 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); | 28 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); |
38 | void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx); | 29 | } |
39 | 30 | ||
40 | +/* ------------------------------------------- */ | 31 | -bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
41 | +/* Generic thread pool types and methods below */ | 32 | +static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
42 | +typedef struct ThreadPool ThreadPool; | 33 | { |
43 | + | 34 | int ret, gen; |
44 | +/* Create a new thread pool. Never returns NULL. */ | 35 | uint64_t gms_size; |
45 | +ThreadPool *thread_pool_new(void); | 36 | @@ -XXX,XX +XXX,XX @@ error: |
37 | error_propagate(errp, err); | ||
38 | return false; | ||
39 | } | ||
46 | + | 40 | + |
47 | +/* | 41 | +/* |
48 | + * Free the thread pool. | 42 | + * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify |
49 | + * Waits for all the previously submitted work to complete before performing | 43 | + * x-igd-opregion=on to enable the access. |
50 | + * the actual freeing operation. | 44 | + * TODO: Check VID/DID and enable opregion access automatically |
51 | + */ | 45 | + */ |
52 | +void thread_pool_free(ThreadPool *pool); | 46 | +static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
53 | + | ||
54 | +/* | ||
55 | + * Submit a new work (task) for the pool. | ||
56 | + * | ||
57 | + * @opaque_destroy is an optional GDestroyNotify for the @opaque argument | ||
58 | + * to the work function at @func. | ||
59 | + */ | ||
60 | +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, | ||
61 | + void *opaque, GDestroyNotify opaque_destroy); | ||
62 | + | ||
63 | +/* | ||
64 | + * Submit a new work (task) for the pool, making sure it starts getting | ||
65 | + * processed immediately, launching a new thread for it if necessary. | ||
66 | + * | ||
67 | + * @opaque_destroy is an optional GDestroyNotify for the @opaque argument | ||
68 | + * to the work function at @func. | ||
69 | + */ | ||
70 | +void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func, | ||
71 | + void *opaque, GDestroyNotify opaque_destroy); | ||
72 | + | ||
73 | +/* | ||
74 | + * Wait for all previously submitted work to complete before returning. | ||
75 | + * | ||
76 | + * Can be used as a barrier between two sets of tasks executed on a thread | ||
77 | + * pool without destroying it or in a performance sensitive path where the | ||
78 | + * caller just wants to wait for all tasks to complete while deferring the | ||
79 | + * pool free operation for later, less performance sensitive time. | ||
80 | + */ | ||
81 | +void thread_pool_wait(ThreadPool *pool); | ||
82 | + | ||
83 | +/* Set the maximum number of threads in the pool. */ | ||
84 | +bool thread_pool_set_max_threads(ThreadPool *pool, int max_threads); | ||
85 | + | ||
86 | +/* | ||
87 | + * Adjust the maximum number of threads in the pool to give each task its | ||
88 | + * own thread (exactly one thread per task). | ||
89 | + */ | ||
90 | +bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool); | ||
91 | |||
92 | #endif | ||
93 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/util/thread-pool.c | ||
96 | +++ b/util/thread-pool.c | ||
97 | @@ -XXX,XX +XXX,XX @@ void thread_pool_free_aio(ThreadPoolAio *pool) | ||
98 | qemu_mutex_destroy(&pool->lock); | ||
99 | g_free(pool); | ||
100 | } | ||
101 | + | ||
102 | +struct ThreadPool { | ||
103 | + GThreadPool *t; | ||
104 | + size_t cur_work; | ||
105 | + QemuMutex cur_work_lock; | ||
106 | + QemuCond all_finished_cond; | ||
107 | +}; | ||
108 | + | ||
109 | +typedef struct { | ||
110 | + ThreadPoolFunc *func; | ||
111 | + void *opaque; | ||
112 | + GDestroyNotify opaque_destroy; | ||
113 | +} ThreadPoolElement; | ||
114 | + | ||
115 | +static void thread_pool_func(gpointer data, gpointer user_data) | ||
116 | +{ | 47 | +{ |
117 | + ThreadPool *pool = user_data; | 48 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && |
118 | + g_autofree ThreadPoolElement *el = data; | 49 | + !vfio_pci_igd_setup_opregion(vdev, errp)) { |
119 | + | 50 | + return false; |
120 | + el->func(el->opaque); | ||
121 | + | ||
122 | + if (el->opaque_destroy) { | ||
123 | + el->opaque_destroy(el->opaque); | ||
124 | + } | 51 | + } |
125 | + | 52 | + |
126 | + QEMU_LOCK_GUARD(&pool->cur_work_lock); | 53 | + return true; |
127 | + | ||
128 | + assert(pool->cur_work > 0); | ||
129 | + pool->cur_work--; | ||
130 | + | ||
131 | + if (pool->cur_work == 0) { | ||
132 | + qemu_cond_signal(&pool->all_finished_cond); | ||
133 | + } | ||
134 | +} | 54 | +} |
135 | + | 55 | + |
136 | +ThreadPool *thread_pool_new(void) | 56 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
137 | +{ | 57 | +{ |
138 | + ThreadPool *pool = g_new(ThreadPool, 1); | 58 | + /* KVMGT/GVT-g vGPU is exposed as mdev */ |
139 | + | 59 | + if (vdev->vbasedev.mdev) { |
140 | + pool->cur_work = 0; | 60 | + return vfio_pci_kvmgt_config_quirk(vdev, errp); |
141 | + qemu_mutex_init(&pool->cur_work_lock); | ||
142 | + qemu_cond_init(&pool->all_finished_cond); | ||
143 | + | ||
144 | + pool->t = g_thread_pool_new(thread_pool_func, pool, 0, TRUE, NULL); | ||
145 | + /* | ||
146 | + * g_thread_pool_new() can only return errors if initial thread(s) | ||
147 | + * creation fails but we ask for 0 initial threads above. | ||
148 | + */ | ||
149 | + assert(pool->t); | ||
150 | + | ||
151 | + return pool; | ||
152 | +} | ||
153 | + | ||
154 | +void thread_pool_free(ThreadPool *pool) | ||
155 | +{ | ||
156 | + /* | ||
157 | + * With _wait = TRUE this effectively waits for all | ||
158 | + * previously submitted work to complete first. | ||
159 | + */ | ||
160 | + g_thread_pool_free(pool->t, FALSE, TRUE); | ||
161 | + | ||
162 | + qemu_cond_destroy(&pool->all_finished_cond); | ||
163 | + qemu_mutex_destroy(&pool->cur_work_lock); | ||
164 | + | ||
165 | + g_free(pool); | ||
166 | +} | ||
167 | + | ||
168 | +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, | ||
169 | + void *opaque, GDestroyNotify opaque_destroy) | ||
170 | +{ | ||
171 | + ThreadPoolElement *el = g_new(ThreadPoolElement, 1); | ||
172 | + | ||
173 | + el->func = func; | ||
174 | + el->opaque = opaque; | ||
175 | + el->opaque_destroy = opaque_destroy; | ||
176 | + | ||
177 | + WITH_QEMU_LOCK_GUARD(&pool->cur_work_lock) { | ||
178 | + pool->cur_work++; | ||
179 | + } | 61 | + } |
180 | + | 62 | + |
181 | + /* | 63 | + return vfio_pci_igd_config_quirk(vdev, errp); |
182 | + * Ignore the return value since this function can only return errors | ||
183 | + * if creation of an additional thread fails but even in this case the | ||
184 | + * provided work is still getting queued (just for the existing threads). | ||
185 | + */ | ||
186 | + g_thread_pool_push(pool->t, el, NULL); | ||
187 | +} | ||
188 | + | ||
189 | +void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func, | ||
190 | + void *opaque, GDestroyNotify opaque_destroy) | ||
191 | +{ | ||
192 | + thread_pool_submit(pool, func, opaque, opaque_destroy); | ||
193 | + thread_pool_adjust_max_threads_to_work(pool); | ||
194 | +} | ||
195 | + | ||
196 | +void thread_pool_wait(ThreadPool *pool) | ||
197 | +{ | ||
198 | + QEMU_LOCK_GUARD(&pool->cur_work_lock); | ||
199 | + | ||
200 | + while (pool->cur_work > 0) { | ||
201 | + qemu_cond_wait(&pool->all_finished_cond, | ||
202 | + &pool->cur_work_lock); | ||
203 | + } | ||
204 | +} | ||
205 | + | ||
206 | +bool thread_pool_set_max_threads(ThreadPool *pool, | ||
207 | + int max_threads) | ||
208 | +{ | ||
209 | + assert(max_threads > 0); | ||
210 | + | ||
211 | + return g_thread_pool_set_max_threads(pool->t, max_threads, NULL); | ||
212 | +} | ||
213 | + | ||
214 | +bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool) | ||
215 | +{ | ||
216 | + QEMU_LOCK_GUARD(&pool->cur_work_lock); | ||
217 | + | ||
218 | + return thread_pool_set_max_threads(pool, pool->cur_work); | ||
219 | +} | 64 | +} |
220 | -- | 65 | -- |
221 | 2.48.1 | 66 | 2.48.1 |
222 | 67 | ||
223 | 68 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Load device config received via multifd using the existing machinery | 3 | Wire data commonly use BE byte order (including in the existing migration |
4 | behind vfio_load_device_config_state(). | 4 | protocol), use it also for for VFIO device state packets. |
5 | 5 | ||
6 | Also, make sure to process the relevant main migration channel flags. | 6 | This will allow VFIO multifd device state transfer between hosts with |
7 | different endianness. | ||
8 | Although currently there is no such use case, it's good to have it now | ||
9 | for completeness. | ||
7 | 10 | ||
11 | Reviewed-by: Avihai Horon <avihaih@nvidia.com> | ||
8 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 12 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> |
9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 13 | Link: https://lore.kernel.org/qemu-devel/dcfc04cc1a50655650dbac8398e2742ada84ee39.1741611079.git.maciej.szmigiero@oracle.com |
10 | Link: https://lore.kernel.org/qemu-devel/5dbd3f3703ec1097da2cf82a7262233452146fee.1741124640.git.maciej.szmigiero@oracle.com | ||
11 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
12 | --- | 15 | --- |
13 | include/hw/vfio/vfio-common.h | 2 ++ | 16 | hw/vfio/migration-multifd.c | 15 ++++++++++----- |
14 | hw/vfio/migration-multifd.c | 49 +++++++++++++++++++++++++++++++++-- | 17 | 1 file changed, 10 insertions(+), 5 deletions(-) |
15 | hw/vfio/migration.c | 9 ++++++- | ||
16 | 3 files changed, 57 insertions(+), 3 deletions(-) | ||
17 | 18 | ||
18 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/hw/vfio/vfio-common.h | ||
21 | +++ b/include/hw/vfio/vfio-common.h | ||
22 | @@ -XXX,XX +XXX,XX @@ void vfio_mig_add_bytes_transferred(unsigned long val); | ||
23 | bool vfio_device_state_is_running(VFIODevice *vbasedev); | ||
24 | bool vfio_device_state_is_precopy(VFIODevice *vbasedev); | ||
25 | |||
26 | +int vfio_load_device_config_state(QEMUFile *f, void *opaque); | ||
27 | + | ||
28 | #ifdef CONFIG_LINUX | ||
29 | int vfio_get_region_info(VFIODevice *vbasedev, int index, | ||
30 | struct vfio_region_info **info); | ||
31 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | 19 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c |
32 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/hw/vfio/migration-multifd.c | 21 | --- a/hw/vfio/migration-multifd.c |
34 | +++ b/hw/vfio/migration-multifd.c | 22 | +++ b/hw/vfio/migration-multifd.c |
35 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ |
24 | #include "hw/vfio/vfio-common.h" | ||
25 | #include "migration/misc.h" | ||
26 | #include "qapi/error.h" | ||
27 | +#include "qemu/bswap.h" | ||
28 | #include "qemu/error-report.h" | ||
36 | #include "qemu/lockable.h" | 29 | #include "qemu/lockable.h" |
37 | #include "qemu/main-loop.h" | 30 | #include "qemu/main-loop.h" |
38 | #include "qemu/thread.h" | ||
39 | +#include "io/channel-buffer.h" | ||
40 | #include "migration/qemu-file.h" | ||
41 | #include "migration-multifd.h" | ||
42 | #include "trace.h" | ||
43 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | 31 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, |
44 | static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev, | 32 | return false; |
45 | Error **errp) | 33 | } |
46 | { | 34 | |
47 | - error_setg(errp, "not yet there"); | 35 | + packet->version = be32_to_cpu(packet->version); |
48 | - return false; | 36 | if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { |
49 | + VFIOMigration *migration = vbasedev->migration; | 37 | error_setg(errp, "%s: packet has unknown version %" PRIu32, |
50 | + VFIOMultifd *multifd = migration->multifd; | 38 | vbasedev->name, packet->version); |
51 | + VFIOStateBuffer *lb; | 39 | return false; |
52 | + g_autoptr(QIOChannelBuffer) bioc = NULL; | 40 | } |
53 | + g_autoptr(QEMUFile) f_out = NULL, f_in = NULL; | 41 | |
54 | + uint64_t mig_header; | 42 | + packet->idx = be32_to_cpu(packet->idx); |
55 | + int ret; | 43 | + packet->flags = be32_to_cpu(packet->flags); |
56 | + | 44 | + |
57 | + assert(multifd->load_buf_idx == multifd->load_buf_idx_last); | 45 | if (packet->idx == UINT32_MAX) { |
58 | + lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx); | 46 | error_setg(errp, "%s: packet index is invalid", vbasedev->name); |
59 | + assert(lb->is_present); | 47 | return false; |
60 | + | 48 | @@ -XXX,XX +XXX,XX @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, |
61 | + bioc = qio_channel_buffer_new(lb->len); | 49 | |
62 | + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load"); | 50 | packet_len = sizeof(*packet) + bioc->usage; |
63 | + | 51 | packet = g_malloc0(packet_len); |
64 | + f_out = qemu_file_new_output(QIO_CHANNEL(bioc)); | 52 | - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; |
65 | + qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len); | 53 | - packet->idx = idx; |
66 | + | 54 | - packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; |
67 | + ret = qemu_fflush(f_out); | 55 | + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); |
68 | + if (ret) { | 56 | + packet->idx = cpu_to_be32(idx); |
69 | + error_setg(errp, "%s: load config state flush failed: %d", | 57 | + packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE); |
70 | + vbasedev->name, ret); | 58 | memcpy(&packet->data, bioc->data, bioc->usage); |
71 | + return false; | 59 | |
72 | + } | 60 | if (!multifd_queue_device_state(idstr, instance_id, |
73 | + | 61 | @@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, |
74 | + qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); | 62 | } |
75 | + f_in = qemu_file_new_input(QIO_CHANNEL(bioc)); | 63 | |
76 | + | 64 | packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); |
77 | + mig_header = qemu_get_be64(f_in); | 65 | - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; |
78 | + if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) { | 66 | + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); |
79 | + error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64, | 67 | |
80 | + vbasedev->name, mig_header); | 68 | for (idx = 0; ; idx++) { |
81 | + return false; | 69 | ssize_t data_size; |
82 | + } | 70 | @@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, |
83 | + | 71 | break; |
84 | + bql_lock(); | ||
85 | + ret = vfio_load_device_config_state(f_in, vbasedev); | ||
86 | + bql_unlock(); | ||
87 | + | ||
88 | + if (ret < 0) { | ||
89 | + error_setg(errp, "%s: vfio_load_device_config_state() failed: %d", | ||
90 | + vbasedev->name, ret); | ||
91 | + return false; | ||
92 | + } | ||
93 | + | ||
94 | + return true; | ||
95 | } | ||
96 | |||
97 | static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd) | ||
98 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/hw/vfio/migration.c | ||
101 | +++ b/hw/vfio/migration.c | ||
102 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque, | ||
103 | return ret; | ||
104 | } | ||
105 | |||
106 | -static int vfio_load_device_config_state(QEMUFile *f, void *opaque) | ||
107 | +int vfio_load_device_config_state(QEMUFile *f, void *opaque) | ||
108 | { | ||
109 | VFIODevice *vbasedev = opaque; | ||
110 | uint64_t data; | ||
111 | @@ -XXX,XX +XXX,XX @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) | ||
112 | switch (data) { | ||
113 | case VFIO_MIG_FLAG_DEV_CONFIG_STATE: | ||
114 | { | ||
115 | + if (vfio_multifd_transfer_enabled(vbasedev)) { | ||
116 | + error_report("%s: got DEV_CONFIG_STATE in main migration " | ||
117 | + "channel but doing multifd transfer", | ||
118 | + vbasedev->name); | ||
119 | + return -EINVAL; | ||
120 | + } | ||
121 | + | ||
122 | return vfio_load_device_config_state(f, opaque); | ||
123 | } | 72 | } |
124 | case VFIO_MIG_FLAG_DEV_SETUP_STATE: | 73 | |
74 | - packet->idx = idx; | ||
75 | + packet->idx = cpu_to_be32(idx); | ||
76 | packet_size = sizeof(*packet) + data_size; | ||
77 | |||
78 | if (!multifd_queue_device_state(d->idstr, d->instance_id, | ||
125 | -- | 79 | -- |
126 | 2.48.1 | 80 | 2.48.1 |
127 | 81 | ||
128 | 82 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add a thread which loads the VFIO device state buffers that were received | 3 | Both qemu_minrampagesize() and qemu_maxrampagesize() are |
4 | via multifd. | 4 | related to host memory backends, having the following call |
5 | stack: | ||
5 | 6 | ||
6 | Each VFIO device that has multifd device state transfer enabled has one | 7 | qemu_minrampagesize() |
7 | such thread, which is created using migration core API | 8 | -> find_min_backend_pagesize() |
8 | qemu_loadvm_start_load_thread(). | 9 | -> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND) |
9 | 10 | ||
10 | Since it's important to finish loading device state transferred via the | 11 | qemu_maxrampagesize() |
11 | main migration channel (via save_live_iterate SaveVMHandler) before | 12 | -> find_max_backend_pagesize() |
12 | starting loading the data asynchronously transferred via multifd the thread | 13 | -> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND) |
13 | doing the actual loading of the multifd transferred data is only started | ||
14 | from switchover_start SaveVMHandler. | ||
15 | 14 | ||
16 | switchover_start handler is called when MIG_CMD_SWITCHOVER_START | 15 | Having TYPE_MEMORY_BACKEND defined in "system/hostmem.h": |
17 | sub-command of QEMU_VM_COMMAND is received via the main migration channel. | ||
18 | 16 | ||
19 | This sub-command is only sent after all save_live_iterate data have already | 17 | include/system/hostmem.h:23:#define TYPE_MEMORY_BACKEND "memory-backend" |
20 | been posted so it is safe to commence loading of the multifd-transferred | ||
21 | device state upon receiving it - loading of save_live_iterate data happens | ||
22 | synchronously in the main migration thread (much like the processing of | ||
23 | MIG_CMD_SWITCHOVER_START) so by the time MIG_CMD_SWITCHOVER_START is | ||
24 | processed all the proceeding data must have already been loaded. | ||
25 | 18 | ||
26 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 19 | Move their prototype declaration to "system/hostmem.h". |
27 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 20 | |
28 | Link: https://lore.kernel.org/qemu-devel/9abe612d775aaf42e31646796acd2363c723a57a.1741124640.git.maciej.szmigiero@oracle.com | 21 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
29 | [ clg: - Reordered savevm_vfio_handlers | 22 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
30 | - Added switchover_start documentation ] | 23 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
24 | Message-Id: <20250308230917.18907-7-philmd@linaro.org> | ||
25 | Acked-by: David Hildenbrand <david@redhat.com> | ||
26 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-2-philmd@linaro.org | ||
31 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 27 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
32 | --- | 28 | --- |
33 | docs/devel/migration/vfio.rst | 4 + | 29 | include/exec/ram_addr.h | 3 --- |
34 | hw/vfio/migration-multifd.h | 2 + | 30 | include/system/hostmem.h | 3 +++ |
35 | hw/vfio/migration-multifd.c | 226 ++++++++++++++++++++++++++++++++++ | 31 | hw/ppc/spapr_caps.c | 1 + |
36 | hw/vfio/migration.c | 12 ++ | 32 | hw/s390x/s390-virtio-ccw.c | 1 + |
37 | hw/vfio/trace-events | 7 ++ | 33 | hw/vfio/spapr.c | 1 + |
38 | 5 files changed, 251 insertions(+) | 34 | 5 files changed, 6 insertions(+), 3 deletions(-) |
39 | 35 | ||
40 | diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst | 36 | diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h |
41 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/docs/devel/migration/vfio.rst | 38 | --- a/include/exec/ram_addr.h |
43 | +++ b/docs/devel/migration/vfio.rst | 39 | +++ b/include/exec/ram_addr.h |
44 | @@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows: | 40 | @@ -XXX,XX +XXX,XX @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, |
45 | * A ``switchover_ack_needed`` function that checks if the VFIO device uses | 41 | |
46 | "switchover-ack" migration capability when this capability is enabled. | 42 | bool ramblock_is_pmem(RAMBlock *rb); |
47 | 43 | ||
48 | +* A ``switchover_start`` function that in the multifd mode starts a thread that | 44 | -long qemu_minrampagesize(void); |
49 | + reassembles the multifd received data and loads it in-order into the device. | 45 | -long qemu_maxrampagesize(void); |
50 | + In the non-multifd mode this function is a NOP. | 46 | - |
51 | + | 47 | /** |
52 | * A ``save_state`` function to save the device config space if it is present. | 48 | * qemu_ram_alloc_from_file, |
53 | 49 | * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing | |
54 | * A ``save_live_complete_precopy`` function that sets the VFIO device in | 50 | diff --git a/include/system/hostmem.h b/include/system/hostmem.h |
55 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
57 | --- a/hw/vfio/migration-multifd.h | 52 | --- a/include/system/hostmem.h |
58 | +++ b/hw/vfio/migration-multifd.h | 53 | +++ b/include/system/hostmem.h |
59 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); | 54 | @@ -XXX,XX +XXX,XX @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend); |
60 | bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | 55 | size_t host_memory_backend_pagesize(HostMemoryBackend *memdev); |
61 | Error **errp); | 56 | char *host_memory_backend_get_name(HostMemoryBackend *backend); |
62 | 57 | ||
63 | +int vfio_multifd_switchover_start(VFIODevice *vbasedev); | 58 | +long qemu_minrampagesize(void); |
59 | +long qemu_maxrampagesize(void); | ||
64 | + | 60 | + |
65 | #endif | 61 | #endif |
66 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | 62 | diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c |
67 | index XXXXXXX..XXXXXXX 100644 | 63 | index XXXXXXX..XXXXXXX 100644 |
68 | --- a/hw/vfio/migration-multifd.c | 64 | --- a/hw/ppc/spapr_caps.c |
69 | +++ b/hw/vfio/migration-multifd.c | 65 | +++ b/hw/ppc/spapr_caps.c |
70 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIOStateBuffer { | 66 | @@ -XXX,XX +XXX,XX @@ |
71 | } VFIOStateBuffer; | 67 | #include "kvm_ppc.h" |
72 | 68 | #include "migration/vmstate.h" | |
73 | typedef struct VFIOMultifd { | 69 | #include "system/tcg.h" |
74 | + bool load_bufs_thread_running; | 70 | +#include "system/hostmem.h" |
75 | + bool load_bufs_thread_want_exit; | 71 | |
76 | + | 72 | #include "hw/ppc/spapr.h" |
77 | VFIOStateBuffers load_bufs; | 73 | |
78 | QemuCond load_bufs_buffer_ready_cond; | 74 | diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c |
79 | + QemuCond load_bufs_thread_finished_cond; | ||
80 | QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ | ||
81 | uint32_t load_buf_idx; | ||
82 | uint32_t load_buf_idx_last; | ||
83 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, | ||
84 | return true; | ||
85 | } | ||
86 | |||
87 | +static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev, | ||
88 | + Error **errp) | ||
89 | +{ | ||
90 | + error_setg(errp, "not yet there"); | ||
91 | + return false; | ||
92 | +} | ||
93 | + | ||
94 | +static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd) | ||
95 | +{ | ||
96 | + VFIOStateBuffer *lb; | ||
97 | + unsigned int bufs_len; | ||
98 | + | ||
99 | + bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs); | ||
100 | + if (multifd->load_buf_idx >= bufs_len) { | ||
101 | + assert(multifd->load_buf_idx == bufs_len); | ||
102 | + return NULL; | ||
103 | + } | ||
104 | + | ||
105 | + lb = vfio_state_buffers_at(&multifd->load_bufs, | ||
106 | + multifd->load_buf_idx); | ||
107 | + if (!lb->is_present) { | ||
108 | + return NULL; | ||
109 | + } | ||
110 | + | ||
111 | + return lb; | ||
112 | +} | ||
113 | + | ||
114 | +static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, | ||
115 | + VFIOStateBuffer *lb, | ||
116 | + Error **errp) | ||
117 | +{ | ||
118 | + VFIOMigration *migration = vbasedev->migration; | ||
119 | + VFIOMultifd *multifd = migration->multifd; | ||
120 | + g_autofree char *buf = NULL; | ||
121 | + char *buf_cur; | ||
122 | + size_t buf_len; | ||
123 | + | ||
124 | + if (!lb->len) { | ||
125 | + return true; | ||
126 | + } | ||
127 | + | ||
128 | + trace_vfio_load_state_device_buffer_load_start(vbasedev->name, | ||
129 | + multifd->load_buf_idx); | ||
130 | + | ||
131 | + /* lb might become re-allocated when we drop the lock */ | ||
132 | + buf = g_steal_pointer(&lb->data); | ||
133 | + buf_cur = buf; | ||
134 | + buf_len = lb->len; | ||
135 | + while (buf_len > 0) { | ||
136 | + ssize_t wr_ret; | ||
137 | + int errno_save; | ||
138 | + | ||
139 | + /* | ||
140 | + * Loading data to the device takes a while, | ||
141 | + * drop the lock during this process. | ||
142 | + */ | ||
143 | + qemu_mutex_unlock(&multifd->load_bufs_mutex); | ||
144 | + wr_ret = write(migration->data_fd, buf_cur, buf_len); | ||
145 | + errno_save = errno; | ||
146 | + qemu_mutex_lock(&multifd->load_bufs_mutex); | ||
147 | + | ||
148 | + if (wr_ret < 0) { | ||
149 | + error_setg(errp, | ||
150 | + "%s: writing state buffer %" PRIu32 " failed: %d", | ||
151 | + vbasedev->name, multifd->load_buf_idx, errno_save); | ||
152 | + return false; | ||
153 | + } | ||
154 | + | ||
155 | + assert(wr_ret <= buf_len); | ||
156 | + buf_len -= wr_ret; | ||
157 | + buf_cur += wr_ret; | ||
158 | + } | ||
159 | + | ||
160 | + trace_vfio_load_state_device_buffer_load_end(vbasedev->name, | ||
161 | + multifd->load_buf_idx); | ||
162 | + | ||
163 | + return true; | ||
164 | +} | ||
165 | + | ||
166 | +static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd, | ||
167 | + bool *should_quit) | ||
168 | +{ | ||
169 | + return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit); | ||
170 | +} | ||
171 | + | ||
172 | +/* | ||
173 | + * This thread is spawned by vfio_multifd_switchover_start() which gets | ||
174 | + * called upon encountering the switchover point marker in main migration | ||
175 | + * stream. | ||
176 | + * | ||
177 | + * It exits after either: | ||
178 | + * * completing loading the remaining device state and device config, OR: | ||
179 | + * * encountering some error while doing the above, OR: | ||
180 | + * * being forcefully aborted by the migration core by it setting should_quit | ||
181 | + * or by vfio_load_cleanup_load_bufs_thread() setting | ||
182 | + * multifd->load_bufs_thread_want_exit. | ||
183 | + */ | ||
184 | +static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) | ||
185 | +{ | ||
186 | + VFIODevice *vbasedev = opaque; | ||
187 | + VFIOMigration *migration = vbasedev->migration; | ||
188 | + VFIOMultifd *multifd = migration->multifd; | ||
189 | + bool ret = false; | ||
190 | + | ||
191 | + trace_vfio_load_bufs_thread_start(vbasedev->name); | ||
192 | + | ||
193 | + assert(multifd); | ||
194 | + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); | ||
195 | + | ||
196 | + assert(multifd->load_bufs_thread_running); | ||
197 | + | ||
198 | + while (true) { | ||
199 | + VFIOStateBuffer *lb; | ||
200 | + | ||
201 | + /* | ||
202 | + * Always check cancellation first after the buffer_ready wait below in | ||
203 | + * case that cond was signalled by vfio_load_cleanup_load_bufs_thread(). | ||
204 | + */ | ||
205 | + if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) { | ||
206 | + error_setg(errp, "operation cancelled"); | ||
207 | + goto thread_exit; | ||
208 | + } | ||
209 | + | ||
210 | + assert(multifd->load_buf_idx <= multifd->load_buf_idx_last); | ||
211 | + | ||
212 | + lb = vfio_load_state_buffer_get(multifd); | ||
213 | + if (!lb) { | ||
214 | + trace_vfio_load_state_device_buffer_starved(vbasedev->name, | ||
215 | + multifd->load_buf_idx); | ||
216 | + qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond, | ||
217 | + &multifd->load_bufs_mutex); | ||
218 | + continue; | ||
219 | + } | ||
220 | + | ||
221 | + if (multifd->load_buf_idx == multifd->load_buf_idx_last) { | ||
222 | + break; | ||
223 | + } | ||
224 | + | ||
225 | + if (multifd->load_buf_idx == 0) { | ||
226 | + trace_vfio_load_state_device_buffer_start(vbasedev->name); | ||
227 | + } | ||
228 | + | ||
229 | + if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) { | ||
230 | + goto thread_exit; | ||
231 | + } | ||
232 | + | ||
233 | + if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) { | ||
234 | + trace_vfio_load_state_device_buffer_end(vbasedev->name); | ||
235 | + } | ||
236 | + | ||
237 | + multifd->load_buf_idx++; | ||
238 | + } | ||
239 | + | ||
240 | + if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) { | ||
241 | + goto thread_exit; | ||
242 | + } | ||
243 | + | ||
244 | + ret = true; | ||
245 | + | ||
246 | +thread_exit: | ||
247 | + /* | ||
248 | + * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that | ||
249 | + * this thread is exiting. | ||
250 | + */ | ||
251 | + multifd->load_bufs_thread_running = false; | ||
252 | + qemu_cond_signal(&multifd->load_bufs_thread_finished_cond); | ||
253 | + | ||
254 | + trace_vfio_load_bufs_thread_end(vbasedev->name); | ||
255 | + | ||
256 | + return ret; | ||
257 | +} | ||
258 | + | ||
259 | static VFIOMultifd *vfio_multifd_new(void) | ||
260 | { | ||
261 | VFIOMultifd *multifd = g_new(VFIOMultifd, 1); | ||
262 | @@ -XXX,XX +XXX,XX @@ static VFIOMultifd *vfio_multifd_new(void) | ||
263 | multifd->load_buf_idx_last = UINT32_MAX; | ||
264 | qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); | ||
265 | |||
266 | + multifd->load_bufs_thread_running = false; | ||
267 | + multifd->load_bufs_thread_want_exit = false; | ||
268 | + qemu_cond_init(&multifd->load_bufs_thread_finished_cond); | ||
269 | + | ||
270 | return multifd; | ||
271 | } | ||
272 | |||
273 | +/* | ||
274 | + * Terminates vfio_load_bufs_thread by setting | ||
275 | + * multifd->load_bufs_thread_want_exit and signalling all the conditions | ||
276 | + * the thread could be blocked on. | ||
277 | + * | ||
278 | + * Waits for the thread to signal that it had finished. | ||
279 | + */ | ||
280 | +static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd) | ||
281 | +{ | ||
282 | + /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */ | ||
283 | + bql_unlock(); | ||
284 | + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { | ||
285 | + while (multifd->load_bufs_thread_running) { | ||
286 | + multifd->load_bufs_thread_want_exit = true; | ||
287 | + | ||
288 | + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); | ||
289 | + qemu_cond_wait(&multifd->load_bufs_thread_finished_cond, | ||
290 | + &multifd->load_bufs_mutex); | ||
291 | + } | ||
292 | + } | ||
293 | + bql_lock(); | ||
294 | +} | ||
295 | + | ||
296 | static void vfio_multifd_free(VFIOMultifd *multifd) | ||
297 | { | ||
298 | + vfio_load_cleanup_load_bufs_thread(multifd); | ||
299 | + | ||
300 | + qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond); | ||
301 | vfio_state_buffers_destroy(&multifd->load_bufs); | ||
302 | qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); | ||
303 | qemu_mutex_destroy(&multifd->load_bufs_mutex); | ||
304 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) | ||
305 | |||
306 | return true; | ||
307 | } | ||
308 | + | ||
309 | +int vfio_multifd_switchover_start(VFIODevice *vbasedev) | ||
310 | +{ | ||
311 | + VFIOMigration *migration = vbasedev->migration; | ||
312 | + VFIOMultifd *multifd = migration->multifd; | ||
313 | + | ||
314 | + assert(multifd); | ||
315 | + | ||
316 | + /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */ | ||
317 | + bql_unlock(); | ||
318 | + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { | ||
319 | + assert(!multifd->load_bufs_thread_running); | ||
320 | + multifd->load_bufs_thread_running = true; | ||
321 | + } | ||
322 | + bql_lock(); | ||
323 | + | ||
324 | + qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev); | ||
325 | + | ||
326 | + return 0; | ||
327 | +} | ||
328 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
329 | index XXXXXXX..XXXXXXX 100644 | 75 | index XXXXXXX..XXXXXXX 100644 |
330 | --- a/hw/vfio/migration.c | 76 | --- a/hw/s390x/s390-virtio-ccw.c |
331 | +++ b/hw/vfio/migration.c | 77 | +++ b/hw/s390x/s390-virtio-ccw.c |
332 | @@ -XXX,XX +XXX,XX @@ static bool vfio_switchover_ack_needed(void *opaque) | 78 | @@ -XXX,XX +XXX,XX @@ |
333 | return vfio_precopy_supported(vbasedev); | 79 | #include "hw/s390x/tod.h" |
334 | } | 80 | #include "system/system.h" |
335 | 81 | #include "system/cpus.h" | |
336 | +static int vfio_switchover_start(void *opaque) | 82 | +#include "system/hostmem.h" |
337 | +{ | 83 | #include "target/s390x/kvm/pv.h" |
338 | + VFIODevice *vbasedev = opaque; | 84 | #include "migration/blocker.h" |
339 | + | 85 | #include "qapi/visitor.h" |
340 | + if (vfio_multifd_transfer_enabled(vbasedev)) { | 86 | diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c |
341 | + return vfio_multifd_switchover_start(vbasedev); | ||
342 | + } | ||
343 | + | ||
344 | + return 0; | ||
345 | +} | ||
346 | + | ||
347 | static const SaveVMHandlers savevm_vfio_handlers = { | ||
348 | .save_prepare = vfio_save_prepare, | ||
349 | .save_setup = vfio_save_setup, | ||
350 | @@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = { | ||
351 | * Multifd support | ||
352 | */ | ||
353 | .load_state_buffer = vfio_multifd_load_state_buffer, | ||
354 | + .switchover_start = vfio_switchover_start, | ||
355 | }; | ||
356 | |||
357 | /* ---------------------------------------------------------------------- */ | ||
358 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | ||
359 | index XXXXXXX..XXXXXXX 100644 | 87 | index XXXXXXX..XXXXXXX 100644 |
360 | --- a/hw/vfio/trace-events | 88 | --- a/hw/vfio/spapr.c |
361 | +++ b/hw/vfio/trace-events | 89 | +++ b/hw/vfio/spapr.c |
362 | @@ -XXX,XX +XXX,XX @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" | 90 | @@ -XXX,XX +XXX,XX @@ |
363 | vfio_display_edid_write_error(void) "" | 91 | #include <linux/kvm.h> |
364 | 92 | #endif | |
365 | # migration.c | 93 | #include "system/kvm.h" |
366 | +vfio_load_bufs_thread_start(const char *name) " (%s)" | 94 | +#include "system/hostmem.h" |
367 | +vfio_load_bufs_thread_end(const char *name) " (%s)" | 95 | #include "exec/address-spaces.h" |
368 | vfio_load_cleanup(const char *name) " (%s)" | 96 | |
369 | vfio_load_device_config_state_start(const char *name) " (%s)" | 97 | #include "hw/vfio/vfio-common.h" |
370 | vfio_load_device_config_state_end(const char *name) " (%s)" | ||
371 | vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 | ||
372 | vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" | ||
373 | vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 | ||
374 | +vfio_load_state_device_buffer_start(const char *name) " (%s)" | ||
375 | +vfio_load_state_device_buffer_starved(const char *name, uint32_t idx) " (%s) idx %"PRIu32 | ||
376 | +vfio_load_state_device_buffer_load_start(const char *name, uint32_t idx) " (%s) idx %"PRIu32 | ||
377 | +vfio_load_state_device_buffer_load_end(const char *name, uint32_t idx) " (%s) idx %"PRIu32 | ||
378 | +vfio_load_state_device_buffer_end(const char *name) " (%s)" | ||
379 | vfio_migration_realize(const char *name) " (%s)" | ||
380 | vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" | ||
381 | vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s" | ||
382 | -- | 98 | -- |
383 | 2.48.1 | 99 | 2.48.1 |
384 | 100 | ||
385 | 101 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Automatic memory management helps avoid memory safety issues. | 3 | <linux/kvm.h> is already included by "system/kvm.h" in the next line. |
4 | 4 | ||
5 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
6 | Reviewed-by: Peter Xu <peterx@redhat.com> | 6 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Link: https://lore.kernel.org/qemu-devel/2fd01d773a783d572dcf538a064a98cc09e75c12.1741124640.git.maciej.szmigiero@oracle.com | 8 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
9 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Message-Id: <20250307180337.14811-3-philmd@linaro.org> | ||
11 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-3-philmd@linaro.org | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 13 | --- |
11 | migration/qemu-file.h | 2 ++ | 14 | hw/vfio/spapr.c | 3 --- |
12 | 1 file changed, 2 insertions(+) | 15 | 1 file changed, 3 deletions(-) |
13 | 16 | ||
14 | diff --git a/migration/qemu-file.h b/migration/qemu-file.h | 17 | diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c |
15 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/migration/qemu-file.h | 19 | --- a/hw/vfio/spapr.c |
17 | +++ b/migration/qemu-file.h | 20 | +++ b/hw/vfio/spapr.c |
18 | @@ -XXX,XX +XXX,XX @@ QEMUFile *qemu_file_new_input(QIOChannel *ioc); | 21 | @@ -XXX,XX +XXX,XX @@ |
19 | QEMUFile *qemu_file_new_output(QIOChannel *ioc); | 22 | #include "qemu/osdep.h" |
20 | int qemu_fclose(QEMUFile *f); | 23 | #include <sys/ioctl.h> |
21 | 24 | #include <linux/vfio.h> | |
22 | +G_DEFINE_AUTOPTR_CLEANUP_FUNC(QEMUFile, qemu_fclose) | 25 | -#ifdef CONFIG_KVM |
23 | + | 26 | -#include <linux/kvm.h> |
24 | /* | 27 | -#endif |
25 | * qemu_file_transferred: | 28 | #include "system/kvm.h" |
26 | * | 29 | #include "system/hostmem.h" |
30 | #include "exec/address-spaces.h" | ||
27 | -- | 31 | -- |
28 | 2.48.1 | 32 | 2.48.1 |
29 | 33 | ||
30 | 34 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add a hw_compat entry for recently added x-migration-multifd-transfer VFIO | 3 | Always include necessary headers explicitly, to avoid |
4 | property. | 4 | when refactoring unrelated ones: |
5 | 5 | ||
6 | hw/vfio/common.c:1176:45: error: implicit declaration of function ‘tcg_enabled’; | ||
7 | 1176 | tcg_enabled() ? DIRTY_CLIENTS_ALL : | ||
8 | | ^~~~~~~~~~~ | ||
9 | |||
10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 13 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 14 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
8 | Link: https://lore.kernel.org/qemu-devel/92c354f0457c152d1f267cc258c6967fff551cb1.1741124640.git.maciej.szmigiero@oracle.com | 15 | Message-Id: <20250307180337.14811-2-philmd@linaro.org> |
16 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-4-philmd@linaro.org | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 17 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 18 | --- |
11 | hw/core/machine.c | 1 + | 19 | hw/vfio/common.c | 1 + |
12 | 1 file changed, 1 insertion(+) | 20 | 1 file changed, 1 insertion(+) |
13 | 21 | ||
14 | diff --git a/hw/core/machine.c b/hw/core/machine.c | 22 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c |
15 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/core/machine.c | 24 | --- a/hw/vfio/common.c |
17 | +++ b/hw/core/machine.c | 25 | +++ b/hw/vfio/common.c |
18 | @@ -XXX,XX +XXX,XX @@ GlobalProperty hw_compat_9_2[] = { | 26 | @@ -XXX,XX +XXX,XX @@ |
19 | { "virtio-mem-pci", "vectors", "0" }, | 27 | #include "migration/misc.h" |
20 | { "migration", "multifd-clean-tls-termination", "false" }, | 28 | #include "migration/blocker.h" |
21 | { "migration", "send-switchover-start", "off"}, | 29 | #include "migration/qemu-file.h" |
22 | + { "vfio-pci", "x-migration-multifd-transfer", "off" }, | 30 | +#include "system/tcg.h" |
23 | }; | 31 | #include "system/tpm.h" |
24 | const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); | 32 | |
25 | 33 | VFIODeviceList vfio_device_list = | |
26 | -- | 34 | -- |
27 | 2.48.1 | 35 | 2.48.1 |
28 | 36 | ||
29 | 37 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Wire VFIO multifd transfer specific setup and cleanup functions into | 3 | Prefer runtime helpers to get target page size. |
4 | general VFIO load/save setup and cleanup methods. | ||
5 | 4 | ||
6 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Link: https://lore.kernel.org/qemu-devel/b1f864a65fafd4fdab1f89230df52e46ae41f2ac.1741124640.git.maciej.szmigiero@oracle.com | 7 | Message-Id: <20250305153929.43687-3-philmd@linaro.org> |
8 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-5-philmd@linaro.org | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 10 | --- |
11 | hw/vfio/migration.c | 24 ++++++++++++++++++++++-- | 11 | hw/vfio/common.c | 8 +++++--- |
12 | 1 file changed, 22 insertions(+), 2 deletions(-) | 12 | 1 file changed, 5 insertions(+), 3 deletions(-) |
13 | 13 | ||
14 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 14 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c |
15 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/hw/vfio/migration.c | 16 | --- a/hw/vfio/common.c |
17 | +++ b/hw/vfio/migration.c | 17 | +++ b/hw/vfio/common.c |
18 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) | 18 | @@ -XXX,XX +XXX,XX @@ |
19 | uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; | 19 | #include "exec/address-spaces.h" |
20 | int ret; | 20 | #include "exec/memory.h" |
21 | 21 | #include "exec/ram_addr.h" | |
22 | + if (!vfio_multifd_setup(vbasedev, false, errp)) { | 22 | +#include "exec/target_page.h" |
23 | + return -EINVAL; | 23 | #include "hw/hw.h" |
24 | + } | 24 | #include "qemu/error-report.h" |
25 | + | 25 | #include "qemu/main-loop.h" |
26 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); | 26 | @@ -XXX,XX +XXX,XX @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, |
27 | 27 | MemoryRegionSection *section) | |
28 | vfio_query_stop_copy_size(vbasedev, &stop_copy_size); | ||
29 | @@ -XXX,XX +XXX,XX @@ static void vfio_save_cleanup(void *opaque) | ||
30 | Error *local_err = NULL; | ||
31 | int ret; | ||
32 | |||
33 | + /* Currently a NOP, done for symmetry with load_cleanup() */ | ||
34 | + vfio_multifd_cleanup(vbasedev); | ||
35 | + | ||
36 | /* | ||
37 | * Changing device state from STOP_COPY to STOP can take time. Do it here, | ||
38 | * after migration has completed, so it won't increase downtime. | ||
39 | @@ -XXX,XX +XXX,XX @@ static void vfio_save_state(QEMUFile *f, void *opaque) | ||
40 | static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp) | ||
41 | { | 28 | { |
42 | VFIODevice *vbasedev = opaque; | 29 | RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); |
43 | + VFIOMigration *migration = vbasedev->migration; | 30 | + int target_page_size = qemu_target_page_size(); |
44 | + int ret; | 31 | VFIORamDiscardListener *vrdl; |
45 | 32 | ||
46 | - return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, | 33 | /* Ignore some corner cases not relevant in practice. */ |
47 | - vbasedev->migration->device_state, errp); | 34 | - g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); |
48 | + if (!vfio_multifd_setup(vbasedev, true, errp)) { | 35 | + g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size)); |
49 | + return -EINVAL; | 36 | g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space, |
50 | + } | 37 | - TARGET_PAGE_SIZE)); |
51 | + | 38 | - g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); |
52 | + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, | 39 | + target_page_size)); |
53 | + migration->device_state, errp); | 40 | + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size)); |
54 | + if (ret) { | 41 | |
55 | + return ret; | 42 | vrdl = g_new0(VFIORamDiscardListener, 1); |
56 | + } | 43 | vrdl->bcontainer = bcontainer; |
57 | + | ||
58 | + return 0; | ||
59 | } | ||
60 | |||
61 | static int vfio_load_cleanup(void *opaque) | ||
62 | { | ||
63 | VFIODevice *vbasedev = opaque; | ||
64 | |||
65 | + vfio_multifd_cleanup(vbasedev); | ||
66 | + | ||
67 | vfio_migration_cleanup(vbasedev); | ||
68 | trace_vfio_load_cleanup(vbasedev->name); | ||
69 | |||
70 | -- | 44 | -- |
71 | 2.48.1 | 45 | 2.48.1 |
72 | 46 | ||
73 | 47 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This way they can also be referenced in other translation | 3 | Some files don't rely on any target-specific knowledge |
4 | units than migration.c. | 4 | and can be compiled once: |
5 | 5 | ||
6 | - helpers.c | ||
7 | - container-base.c | ||
8 | - migration.c (removing unnecessary "exec/ram_addr.h") | ||
9 | - migration-multifd.c | ||
10 | - cpr.c | ||
11 | |||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
13 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 15 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 16 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
8 | Link: https://lore.kernel.org/qemu-devel/26a940f6b22c1b685818251b7a3ddbbca601b1d6.1741124640.git.maciej.szmigiero@oracle.com | 17 | Message-Id: <20250308230917.18907-4-philmd@linaro.org> |
18 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-6-philmd@linaro.org | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 20 | --- |
11 | include/hw/vfio/vfio-common.h | 17 +++++++++++++++++ | 21 | hw/vfio/migration.c | 1 - |
12 | hw/vfio/migration.c | 17 ----------------- | 22 | hw/vfio/meson.build | 13 ++++++++----- |
13 | 2 files changed, 17 insertions(+), 17 deletions(-) | 23 | 2 files changed, 8 insertions(+), 6 deletions(-) |
14 | 24 | ||
15 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/hw/vfio/vfio-common.h | ||
18 | +++ b/include/hw/vfio/vfio-common.h | ||
19 | @@ -XXX,XX +XXX,XX @@ | ||
20 | |||
21 | #define VFIO_MSG_PREFIX "vfio %s: " | ||
22 | |||
23 | +/* | ||
24 | + * Flags to be used as unique delimiters for VFIO devices in the migration | ||
25 | + * stream. These flags are composed as: | ||
26 | + * 0xffffffff => MSB 32-bit all 1s | ||
27 | + * 0xef10 => Magic ID, represents emulated (virtual) function IO | ||
28 | + * 0x0000 => 16-bits reserved for flags | ||
29 | + * | ||
30 | + * The beginning of state information is marked by _DEV_CONFIG_STATE, | ||
31 | + * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a | ||
32 | + * certain state information is marked by _END_OF_STATE. | ||
33 | + */ | ||
34 | +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) | ||
35 | +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) | ||
36 | +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) | ||
37 | +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) | ||
38 | +#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) | ||
39 | + | ||
40 | enum { | ||
41 | VFIO_DEVICE_TYPE_PCI = 0, | ||
42 | VFIO_DEVICE_TYPE_PLATFORM = 1, | ||
43 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 25 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c |
44 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/hw/vfio/migration.c | 27 | --- a/hw/vfio/migration.c |
46 | +++ b/hw/vfio/migration.c | 28 | +++ b/hw/vfio/migration.c |
47 | @@ -XXX,XX +XXX,XX @@ | 29 | @@ -XXX,XX +XXX,XX @@ |
30 | #include "qapi/error.h" | ||
31 | #include "qapi/qapi-events-vfio.h" | ||
32 | #include "exec/ramlist.h" | ||
33 | -#include "exec/ram_addr.h" | ||
34 | #include "pci.h" | ||
48 | #include "trace.h" | 35 | #include "trace.h" |
49 | #include "hw/hw.h" | 36 | #include "hw/hw.h" |
50 | 37 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | |
51 | -/* | 38 | index XXXXXXX..XXXXXXX 100644 |
52 | - * Flags to be used as unique delimiters for VFIO devices in the migration | 39 | --- a/hw/vfio/meson.build |
53 | - * stream. These flags are composed as: | 40 | +++ b/hw/vfio/meson.build |
54 | - * 0xffffffff => MSB 32-bit all 1s | 41 | @@ -XXX,XX +XXX,XX @@ |
55 | - * 0xef10 => Magic ID, represents emulated (virtual) function IO | 42 | vfio_ss = ss.source_set() |
56 | - * 0x0000 => 16-bits reserved for flags | 43 | vfio_ss.add(files( |
57 | - * | 44 | - 'helpers.c', |
58 | - * The beginning of state information is marked by _DEV_CONFIG_STATE, | 45 | 'common.c', |
59 | - * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a | 46 | - 'container-base.c', |
60 | - * certain state information is marked by _END_OF_STATE. | 47 | 'container.c', |
61 | - */ | 48 | - 'migration.c', |
62 | -#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) | 49 | - 'migration-multifd.c', |
63 | -#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) | 50 | - 'cpr.c', |
64 | -#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) | 51 | )) |
65 | -#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) | 52 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) |
66 | -#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) | 53 | vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( |
67 | - | 54 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) |
68 | /* | 55 | vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) |
69 | * This is an arbitrary size based on migration of mlx5 devices, where typically | 56 | |
70 | * total device migration size is on the order of 100s of MB. Testing with | 57 | specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) |
58 | + | ||
59 | +system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
60 | + 'helpers.c', | ||
61 | + 'container-base.c', | ||
62 | + 'migration.c', | ||
63 | + 'migration-multifd.c', | ||
64 | + 'cpr.c', | ||
65 | +)) | ||
71 | -- | 66 | -- |
72 | 2.48.1 | 67 | 2.48.1 |
73 | 68 | ||
74 | 69 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add multifd setup/cleanup functions and an associated VFIOMultifd data | 3 | These files depend on the VFIO symbol in their Kconfig |
4 | structure that will contain most of the receive-side data together | 4 | definition. They don't rely on target specific definitions, |
5 | with its init/cleanup methods. | 5 | move them to system_ss[] to build them once. |
6 | 6 | ||
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
9 | Link: https://lore.kernel.org/qemu-devel/c0520523053b1087787152ddf2163257d3030be0.1741124640.git.maciej.szmigiero@oracle.com | 11 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
12 | Message-Id: <20250308230917.18907-5-philmd@linaro.org> | ||
13 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-7-philmd@linaro.org | ||
10 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
11 | --- | 15 | --- |
12 | hw/vfio/migration-multifd.h | 4 ++++ | 16 | hw/vfio/meson.build | 4 ++-- |
13 | include/hw/vfio/vfio-common.h | 3 +++ | 17 | 1 file changed, 2 insertions(+), 2 deletions(-) |
14 | hw/vfio/migration-multifd.c | 44 +++++++++++++++++++++++++++++++++++ | ||
15 | 3 files changed, 51 insertions(+) | ||
16 | 18 | ||
17 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | 19 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build |
18 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/vfio/migration-multifd.h | 21 | --- a/hw/vfio/meson.build |
20 | +++ b/hw/vfio/migration-multifd.h | 22 | +++ b/hw/vfio/meson.build |
21 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( |
22 | 24 | )) | |
23 | #include "hw/vfio/vfio-common.h" | 25 | vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c')) |
24 | 26 | vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c')) | |
25 | +bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp); | 27 | -vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) |
26 | +void vfio_multifd_cleanup(VFIODevice *vbasedev); | 28 | -vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) |
27 | + | 29 | vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) |
28 | bool vfio_multifd_transfer_supported(void); | 30 | vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) |
29 | +bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); | 31 | |
30 | 32 | specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) | |
31 | #endif | 33 | |
32 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | 34 | +system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) |
33 | index XXXXXXX..XXXXXXX 100644 | 35 | +system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) |
34 | --- a/include/hw/vfio/vfio-common.h | 36 | system_ss.add(when: 'CONFIG_VFIO', if_true: files( |
35 | +++ b/include/hw/vfio/vfio-common.h | 37 | 'helpers.c', |
36 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIORegion { | 38 | 'container-base.c', |
37 | uint8_t nr; /* cache the region number for debug */ | ||
38 | } VFIORegion; | ||
39 | |||
40 | +typedef struct VFIOMultifd VFIOMultifd; | ||
41 | + | ||
42 | typedef struct VFIOMigration { | ||
43 | struct VFIODevice *vbasedev; | ||
44 | VMChangeStateEntry *vm_state; | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIOMigration { | ||
46 | uint64_t mig_flags; | ||
47 | uint64_t precopy_init_size; | ||
48 | uint64_t precopy_dirty_size; | ||
49 | + VFIOMultifd *multifd; | ||
50 | bool initial_data_sent; | ||
51 | |||
52 | bool event_save_iterate_started; | ||
53 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/hw/vfio/migration-multifd.c | ||
56 | +++ b/hw/vfio/migration-multifd.c | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket { | ||
58 | uint8_t data[0]; | ||
59 | } QEMU_PACKED VFIODeviceStatePacket; | ||
60 | |||
61 | +typedef struct VFIOMultifd { | ||
62 | +} VFIOMultifd; | ||
63 | + | ||
64 | +static VFIOMultifd *vfio_multifd_new(void) | ||
65 | +{ | ||
66 | + VFIOMultifd *multifd = g_new(VFIOMultifd, 1); | ||
67 | + | ||
68 | + return multifd; | ||
69 | +} | ||
70 | + | ||
71 | +static void vfio_multifd_free(VFIOMultifd *multifd) | ||
72 | +{ | ||
73 | + g_free(multifd); | ||
74 | +} | ||
75 | + | ||
76 | +void vfio_multifd_cleanup(VFIODevice *vbasedev) | ||
77 | +{ | ||
78 | + VFIOMigration *migration = vbasedev->migration; | ||
79 | + | ||
80 | + g_clear_pointer(&migration->multifd, vfio_multifd_free); | ||
81 | +} | ||
82 | + | ||
83 | bool vfio_multifd_transfer_supported(void) | ||
84 | { | ||
85 | return multifd_device_state_supported() && | ||
86 | migrate_send_switchover_start(); | ||
87 | } | ||
88 | + | ||
89 | +bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev) | ||
90 | +{ | ||
91 | + return false; | ||
92 | +} | ||
93 | + | ||
94 | +bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) | ||
95 | +{ | ||
96 | + VFIOMigration *migration = vbasedev->migration; | ||
97 | + | ||
98 | + if (!vfio_multifd_transfer_enabled(vbasedev)) { | ||
99 | + /* Nothing further to check or do */ | ||
100 | + return true; | ||
101 | + } | ||
102 | + | ||
103 | + if (alloc_multifd) { | ||
104 | + assert(!migration->multifd); | ||
105 | + migration->multifd = vfio_multifd_new(); | ||
106 | + } | ||
107 | + | ||
108 | + return true; | ||
109 | +} | ||
110 | -- | 39 | -- |
111 | 2.48.1 | 40 | 2.48.1 |
112 | 41 | ||
113 | 42 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add vfio_multifd_transfer_supported() function that tells whether the | 3 | Removing unused "exec/ram_addr.h" header allow to compile |
4 | multifd device state transfer is supported. | 4 | iommufd.c once for all targets. |
5 | 5 | ||
6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
7 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | 10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
8 | Link: https://lore.kernel.org/qemu-devel/8ce50256f341b3d47342bb217cb5fbb2deb14639.1741124640.git.maciej.szmigiero@oracle.com | 11 | Message-Id: <20250308230917.18907-6-philmd@linaro.org> |
12 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-8-philmd@linaro.org | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
10 | --- | 14 | --- |
11 | hw/vfio/migration-multifd.h | 2 ++ | 15 | hw/vfio/iommufd.c | 1 - |
12 | hw/vfio/migration-multifd.c | 6 ++++++ | 16 | hw/vfio/meson.build | 6 +++--- |
13 | 2 files changed, 8 insertions(+) | 17 | 2 files changed, 3 insertions(+), 4 deletions(-) |
14 | 18 | ||
15 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | 19 | diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c |
16 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/vfio/migration-multifd.h | 21 | --- a/hw/vfio/iommufd.c |
18 | +++ b/hw/vfio/migration-multifd.h | 22 | +++ b/hw/vfio/iommufd.c |
19 | @@ -XXX,XX +XXX,XX @@ | 23 | @@ -XXX,XX +XXX,XX @@ |
20 | 24 | #include "qemu/cutils.h" | |
21 | #include "hw/vfio/vfio-common.h" | 25 | #include "qemu/chardev_open.h" |
22 | 26 | #include "pci.h" | |
23 | +bool vfio_multifd_transfer_supported(void); | 27 | -#include "exec/ram_addr.h" |
24 | + | 28 | |
25 | #endif | 29 | static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, |
26 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | 30 | ram_addr_t size, void *vaddr, bool readonly) |
31 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | ||
27 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/hw/vfio/migration-multifd.c | 33 | --- a/hw/vfio/meson.build |
29 | +++ b/hw/vfio/migration-multifd.c | 34 | +++ b/hw/vfio/meson.build |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket { | 35 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(files( |
31 | uint32_t flags; | 36 | 'container.c', |
32 | uint8_t data[0]; | 37 | )) |
33 | } QEMU_PACKED VFIODeviceStatePacket; | 38 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) |
34 | + | 39 | -vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( |
35 | +bool vfio_multifd_transfer_supported(void) | 40 | - 'iommufd.c', |
36 | +{ | 41 | -)) |
37 | + return multifd_device_state_supported() && | 42 | vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( |
38 | + migrate_send_switchover_start(); | 43 | 'display.c', |
39 | +} | 44 | 'pci-quirks.c', |
45 | @@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
46 | 'migration-multifd.c', | ||
47 | 'cpr.c', | ||
48 | )) | ||
49 | +system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( | ||
50 | + 'iommufd.c', | ||
51 | +)) | ||
40 | -- | 52 | -- |
41 | 2.48.1 | 53 | 2.48.1 |
42 | 54 | ||
43 | 55 | diff view generated by jsdifflib |
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Add basic types and flags used by VFIO multifd device state transfer | 3 | display.c doesn't rely on target specific definitions, |
4 | support. | 4 | move it to system_ss[] to build it once. |
5 | 5 | ||
6 | Since we'll be introducing a lot of multifd transfer specific code, | 6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | add a new file migration-multifd.c to home it, wired into main VFIO | 7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
8 | migration code (migration.c) via migration-multifd.h header file. | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | |||
10 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
11 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
12 | Link: https://lore.kernel.org/qemu-devel/4eedd529e6617f80f3d6a66d7268a0db2bc173fa.1741124640.git.maciej.szmigiero@oracle.com | 10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> |
11 | Message-Id: <20250308230917.18907-8-philmd@linaro.org> | ||
12 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-9-philmd@linaro.org | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
14 | --- | 14 | --- |
15 | hw/vfio/migration-multifd.h | 17 +++++++++++++++++ | 15 | hw/vfio/meson.build | 4 +++- |
16 | hw/vfio/migration-multifd.c | 33 +++++++++++++++++++++++++++++++++ | 16 | 1 file changed, 3 insertions(+), 1 deletion(-) |
17 | hw/vfio/migration.c | 1 + | ||
18 | hw/vfio/meson.build | 1 + | ||
19 | 4 files changed, 52 insertions(+) | ||
20 | create mode 100644 hw/vfio/migration-multifd.h | ||
21 | create mode 100644 hw/vfio/migration-multifd.c | ||
22 | 17 | ||
23 | diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h | ||
24 | new file mode 100644 | ||
25 | index XXXXXXX..XXXXXXX | ||
26 | --- /dev/null | ||
27 | +++ b/hw/vfio/migration-multifd.h | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | +/* | ||
30 | + * Multifd VFIO migration | ||
31 | + * | ||
32 | + * Copyright (C) 2024,2025 Oracle and/or its affiliates. | ||
33 | + * | ||
34 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
35 | + * See the COPYING file in the top-level directory. | ||
36 | + * | ||
37 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
38 | + */ | ||
39 | + | ||
40 | +#ifndef HW_VFIO_MIGRATION_MULTIFD_H | ||
41 | +#define HW_VFIO_MIGRATION_MULTIFD_H | ||
42 | + | ||
43 | +#include "hw/vfio/vfio-common.h" | ||
44 | + | ||
45 | +#endif | ||
46 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c | ||
47 | new file mode 100644 | ||
48 | index XXXXXXX..XXXXXXX | ||
49 | --- /dev/null | ||
50 | +++ b/hw/vfio/migration-multifd.c | ||
51 | @@ -XXX,XX +XXX,XX @@ | ||
52 | +/* | ||
53 | + * Multifd VFIO migration | ||
54 | + * | ||
55 | + * Copyright (C) 2024,2025 Oracle and/or its affiliates. | ||
56 | + * | ||
57 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
58 | + * See the COPYING file in the top-level directory. | ||
59 | + * | ||
60 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
61 | + */ | ||
62 | + | ||
63 | +#include "qemu/osdep.h" | ||
64 | +#include "hw/vfio/vfio-common.h" | ||
65 | +#include "migration/misc.h" | ||
66 | +#include "qapi/error.h" | ||
67 | +#include "qemu/error-report.h" | ||
68 | +#include "qemu/lockable.h" | ||
69 | +#include "qemu/main-loop.h" | ||
70 | +#include "qemu/thread.h" | ||
71 | +#include "migration/qemu-file.h" | ||
72 | +#include "migration-multifd.h" | ||
73 | +#include "trace.h" | ||
74 | + | ||
75 | +#define VFIO_DEVICE_STATE_CONFIG_STATE (1) | ||
76 | + | ||
77 | +#define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0) | ||
78 | + | ||
79 | +typedef struct VFIODeviceStatePacket { | ||
80 | + uint32_t version; | ||
81 | + uint32_t idx; | ||
82 | + uint32_t flags; | ||
83 | + uint8_t data[0]; | ||
84 | +} QEMU_PACKED VFIODeviceStatePacket; | ||
85 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/hw/vfio/migration.c | ||
88 | +++ b/hw/vfio/migration.c | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | #include "migration/qemu-file.h" | ||
91 | #include "migration/register.h" | ||
92 | #include "migration/blocker.h" | ||
93 | +#include "migration-multifd.h" | ||
94 | #include "qapi/error.h" | ||
95 | #include "qapi/qapi-events-vfio.h" | ||
96 | #include "exec/ramlist.h" | ||
97 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | 18 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build |
98 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
99 | --- a/hw/vfio/meson.build | 20 | --- a/hw/vfio/meson.build |
100 | +++ b/hw/vfio/meson.build | 21 | +++ b/hw/vfio/meson.build |
101 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(files( | 22 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(files( |
102 | 'container-base.c', | ||
103 | 'container.c', | ||
104 | 'migration.c', | ||
105 | + 'migration-multifd.c', | ||
106 | 'cpr.c', | ||
107 | )) | 23 | )) |
108 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) | 24 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) |
25 | vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
26 | - 'display.c', | ||
27 | 'pci-quirks.c', | ||
28 | 'pci.c', | ||
29 | )) | ||
30 | @@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
31 | system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( | ||
32 | 'iommufd.c', | ||
33 | )) | ||
34 | +system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
35 | + 'display.c', | ||
36 | +)) | ||
109 | -- | 37 | -- |
110 | 2.48.1 | 38 | 2.48.1 |
111 | 39 | ||
112 | 40 | diff view generated by jsdifflib |
1 | From: Peter Xu <peterx@redhat.com> | 1 | From: Vasilis Liaskovitis <vliaskovitis@suse.com> |
---|---|---|---|
2 | 2 | ||
3 | The newly introduced device state buffer can be used for either storing | 3 | The ATI BAR4 quirk is targeting an ioport BAR. Older devices may |
4 | VFIO's read() raw data, but already also possible to store generic device | 4 | have a BAR4 which is not an ioport, causing a segfault here. Test |
5 | states. After noticing that device states may not easily provide a max | 5 | the BAR type to skip these devices. |
6 | buffer size (also the fact that RAM MultiFDPages_t after all also want to | ||
7 | have flexibility on managing offset[] array), it may not be a good idea to | ||
8 | stick with union on MultiFDSendData.. as it won't play well with such | ||
9 | flexibility. | ||
10 | 6 | ||
11 | Switch MultiFDSendData to a struct. | 7 | Similar to |
8 | "8f419c5b: vfio/pci-quirks: Exclude non-ioport BAR from NVIDIA quirk" | ||
12 | 9 | ||
13 | It won't consume a lot more space in reality, after all the real buffers | 10 | Untested, as I don't have the card to test. |
14 | were already dynamically allocated, so it's so far only about the two | ||
15 | structs (pages, device_state) that will be duplicated, but they're small. | ||
16 | 11 | ||
17 | With this, we can remove the pretty hard to understand alloc size logic. | 12 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2856 |
18 | Because now we can allocate offset[] together with the SendData, and | 13 | Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com> |
19 | properly free it when the SendData is freed. | 14 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
20 | 15 | Link: https://lore.kernel.org/qemu-devel/20250310235833.41026-1-vliaskovitis@suse.com | |
21 | [MSS: Make sure to clear possible device state payload before freeing | ||
22 | MultiFDSendData, remove placeholders for other patches not included] | ||
23 | |||
24 | Signed-off-by: Peter Xu <peterx@redhat.com> | ||
25 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
26 | Acked-by: Fabiano Rosas <farosas@suse.de> | ||
27 | Link: https://lore.kernel.org/qemu-devel/7b02baba8e6ddb23ef7c349d312b9b631db09d7e.1741124640.git.maciej.szmigiero@oracle.com | ||
28 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 16 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
29 | --- | 17 | --- |
30 | migration/multifd.h | 15 +++++++++------ | 18 | hw/vfio/pci-quirks.c | 2 +- |
31 | migration/multifd-device-state.c | 5 ----- | 19 | 1 file changed, 1 insertion(+), 1 deletion(-) |
32 | migration/multifd-nocomp.c | 13 ++++++------- | ||
33 | migration/multifd.c | 25 +++++++------------------ | ||
34 | 4 files changed, 22 insertions(+), 36 deletions(-) | ||
35 | 20 | ||
36 | diff --git a/migration/multifd.h b/migration/multifd.h | 21 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c |
37 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/migration/multifd.h | 23 | --- a/hw/vfio/pci-quirks.c |
39 | +++ b/migration/multifd.h | 24 | +++ b/hw/vfio/pci-quirks.c |
40 | @@ -XXX,XX +XXX,XX @@ typedef struct { | 25 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
41 | uint32_t num; | 26 | |
42 | /* number of normal pages */ | 27 | /* This windows doesn't seem to be used except by legacy VGA code */ |
43 | uint32_t normal_num; | 28 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || |
44 | + /* | 29 | - !vdev->vga || nr != 4) { |
45 | + * Pointer to the ramblock. NOTE: it's caller's responsibility to make | 30 | + !vdev->vga || nr != 4 || !vdev->bars[4].ioport) { |
46 | + * sure the pointer is always valid! | ||
47 | + */ | ||
48 | RAMBlock *block; | ||
49 | - /* offset of each page */ | ||
50 | - ram_addr_t offset[]; | ||
51 | + /* offset array of each page, managed by multifd */ | ||
52 | + ram_addr_t *offset; | ||
53 | } MultiFDPages_t; | ||
54 | |||
55 | struct MultiFDRecvData { | ||
56 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
57 | MULTIFD_PAYLOAD_DEVICE_STATE, | ||
58 | } MultiFDPayloadType; | ||
59 | |||
60 | -typedef union MultiFDPayload { | ||
61 | +typedef struct MultiFDPayload { | ||
62 | MultiFDPages_t ram; | ||
63 | MultiFDDeviceState_t device_state; | ||
64 | } MultiFDPayload; | ||
65 | @@ -XXX,XX +XXX,XX @@ void multifd_ram_save_cleanup(void); | ||
66 | int multifd_ram_flush_and_sync(QEMUFile *f); | ||
67 | bool multifd_ram_sync_per_round(void); | ||
68 | bool multifd_ram_sync_per_section(void); | ||
69 | -size_t multifd_ram_payload_size(void); | ||
70 | +void multifd_ram_payload_alloc(MultiFDPages_t *pages); | ||
71 | +void multifd_ram_payload_free(MultiFDPages_t *pages); | ||
72 | void multifd_ram_fill_packet(MultiFDSendParams *p); | ||
73 | int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp); | ||
74 | |||
75 | -size_t multifd_device_state_payload_size(void); | ||
76 | - | ||
77 | void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state); | ||
78 | |||
79 | void multifd_device_state_send_setup(void); | ||
80 | diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c | ||
81 | index XXXXXXX..XXXXXXX 100644 | ||
82 | --- a/migration/multifd-device-state.c | ||
83 | +++ b/migration/multifd-device-state.c | ||
84 | @@ -XXX,XX +XXX,XX @@ static struct { | ||
85 | MultiFDSendData *send_data; | ||
86 | } *multifd_send_device_state; | ||
87 | |||
88 | -size_t multifd_device_state_payload_size(void) | ||
89 | -{ | ||
90 | - return sizeof(MultiFDDeviceState_t); | ||
91 | -} | ||
92 | - | ||
93 | void multifd_device_state_send_setup(void) | ||
94 | { | ||
95 | assert(!multifd_send_device_state); | ||
96 | diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c | ||
97 | index XXXXXXX..XXXXXXX 100644 | ||
98 | --- a/migration/multifd-nocomp.c | ||
99 | +++ b/migration/multifd-nocomp.c | ||
100 | @@ -XXX,XX +XXX,XX @@ | ||
101 | |||
102 | static MultiFDSendData *multifd_ram_send; | ||
103 | |||
104 | -size_t multifd_ram_payload_size(void) | ||
105 | +void multifd_ram_payload_alloc(MultiFDPages_t *pages) | ||
106 | { | ||
107 | - uint32_t n = multifd_ram_page_count(); | ||
108 | + pages->offset = g_new0(ram_addr_t, multifd_ram_page_count()); | ||
109 | +} | ||
110 | |||
111 | - /* | ||
112 | - * We keep an array of page offsets at the end of MultiFDPages_t, | ||
113 | - * add space for it in the allocation. | ||
114 | - */ | ||
115 | - return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t); | ||
116 | +void multifd_ram_payload_free(MultiFDPages_t *pages) | ||
117 | +{ | ||
118 | + g_clear_pointer(&pages->offset, g_free); | ||
119 | } | ||
120 | |||
121 | void multifd_ram_save_setup(void) | ||
122 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
123 | index XXXXXXX..XXXXXXX 100644 | ||
124 | --- a/migration/multifd.c | ||
125 | +++ b/migration/multifd.c | ||
126 | @@ -XXX,XX +XXX,XX @@ struct { | ||
127 | |||
128 | MultiFDSendData *multifd_send_data_alloc(void) | ||
129 | { | ||
130 | - size_t max_payload_size, size_minus_payload; | ||
131 | + MultiFDSendData *new = g_new0(MultiFDSendData, 1); | ||
132 | |||
133 | - /* | ||
134 | - * MultiFDPages_t has a flexible array at the end, account for it | ||
135 | - * when allocating MultiFDSendData. Use max() in case other types | ||
136 | - * added to the union in the future are larger than | ||
137 | - * (MultiFDPages_t + flex array). | ||
138 | - */ | ||
139 | - max_payload_size = MAX(multifd_ram_payload_size(), | ||
140 | - multifd_device_state_payload_size()); | ||
141 | - max_payload_size = MAX(max_payload_size, sizeof(MultiFDPayload)); | ||
142 | - | ||
143 | - /* | ||
144 | - * Account for any holes the compiler might insert. We can't pack | ||
145 | - * the structure because that misaligns the members and triggers | ||
146 | - * Waddress-of-packed-member. | ||
147 | - */ | ||
148 | - size_minus_payload = sizeof(MultiFDSendData) - sizeof(MultiFDPayload); | ||
149 | + multifd_ram_payload_alloc(&new->u.ram); | ||
150 | + /* Device state allocates its payload on-demand */ | ||
151 | |||
152 | - return g_malloc0(size_minus_payload + max_payload_size); | ||
153 | + return new; | ||
154 | } | ||
155 | |||
156 | void multifd_send_data_clear(MultiFDSendData *data) | ||
157 | @@ -XXX,XX +XXX,XX @@ void multifd_send_data_free(MultiFDSendData *data) | ||
158 | return; | 31 | return; |
159 | } | 32 | } |
160 | |||
161 | + /* This also free's device state payload */ | ||
162 | multifd_send_data_clear(data); | ||
163 | |||
164 | + multifd_ram_payload_free(&data->u.ram); | ||
165 | + | ||
166 | g_free(data); | ||
167 | } | ||
168 | 33 | ||
169 | -- | 34 | -- |
170 | 2.48.1 | 35 | 2.48.1 |
171 | 36 | ||
172 | 37 | diff view generated by jsdifflib |
1 | From: Alex Williamson <alex.williamson@redhat.com> | 1 | From: Joao Martins <joao.m.martins@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | We want the device in the D0 power state going into reset, but the | 3 | The intent behind the x-device-dirty-page-tracking option is twofold: |
4 | config write can enable the BARs in the address space, which are | ||
5 | then removed from the address space once we clear the memory enable | ||
6 | bit in the command register. Re-order to clear the command bit | ||
7 | first, so the power state change doesn't enable the BARs. | ||
8 | 4 | ||
9 | Cc: Cédric Le Goater <clg@redhat.com> | 5 | 1) development/testing in the presence of VFs with VF dirty page tracking |
10 | Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com> | 6 | |
11 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 7 | 2) deliberately choosing platform dirty tracker over the VF one. |
12 | Signed-off-by: Alex Williamson <alex.williamson@redhat.com> | 8 | |
13 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 9 | Item 2) scenario is useful when VF dirty tracker is not as fast as |
14 | Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-6-alex.williamson@redhat.com | 10 | IOMMU, or there's some limitations around it (e.g. number of them is |
11 | limited; aggregated address space under tracking is limited), | ||
12 | efficiency/scalability (e.g. 1 pagetable in IOMMU dirty tracker to scan | ||
13 | vs N VFs) or just troubleshooting. Given item 2 it is not restricted to | ||
14 | debugging, hence drop the debug parenthesis from the option description. | ||
15 | |||
16 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> | ||
17 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
18 | Link: https://lore.kernel.org/qemu-devel/20250311174807.79825-1-joao.m.martins@oracle.com | ||
19 | [ clg: Fixed subject spelling ] | ||
15 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 20 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
16 | --- | 21 | --- |
17 | hw/vfio/pci.c | 18 +++++++++--------- | 22 | hw/vfio/pci.c | 2 +- |
18 | 1 file changed, 9 insertions(+), 9 deletions(-) | 23 | 1 file changed, 1 insertion(+), 1 deletion(-) |
19 | 24 | ||
20 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | 25 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
21 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/vfio/pci.c | 27 | --- a/hw/vfio/pci.c |
23 | +++ b/hw/vfio/pci.c | 28 | +++ b/hw/vfio/pci.c |
24 | @@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) | 29 | @@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) |
25 | 30 | object_class_property_set_description(klass, /* 9.1 */ | |
26 | vfio_disable_interrupts(vdev); | 31 | "x-device-dirty-page-tracking", |
27 | 32 | "Disable device dirty page tracking and use " | |
28 | + /* | 33 | - "container-based dirty page tracking (DEBUG)"); |
29 | + * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. | 34 | + "container-based dirty page tracking"); |
30 | + * Also put INTx Disable in known state. | 35 | object_class_property_set_description(klass, /* 9.1 */ |
31 | + */ | 36 | "migration-events", |
32 | + cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); | 37 | "Emit VFIO migration QAPI event when a VFIO device " |
33 | + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | | ||
34 | + PCI_COMMAND_INTX_DISABLE); | ||
35 | + vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); | ||
36 | + | ||
37 | /* Make sure the device is in D0 */ | ||
38 | if (pdev->pm_cap) { | ||
39 | uint16_t pmcsr; | ||
40 | @@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | - | ||
45 | - /* | ||
46 | - * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. | ||
47 | - * Also put INTx Disable in known state. | ||
48 | - */ | ||
49 | - cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); | ||
50 | - cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | | ||
51 | - PCI_COMMAND_INTX_DISABLE); | ||
52 | - vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); | ||
53 | } | ||
54 | |||
55 | void vfio_pci_post_reset(VFIOPCIDevice *vdev) | ||
56 | -- | 38 | -- |
57 | 2.48.1 | 39 | 2.48.1 |
58 | 40 | ||
59 | 41 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Eric Auger <eric.auger@redhat.com> | ||
2 | 1 | ||
3 | As an outcome of KVM forum 2024 "vfio-platform: live and let die?" | ||
4 | talk, let's deprecate vfio-platform devices. | ||
5 | |||
6 | Signed-off-by: Eric Auger <eric.auger@redhat.com> | ||
7 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
8 | Link: https://lore.kernel.org/qemu-devel/20250305124225.952791-1-eric.auger@redhat.com | ||
9 | [ clg: Fixed spelling in vfio-amd-xgbe section ] | ||
10 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
11 | --- | ||
12 | docs/about/deprecated.rst | 25 +++++++++++++++++++++++++ | ||
13 | hw/vfio/amd-xgbe.c | 2 ++ | ||
14 | hw/vfio/calxeda-xgmac.c | 2 ++ | ||
15 | hw/vfio/platform.c | 1 + | ||
16 | 4 files changed, 30 insertions(+) | ||
17 | |||
18 | diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/docs/about/deprecated.rst | ||
21 | +++ b/docs/about/deprecated.rst | ||
22 | @@ -XXX,XX +XXX,XX @@ Stream ``reconnect`` (since 9.2) | ||
23 | The ``reconnect`` option only allows specifiying second granularity timeouts, | ||
24 | which is not enough for all types of use cases, use ``reconnect-ms`` instead. | ||
25 | |||
26 | +VFIO device options | ||
27 | +''''''''''''''''''' | ||
28 | + | ||
29 | +``-device vfio-calxeda-xgmac`` (since 10.0) | ||
30 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
31 | +The vfio-calxeda-xgmac device allows to assign a host Calxeda Highbank | ||
32 | +10Gb XGMAC Ethernet controller device ("calxeda,hb-xgmac" compatibility | ||
33 | +string) to a guest. Calxeda HW has been ewasted now and there is no point | ||
34 | +keeping that device. | ||
35 | + | ||
36 | +``-device vfio-amd-xgbe`` (since 10.0) | ||
37 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
38 | +The vfio-amd-xgbe device allows to assign a host AMD 10GbE controller | ||
39 | +to a guest ("amd,xgbe-seattle-v1a" compatibility string). AMD "Seattle" | ||
40 | +is not supported anymore and there is no point keeping that device. | ||
41 | + | ||
42 | +``-device vfio-platform`` (since 10.0) | ||
43 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
44 | +The vfio-platform device allows to assign a host platform device | ||
45 | +to a guest in a generic manner. Integrating a new device into | ||
46 | +the vfio-platform infrastructure requires some adaptation at | ||
47 | +both kernel and qemu level. No such attempt has been done for years | ||
48 | +and the conclusion is that vfio-platform has not got any traction. | ||
49 | +PCIe passthrough shall be the mainline solution. | ||
50 | + | ||
51 | CPU device properties | ||
52 | ''''''''''''''''''''' | ||
53 | |||
54 | diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/hw/vfio/amd-xgbe.c | ||
57 | +++ b/hw/vfio/amd-xgbe.c | ||
58 | @@ -XXX,XX +XXX,XX @@ | ||
59 | #include "hw/vfio/vfio-amd-xgbe.h" | ||
60 | #include "migration/vmstate.h" | ||
61 | #include "qemu/module.h" | ||
62 | +#include "qemu/error-report.h" | ||
63 | |||
64 | static void amd_xgbe_realize(DeviceState *dev, Error **errp) | ||
65 | { | ||
66 | VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); | ||
67 | VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev); | ||
68 | |||
69 | + warn_report("-device vfio-amd-xgbe is deprecated"); | ||
70 | vdev->compat = g_strdup("amd,xgbe-seattle-v1a"); | ||
71 | vdev->num_compat = 1; | ||
72 | |||
73 | diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/hw/vfio/calxeda-xgmac.c | ||
76 | +++ b/hw/vfio/calxeda-xgmac.c | ||
77 | @@ -XXX,XX +XXX,XX @@ | ||
78 | #include "hw/vfio/vfio-calxeda-xgmac.h" | ||
79 | #include "migration/vmstate.h" | ||
80 | #include "qemu/module.h" | ||
81 | +#include "qemu/error-report.h" | ||
82 | |||
83 | static void calxeda_xgmac_realize(DeviceState *dev, Error **errp) | ||
84 | { | ||
85 | VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); | ||
86 | VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev); | ||
87 | |||
88 | + warn_report("-device vfio-calxeda-xgmac is deprecated"); | ||
89 | vdev->compat = g_strdup("calxeda,hb-xgmac"); | ||
90 | vdev->num_compat = 1; | ||
91 | |||
92 | diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/vfio/platform.c | ||
95 | +++ b/hw/vfio/platform.c | ||
96 | @@ -XXX,XX +XXX,XX @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) | ||
97 | VFIODevice *vbasedev = &vdev->vbasedev; | ||
98 | int i; | ||
99 | |||
100 | + warn_report("-device vfio-platform is deprecated"); | ||
101 | qemu_mutex_init(&vdev->intp_mutex); | ||
102 | |||
103 | trace_vfio_platform_realize(vbasedev->sysfsdev ? | ||
104 | -- | ||
105 | 2.48.1 | ||
106 | |||
107 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | It's possible for {load,save}_cleanup SaveVMHandlers to get called without | ||
4 | the corresponding {load,save}_setup handler being called first. | ||
5 | |||
6 | One such example is if {load,save}_setup handler of a proceeding device | ||
7 | returns error. | ||
8 | In this case the migration core cleanup code will call all corresponding | ||
9 | cleanup handlers, even for these devices which haven't had its setup | ||
10 | handler called. | ||
11 | |||
12 | Since this behavior can generate some surprises let's clearly document it | ||
13 | in these SaveVMHandlers description. | ||
14 | |||
15 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
16 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
17 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
18 | Link: https://lore.kernel.org/qemu-devel/991636623fb780350f493b5f045cb17e13ce4c0f.1741124640.git.maciej.szmigiero@oracle.com | ||
19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
20 | --- | ||
21 | include/migration/register.h | 6 +++++- | ||
22 | 1 file changed, 5 insertions(+), 1 deletion(-) | ||
23 | |||
24 | diff --git a/include/migration/register.h b/include/migration/register.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/migration/register.h | ||
27 | +++ b/include/migration/register.h | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | ||
29 | /** | ||
30 | * @save_cleanup | ||
31 | * | ||
32 | - * Uninitializes the data structures on the source | ||
33 | + * Uninitializes the data structures on the source. | ||
34 | + * Note that this handler can be called even if save_setup | ||
35 | + * wasn't called earlier. | ||
36 | * | ||
37 | * @opaque: data pointer passed to register_savevm_live() | ||
38 | */ | ||
39 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | ||
40 | * @load_cleanup | ||
41 | * | ||
42 | * Uninitializes the data structures on the destination. | ||
43 | + * Note that this handler can be called even if load_setup | ||
44 | + * wasn't called earlier. | ||
45 | * | ||
46 | * @opaque: data pointer passed to register_savevm_live() | ||
47 | * | ||
48 | -- | ||
49 | 2.48.1 | ||
50 | |||
51 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | This function name conflicts with one used by a future generic thread pool | ||
4 | function and it was only used by one test anyway. | ||
5 | |||
6 | Update the trace event name in thread_pool_submit_aio() accordingly. | ||
7 | |||
8 | Acked-by: Fabiano Rosas <farosas@suse.de> | ||
9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
10 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
11 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
12 | Link: https://lore.kernel.org/qemu-devel/6830f07777f939edaf0a2d301c39adcaaf3817f0.1741124640.git.maciej.szmigiero@oracle.com | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | include/block/thread-pool.h | 3 +-- | ||
16 | tests/unit/test-thread-pool.c | 6 +++--- | ||
17 | util/thread-pool.c | 7 +------ | ||
18 | util/trace-events | 2 +- | ||
19 | 4 files changed, 6 insertions(+), 12 deletions(-) | ||
20 | |||
21 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/include/block/thread-pool.h | ||
24 | +++ b/include/block/thread-pool.h | ||
25 | @@ -XXX,XX +XXX,XX @@ ThreadPool *thread_pool_new(struct AioContext *ctx); | ||
26 | void thread_pool_free(ThreadPool *pool); | ||
27 | |||
28 | /* | ||
29 | - * thread_pool_submit* API: submit I/O requests in the thread's | ||
30 | + * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's | ||
31 | * current AioContext. | ||
32 | */ | ||
33 | BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
34 | BlockCompletionFunc *cb, void *opaque); | ||
35 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); | ||
36 | -void thread_pool_submit(ThreadPoolFunc *func, void *arg); | ||
37 | |||
38 | void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); | ||
39 | |||
40 | diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tests/unit/test-thread-pool.c | ||
43 | +++ b/tests/unit/test-thread-pool.c | ||
44 | @@ -XXX,XX +XXX,XX @@ static void done_cb(void *opaque, int ret) | ||
45 | active--; | ||
46 | } | ||
47 | |||
48 | -static void test_submit(void) | ||
49 | +static void test_submit_no_complete(void) | ||
50 | { | ||
51 | WorkerTestData data = { .n = 0 }; | ||
52 | - thread_pool_submit(worker_cb, &data); | ||
53 | + thread_pool_submit_aio(worker_cb, &data, NULL, NULL); | ||
54 | while (data.n == 0) { | ||
55 | aio_poll(ctx, true); | ||
56 | } | ||
57 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
58 | ctx = qemu_get_current_aio_context(); | ||
59 | |||
60 | g_test_init(&argc, &argv, NULL); | ||
61 | - g_test_add_func("/thread-pool/submit", test_submit); | ||
62 | + g_test_add_func("/thread-pool/submit-no-complete", test_submit_no_complete); | ||
63 | g_test_add_func("/thread-pool/submit-aio", test_submit_aio); | ||
64 | g_test_add_func("/thread-pool/submit-co", test_submit_co); | ||
65 | g_test_add_func("/thread-pool/submit-many", test_submit_many); | ||
66 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/util/thread-pool.c | ||
69 | +++ b/util/thread-pool.c | ||
70 | @@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
71 | |||
72 | QLIST_INSERT_HEAD(&pool->head, req, all); | ||
73 | |||
74 | - trace_thread_pool_submit(pool, req, arg); | ||
75 | + trace_thread_pool_submit_aio(pool, req, arg); | ||
76 | |||
77 | qemu_mutex_lock(&pool->lock); | ||
78 | if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) { | ||
79 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | ||
80 | return tpc.ret; | ||
81 | } | ||
82 | |||
83 | -void thread_pool_submit(ThreadPoolFunc *func, void *arg) | ||
84 | -{ | ||
85 | - thread_pool_submit_aio(func, arg, NULL, NULL); | ||
86 | -} | ||
87 | - | ||
88 | void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
89 | { | ||
90 | qemu_mutex_lock(&pool->lock); | ||
91 | diff --git a/util/trace-events b/util/trace-events | ||
92 | index XXXXXXX..XXXXXXX 100644 | ||
93 | --- a/util/trace-events | ||
94 | +++ b/util/trace-events | ||
95 | @@ -XXX,XX +XXX,XX @@ aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" | ||
96 | reentrant_aio(void *ctx, const char *name) "ctx %p name %s" | ||
97 | |||
98 | # thread-pool.c | ||
99 | -thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" | ||
100 | +thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" | ||
101 | thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d" | ||
102 | thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" | ||
103 | |||
104 | -- | ||
105 | 2.48.1 | ||
106 | |||
107 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | These names conflict with ones used by future generic thread pool | ||
4 | equivalents. | ||
5 | Generic names should belong to the generic pool type, not specific (AIO) | ||
6 | type. | ||
7 | |||
8 | Acked-by: Fabiano Rosas <farosas@suse.de> | ||
9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
10 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
11 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
12 | Link: https://lore.kernel.org/qemu-devel/70f9e0fb4b01042258a1a57996c64d19779dc7f0.1741124640.git.maciej.szmigiero@oracle.com | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | include/block/aio.h | 8 ++--- | ||
16 | include/block/thread-pool.h | 8 ++--- | ||
17 | util/async.c | 6 ++-- | ||
18 | util/thread-pool.c | 58 ++++++++++++++++++------------------- | ||
19 | util/trace-events | 4 +-- | ||
20 | 5 files changed, 42 insertions(+), 42 deletions(-) | ||
21 | |||
22 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/block/aio.h | ||
25 | +++ b/include/block/aio.h | ||
26 | @@ -XXX,XX +XXX,XX @@ typedef void QEMUBHFunc(void *opaque); | ||
27 | typedef bool AioPollFn(void *opaque); | ||
28 | typedef void IOHandler(void *opaque); | ||
29 | |||
30 | -struct ThreadPool; | ||
31 | +struct ThreadPoolAio; | ||
32 | struct LinuxAioState; | ||
33 | typedef struct LuringState LuringState; | ||
34 | |||
35 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | ||
36 | /* Thread pool for performing work and receiving completion callbacks. | ||
37 | * Has its own locking. | ||
38 | */ | ||
39 | - struct ThreadPool *thread_pool; | ||
40 | + struct ThreadPoolAio *thread_pool; | ||
41 | |||
42 | #ifdef CONFIG_LINUX_AIO | ||
43 | struct LinuxAioState *linux_aio; | ||
44 | @@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier_poll(AioContext *ctx, | ||
45 | */ | ||
46 | GSource *aio_get_g_source(AioContext *ctx); | ||
47 | |||
48 | -/* Return the ThreadPool bound to this AioContext */ | ||
49 | -struct ThreadPool *aio_get_thread_pool(AioContext *ctx); | ||
50 | +/* Return the ThreadPoolAio bound to this AioContext */ | ||
51 | +struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx); | ||
52 | |||
53 | /* Setup the LinuxAioState bound to this AioContext */ | ||
54 | struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); | ||
55 | diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/include/block/thread-pool.h | ||
58 | +++ b/include/block/thread-pool.h | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | |||
61 | typedef int ThreadPoolFunc(void *opaque); | ||
62 | |||
63 | -typedef struct ThreadPool ThreadPool; | ||
64 | +typedef struct ThreadPoolAio ThreadPoolAio; | ||
65 | |||
66 | -ThreadPool *thread_pool_new(struct AioContext *ctx); | ||
67 | -void thread_pool_free(ThreadPool *pool); | ||
68 | +ThreadPoolAio *thread_pool_new_aio(struct AioContext *ctx); | ||
69 | +void thread_pool_free_aio(ThreadPoolAio *pool); | ||
70 | |||
71 | /* | ||
72 | * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's | ||
73 | @@ -XXX,XX +XXX,XX @@ void thread_pool_free(ThreadPool *pool); | ||
74 | BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
75 | BlockCompletionFunc *cb, void *opaque); | ||
76 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); | ||
77 | +void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx); | ||
78 | |||
79 | -void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); | ||
80 | |||
81 | #endif | ||
82 | diff --git a/util/async.c b/util/async.c | ||
83 | index XXXXXXX..XXXXXXX 100644 | ||
84 | --- a/util/async.c | ||
85 | +++ b/util/async.c | ||
86 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) | ||
87 | QEMUBH *bh; | ||
88 | unsigned flags; | ||
89 | |||
90 | - thread_pool_free(ctx->thread_pool); | ||
91 | + thread_pool_free_aio(ctx->thread_pool); | ||
92 | |||
93 | #ifdef CONFIG_LINUX_AIO | ||
94 | if (ctx->linux_aio) { | ||
95 | @@ -XXX,XX +XXX,XX @@ GSource *aio_get_g_source(AioContext *ctx) | ||
96 | return &ctx->source; | ||
97 | } | ||
98 | |||
99 | -ThreadPool *aio_get_thread_pool(AioContext *ctx) | ||
100 | +ThreadPoolAio *aio_get_thread_pool(AioContext *ctx) | ||
101 | { | ||
102 | if (!ctx->thread_pool) { | ||
103 | - ctx->thread_pool = thread_pool_new(ctx); | ||
104 | + ctx->thread_pool = thread_pool_new_aio(ctx); | ||
105 | } | ||
106 | return ctx->thread_pool; | ||
107 | } | ||
108 | diff --git a/util/thread-pool.c b/util/thread-pool.c | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/util/thread-pool.c | ||
111 | +++ b/util/thread-pool.c | ||
112 | @@ -XXX,XX +XXX,XX @@ | ||
113 | #include "block/thread-pool.h" | ||
114 | #include "qemu/main-loop.h" | ||
115 | |||
116 | -static void do_spawn_thread(ThreadPool *pool); | ||
117 | +static void do_spawn_thread(ThreadPoolAio *pool); | ||
118 | |||
119 | -typedef struct ThreadPoolElement ThreadPoolElement; | ||
120 | +typedef struct ThreadPoolElementAio ThreadPoolElementAio; | ||
121 | |||
122 | enum ThreadState { | ||
123 | THREAD_QUEUED, | ||
124 | @@ -XXX,XX +XXX,XX @@ enum ThreadState { | ||
125 | THREAD_DONE, | ||
126 | }; | ||
127 | |||
128 | -struct ThreadPoolElement { | ||
129 | +struct ThreadPoolElementAio { | ||
130 | BlockAIOCB common; | ||
131 | - ThreadPool *pool; | ||
132 | + ThreadPoolAio *pool; | ||
133 | ThreadPoolFunc *func; | ||
134 | void *arg; | ||
135 | |||
136 | @@ -XXX,XX +XXX,XX @@ struct ThreadPoolElement { | ||
137 | int ret; | ||
138 | |||
139 | /* Access to this list is protected by lock. */ | ||
140 | - QTAILQ_ENTRY(ThreadPoolElement) reqs; | ||
141 | + QTAILQ_ENTRY(ThreadPoolElementAio) reqs; | ||
142 | |||
143 | /* This list is only written by the thread pool's mother thread. */ | ||
144 | - QLIST_ENTRY(ThreadPoolElement) all; | ||
145 | + QLIST_ENTRY(ThreadPoolElementAio) all; | ||
146 | }; | ||
147 | |||
148 | -struct ThreadPool { | ||
149 | +struct ThreadPoolAio { | ||
150 | AioContext *ctx; | ||
151 | QEMUBH *completion_bh; | ||
152 | QemuMutex lock; | ||
153 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
154 | QEMUBH *new_thread_bh; | ||
155 | |||
156 | /* The following variables are only accessed from one AioContext. */ | ||
157 | - QLIST_HEAD(, ThreadPoolElement) head; | ||
158 | + QLIST_HEAD(, ThreadPoolElementAio) head; | ||
159 | |||
160 | /* The following variables are protected by lock. */ | ||
161 | - QTAILQ_HEAD(, ThreadPoolElement) request_list; | ||
162 | + QTAILQ_HEAD(, ThreadPoolElementAio) request_list; | ||
163 | int cur_threads; | ||
164 | int idle_threads; | ||
165 | int new_threads; /* backlog of threads we need to create */ | ||
166 | @@ -XXX,XX +XXX,XX @@ struct ThreadPool { | ||
167 | |||
168 | static void *worker_thread(void *opaque) | ||
169 | { | ||
170 | - ThreadPool *pool = opaque; | ||
171 | + ThreadPoolAio *pool = opaque; | ||
172 | |||
173 | qemu_mutex_lock(&pool->lock); | ||
174 | pool->pending_threads--; | ||
175 | do_spawn_thread(pool); | ||
176 | |||
177 | while (pool->cur_threads <= pool->max_threads) { | ||
178 | - ThreadPoolElement *req; | ||
179 | + ThreadPoolElementAio *req; | ||
180 | int ret; | ||
181 | |||
182 | if (QTAILQ_EMPTY(&pool->request_list)) { | ||
183 | @@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque) | ||
184 | return NULL; | ||
185 | } | ||
186 | |||
187 | -static void do_spawn_thread(ThreadPool *pool) | ||
188 | +static void do_spawn_thread(ThreadPoolAio *pool) | ||
189 | { | ||
190 | QemuThread t; | ||
191 | |||
192 | @@ -XXX,XX +XXX,XX @@ static void do_spawn_thread(ThreadPool *pool) | ||
193 | |||
194 | static void spawn_thread_bh_fn(void *opaque) | ||
195 | { | ||
196 | - ThreadPool *pool = opaque; | ||
197 | + ThreadPoolAio *pool = opaque; | ||
198 | |||
199 | qemu_mutex_lock(&pool->lock); | ||
200 | do_spawn_thread(pool); | ||
201 | qemu_mutex_unlock(&pool->lock); | ||
202 | } | ||
203 | |||
204 | -static void spawn_thread(ThreadPool *pool) | ||
205 | +static void spawn_thread(ThreadPoolAio *pool) | ||
206 | { | ||
207 | pool->cur_threads++; | ||
208 | pool->new_threads++; | ||
209 | @@ -XXX,XX +XXX,XX @@ static void spawn_thread(ThreadPool *pool) | ||
210 | |||
211 | static void thread_pool_completion_bh(void *opaque) | ||
212 | { | ||
213 | - ThreadPool *pool = opaque; | ||
214 | - ThreadPoolElement *elem, *next; | ||
215 | + ThreadPoolAio *pool = opaque; | ||
216 | + ThreadPoolElementAio *elem, *next; | ||
217 | |||
218 | defer_call_begin(); /* cb() may use defer_call() to coalesce work */ | ||
219 | |||
220 | @@ -XXX,XX +XXX,XX @@ restart: | ||
221 | continue; | ||
222 | } | ||
223 | |||
224 | - trace_thread_pool_complete(pool, elem, elem->common.opaque, | ||
225 | - elem->ret); | ||
226 | + trace_thread_pool_complete_aio(pool, elem, elem->common.opaque, | ||
227 | + elem->ret); | ||
228 | QLIST_REMOVE(elem, all); | ||
229 | |||
230 | if (elem->common.cb) { | ||
231 | @@ -XXX,XX +XXX,XX @@ restart: | ||
232 | |||
233 | static void thread_pool_cancel(BlockAIOCB *acb) | ||
234 | { | ||
235 | - ThreadPoolElement *elem = (ThreadPoolElement *)acb; | ||
236 | - ThreadPool *pool = elem->pool; | ||
237 | + ThreadPoolElementAio *elem = (ThreadPoolElementAio *)acb; | ||
238 | + ThreadPoolAio *pool = elem->pool; | ||
239 | |||
240 | - trace_thread_pool_cancel(elem, elem->common.opaque); | ||
241 | + trace_thread_pool_cancel_aio(elem, elem->common.opaque); | ||
242 | |||
243 | QEMU_LOCK_GUARD(&pool->lock); | ||
244 | if (elem->state == THREAD_QUEUED) { | ||
245 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_cancel(BlockAIOCB *acb) | ||
246 | } | ||
247 | |||
248 | static const AIOCBInfo thread_pool_aiocb_info = { | ||
249 | - .aiocb_size = sizeof(ThreadPoolElement), | ||
250 | + .aiocb_size = sizeof(ThreadPoolElementAio), | ||
251 | .cancel_async = thread_pool_cancel, | ||
252 | }; | ||
253 | |||
254 | BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, | ||
255 | BlockCompletionFunc *cb, void *opaque) | ||
256 | { | ||
257 | - ThreadPoolElement *req; | ||
258 | + ThreadPoolElementAio *req; | ||
259 | AioContext *ctx = qemu_get_current_aio_context(); | ||
260 | - ThreadPool *pool = aio_get_thread_pool(ctx); | ||
261 | + ThreadPoolAio *pool = aio_get_thread_pool(ctx); | ||
262 | |||
263 | /* Assert that the thread submitting work is the same running the pool */ | ||
264 | assert(pool->ctx == qemu_get_current_aio_context()); | ||
265 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) | ||
266 | return tpc.ret; | ||
267 | } | ||
268 | |||
269 | -void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
270 | +void thread_pool_update_params(ThreadPoolAio *pool, AioContext *ctx) | ||
271 | { | ||
272 | qemu_mutex_lock(&pool->lock); | ||
273 | |||
274 | @@ -XXX,XX +XXX,XX @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) | ||
275 | qemu_mutex_unlock(&pool->lock); | ||
276 | } | ||
277 | |||
278 | -static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
279 | +static void thread_pool_init_one(ThreadPoolAio *pool, AioContext *ctx) | ||
280 | { | ||
281 | if (!ctx) { | ||
282 | ctx = qemu_get_aio_context(); | ||
283 | @@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) | ||
284 | thread_pool_update_params(pool, ctx); | ||
285 | } | ||
286 | |||
287 | -ThreadPool *thread_pool_new(AioContext *ctx) | ||
288 | +ThreadPoolAio *thread_pool_new_aio(AioContext *ctx) | ||
289 | { | ||
290 | - ThreadPool *pool = g_new(ThreadPool, 1); | ||
291 | + ThreadPoolAio *pool = g_new(ThreadPoolAio, 1); | ||
292 | thread_pool_init_one(pool, ctx); | ||
293 | return pool; | ||
294 | } | ||
295 | |||
296 | -void thread_pool_free(ThreadPool *pool) | ||
297 | +void thread_pool_free_aio(ThreadPoolAio *pool) | ||
298 | { | ||
299 | if (!pool) { | ||
300 | return; | ||
301 | diff --git a/util/trace-events b/util/trace-events | ||
302 | index XXXXXXX..XXXXXXX 100644 | ||
303 | --- a/util/trace-events | ||
304 | +++ b/util/trace-events | ||
305 | @@ -XXX,XX +XXX,XX @@ reentrant_aio(void *ctx, const char *name) "ctx %p name %s" | ||
306 | |||
307 | # thread-pool.c | ||
308 | thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" | ||
309 | -thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d" | ||
310 | -thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" | ||
311 | +thread_pool_complete_aio(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d" | ||
312 | +thread_pool_cancel_aio(void *req, void *opaque) "req %p opaque %p" | ||
313 | |||
314 | # buffer.c | ||
315 | buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd" | ||
316 | -- | ||
317 | 2.48.1 | ||
318 | |||
319 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | qemu_loadvm_load_state_buffer() and its load_state_buffer | ||
4 | SaveVMHandler allow providing device state buffer to explicitly | ||
5 | specified device via its idstr and instance id. | ||
6 | |||
7 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
8 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
9 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
10 | Link: https://lore.kernel.org/qemu-devel/71ca753286b87831ced4afd422e2e2bed071af25.1741124640.git.maciej.szmigiero@oracle.com | ||
11 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
12 | --- | ||
13 | include/migration/register.h | 15 +++++++++++++++ | ||
14 | migration/savevm.h | 3 +++ | ||
15 | migration/savevm.c | 23 +++++++++++++++++++++++ | ||
16 | 3 files changed, 41 insertions(+) | ||
17 | |||
18 | diff --git a/include/migration/register.h b/include/migration/register.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/migration/register.h | ||
21 | +++ b/include/migration/register.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | ||
23 | */ | ||
24 | int (*load_state)(QEMUFile *f, void *opaque, int version_id); | ||
25 | |||
26 | + /** | ||
27 | + * @load_state_buffer (invoked outside the BQL) | ||
28 | + * | ||
29 | + * Load device state buffer provided to qemu_loadvm_load_state_buffer(). | ||
30 | + * | ||
31 | + * @opaque: data pointer passed to register_savevm_live() | ||
32 | + * @buf: the data buffer to load | ||
33 | + * @len: the data length in buffer | ||
34 | + * @errp: pointer to Error*, to store an error if it happens. | ||
35 | + * | ||
36 | + * Returns true to indicate success and false for errors. | ||
37 | + */ | ||
38 | + bool (*load_state_buffer)(void *opaque, char *buf, size_t len, | ||
39 | + Error **errp); | ||
40 | + | ||
41 | /** | ||
42 | * @load_setup | ||
43 | * | ||
44 | diff --git a/migration/savevm.h b/migration/savevm.h | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/migration/savevm.h | ||
47 | +++ b/migration/savevm.h | ||
48 | @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_approve_switchover(void); | ||
49 | int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, | ||
50 | bool in_postcopy); | ||
51 | |||
52 | +bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id, | ||
53 | + char *buf, size_t len, Error **errp); | ||
54 | + | ||
55 | #endif | ||
56 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/migration/savevm.c | ||
59 | +++ b/migration/savevm.c | ||
60 | @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_approve_switchover(void) | ||
61 | return migrate_send_rp_switchover_ack(mis); | ||
62 | } | ||
63 | |||
64 | +bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id, | ||
65 | + char *buf, size_t len, Error **errp) | ||
66 | +{ | ||
67 | + SaveStateEntry *se; | ||
68 | + | ||
69 | + se = find_se(idstr, instance_id); | ||
70 | + if (!se) { | ||
71 | + error_setg(errp, | ||
72 | + "Unknown idstr %s or instance id %u for load state buffer", | ||
73 | + idstr, instance_id); | ||
74 | + return false; | ||
75 | + } | ||
76 | + | ||
77 | + if (!se->ops || !se->ops->load_state_buffer) { | ||
78 | + error_setg(errp, | ||
79 | + "idstr %s / instance %u has no load state buffer operation", | ||
80 | + idstr, instance_id); | ||
81 | + return false; | ||
82 | + } | ||
83 | + | ||
84 | + return se->ops->load_state_buffer(se->opaque, buf, len, errp); | ||
85 | +} | ||
86 | + | ||
87 | bool save_snapshot(const char *name, bool overwrite, const char *vmstate, | ||
88 | bool has_devices, strList *devices, Error **errp) | ||
89 | { | ||
90 | -- | ||
91 | 2.48.1 | ||
92 | |||
93 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | All callers to migration_incoming_state_destroy() other than | ||
4 | postcopy_ram_listen_thread() do this call with BQL held. | ||
5 | |||
6 | Since migration_incoming_state_destroy() ultimately calls "load_cleanup" | ||
7 | SaveVMHandlers and it will soon call BQL-sensitive code it makes sense | ||
8 | to always call that function under BQL rather than to have it deal with | ||
9 | both cases (with BQL and without BQL). | ||
10 | Add the necessary bql_lock() and bql_unlock() to | ||
11 | postcopy_ram_listen_thread(). | ||
12 | |||
13 | qemu_loadvm_state_main() in postcopy_ram_listen_thread() could call | ||
14 | "load_state" SaveVMHandlers that are expecting BQL to be held. | ||
15 | |||
16 | In principle, the only devices that should be arriving on migration | ||
17 | channel serviced by postcopy_ram_listen_thread() are those that are | ||
18 | postcopiable and whose load handlers are safe to be called without BQL | ||
19 | being held. | ||
20 | |||
21 | But nothing currently prevents the source from sending data for "unsafe" | ||
22 | devices which would cause trouble there. | ||
23 | Add a TODO comment there so it's clear that it would be good to improve | ||
24 | handling of such (erroneous) case in the future. | ||
25 | |||
26 | Acked-by: Peter Xu <peterx@redhat.com> | ||
27 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
28 | Link: https://lore.kernel.org/qemu-devel/21bb5ca337b1d5a802e697f553f37faf296b5ff4.1741193259.git.maciej.szmigiero@oracle.com | ||
29 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
30 | --- | ||
31 | migration/migration.c | 13 +++++++++++++ | ||
32 | migration/savevm.c | 4 ++++ | ||
33 | 2 files changed, 17 insertions(+) | ||
34 | |||
35 | diff --git a/migration/migration.c b/migration/migration.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/migration/migration.c | ||
38 | +++ b/migration/migration.c | ||
39 | @@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void) | ||
40 | struct MigrationIncomingState *mis = migration_incoming_get_current(); | ||
41 | |||
42 | multifd_recv_cleanup(); | ||
43 | + | ||
44 | /* | ||
45 | * RAM state cleanup needs to happen after multifd cleanup, because | ||
46 | * multifd threads can use some of its states (receivedmap). | ||
47 | + * The VFIO load_cleanup() implementation is BQL-sensitive. It requires | ||
48 | + * BQL must NOT be taken when recycling load threads, so that it won't | ||
49 | + * block the load threads from making progress on address space | ||
50 | + * modification operations. | ||
51 | + * | ||
52 | + * To make it work, we could try to not take BQL for all load_cleanup(), | ||
53 | + * or conditionally unlock BQL only if bql_locked() in VFIO. | ||
54 | + * | ||
55 | + * Since most existing call sites take BQL for load_cleanup(), make | ||
56 | + * it simple by taking BQL always as the rule, so that VFIO can unlock | ||
57 | + * BQL and retake unconditionally. | ||
58 | */ | ||
59 | + assert(bql_locked()); | ||
60 | qemu_loadvm_state_cleanup(); | ||
61 | |||
62 | if (mis->to_src_file) { | ||
63 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/migration/savevm.c | ||
66 | +++ b/migration/savevm.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static void *postcopy_ram_listen_thread(void *opaque) | ||
68 | * in qemu_file, and thus we must be blocking now. | ||
69 | */ | ||
70 | qemu_file_set_blocking(f, true); | ||
71 | + | ||
72 | + /* TODO: sanity check that only postcopiable data will be loaded here */ | ||
73 | load_res = qemu_loadvm_state_main(f, mis); | ||
74 | |||
75 | /* | ||
76 | @@ -XXX,XX +XXX,XX @@ static void *postcopy_ram_listen_thread(void *opaque) | ||
77 | * (If something broke then qemu will have to exit anyway since it's | ||
78 | * got a bad migration state). | ||
79 | */ | ||
80 | + bql_lock(); | ||
81 | migration_incoming_state_destroy(); | ||
82 | + bql_unlock(); | ||
83 | |||
84 | rcu_unregister_thread(); | ||
85 | mis->have_listen_thread = false; | ||
86 | -- | ||
87 | 2.48.1 | ||
88 | |||
89 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | Automatic memory management helps avoid memory safety issues. | ||
4 | |||
5 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
6 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
7 | Link: https://lore.kernel.org/qemu-devel/a5843c5fa64d7e5239a4316092ec0ef0d10c2320.1741124640.git.maciej.szmigiero@oracle.com | ||
8 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
9 | --- | ||
10 | include/qapi/error.h | 2 ++ | ||
11 | 1 file changed, 2 insertions(+) | ||
12 | |||
13 | diff --git a/include/qapi/error.h b/include/qapi/error.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/include/qapi/error.h | ||
16 | +++ b/include/qapi/error.h | ||
17 | @@ -XXX,XX +XXX,XX @@ Error *error_copy(const Error *err); | ||
18 | */ | ||
19 | void error_free(Error *err); | ||
20 | |||
21 | +G_DEFINE_AUTOPTR_CLEANUP_FUNC(Error, error_free) | ||
22 | + | ||
23 | /* | ||
24 | * Convenience function to assert that *@errp is set, then silently free it. | ||
25 | */ | ||
26 | -- | ||
27 | 2.48.1 | ||
28 | |||
29 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | Some drivers might want to make use of auxiliary helper threads during VM | ||
4 | state loading, for example to make sure that their blocking (sync) I/O | ||
5 | operations don't block the rest of the migration process. | ||
6 | |||
7 | Add a migration core managed thread pool to facilitate this use case. | ||
8 | |||
9 | The migration core will wait for these threads to finish before | ||
10 | (re)starting the VM at destination. | ||
11 | |||
12 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
13 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
14 | Link: https://lore.kernel.org/qemu-devel/b09fd70369b6159c75847e69f235cb908b02570c.1741124640.git.maciej.szmigiero@oracle.com | ||
15 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
16 | --- | ||
17 | include/migration/misc.h | 3 ++ | ||
18 | include/qemu/typedefs.h | 2 + | ||
19 | migration/migration.h | 5 +++ | ||
20 | migration/savevm.h | 2 +- | ||
21 | migration/migration.c | 2 +- | ||
22 | migration/savevm.c | 95 +++++++++++++++++++++++++++++++++++++++- | ||
23 | 6 files changed, 105 insertions(+), 4 deletions(-) | ||
24 | |||
25 | diff --git a/include/migration/misc.h b/include/migration/misc.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/migration/misc.h | ||
28 | +++ b/include/migration/misc.h | ||
29 | @@ -XXX,XX +XXX,XX @@ bool migrate_ram_is_ignored(RAMBlock *block); | ||
30 | /* migration/block.c */ | ||
31 | |||
32 | AnnounceParameters *migrate_announce_params(void); | ||
33 | + | ||
34 | /* migration/savevm.c */ | ||
35 | |||
36 | void dump_vmstate_json_to_file(FILE *out_fp); | ||
37 | +void qemu_loadvm_start_load_thread(MigrationLoadThread function, | ||
38 | + void *opaque); | ||
39 | |||
40 | /* migration/migration.c */ | ||
41 | void migration_object_init(void); | ||
42 | diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/include/qemu/typedefs.h | ||
45 | +++ b/include/qemu/typedefs.h | ||
46 | @@ -XXX,XX +XXX,XX @@ typedef struct IRQState *qemu_irq; | ||
47 | * Function types | ||
48 | */ | ||
49 | typedef void (*qemu_irq_handler)(void *opaque, int n, int level); | ||
50 | +typedef bool (*MigrationLoadThread)(void *opaque, bool *should_quit, | ||
51 | + Error **errp); | ||
52 | |||
53 | #endif /* QEMU_TYPEDEFS_H */ | ||
54 | diff --git a/migration/migration.h b/migration/migration.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/migration/migration.h | ||
57 | +++ b/migration/migration.h | ||
58 | @@ -XXX,XX +XXX,XX @@ | ||
59 | #define MIGRATION_THREAD_DST_PREEMPT "mig/dst/preempt" | ||
60 | |||
61 | struct PostcopyBlocktimeContext; | ||
62 | +typedef struct ThreadPool ThreadPool; | ||
63 | |||
64 | #define MIGRATION_RESUME_ACK_VALUE (1) | ||
65 | |||
66 | @@ -XXX,XX +XXX,XX @@ struct MigrationIncomingState { | ||
67 | Coroutine *colo_incoming_co; | ||
68 | QemuSemaphore colo_incoming_sem; | ||
69 | |||
70 | + /* Optional load threads pool and its thread exit request flag */ | ||
71 | + ThreadPool *load_threads; | ||
72 | + bool load_threads_abort; | ||
73 | + | ||
74 | /* | ||
75 | * PostcopyBlocktimeContext to keep information for postcopy | ||
76 | * live migration, to calculate vCPU block time | ||
77 | diff --git a/migration/savevm.h b/migration/savevm.h | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/migration/savevm.h | ||
80 | +++ b/migration/savevm.h | ||
81 | @@ -XXX,XX +XXX,XX @@ void qemu_savevm_live_state(QEMUFile *f); | ||
82 | int qemu_save_device_state(QEMUFile *f); | ||
83 | |||
84 | int qemu_loadvm_state(QEMUFile *f); | ||
85 | -void qemu_loadvm_state_cleanup(void); | ||
86 | +void qemu_loadvm_state_cleanup(MigrationIncomingState *mis); | ||
87 | int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); | ||
88 | int qemu_load_device_state(QEMUFile *f); | ||
89 | int qemu_loadvm_approve_switchover(void); | ||
90 | diff --git a/migration/migration.c b/migration/migration.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/migration/migration.c | ||
93 | +++ b/migration/migration.c | ||
94 | @@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void) | ||
95 | * BQL and retake unconditionally. | ||
96 | */ | ||
97 | assert(bql_locked()); | ||
98 | - qemu_loadvm_state_cleanup(); | ||
99 | + qemu_loadvm_state_cleanup(mis); | ||
100 | |||
101 | if (mis->to_src_file) { | ||
102 | /* Tell source that we are done */ | ||
103 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/migration/savevm.c | ||
106 | +++ b/migration/savevm.c | ||
107 | @@ -XXX,XX +XXX,XX @@ | ||
108 | #include "qemu/job.h" | ||
109 | #include "qemu/main-loop.h" | ||
110 | #include "block/snapshot.h" | ||
111 | +#include "block/thread-pool.h" | ||
112 | #include "qemu/cutils.h" | ||
113 | #include "io/channel-buffer.h" | ||
114 | #include "io/channel-file.h" | ||
115 | @@ -XXX,XX +XXX,XX @@ static struct mig_cmd_args { | ||
116 | * generic extendable format with an exception for two old entities. | ||
117 | */ | ||
118 | |||
119 | +/***********************************************************/ | ||
120 | +/* Optional load threads pool support */ | ||
121 | + | ||
122 | +static void qemu_loadvm_thread_pool_create(MigrationIncomingState *mis) | ||
123 | +{ | ||
124 | + assert(!mis->load_threads); | ||
125 | + mis->load_threads = thread_pool_new(); | ||
126 | + mis->load_threads_abort = false; | ||
127 | +} | ||
128 | + | ||
129 | +static void qemu_loadvm_thread_pool_destroy(MigrationIncomingState *mis) | ||
130 | +{ | ||
131 | + qatomic_set(&mis->load_threads_abort, true); | ||
132 | + | ||
133 | + bql_unlock(); /* Load threads might be waiting for BQL */ | ||
134 | + g_clear_pointer(&mis->load_threads, thread_pool_free); | ||
135 | + bql_lock(); | ||
136 | +} | ||
137 | + | ||
138 | +static bool qemu_loadvm_thread_pool_wait(MigrationState *s, | ||
139 | + MigrationIncomingState *mis) | ||
140 | +{ | ||
141 | + bql_unlock(); /* Let load threads do work requiring BQL */ | ||
142 | + thread_pool_wait(mis->load_threads); | ||
143 | + bql_lock(); | ||
144 | + | ||
145 | + return !migrate_has_error(s); | ||
146 | +} | ||
147 | + | ||
148 | /***********************************************************/ | ||
149 | /* savevm/loadvm support */ | ||
150 | |||
151 | @@ -XXX,XX +XXX,XX @@ static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp) | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | -void qemu_loadvm_state_cleanup(void) | ||
156 | +struct LoadThreadData { | ||
157 | + MigrationLoadThread function; | ||
158 | + void *opaque; | ||
159 | +}; | ||
160 | + | ||
161 | +static int qemu_loadvm_load_thread(void *thread_opaque) | ||
162 | +{ | ||
163 | + struct LoadThreadData *data = thread_opaque; | ||
164 | + MigrationIncomingState *mis = migration_incoming_get_current(); | ||
165 | + g_autoptr(Error) local_err = NULL; | ||
166 | + | ||
167 | + if (!data->function(data->opaque, &mis->load_threads_abort, &local_err)) { | ||
168 | + MigrationState *s = migrate_get_current(); | ||
169 | + | ||
170 | + /* | ||
171 | + * Can't set load_threads_abort here since processing of main migration | ||
172 | + * channel data could still be happening, resulting in launching of new | ||
173 | + * load threads. | ||
174 | + */ | ||
175 | + | ||
176 | + assert(local_err); | ||
177 | + | ||
178 | + /* | ||
179 | + * In case of multiple load threads failing which thread error | ||
180 | + * return we end setting is purely arbitrary. | ||
181 | + */ | ||
182 | + migrate_set_error(s, local_err); | ||
183 | + } | ||
184 | + | ||
185 | + return 0; | ||
186 | +} | ||
187 | + | ||
188 | +void qemu_loadvm_start_load_thread(MigrationLoadThread function, | ||
189 | + void *opaque) | ||
190 | +{ | ||
191 | + MigrationIncomingState *mis = migration_incoming_get_current(); | ||
192 | + struct LoadThreadData *data; | ||
193 | + | ||
194 | + /* We only set it from this thread so it's okay to read it directly */ | ||
195 | + assert(!mis->load_threads_abort); | ||
196 | + | ||
197 | + data = g_new(struct LoadThreadData, 1); | ||
198 | + data->function = function; | ||
199 | + data->opaque = opaque; | ||
200 | + | ||
201 | + thread_pool_submit_immediate(mis->load_threads, qemu_loadvm_load_thread, | ||
202 | + data, g_free); | ||
203 | +} | ||
204 | + | ||
205 | +void qemu_loadvm_state_cleanup(MigrationIncomingState *mis) | ||
206 | { | ||
207 | SaveStateEntry *se; | ||
208 | |||
209 | trace_loadvm_state_cleanup(); | ||
210 | + | ||
211 | QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { | ||
212 | if (se->ops && se->ops->load_cleanup) { | ||
213 | se->ops->load_cleanup(se->opaque); | ||
214 | } | ||
215 | } | ||
216 | + | ||
217 | + qemu_loadvm_thread_pool_destroy(mis); | ||
218 | } | ||
219 | |||
220 | /* Return true if we should continue the migration, or false. */ | ||
221 | @@ -XXX,XX +XXX,XX @@ out: | ||
222 | |||
223 | int qemu_loadvm_state(QEMUFile *f) | ||
224 | { | ||
225 | + MigrationState *s = migrate_get_current(); | ||
226 | MigrationIncomingState *mis = migration_incoming_get_current(); | ||
227 | Error *local_err = NULL; | ||
228 | int ret; | ||
229 | @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) | ||
230 | return -EINVAL; | ||
231 | } | ||
232 | |||
233 | + qemu_loadvm_thread_pool_create(mis); | ||
234 | + | ||
235 | ret = qemu_loadvm_state_header(f); | ||
236 | if (ret) { | ||
237 | return ret; | ||
238 | @@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f) | ||
239 | |||
240 | /* When reaching here, it must be precopy */ | ||
241 | if (ret == 0) { | ||
242 | - if (migrate_has_error(migrate_get_current())) { | ||
243 | + if (migrate_has_error(migrate_get_current()) || | ||
244 | + !qemu_loadvm_thread_pool_wait(s, mis)) { | ||
245 | ret = -EINVAL; | ||
246 | } else { | ||
247 | ret = qemu_file_get_error(f); | ||
248 | } | ||
249 | } | ||
250 | + /* | ||
251 | + * Set this flag unconditionally so we'll catch further attempts to | ||
252 | + * start additional threads via an appropriate assert() | ||
253 | + */ | ||
254 | + qatomic_set(&mis->load_threads_abort, true); | ||
255 | |||
256 | /* | ||
257 | * Try to read in the VMDESC section as well, so that dumping tools that | ||
258 | -- | ||
259 | 2.48.1 | ||
260 | |||
261 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | Read packet header first so in the future we will be able to | ||
4 | differentiate between a RAM multifd packet and a device state multifd | ||
5 | packet. | ||
6 | |||
7 | Since these two are of different size we can't read the packet body until | ||
8 | we know which packet type it is. | ||
9 | |||
10 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
11 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
12 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
13 | Link: https://lore.kernel.org/qemu-devel/832ad055fe447561ac1ad565d61658660cb3f63f.1741124640.git.maciej.szmigiero@oracle.com | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
15 | --- | ||
16 | migration/multifd.h | 5 +++++ | ||
17 | migration/multifd.c | 55 ++++++++++++++++++++++++++++++++++++--------- | ||
18 | 2 files changed, 49 insertions(+), 11 deletions(-) | ||
19 | |||
20 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/migration/multifd.h | ||
23 | +++ b/migration/multifd.h | ||
24 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
25 | uint32_t magic; | ||
26 | uint32_t version; | ||
27 | uint32_t flags; | ||
28 | +} __attribute__((packed)) MultiFDPacketHdr_t; | ||
29 | + | ||
30 | +typedef struct { | ||
31 | + MultiFDPacketHdr_t hdr; | ||
32 | + | ||
33 | /* maximum number of allocated pages */ | ||
34 | uint32_t pages_alloc; | ||
35 | /* non zero pages */ | ||
36 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/migration/multifd.c | ||
39 | +++ b/migration/multifd.c | ||
40 | @@ -XXX,XX +XXX,XX @@ void multifd_send_fill_packet(MultiFDSendParams *p) | ||
41 | |||
42 | memset(packet, 0, p->packet_len); | ||
43 | |||
44 | - packet->magic = cpu_to_be32(MULTIFD_MAGIC); | ||
45 | - packet->version = cpu_to_be32(MULTIFD_VERSION); | ||
46 | + packet->hdr.magic = cpu_to_be32(MULTIFD_MAGIC); | ||
47 | + packet->hdr.version = cpu_to_be32(MULTIFD_VERSION); | ||
48 | |||
49 | - packet->flags = cpu_to_be32(p->flags); | ||
50 | + packet->hdr.flags = cpu_to_be32(p->flags); | ||
51 | packet->next_packet_size = cpu_to_be32(p->next_packet_size); | ||
52 | |||
53 | packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); | ||
54 | @@ -XXX,XX +XXX,XX @@ void multifd_send_fill_packet(MultiFDSendParams *p) | ||
55 | p->flags, p->next_packet_size); | ||
56 | } | ||
57 | |||
58 | -static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
59 | +static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p, | ||
60 | + const MultiFDPacketHdr_t *hdr, | ||
61 | + Error **errp) | ||
62 | { | ||
63 | - const MultiFDPacket_t *packet = p->packet; | ||
64 | - uint32_t magic = be32_to_cpu(packet->magic); | ||
65 | - uint32_t version = be32_to_cpu(packet->version); | ||
66 | - int ret = 0; | ||
67 | + uint32_t magic = be32_to_cpu(hdr->magic); | ||
68 | + uint32_t version = be32_to_cpu(hdr->version); | ||
69 | |||
70 | if (magic != MULTIFD_MAGIC) { | ||
71 | error_setg(errp, "multifd: received packet magic %x, expected %x", | ||
72 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
73 | return -1; | ||
74 | } | ||
75 | |||
76 | - p->flags = be32_to_cpu(packet->flags); | ||
77 | + p->flags = be32_to_cpu(hdr->flags); | ||
78 | + | ||
79 | + return 0; | ||
80 | +} | ||
81 | + | ||
82 | +static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
83 | +{ | ||
84 | + const MultiFDPacket_t *packet = p->packet; | ||
85 | + int ret = 0; | ||
86 | + | ||
87 | p->next_packet_size = be32_to_cpu(packet->next_packet_size); | ||
88 | p->packet_num = be64_to_cpu(packet->packet_num); | ||
89 | p->packets_recved++; | ||
90 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
91 | } | ||
92 | |||
93 | while (true) { | ||
94 | + MultiFDPacketHdr_t hdr; | ||
95 | uint32_t flags = 0; | ||
96 | bool has_data = false; | ||
97 | + uint8_t *pkt_buf; | ||
98 | + size_t pkt_len; | ||
99 | + | ||
100 | p->normal_num = 0; | ||
101 | |||
102 | if (use_packets) { | ||
103 | struct iovec iov = { | ||
104 | - .iov_base = (void *)p->packet, | ||
105 | - .iov_len = p->packet_len | ||
106 | + .iov_base = (void *)&hdr, | ||
107 | + .iov_len = sizeof(hdr) | ||
108 | }; | ||
109 | |||
110 | if (multifd_recv_should_exit()) { | ||
111 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
112 | break; | ||
113 | } | ||
114 | |||
115 | + ret = multifd_recv_unfill_packet_header(p, &hdr, &local_err); | ||
116 | + if (ret) { | ||
117 | + break; | ||
118 | + } | ||
119 | + | ||
120 | + pkt_buf = (uint8_t *)p->packet + sizeof(hdr); | ||
121 | + pkt_len = p->packet_len - sizeof(hdr); | ||
122 | + | ||
123 | + ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len, | ||
124 | + &local_err); | ||
125 | + if (!ret) { | ||
126 | + /* EOF */ | ||
127 | + error_setg(&local_err, "multifd: unexpected EOF after packet header"); | ||
128 | + break; | ||
129 | + } | ||
130 | + | ||
131 | + if (ret == -1) { | ||
132 | + break; | ||
133 | + } | ||
134 | + | ||
135 | qemu_mutex_lock(&p->mutex); | ||
136 | ret = multifd_recv_unfill_packet(p, &local_err); | ||
137 | if (ret) { | ||
138 | -- | ||
139 | 2.48.1 | ||
140 | |||
141 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | Add a basic support for receiving device state via multifd channels - | ||
4 | channels that are shared with RAM transfers. | ||
5 | |||
6 | Depending whether MULTIFD_FLAG_DEVICE_STATE flag is present or not in the | ||
7 | packet header either device state (MultiFDPacketDeviceState_t) or RAM | ||
8 | data (existing MultiFDPacket_t) is read. | ||
9 | |||
10 | The received device state data is provided to | ||
11 | qemu_loadvm_load_state_buffer() function for processing in the | ||
12 | device's load_state_buffer handler. | ||
13 | |||
14 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
15 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
16 | Link: https://lore.kernel.org/qemu-devel/9b86f806c134e7815ecce0eee84f0e0e34aa0146.1741124640.git.maciej.szmigiero@oracle.com | ||
17 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
18 | --- | ||
19 | migration/multifd.h | 19 ++++++++- | ||
20 | migration/multifd.c | 101 +++++++++++++++++++++++++++++++++++++++----- | ||
21 | 2 files changed, 108 insertions(+), 12 deletions(-) | ||
22 | |||
23 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/migration/multifd.h | ||
26 | +++ b/migration/multifd.h | ||
27 | @@ -XXX,XX +XXX,XX @@ MultiFDRecvData *multifd_get_recv_data(void); | ||
28 | #define MULTIFD_FLAG_UADK (8 << 1) | ||
29 | #define MULTIFD_FLAG_QATZIP (16 << 1) | ||
30 | |||
31 | +/* | ||
32 | + * If set it means that this packet contains device state | ||
33 | + * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t). | ||
34 | + */ | ||
35 | +#define MULTIFD_FLAG_DEVICE_STATE (32 << 1) | ||
36 | + | ||
37 | /* This value needs to be a multiple of qemu_target_page_size() */ | ||
38 | #define MULTIFD_PACKET_SIZE (512 * 1024) | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
41 | uint64_t offset[]; | ||
42 | } __attribute__((packed)) MultiFDPacket_t; | ||
43 | |||
44 | +typedef struct { | ||
45 | + MultiFDPacketHdr_t hdr; | ||
46 | + | ||
47 | + char idstr[256]; | ||
48 | + uint32_t instance_id; | ||
49 | + | ||
50 | + /* size of the next packet that contains the actual data */ | ||
51 | + uint32_t next_packet_size; | ||
52 | +} __attribute__((packed)) MultiFDPacketDeviceState_t; | ||
53 | + | ||
54 | typedef struct { | ||
55 | /* number of used pages */ | ||
56 | uint32_t num; | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
58 | |||
59 | /* thread local variables. No locking required */ | ||
60 | |||
61 | - /* pointer to the packet */ | ||
62 | + /* pointers to the possible packet types */ | ||
63 | MultiFDPacket_t *packet; | ||
64 | + MultiFDPacketDeviceState_t *packet_dev_state; | ||
65 | /* size of the next packet that contains pages */ | ||
66 | uint32_t next_packet_size; | ||
67 | /* packets received through this channel */ | ||
68 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/migration/multifd.c | ||
71 | +++ b/migration/multifd.c | ||
72 | @@ -XXX,XX +XXX,XX @@ | ||
73 | #include "file.h" | ||
74 | #include "migration.h" | ||
75 | #include "migration-stats.h" | ||
76 | +#include "savevm.h" | ||
77 | #include "socket.h" | ||
78 | #include "tls.h" | ||
79 | #include "qemu-file.h" | ||
80 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p, | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | -static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
85 | +static int multifd_recv_unfill_packet_device_state(MultiFDRecvParams *p, | ||
86 | + Error **errp) | ||
87 | +{ | ||
88 | + MultiFDPacketDeviceState_t *packet = p->packet_dev_state; | ||
89 | + | ||
90 | + packet->instance_id = be32_to_cpu(packet->instance_id); | ||
91 | + p->next_packet_size = be32_to_cpu(packet->next_packet_size); | ||
92 | + | ||
93 | + return 0; | ||
94 | +} | ||
95 | + | ||
96 | +static int multifd_recv_unfill_packet_ram(MultiFDRecvParams *p, Error **errp) | ||
97 | { | ||
98 | const MultiFDPacket_t *packet = p->packet; | ||
99 | int ret = 0; | ||
100 | |||
101 | p->next_packet_size = be32_to_cpu(packet->next_packet_size); | ||
102 | p->packet_num = be64_to_cpu(packet->packet_num); | ||
103 | - p->packets_recved++; | ||
104 | |||
105 | /* Always unfill, old QEMUs (<9.0) send data along with SYNC */ | ||
106 | ret = multifd_ram_unfill_packet(p, errp); | ||
107 | @@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | +static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) | ||
112 | +{ | ||
113 | + p->packets_recved++; | ||
114 | + | ||
115 | + if (p->flags & MULTIFD_FLAG_DEVICE_STATE) { | ||
116 | + return multifd_recv_unfill_packet_device_state(p, errp); | ||
117 | + } | ||
118 | + | ||
119 | + return multifd_recv_unfill_packet_ram(p, errp); | ||
120 | +} | ||
121 | + | ||
122 | static bool multifd_send_should_exit(void) | ||
123 | { | ||
124 | return qatomic_read(&multifd_send_state->exiting); | ||
125 | @@ -XXX,XX +XXX,XX @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) | ||
126 | p->packet_len = 0; | ||
127 | g_free(p->packet); | ||
128 | p->packet = NULL; | ||
129 | + g_clear_pointer(&p->packet_dev_state, g_free); | ||
130 | g_free(p->normal); | ||
131 | p->normal = NULL; | ||
132 | g_free(p->zero); | ||
133 | @@ -XXX,XX +XXX,XX @@ void multifd_recv_sync_main(void) | ||
134 | trace_multifd_recv_sync_main(multifd_recv_state->packet_num); | ||
135 | } | ||
136 | |||
137 | +static int multifd_device_state_recv(MultiFDRecvParams *p, Error **errp) | ||
138 | +{ | ||
139 | + g_autofree char *dev_state_buf = NULL; | ||
140 | + int ret; | ||
141 | + | ||
142 | + dev_state_buf = g_malloc(p->next_packet_size); | ||
143 | + | ||
144 | + ret = qio_channel_read_all(p->c, dev_state_buf, p->next_packet_size, errp); | ||
145 | + if (ret != 0) { | ||
146 | + return ret; | ||
147 | + } | ||
148 | + | ||
149 | + if (p->packet_dev_state->idstr[sizeof(p->packet_dev_state->idstr) - 1] | ||
150 | + != 0) { | ||
151 | + error_setg(errp, "unterminated multifd device state idstr"); | ||
152 | + return -1; | ||
153 | + } | ||
154 | + | ||
155 | + if (!qemu_loadvm_load_state_buffer(p->packet_dev_state->idstr, | ||
156 | + p->packet_dev_state->instance_id, | ||
157 | + dev_state_buf, p->next_packet_size, | ||
158 | + errp)) { | ||
159 | + ret = -1; | ||
160 | + } | ||
161 | + | ||
162 | + return ret; | ||
163 | +} | ||
164 | + | ||
165 | static void *multifd_recv_thread(void *opaque) | ||
166 | { | ||
167 | MigrationState *s = migrate_get_current(); | ||
168 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
169 | while (true) { | ||
170 | MultiFDPacketHdr_t hdr; | ||
171 | uint32_t flags = 0; | ||
172 | + bool is_device_state = false; | ||
173 | bool has_data = false; | ||
174 | uint8_t *pkt_buf; | ||
175 | size_t pkt_len; | ||
176 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
177 | break; | ||
178 | } | ||
179 | |||
180 | - pkt_buf = (uint8_t *)p->packet + sizeof(hdr); | ||
181 | - pkt_len = p->packet_len - sizeof(hdr); | ||
182 | + is_device_state = p->flags & MULTIFD_FLAG_DEVICE_STATE; | ||
183 | + if (is_device_state) { | ||
184 | + pkt_buf = (uint8_t *)p->packet_dev_state + sizeof(hdr); | ||
185 | + pkt_len = sizeof(*p->packet_dev_state) - sizeof(hdr); | ||
186 | + } else { | ||
187 | + pkt_buf = (uint8_t *)p->packet + sizeof(hdr); | ||
188 | + pkt_len = p->packet_len - sizeof(hdr); | ||
189 | + } | ||
190 | |||
191 | ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len, | ||
192 | &local_err); | ||
193 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
194 | /* recv methods don't know how to handle the SYNC flag */ | ||
195 | p->flags &= ~MULTIFD_FLAG_SYNC; | ||
196 | |||
197 | - /* | ||
198 | - * Even if it's a SYNC packet, this needs to be set | ||
199 | - * because older QEMUs (<9.0) still send data along with | ||
200 | - * the SYNC packet. | ||
201 | - */ | ||
202 | - has_data = p->normal_num || p->zero_num; | ||
203 | + if (is_device_state) { | ||
204 | + has_data = p->next_packet_size > 0; | ||
205 | + } else { | ||
206 | + /* | ||
207 | + * Even if it's a SYNC packet, this needs to be set | ||
208 | + * because older QEMUs (<9.0) still send data along with | ||
209 | + * the SYNC packet. | ||
210 | + */ | ||
211 | + has_data = p->normal_num || p->zero_num; | ||
212 | + } | ||
213 | + | ||
214 | qemu_mutex_unlock(&p->mutex); | ||
215 | } else { | ||
216 | /* | ||
217 | @@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque) | ||
218 | } | ||
219 | |||
220 | if (has_data) { | ||
221 | - ret = multifd_recv_state->ops->recv(p, &local_err); | ||
222 | + if (is_device_state) { | ||
223 | + assert(use_packets); | ||
224 | + ret = multifd_device_state_recv(p, &local_err); | ||
225 | + } else { | ||
226 | + ret = multifd_recv_state->ops->recv(p, &local_err); | ||
227 | + } | ||
228 | if (ret != 0) { | ||
229 | break; | ||
230 | } | ||
231 | + } else if (is_device_state) { | ||
232 | + error_setg(&local_err, | ||
233 | + "multifd: received empty device state packet"); | ||
234 | + break; | ||
235 | } | ||
236 | |||
237 | if (use_packets) { | ||
238 | if (flags & MULTIFD_FLAG_SYNC) { | ||
239 | + if (is_device_state) { | ||
240 | + error_setg(&local_err, | ||
241 | + "multifd: received SYNC device state packet"); | ||
242 | + break; | ||
243 | + } | ||
244 | + | ||
245 | qemu_sem_post(&multifd_recv_state->sem_sync); | ||
246 | qemu_sem_wait(&p->sem_sync); | ||
247 | } | ||
248 | @@ -XXX,XX +XXX,XX @@ int multifd_recv_setup(Error **errp) | ||
249 | p->packet_len = sizeof(MultiFDPacket_t) | ||
250 | + sizeof(uint64_t) * page_count; | ||
251 | p->packet = g_malloc0(p->packet_len); | ||
252 | + p->packet_dev_state = g_malloc0(sizeof(*p->packet_dev_state)); | ||
253 | } | ||
254 | p->name = g_strdup_printf(MIGRATION_THREAD_DST_MULTIFD, i); | ||
255 | p->normal = g_new0(ram_addr_t, page_count); | ||
256 | -- | ||
257 | 2.48.1 | ||
258 | |||
259 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | multifd_send() function is currently not thread safe, make it thread safe | ||
4 | by holding a lock during its execution. | ||
5 | |||
6 | This way it will be possible to safely call it concurrently from multiple | ||
7 | threads. | ||
8 | |||
9 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
10 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
11 | Link: https://lore.kernel.org/qemu-devel/dd0f3bcc02ca96a7d523ca58ea69e495a33b453b.1741124640.git.maciej.szmigiero@oracle.com | ||
12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
13 | --- | ||
14 | migration/multifd.c | 8 ++++++++ | ||
15 | 1 file changed, 8 insertions(+) | ||
16 | |||
17 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/migration/multifd.c | ||
20 | +++ b/migration/multifd.c | ||
21 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
22 | |||
23 | struct { | ||
24 | MultiFDSendParams *params; | ||
25 | + | ||
26 | + /* multifd_send() body is not thread safe, needs serialization */ | ||
27 | + QemuMutex multifd_send_mutex; | ||
28 | + | ||
29 | /* | ||
30 | * Global number of generated multifd packets. | ||
31 | * | ||
32 | @@ -XXX,XX +XXX,XX @@ bool multifd_send(MultiFDSendData **send_data) | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | + QEMU_LOCK_GUARD(&multifd_send_state->multifd_send_mutex); | ||
37 | + | ||
38 | /* We wait here, until at least one channel is ready */ | ||
39 | qemu_sem_wait(&multifd_send_state->channels_ready); | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ static void multifd_send_cleanup_state(void) | ||
42 | socket_cleanup_outgoing_migration(); | ||
43 | qemu_sem_destroy(&multifd_send_state->channels_created); | ||
44 | qemu_sem_destroy(&multifd_send_state->channels_ready); | ||
45 | + qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex); | ||
46 | g_free(multifd_send_state->params); | ||
47 | multifd_send_state->params = NULL; | ||
48 | g_free(multifd_send_state); | ||
49 | @@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void) | ||
50 | thread_count = migrate_multifd_channels(); | ||
51 | multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); | ||
52 | multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); | ||
53 | + qemu_mutex_init(&multifd_send_state->multifd_send_mutex); | ||
54 | qemu_sem_init(&multifd_send_state->channels_created, 0); | ||
55 | qemu_sem_init(&multifd_send_state->channels_ready, 0); | ||
56 | qatomic_set(&multifd_send_state->exiting, 0); | ||
57 | -- | ||
58 | 2.48.1 | ||
59 | |||
60 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | This way if there are fields there that needs explicit disposal (like, for | ||
4 | example, some attached buffers) they will be handled appropriately. | ||
5 | |||
6 | Add a related assert to multifd_set_payload_type() in order to make sure | ||
7 | that this function is only used to fill a previously empty MultiFDSendData | ||
8 | with some payload, not the other way around. | ||
9 | |||
10 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
11 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
12 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
13 | Link: https://lore.kernel.org/qemu-devel/6755205f2b95abbed251f87061feee1c0e410836.1741124640.git.maciej.szmigiero@oracle.com | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
15 | --- | ||
16 | migration/multifd.h | 5 +++++ | ||
17 | migration/multifd-nocomp.c | 3 +-- | ||
18 | migration/multifd.c | 31 ++++++++++++++++++++++++++++--- | ||
19 | 3 files changed, 34 insertions(+), 5 deletions(-) | ||
20 | |||
21 | diff --git a/migration/multifd.h b/migration/multifd.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/migration/multifd.h | ||
24 | +++ b/migration/multifd.h | ||
25 | @@ -XXX,XX +XXX,XX @@ static inline bool multifd_payload_empty(MultiFDSendData *data) | ||
26 | static inline void multifd_set_payload_type(MultiFDSendData *data, | ||
27 | MultiFDPayloadType type) | ||
28 | { | ||
29 | + assert(multifd_payload_empty(data)); | ||
30 | + assert(type != MULTIFD_PAYLOAD_NONE); | ||
31 | + | ||
32 | data->type = type; | ||
33 | } | ||
34 | |||
35 | @@ -XXX,XX +XXX,XX @@ static inline void multifd_send_prepare_header(MultiFDSendParams *p) | ||
36 | void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc); | ||
37 | bool multifd_send(MultiFDSendData **send_data); | ||
38 | MultiFDSendData *multifd_send_data_alloc(void); | ||
39 | +void multifd_send_data_clear(MultiFDSendData *data); | ||
40 | +void multifd_send_data_free(MultiFDSendData *data); | ||
41 | |||
42 | static inline uint32_t multifd_ram_page_size(void) | ||
43 | { | ||
44 | diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/migration/multifd-nocomp.c | ||
47 | +++ b/migration/multifd-nocomp.c | ||
48 | @@ -XXX,XX +XXX,XX @@ void multifd_ram_save_setup(void) | ||
49 | |||
50 | void multifd_ram_save_cleanup(void) | ||
51 | { | ||
52 | - g_free(multifd_ram_send); | ||
53 | - multifd_ram_send = NULL; | ||
54 | + g_clear_pointer(&multifd_ram_send, multifd_send_data_free); | ||
55 | } | ||
56 | |||
57 | static void multifd_set_file_bitmap(MultiFDSendParams *p) | ||
58 | diff --git a/migration/multifd.c b/migration/multifd.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/migration/multifd.c | ||
61 | +++ b/migration/multifd.c | ||
62 | @@ -XXX,XX +XXX,XX @@ MultiFDSendData *multifd_send_data_alloc(void) | ||
63 | return g_malloc0(size_minus_payload + max_payload_size); | ||
64 | } | ||
65 | |||
66 | +void multifd_send_data_clear(MultiFDSendData *data) | ||
67 | +{ | ||
68 | + if (multifd_payload_empty(data)) { | ||
69 | + return; | ||
70 | + } | ||
71 | + | ||
72 | + switch (data->type) { | ||
73 | + default: | ||
74 | + /* Nothing to do */ | ||
75 | + break; | ||
76 | + } | ||
77 | + | ||
78 | + data->type = MULTIFD_PAYLOAD_NONE; | ||
79 | +} | ||
80 | + | ||
81 | +void multifd_send_data_free(MultiFDSendData *data) | ||
82 | +{ | ||
83 | + if (!data) { | ||
84 | + return; | ||
85 | + } | ||
86 | + | ||
87 | + multifd_send_data_clear(data); | ||
88 | + | ||
89 | + g_free(data); | ||
90 | +} | ||
91 | + | ||
92 | static bool multifd_use_packets(void) | ||
93 | { | ||
94 | return !migrate_mapped_ram(); | ||
95 | @@ -XXX,XX +XXX,XX @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) | ||
96 | qemu_sem_destroy(&p->sem_sync); | ||
97 | g_free(p->name); | ||
98 | p->name = NULL; | ||
99 | - g_free(p->data); | ||
100 | - p->data = NULL; | ||
101 | + g_clear_pointer(&p->data, multifd_send_data_free); | ||
102 | p->packet_len = 0; | ||
103 | g_free(p->packet); | ||
104 | p->packet = NULL; | ||
105 | @@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque) | ||
106 | (uint64_t)p->next_packet_size + p->packet_len); | ||
107 | |||
108 | p->next_packet_size = 0; | ||
109 | - multifd_set_payload_type(p->data, MULTIFD_PAYLOAD_NONE); | ||
110 | + multifd_send_data_clear(p->data); | ||
111 | |||
112 | /* | ||
113 | * Making sure p->data is published before saying "we're | ||
114 | -- | ||
115 | 2.48.1 | ||
116 | |||
117 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | Since device state transfer via multifd channels requires multifd | ||
4 | channels with packets and is currently not compatible with multifd | ||
5 | compression add an appropriate query function so device can learn | ||
6 | whether it can actually make use of it. | ||
7 | |||
8 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
9 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
10 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
11 | Link: https://lore.kernel.org/qemu-devel/1ff0d98b85f470e5a33687406e877583b8fab74e.1741124640.git.maciej.szmigiero@oracle.com | ||
12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
13 | --- | ||
14 | include/migration/misc.h | 1 + | ||
15 | migration/multifd-device-state.c | 7 +++++++ | ||
16 | 2 files changed, 8 insertions(+) | ||
17 | |||
18 | diff --git a/include/migration/misc.h b/include/migration/misc.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/migration/misc.h | ||
21 | +++ b/include/migration/misc.h | ||
22 | @@ -XXX,XX +XXX,XX @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel, | ||
23 | /* migration/multifd-device-state.c */ | ||
24 | bool multifd_queue_device_state(char *idstr, uint32_t instance_id, | ||
25 | char *data, size_t len); | ||
26 | +bool multifd_device_state_supported(void); | ||
27 | |||
28 | #endif | ||
29 | diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/migration/multifd-device-state.c | ||
32 | +++ b/migration/multifd-device-state.c | ||
33 | @@ -XXX,XX +XXX,XX @@ | ||
34 | #include "qemu/lockable.h" | ||
35 | #include "migration/misc.h" | ||
36 | #include "multifd.h" | ||
37 | +#include "options.h" | ||
38 | |||
39 | static struct { | ||
40 | QemuMutex queue_job_mutex; | ||
41 | @@ -XXX,XX +XXX,XX @@ bool multifd_queue_device_state(char *idstr, uint32_t instance_id, | ||
42 | |||
43 | return true; | ||
44 | } | ||
45 | + | ||
46 | +bool multifd_device_state_supported(void) | ||
47 | +{ | ||
48 | + return migrate_multifd() && !migrate_mapped_ram() && | ||
49 | + migrate_multifd_compression() == MULTIFD_COMPRESSION_NONE; | ||
50 | +} | ||
51 | -- | ||
52 | 2.48.1 | ||
53 | |||
54 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | This SaveVMHandler helps device provide its own asynchronous transmission | ||
4 | of the remaining data at the end of a precopy phase via multifd channels, | ||
5 | in parallel with the transfer done by save_live_complete_precopy handlers. | ||
6 | |||
7 | These threads are launched only when multifd device state transfer is | ||
8 | supported. | ||
9 | |||
10 | Management of these threads in done in the multifd migration code, | ||
11 | wrapping them in the generic thread pool. | ||
12 | |||
13 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
14 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
15 | Link: https://lore.kernel.org/qemu-devel/eac74a4ca7edd8968bbf72aa07b9041c76364a16.1741124640.git.maciej.szmigiero@oracle.com | ||
16 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
17 | --- | ||
18 | include/migration/misc.h | 17 ++++++ | ||
19 | include/migration/register.h | 19 +++++++ | ||
20 | include/qemu/typedefs.h | 3 ++ | ||
21 | migration/multifd-device-state.c | 92 ++++++++++++++++++++++++++++++++ | ||
22 | migration/savevm.c | 40 +++++++++++++- | ||
23 | 5 files changed, 170 insertions(+), 1 deletion(-) | ||
24 | |||
25 | diff --git a/include/migration/misc.h b/include/migration/misc.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/migration/misc.h | ||
28 | +++ b/include/migration/misc.h | ||
29 | @@ -XXX,XX +XXX,XX @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel, | ||
30 | Error **errp); | ||
31 | |||
32 | /* migration/multifd-device-state.c */ | ||
33 | +typedef struct SaveLiveCompletePrecopyThreadData { | ||
34 | + SaveLiveCompletePrecopyThreadHandler hdlr; | ||
35 | + char *idstr; | ||
36 | + uint32_t instance_id; | ||
37 | + void *handler_opaque; | ||
38 | +} SaveLiveCompletePrecopyThreadData; | ||
39 | + | ||
40 | bool multifd_queue_device_state(char *idstr, uint32_t instance_id, | ||
41 | char *data, size_t len); | ||
42 | bool multifd_device_state_supported(void); | ||
43 | |||
44 | +void | ||
45 | +multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr, | ||
46 | + char *idstr, uint32_t instance_id, | ||
47 | + void *opaque); | ||
48 | + | ||
49 | +bool multifd_device_state_save_thread_should_exit(void); | ||
50 | + | ||
51 | +void multifd_abort_device_state_save_threads(void); | ||
52 | +bool multifd_join_device_state_save_threads(void); | ||
53 | + | ||
54 | #endif | ||
55 | diff --git a/include/migration/register.h b/include/migration/register.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/include/migration/register.h | ||
58 | +++ b/include/migration/register.h | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | ||
60 | */ | ||
61 | int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); | ||
62 | |||
63 | + /** | ||
64 | + * @save_live_complete_precopy_thread (invoked in a separate thread) | ||
65 | + * | ||
66 | + * Called at the end of a precopy phase from a separate worker thread | ||
67 | + * in configurations where multifd device state transfer is supported | ||
68 | + * in order to perform asynchronous transmission of the remaining data in | ||
69 | + * parallel with @save_live_complete_precopy handlers. | ||
70 | + * When postcopy is enabled, devices that support postcopy will skip this | ||
71 | + * step. | ||
72 | + * | ||
73 | + * @d: a #SaveLiveCompletePrecopyThreadData containing parameters that the | ||
74 | + * handler may need, including this device section idstr and instance_id, | ||
75 | + * and opaque data pointer passed to register_savevm_live(). | ||
76 | + * @errp: pointer to Error*, to store an error if it happens. | ||
77 | + * | ||
78 | + * Returns true to indicate success and false for errors. | ||
79 | + */ | ||
80 | + SaveLiveCompletePrecopyThreadHandler save_live_complete_precopy_thread; | ||
81 | + | ||
82 | /* This runs both outside and inside the BQL. */ | ||
83 | |||
84 | /** | ||
85 | diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/include/qemu/typedefs.h | ||
88 | +++ b/include/qemu/typedefs.h | ||
89 | @@ -XXX,XX +XXX,XX @@ typedef struct QString QString; | ||
90 | typedef struct RAMBlock RAMBlock; | ||
91 | typedef struct Range Range; | ||
92 | typedef struct ReservedRegion ReservedRegion; | ||
93 | +typedef struct SaveLiveCompletePrecopyThreadData SaveLiveCompletePrecopyThreadData; | ||
94 | typedef struct SHPCDevice SHPCDevice; | ||
95 | typedef struct SSIBus SSIBus; | ||
96 | typedef struct TCGCPUOps TCGCPUOps; | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef struct IRQState *qemu_irq; | ||
98 | typedef void (*qemu_irq_handler)(void *opaque, int n, int level); | ||
99 | typedef bool (*MigrationLoadThread)(void *opaque, bool *should_quit, | ||
100 | Error **errp); | ||
101 | +typedef bool (*SaveLiveCompletePrecopyThreadHandler)(SaveLiveCompletePrecopyThreadData *d, | ||
102 | + Error **errp); | ||
103 | |||
104 | #endif /* QEMU_TYPEDEFS_H */ | ||
105 | diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/migration/multifd-device-state.c | ||
108 | +++ b/migration/multifd-device-state.c | ||
109 | @@ -XXX,XX +XXX,XX @@ | ||
110 | */ | ||
111 | |||
112 | #include "qemu/osdep.h" | ||
113 | +#include "qapi/error.h" | ||
114 | #include "qemu/lockable.h" | ||
115 | +#include "block/thread-pool.h" | ||
116 | +#include "migration.h" | ||
117 | #include "migration/misc.h" | ||
118 | #include "multifd.h" | ||
119 | #include "options.h" | ||
120 | @@ -XXX,XX +XXX,XX @@ static struct { | ||
121 | QemuMutex queue_job_mutex; | ||
122 | |||
123 | MultiFDSendData *send_data; | ||
124 | + | ||
125 | + ThreadPool *threads; | ||
126 | + bool threads_abort; | ||
127 | } *multifd_send_device_state; | ||
128 | |||
129 | void multifd_device_state_send_setup(void) | ||
130 | @@ -XXX,XX +XXX,XX @@ void multifd_device_state_send_setup(void) | ||
131 | qemu_mutex_init(&multifd_send_device_state->queue_job_mutex); | ||
132 | |||
133 | multifd_send_device_state->send_data = multifd_send_data_alloc(); | ||
134 | + | ||
135 | + multifd_send_device_state->threads = thread_pool_new(); | ||
136 | + multifd_send_device_state->threads_abort = false; | ||
137 | } | ||
138 | |||
139 | void multifd_device_state_send_cleanup(void) | ||
140 | { | ||
141 | + g_clear_pointer(&multifd_send_device_state->threads, thread_pool_free); | ||
142 | g_clear_pointer(&multifd_send_device_state->send_data, | ||
143 | multifd_send_data_free); | ||
144 | |||
145 | @@ -XXX,XX +XXX,XX @@ bool multifd_device_state_supported(void) | ||
146 | return migrate_multifd() && !migrate_mapped_ram() && | ||
147 | migrate_multifd_compression() == MULTIFD_COMPRESSION_NONE; | ||
148 | } | ||
149 | + | ||
150 | +static void multifd_device_state_save_thread_data_free(void *opaque) | ||
151 | +{ | ||
152 | + SaveLiveCompletePrecopyThreadData *data = opaque; | ||
153 | + | ||
154 | + g_clear_pointer(&data->idstr, g_free); | ||
155 | + g_free(data); | ||
156 | +} | ||
157 | + | ||
158 | +static int multifd_device_state_save_thread(void *opaque) | ||
159 | +{ | ||
160 | + SaveLiveCompletePrecopyThreadData *data = opaque; | ||
161 | + g_autoptr(Error) local_err = NULL; | ||
162 | + | ||
163 | + if (!data->hdlr(data, &local_err)) { | ||
164 | + MigrationState *s = migrate_get_current(); | ||
165 | + | ||
166 | + /* | ||
167 | + * Can't call abort_device_state_save_threads() here since new | ||
168 | + * save threads could still be in process of being launched | ||
169 | + * (if, for example, the very first save thread launched exited | ||
170 | + * with an error very quickly). | ||
171 | + */ | ||
172 | + | ||
173 | + assert(local_err); | ||
174 | + | ||
175 | + /* | ||
176 | + * In case of multiple save threads failing which thread error | ||
177 | + * return we end setting is purely arbitrary. | ||
178 | + */ | ||
179 | + migrate_set_error(s, local_err); | ||
180 | + } | ||
181 | + | ||
182 | + return 0; | ||
183 | +} | ||
184 | + | ||
185 | +bool multifd_device_state_save_thread_should_exit(void) | ||
186 | +{ | ||
187 | + return qatomic_read(&multifd_send_device_state->threads_abort); | ||
188 | +} | ||
189 | + | ||
190 | +void | ||
191 | +multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr, | ||
192 | + char *idstr, uint32_t instance_id, | ||
193 | + void *opaque) | ||
194 | +{ | ||
195 | + SaveLiveCompletePrecopyThreadData *data; | ||
196 | + | ||
197 | + assert(multifd_device_state_supported()); | ||
198 | + assert(multifd_send_device_state); | ||
199 | + | ||
200 | + assert(!qatomic_read(&multifd_send_device_state->threads_abort)); | ||
201 | + | ||
202 | + data = g_new(SaveLiveCompletePrecopyThreadData, 1); | ||
203 | + data->hdlr = hdlr; | ||
204 | + data->idstr = g_strdup(idstr); | ||
205 | + data->instance_id = instance_id; | ||
206 | + data->handler_opaque = opaque; | ||
207 | + | ||
208 | + thread_pool_submit_immediate(multifd_send_device_state->threads, | ||
209 | + multifd_device_state_save_thread, | ||
210 | + data, | ||
211 | + multifd_device_state_save_thread_data_free); | ||
212 | +} | ||
213 | + | ||
214 | +void multifd_abort_device_state_save_threads(void) | ||
215 | +{ | ||
216 | + assert(multifd_device_state_supported()); | ||
217 | + | ||
218 | + qatomic_set(&multifd_send_device_state->threads_abort, true); | ||
219 | +} | ||
220 | + | ||
221 | +bool multifd_join_device_state_save_threads(void) | ||
222 | +{ | ||
223 | + MigrationState *s = migrate_get_current(); | ||
224 | + | ||
225 | + assert(multifd_device_state_supported()); | ||
226 | + | ||
227 | + thread_pool_wait(multifd_send_device_state->threads); | ||
228 | + | ||
229 | + return !migrate_has_error(s); | ||
230 | +} | ||
231 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
232 | index XXXXXXX..XXXXXXX 100644 | ||
233 | --- a/migration/savevm.c | ||
234 | +++ b/migration/savevm.c | ||
235 | @@ -XXX,XX +XXX,XX @@ | ||
236 | #include "migration/register.h" | ||
237 | #include "migration/global_state.h" | ||
238 | #include "migration/channel-block.h" | ||
239 | +#include "multifd.h" | ||
240 | #include "ram.h" | ||
241 | #include "qemu-file.h" | ||
242 | #include "savevm.h" | ||
243 | @@ -XXX,XX +XXX,XX @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) | ||
244 | int64_t start_ts_each, end_ts_each; | ||
245 | SaveStateEntry *se; | ||
246 | int ret; | ||
247 | + bool multifd_device_state = multifd_device_state_supported(); | ||
248 | + | ||
249 | + if (multifd_device_state) { | ||
250 | + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { | ||
251 | + SaveLiveCompletePrecopyThreadHandler hdlr; | ||
252 | + | ||
253 | + if (!se->ops || (in_postcopy && se->ops->has_postcopy && | ||
254 | + se->ops->has_postcopy(se->opaque)) || | ||
255 | + !se->ops->save_live_complete_precopy_thread) { | ||
256 | + continue; | ||
257 | + } | ||
258 | + | ||
259 | + hdlr = se->ops->save_live_complete_precopy_thread; | ||
260 | + multifd_spawn_device_state_save_thread(hdlr, | ||
261 | + se->idstr, se->instance_id, | ||
262 | + se->opaque); | ||
263 | + } | ||
264 | + } | ||
265 | |||
266 | QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { | ||
267 | if (!se->ops || | ||
268 | @@ -XXX,XX +XXX,XX @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) | ||
269 | save_section_footer(f, se); | ||
270 | if (ret < 0) { | ||
271 | qemu_file_set_error(f, ret); | ||
272 | - return -1; | ||
273 | + goto ret_fail_abort_threads; | ||
274 | } | ||
275 | end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME); | ||
276 | trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id, | ||
277 | end_ts_each - start_ts_each); | ||
278 | } | ||
279 | |||
280 | + if (multifd_device_state) { | ||
281 | + if (migrate_has_error(migrate_get_current())) { | ||
282 | + multifd_abort_device_state_save_threads(); | ||
283 | + } | ||
284 | + | ||
285 | + if (!multifd_join_device_state_save_threads()) { | ||
286 | + qemu_file_set_error(f, -EINVAL); | ||
287 | + return -1; | ||
288 | + } | ||
289 | + } | ||
290 | + | ||
291 | trace_vmstate_downtime_checkpoint("src-iterable-saved"); | ||
292 | |||
293 | return 0; | ||
294 | + | ||
295 | +ret_fail_abort_threads: | ||
296 | + if (multifd_device_state) { | ||
297 | + multifd_abort_device_state_save_threads(); | ||
298 | + multifd_join_device_state_save_threads(); | ||
299 | + } | ||
300 | + | ||
301 | + return -1; | ||
302 | } | ||
303 | |||
304 | int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, | ||
305 | -- | ||
306 | 2.48.1 | ||
307 | |||
308 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | And rename existing load_device_config_state trace event to | ||
4 | load_device_config_state_end for consistency since it is triggered at the | ||
5 | end of loading of the VFIO device config state. | ||
6 | |||
7 | This way both the start and end points of particular device config | ||
8 | loading operation (a long, BQL-serialized operation) are known. | ||
9 | |||
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
11 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
12 | Link: https://lore.kernel.org/qemu-devel/1b6c5a2097e64c272eb7e53f9e4cca4b79581b38.1741124640.git.maciej.szmigiero@oracle.com | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | hw/vfio/migration.c | 4 +++- | ||
16 | hw/vfio/trace-events | 3 ++- | ||
17 | 2 files changed, 5 insertions(+), 2 deletions(-) | ||
18 | |||
19 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/vfio/migration.c | ||
22 | +++ b/hw/vfio/migration.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) | ||
24 | VFIODevice *vbasedev = opaque; | ||
25 | uint64_t data; | ||
26 | |||
27 | + trace_vfio_load_device_config_state_start(vbasedev->name); | ||
28 | + | ||
29 | if (vbasedev->ops && vbasedev->ops->vfio_load_config) { | ||
30 | int ret; | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) | ||
33 | return -EINVAL; | ||
34 | } | ||
35 | |||
36 | - trace_vfio_load_device_config_state(vbasedev->name); | ||
37 | + trace_vfio_load_device_config_state_end(vbasedev->name); | ||
38 | return qemu_file_get_error(f); | ||
39 | } | ||
40 | |||
41 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/hw/vfio/trace-events | ||
44 | +++ b/hw/vfio/trace-events | ||
45 | @@ -XXX,XX +XXX,XX @@ vfio_display_edid_write_error(void) "" | ||
46 | |||
47 | # migration.c | ||
48 | vfio_load_cleanup(const char *name) " (%s)" | ||
49 | -vfio_load_device_config_state(const char *name) " (%s)" | ||
50 | +vfio_load_device_config_state_start(const char *name) " (%s)" | ||
51 | +vfio_load_device_config_state_end(const char *name) " (%s)" | ||
52 | vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 | ||
53 | vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" | ||
54 | vfio_migration_realize(const char *name) " (%s)" | ||
55 | -- | ||
56 | 2.48.1 | ||
57 | |||
58 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | So it can be safety accessed from multiple threads. | ||
4 | |||
5 | This variable type needs to be changed to unsigned long since | ||
6 | 32-bit host platforms lack the necessary addition atomics on 64-bit | ||
7 | variables. | ||
8 | |||
9 | Using 32-bit counters on 32-bit host platforms should not be a problem | ||
10 | in practice since they can't realistically address more memory anyway. | ||
11 | |||
12 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
13 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
14 | Link: https://lore.kernel.org/qemu-devel/dc391771d2d9ad0f311994f0cb9e666da564aeaf.1741124640.git.maciej.szmigiero@oracle.com | ||
15 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
16 | --- | ||
17 | hw/vfio/migration.c | 8 ++++---- | ||
18 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
19 | |||
20 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/hw/vfio/migration.c | ||
23 | +++ b/hw/vfio/migration.c | ||
24 | @@ -XXX,XX +XXX,XX @@ | ||
25 | */ | ||
26 | #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) | ||
27 | |||
28 | -static int64_t bytes_transferred; | ||
29 | +static unsigned long bytes_transferred; | ||
30 | |||
31 | static const char *mig_state_to_str(enum vfio_device_mig_state state) | ||
32 | { | ||
33 | @@ -XXX,XX +XXX,XX @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) | ||
34 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); | ||
35 | qemu_put_be64(f, data_size); | ||
36 | qemu_put_buffer(f, migration->data_buffer, data_size); | ||
37 | - bytes_transferred += data_size; | ||
38 | + qatomic_add(&bytes_transferred, data_size); | ||
39 | |||
40 | trace_vfio_save_block(migration->vbasedev->name, data_size); | ||
41 | |||
42 | @@ -XXX,XX +XXX,XX @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) | ||
43 | |||
44 | int64_t vfio_mig_bytes_transferred(void) | ||
45 | { | ||
46 | - return bytes_transferred; | ||
47 | + return MIN(qatomic_read(&bytes_transferred), INT64_MAX); | ||
48 | } | ||
49 | |||
50 | void vfio_reset_bytes_transferred(void) | ||
51 | { | ||
52 | - bytes_transferred = 0; | ||
53 | + qatomic_set(&bytes_transferred, 0); | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | -- | ||
58 | 2.48.1 | ||
59 | |||
60 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> | ||
2 | 1 | ||
3 | This way bytes_transferred can also be incremented in other translation | ||
4 | units than migration.c. | ||
5 | |||
6 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> | ||
7 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
8 | Link: https://lore.kernel.org/qemu-devel/d1fbc27ac2417b49892f354ba20f6c6b3f7209f8.1741124640.git.maciej.szmigiero@oracle.com | ||
9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
10 | --- | ||
11 | include/hw/vfio/vfio-common.h | 1 + | ||
12 | hw/vfio/migration.c | 7 ++++++- | ||
13 | 2 files changed, 7 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/hw/vfio/vfio-common.h | ||
18 | +++ b/include/hw/vfio/vfio-common.h | ||
19 | @@ -XXX,XX +XXX,XX @@ void vfio_unblock_multiple_devices_migration(void); | ||
20 | bool vfio_viommu_preset(VFIODevice *vbasedev); | ||
21 | int64_t vfio_mig_bytes_transferred(void); | ||
22 | void vfio_reset_bytes_transferred(void); | ||
23 | +void vfio_mig_add_bytes_transferred(unsigned long val); | ||
24 | bool vfio_device_state_is_running(VFIODevice *vbasedev); | ||
25 | bool vfio_device_state_is_precopy(VFIODevice *vbasedev); | ||
26 | |||
27 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/hw/vfio/migration.c | ||
30 | +++ b/hw/vfio/migration.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) | ||
32 | qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); | ||
33 | qemu_put_be64(f, data_size); | ||
34 | qemu_put_buffer(f, migration->data_buffer, data_size); | ||
35 | - qatomic_add(&bytes_transferred, data_size); | ||
36 | + vfio_mig_add_bytes_transferred(data_size); | ||
37 | |||
38 | trace_vfio_save_block(migration->vbasedev->name, data_size); | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ void vfio_reset_bytes_transferred(void) | ||
41 | qatomic_set(&bytes_transferred, 0); | ||
42 | } | ||
43 | |||
44 | +void vfio_mig_add_bytes_transferred(unsigned long val) | ||
45 | +{ | ||
46 | + qatomic_add(&bytes_transferred, val); | ||
47 | +} | ||
48 | + | ||
49 | /* | ||
50 | * Return true when either migration initialized or blocker registered. | ||
51 | * Currently only return false when adding blocker fails which will | ||
52 | -- | ||
53 | 2.48.1 | ||
54 | |||
55 | diff view generated by jsdifflib |