1
The following changes since commit e8a01102936286e012ed0f00bd7f3b7474d415c9:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge tag 'ui-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging (2025-03-05 21:58:23 +0800)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/legoater/qemu/ tags/pull-vfio-20250306
7
https://github.com/legoater/qemu/ tags/pull-vfio-20250311
8
8
9
for you to fetch changes up to 59a67e70950bcc2002d3a8d22a17743e0f70da96:
9
for you to fetch changes up to 4d9607481560e6c8e1508a0aafe94f86a0503c8c:
10
10
11
hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property (2025-03-06 06:47:34 +0100)
11
vfio/pci: Drop debug commentary from x-device-dirty-page-tracking (2025-03-11 19:04:58 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
vfio queue:
14
vfio queue:
15
15
16
* Added property documentation
16
* Fixed endianness of VFIO device state packets
17
* Added Minor fixes
17
* Improved IGD passthrough support with legacy mode
18
* Implemented basic PCI PM capability backing
18
* Improved build
19
* Promoted new IGD maintainer
19
* Added support for old AMD GPUs (x550)
20
* Deprecated vfio-plaform
20
* Updated property documentation
21
* Extended VFIO migration with multifd support
22
21
23
----------------------------------------------------------------
22
----------------------------------------------------------------
24
Alex Williamson (5):
23
Joao Martins (1):
25
hw/pci: Basic support for PCI power management
24
vfio/pci: Drop debug commentary from x-device-dirty-page-tracking
26
pci: Use PCI PM capability initializer
27
vfio/pci: Delete local pm_cap
28
pcie, virtio: Remove redundant pm_cap
29
hw/vfio/pci: Re-order pre-reset
30
25
31
Cédric Le Goater (2):
26
Maciej S. Szmigiero (1):
32
vfio: Add property documentation
27
vfio/migration: Use BE byte order for device state wire packets
33
vfio/ccw: Replace warn_once_pfch() with warn_report_once()
34
28
35
Eric Auger (1):
29
Philippe Mathieu-Daudé (8):
36
vfio-platform: Deprecate all forms of vfio-platform devices
30
system: Declare qemu_[min/max]rampagesize() in 'system/hostmem.h'
31
hw/vfio/spapr: Do not include <linux/kvm.h>
32
hw/vfio/common: Include missing 'system/tcg.h' header
33
hw/vfio/common: Get target page size using runtime helpers
34
hw/vfio: Compile some common objects once
35
hw/vfio: Compile more objects once
36
hw/vfio: Compile iommufd.c once
37
hw/vfio: Compile display.c once
37
38
38
Maciej S. Szmigiero (32):
39
Tomita Moeko (10):
39
migration: Clarify that {load, save}_cleanup handlers can run without setup
40
vfio/igd: Remove GTT write quirk in IO BAR 4
40
thread-pool: Remove thread_pool_submit() function
41
vfio/igd: Do not include GTT stolen size in etc/igd-bdsm-size
41
thread-pool: Rename AIO pool functions to *_aio() and data types to *Aio
42
vfio/igd: Consolidate OpRegion initialization into a single function
42
thread-pool: Implement generic (non-AIO) pool support
43
vfio/igd: Move LPC bridge initialization to a separate function
43
migration: Add MIG_CMD_SWITCHOVER_START and its load handler
44
vfio/pci: Add placeholder for device-specific config space quirks
44
migration: Add qemu_loadvm_load_state_buffer() and its handler
45
vfio/igd: Refactor vfio_probe_igd_bar4_quirk into pci config quirk
45
migration: Always take BQL for migration_incoming_state_destroy()
46
vfio/igd: Decouple common quirks from legacy mode
46
error: define g_autoptr() cleanup function for the Error type
47
vfio/igd: Handle x-igd-opregion option in config quirk
47
migration: Add thread pool of optional load threads
48
vfio/igd: Introduce x-igd-lpc option for LPC bridge ID quirk
48
migration/multifd: Split packet into header and RAM data
49
vfio/igd: Fix broken KVMGT OpRegion support
49
migration/multifd: Device state transfer support - receive side
50
migration/multifd: Make multifd_send() thread safe
51
migration/multifd: Add an explicit MultiFDSendData destructor
52
migration/multifd: Device state transfer support - send side
53
migration/multifd: Add multifd_device_state_supported()
54
migration: Add save_live_complete_precopy_thread handler
55
vfio/migration: Add load_device_config_state_start trace event
56
vfio/migration: Convert bytes_transferred counter to atomic
57
vfio/migration: Add vfio_add_bytes_transferred()
58
vfio/migration: Move migration channel flags to vfio-common.h header file
59
vfio/migration: Multifd device state transfer support - basic types
60
vfio/migration: Multifd device state transfer - add support checking function
61
vfio/migration: Multifd setup/cleanup functions and associated VFIOMultifd
62
vfio/migration: Setup and cleanup multifd transfer in these general methods
63
vfio/migration: Multifd device state transfer support - received buffers queuing
64
vfio/migration: Multifd device state transfer support - load thread
65
migration/qemu-file: Define g_autoptr() cleanup function for QEMUFile
66
vfio/migration: Multifd device state transfer support - config loading support
67
vfio/migration: Multifd device state transfer support - send side
68
vfio/migration: Add x-migration-multifd-transfer VFIO property
69
vfio/migration: Make x-migration-multifd-transfer VFIO property mutable
70
hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property
71
50
72
Peter Xu (1):
51
Vasilis Liaskovitis (1):
73
migration/multifd: Make MultiFDSendData a struct
52
vfio/pci-quirks: Exclude non-ioport BAR from ATI quirk
74
53
75
Tomita Moeko (1):
54
hw/vfio/pci.h | 11 +-
76
MAINTAINERS: Add myself as vfio-igd maintainer
55
include/exec/ram_addr.h | 3 -
77
56
include/system/hostmem.h | 3 +
78
MAINTAINERS | 9 +-
57
hw/ppc/spapr_caps.c | 1 +
79
docs/about/deprecated.rst | 25 ++
58
hw/s390x/s390-virtio-ccw.c | 1 +
80
docs/devel/migration/vfio.rst | 45 ++-
59
hw/vfio/common.c | 9 +-
81
hw/vfio/migration-multifd.h | 34 ++
60
hw/vfio/igd.c | 529 +++++++++++++++++++-------------------------
82
hw/vfio/pci.h | 1 -
61
hw/vfio/iommufd.c | 1 -
83
include/block/aio.h | 8 +-
62
hw/vfio/migration-multifd.c | 15 +-
84
include/block/thread-pool.h | 62 +++-
63
hw/vfio/migration.c | 1 -
85
include/hw/pci/pci.h | 3 +
64
hw/vfio/pci-quirks.c | 53 +----
86
include/hw/pci/pci_device.h | 3 +
65
hw/vfio/pci.c | 35 +--
87
include/hw/pci/pcie.h | 2 -
66
hw/vfio/spapr.c | 4 +-
88
include/hw/vfio/vfio-common.h | 31 ++
67
hw/vfio/meson.build | 27 ++-
89
include/migration/client-options.h | 4 +
68
14 files changed, 288 insertions(+), 405 deletions(-)
90
include/migration/misc.h | 25 ++
91
include/migration/register.h | 52 ++-
92
include/qapi/error.h | 2 +
93
include/qemu/typedefs.h | 5 +
94
migration/migration.h | 7 +
95
migration/multifd.h | 74 +++-
96
migration/qemu-file.h | 2 +
97
migration/savevm.h | 6 +-
98
hw/core/machine.c | 2 +
99
hw/net/e1000e.c | 3 +-
100
hw/net/eepro100.c | 4 +-
101
hw/net/igb.c | 3 +-
102
hw/nvme/ctrl.c | 3 +-
103
hw/pci-bridge/pcie_pci_bridge.c | 3 +-
104
hw/pci/pci.c | 93 ++++-
105
hw/vfio/amd-xgbe.c | 2 +
106
hw/vfio/ap.c | 9 +
107
hw/vfio/calxeda-xgmac.c | 2 +
108
hw/vfio/ccw.c | 27 +-
109
hw/vfio/migration-multifd.c | 679 +++++++++++++++++++++++++++++++++++++
110
hw/vfio/migration.c | 106 ++++--
111
hw/vfio/pci.c | 180 +++++++++-
112
hw/vfio/platform.c | 25 ++
113
hw/virtio/virtio-pci.c | 11 +-
114
migration/colo.c | 3 +
115
migration/migration-hmp-cmds.c | 2 +
116
migration/migration.c | 17 +-
117
migration/multifd-device-state.c | 212 ++++++++++++
118
migration/multifd-nocomp.c | 30 +-
119
migration/multifd.c | 248 +++++++++++---
120
migration/options.c | 9 +
121
migration/savevm.c | 201 ++++++++++-
122
tests/unit/test-thread-pool.c | 6 +-
123
util/async.c | 6 +-
124
util/thread-pool.c | 184 ++++++++--
125
hw/pci/trace-events | 2 +
126
hw/vfio/meson.build | 1 +
127
hw/vfio/trace-events | 13 +-
128
migration/meson.build | 1 +
129
migration/trace-events | 1 +
130
scripts/analyze-migration.py | 11 +
131
util/trace-events | 6 +-
132
54 files changed, 2296 insertions(+), 209 deletions(-)
133
create mode 100644 hw/vfio/migration-multifd.h
134
create mode 100644 hw/vfio/migration-multifd.c
135
create mode 100644 migration/multifd-device-state.c
136
69
137
70
diff view generated by jsdifflib
Deleted patch
1
Investigate the git history to uncover when and why the VFIO
2
properties were introduced and update the models. This is mostly
3
targeting vfio-pci device, since vfio-platform, vfio-ap and vfio-ccw
4
devices are simpler.
5
1
6
Sort the properties based on the QEMU version in which they were
7
introduced.
8
9
Cc: Tony Krowiak <akrowiak@linux.ibm.com>
10
Cc: Eric Farman <farman@linux.ibm.com>
11
Cc: Eric Auger <eric.auger@redhat.com>
12
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
13
Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com>
14
Reviewed-by: Eric Farman <farman@linux.ibm.com> # vfio-ccw
15
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
16
Reviewed-by: Eric Auger <eric.auger@redhat.com>
17
Link: https://lore.kernel.org/qemu-devel/20250217173455.449983-1-clg@redhat.com
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
---
20
hw/vfio/ap.c | 9 ++++
21
hw/vfio/ccw.c | 15 ++++++
22
hw/vfio/pci.c | 125 +++++++++++++++++++++++++++++++++++++++++++++
23
hw/vfio/platform.c | 24 +++++++++
24
4 files changed, 173 insertions(+)
25
26
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/vfio/ap.c
29
+++ b/hw/vfio/ap.c
30
@@ -XXX,XX +XXX,XX @@ static void vfio_ap_class_init(ObjectClass *klass, void *data)
31
dc->hotpluggable = true;
32
device_class_set_legacy_reset(dc, vfio_ap_reset);
33
dc->bus_type = TYPE_AP_BUS;
34
+
35
+ object_class_property_set_description(klass, /* 3.1 */
36
+ "sysfsdev",
37
+ "Host sysfs path of assigned device");
38
+#ifdef CONFIG_IOMMUFD
39
+ object_class_property_set_description(klass, /* 9.0 */
40
+ "iommufd",
41
+ "Set host IOMMUFD backend device");
42
+#endif
43
}
44
45
static const TypeInfo vfio_ap_info = {
46
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/hw/vfio/ccw.c
49
+++ b/hw/vfio/ccw.c
50
@@ -XXX,XX +XXX,XX @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data)
51
cdc->handle_halt = vfio_ccw_handle_halt;
52
cdc->handle_clear = vfio_ccw_handle_clear;
53
cdc->handle_store = vfio_ccw_handle_store;
54
+
55
+ object_class_property_set_description(klass, /* 2.10 */
56
+ "sysfsdev",
57
+ "Host sysfs path of assigned device");
58
+ object_class_property_set_description(klass, /* 3.0 */
59
+ "force-orb-pfch",
60
+ "Force unlimited prefetch");
61
+#ifdef CONFIG_IOMMUFD
62
+ object_class_property_set_description(klass, /* 9.0 */
63
+ "iommufd",
64
+ "Set host IOMMUFD backend device");
65
+#endif
66
+ object_class_property_set_description(klass, /* 9.2 */
67
+ "loadparm",
68
+ "Define which devices that can be used for booting");
69
}
70
71
static const TypeInfo vfio_ccw_info = {
72
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/hw/vfio/pci.c
75
+++ b/hw/vfio/pci.c
76
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
77
pdc->exit = vfio_exitfn;
78
pdc->config_read = vfio_pci_read_config;
79
pdc->config_write = vfio_pci_write_config;
80
+
81
+ object_class_property_set_description(klass, /* 1.3 */
82
+ "host",
83
+ "Host PCI address [domain:]<bus:slot.function> of assigned device");
84
+ object_class_property_set_description(klass, /* 1.3 */
85
+ "x-intx-mmap-timeout-ms",
86
+ "When EOI is not provided by KVM/QEMU, wait time "
87
+ "(milliseconds) to re-enable device direct access "
88
+ "after INTx (DEBUG)");
89
+ object_class_property_set_description(klass, /* 1.5 */
90
+ "x-vga",
91
+ "Expose VGA address spaces for device");
92
+ object_class_property_set_description(klass, /* 2.3 */
93
+ "x-req",
94
+ "Disable device request notification support (DEBUG)");
95
+ object_class_property_set_description(klass, /* 2.4 and 2.5 */
96
+ "x-no-mmap",
97
+ "Disable MMAP for device. Allows to trace MMIO "
98
+ "accesses (DEBUG)");
99
+ object_class_property_set_description(klass, /* 2.5 */
100
+ "x-no-kvm-intx",
101
+ "Disable direct VFIO->KVM INTx injection. Allows to "
102
+ "trace INTx interrupts (DEBUG)");
103
+ object_class_property_set_description(klass, /* 2.5 */
104
+ "x-no-kvm-msi",
105
+ "Disable direct VFIO->KVM MSI injection. Allows to "
106
+ "trace MSI interrupts (DEBUG)");
107
+ object_class_property_set_description(klass, /* 2.5 */
108
+ "x-no-kvm-msix",
109
+ "Disable direct VFIO->KVM MSIx injection. Allows to "
110
+ "trace MSIx interrupts (DEBUG)");
111
+ object_class_property_set_description(klass, /* 2.5 */
112
+ "x-pci-vendor-id",
113
+ "Override PCI Vendor ID with provided value (DEBUG)");
114
+ object_class_property_set_description(klass, /* 2.5 */
115
+ "x-pci-device-id",
116
+ "Override PCI device ID with provided value (DEBUG)");
117
+ object_class_property_set_description(klass, /* 2.5 */
118
+ "x-pci-sub-vendor-id",
119
+ "Override PCI Subsystem Vendor ID with provided value "
120
+ "(DEBUG)");
121
+ object_class_property_set_description(klass, /* 2.5 */
122
+ "x-pci-sub-device-id",
123
+ "Override PCI Subsystem Device ID with provided value "
124
+ "(DEBUG)");
125
+ object_class_property_set_description(klass, /* 2.6 */
126
+ "sysfsdev",
127
+ "Host sysfs path of assigned device");
128
+ object_class_property_set_description(klass, /* 2.7 */
129
+ "x-igd-opregion",
130
+ "Expose host IGD OpRegion to guest");
131
+ object_class_property_set_description(klass, /* 2.7 (See c4c45e943e51) */
132
+ "x-igd-gms",
133
+ "Override IGD data stolen memory size (32MiB units)");
134
+ object_class_property_set_description(klass, /* 2.11 */
135
+ "x-nv-gpudirect-clique",
136
+ "Add NVIDIA GPUDirect capability indicating P2P DMA "
137
+ "clique for device [0-15]");
138
+ object_class_property_set_description(klass, /* 2.12 */
139
+ "x-no-geforce-quirks",
140
+ "Disable GeForce quirks (for NVIDIA Quadro/GRID/Tesla). "
141
+ "Improves performance");
142
+ object_class_property_set_description(klass, /* 2.12 */
143
+ "display",
144
+ "Enable display support for device, ex. vGPU");
145
+ object_class_property_set_description(klass, /* 2.12 */
146
+ "x-msix-relocation",
147
+ "Specify MSI-X MMIO relocation to the end of specified "
148
+ "existing BAR or new BAR to avoid virtualization overhead "
149
+ "due to adjacent device registers");
150
+ object_class_property_set_description(klass, /* 3.0 */
151
+ "x-no-kvm-ioeventfd",
152
+ "Disable registration of ioeventfds with KVM (DEBUG)");
153
+ object_class_property_set_description(klass, /* 3.0 */
154
+ "x-no-vfio-ioeventfd",
155
+ "Disable linking of KVM ioeventfds to VFIO ioeventfds "
156
+ "(DEBUG)");
157
+ object_class_property_set_description(klass, /* 3.1 */
158
+ "x-balloon-allowed",
159
+ "Override allowing ballooning with device (DEBUG, DANGER)");
160
+ object_class_property_set_description(klass, /* 3.2 */
161
+ "xres",
162
+ "Set X display resolution the vGPU should use");
163
+ object_class_property_set_description(klass, /* 3.2 */
164
+ "yres",
165
+ "Set Y display resolution the vGPU should use");
166
+ object_class_property_set_description(klass, /* 5.2 */
167
+ "x-pre-copy-dirty-page-tracking",
168
+ "Disable dirty pages tracking during iterative phase "
169
+ "(DEBUG)");
170
+ object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */
171
+ "enable-migration",
172
+ "Enale device migration. Also requires a host VFIO PCI "
173
+ "variant or mdev driver with migration support enabled");
174
+ object_class_property_set_description(klass, /* 8.1 */
175
+ "vf-token",
176
+ "Specify UUID VF token. Required for VF when PF is owned "
177
+ "by another VFIO driver");
178
+#ifdef CONFIG_IOMMUFD
179
+ object_class_property_set_description(klass, /* 9.0 */
180
+ "iommufd",
181
+ "Set host IOMMUFD backend device");
182
+#endif
183
+ object_class_property_set_description(klass, /* 9.1 */
184
+ "x-device-dirty-page-tracking",
185
+ "Disable device dirty page tracking and use "
186
+ "container-based dirty page tracking (DEBUG)");
187
+ object_class_property_set_description(klass, /* 9.1 */
188
+ "migration-events",
189
+ "Emit VFIO migration QAPI event when a VFIO device "
190
+ "changes its migration state. For management applications");
191
+ object_class_property_set_description(klass, /* 9.1 */
192
+ "skip-vsc-check",
193
+ "Skip config space check for Vendor Specific Capability. "
194
+ "Setting to false will enforce strict checking of VSC content "
195
+ "(DEBUG)");
196
}
197
198
static const TypeInfo vfio_pci_dev_info = {
199
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data)
200
201
device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
202
dc->hotpluggable = false;
203
+
204
+ object_class_property_set_description(klass, /* 3.1 */
205
+ "ramfb",
206
+ "Enable ramfb to provide pre-boot graphics for devices "
207
+ "enabling display option");
208
+ object_class_property_set_description(klass, /* 8.2 */
209
+ "x-ramfb-migrate",
210
+ "Override default migration support for ramfb support "
211
+ "(DEBUG)");
212
}
213
214
static const TypeInfo vfio_pci_nohotplug_dev_info = {
215
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/hw/vfio/platform.c
218
+++ b/hw/vfio/platform.c
219
@@ -XXX,XX +XXX,XX @@ static void vfio_platform_class_init(ObjectClass *klass, void *data)
220
dc->desc = "VFIO-based platform device assignment";
221
sbc->connect_irq_notifier = vfio_start_irqfd_injection;
222
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
223
+
224
+ object_class_property_set_description(klass, /* 2.4 */
225
+ "host",
226
+ "Host device name of assigned device");
227
+ object_class_property_set_description(klass, /* 2.4 and 2.5 */
228
+ "x-no-mmap",
229
+ "Disable MMAP for device. Allows to trace MMIO "
230
+ "accesses (DEBUG)");
231
+ object_class_property_set_description(klass, /* 2.4 */
232
+ "mmap-timeout-ms",
233
+ "When EOI is not provided by KVM/QEMU, wait time "
234
+ "(milliseconds) to re-enable device direct access "
235
+ "after level interrupt (DEBUG)");
236
+ object_class_property_set_description(klass, /* 2.4 */
237
+ "x-irqfd",
238
+ "Allow disabling irqfd support (DEBUG)");
239
+ object_class_property_set_description(klass, /* 2.6 */
240
+ "sysfsdev",
241
+ "Host sysfs path of assigned device");
242
+#ifdef CONFIG_IOMMUFD
243
+ object_class_property_set_description(klass, /* 9.0 */
244
+ "iommufd",
245
+ "Set host IOMMUFD backend device");
246
+#endif
247
}
248
249
static const TypeInfo vfio_platform_dev_info = {
250
--
251
2.48.1
252
253
diff view generated by jsdifflib
Deleted patch
1
Use the common helper warn_report_once() instead of implementing its
2
own.
3
1
4
Cc: Eric Farman <farman@linux.ibm.com>
5
Reviewed-by: Eric Farman <farman@linux.ibm.com>
6
Link: https://lore.kernel.org/qemu-devel/20250214161936.1720039-1-clg@redhat.com
7
Signed-off-by: Cédric Le Goater <clg@redhat.com>
8
---
9
hw/vfio/ccw.c | 12 ++----------
10
1 file changed, 2 insertions(+), 10 deletions(-)
11
12
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/vfio/ccw.c
15
+++ b/hw/vfio/ccw.c
16
@@ -XXX,XX +XXX,XX @@ struct VFIOCCWDevice {
17
EventNotifier crw_notifier;
18
EventNotifier req_notifier;
19
bool force_orb_pfch;
20
- bool warned_orb_pfch;
21
};
22
23
-static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
24
- const char *msg)
25
-{
26
- warn_report_once_cond(&vcdev->warned_orb_pfch,
27
- "vfio-ccw (devno %x.%x.%04x): %s",
28
- sch->cssid, sch->ssid, sch->devno, msg);
29
-}
30
-
31
static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
32
{
33
vdev->needs_reset = false;
34
@@ -XXX,XX +XXX,XX @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
35
36
if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) {
37
sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
38
- warn_once_pfch(vcdev, sch, "PFCH flag forced");
39
+ warn_report_once("vfio-ccw (devno %x.%x.%04x): PFCH flag forced",
40
+ sch->cssid, sch->ssid, sch->devno);
41
}
42
43
QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
44
--
45
2.48.1
46
47
diff view generated by jsdifflib
Deleted patch
1
From: Alex Williamson <alex.williamson@redhat.com>
2
1
3
The memory and IO BARs for devices are only accessible in the D0 power
4
state. In other power states the PCI spec defines that the device
5
responds to TLPs and messages with an Unsupported Request response.
6
7
To approximate this behavior, consider the BARs as unmapped when the
8
device is not in the D0 power state. This makes the BARs inaccessible
9
and has the additional bonus for vfio-pci that we don't attempt to DMA
10
map BARs for devices in a non-D0 power state.
11
12
To support this, an interface is added for devices to register the PM
13
capability, which allows central tracking to enforce valid transitions
14
and unmap BARs in non-D0 states.
15
16
NB. We currently have device models (eepro100 and pcie_pci_bridge)
17
that register a PM capability but do not set wmask to enable writes to
18
the power state field. In order to maintain migration compatibility,
19
this new helper does not manage the wmask to enable guest writes to
20
initiate a power state change. The contents and write access of the
21
PM capability are still managed by the caller.
22
23
Cc: Michael S. Tsirkin <mst@redhat.com>
24
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
25
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
26
Reviewed-by: Eric Auger <eric.auger@redhat.com>
27
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
28
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-2-alex.williamson@redhat.com
29
Signed-off-by: Cédric Le Goater <clg@redhat.com>
30
---
31
include/hw/pci/pci.h | 3 ++
32
include/hw/pci/pci_device.h | 3 ++
33
hw/pci/pci.c | 93 ++++++++++++++++++++++++++++++++++++-
34
hw/pci/trace-events | 2 +
35
4 files changed, 99 insertions(+), 2 deletions(-)
36
37
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/hw/pci/pci.h
40
+++ b/include/hw/pci/pci.h
41
@@ -XXX,XX +XXX,XX @@ enum {
42
QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
43
#define QEMU_PCIE_EXT_TAG_BITNR 13
44
QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
45
+#define QEMU_PCI_CAP_PM_BITNR 14
46
+ QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
47
};
48
49
typedef struct PCIINTxRoute {
50
@@ -XXX,XX +XXX,XX @@ static inline void pci_irq_deassert(PCIDevice *pci_dev)
51
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
52
void pci_set_enabled(PCIDevice *pci_dev, bool state);
53
void pci_set_power(PCIDevice *pci_dev, bool state);
54
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
55
56
#endif
57
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/include/hw/pci/pci_device.h
60
+++ b/include/hw/pci/pci_device.h
61
@@ -XXX,XX +XXX,XX @@ struct PCIDevice {
62
/* Capability bits */
63
uint32_t cap_present;
64
65
+ /* Offset of PM capability in config space */
66
+ uint8_t pm_cap;
67
+
68
/* Offset of MSI-X capability in config space */
69
uint8_t msix_cap;
70
71
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/hw/pci/pci.c
74
+++ b/hw/pci/pci.c
75
@@ -XXX,XX +XXX,XX @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
76
attrs, NULL);
77
}
78
79
+/*
80
+ * Register and track a PM capability. If wmask is also enabled for the power
81
+ * state field of the pmcsr register, guest writes may change the device PM
82
+ * state. BAR access is only enabled while the device is in the D0 state.
83
+ * Return the capability offset or negative error code.
84
+ */
85
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
86
+{
87
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
88
+
89
+ if (cap < 0) {
90
+ return cap;
91
+ }
92
+
93
+ d->pm_cap = cap;
94
+ d->cap_present |= QEMU_PCI_CAP_PM;
95
+
96
+ return cap;
97
+}
98
+
99
+static uint8_t pci_pm_state(PCIDevice *d)
100
+{
101
+ uint16_t pmcsr;
102
+
103
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
104
+ return 0;
105
+ }
106
+
107
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
108
+
109
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
110
+}
111
+
112
+/*
113
+ * Update the PM capability state based on the new value stored in config
114
+ * space respective to the old, pre-write state provided. If the new value
115
+ * is rejected (unsupported or invalid transition) restore the old value.
116
+ * Return the resulting PM state.
117
+ */
118
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
119
+{
120
+ uint16_t pmc;
121
+ uint8_t new;
122
+
123
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
124
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
125
+ return old;
126
+ }
127
+
128
+ new = pci_pm_state(d);
129
+ if (new == old) {
130
+ return old;
131
+ }
132
+
133
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
134
+
135
+ /*
136
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
137
+ * only transition to higher D-states or to D0.
138
+ */
139
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
140
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
141
+ (old && new && new < old)) {
142
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
143
+ PCI_PM_CTRL_STATE_MASK);
144
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
145
+ old);
146
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
147
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
148
+ old, new);
149
+ return old;
150
+ }
151
+
152
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
153
+ PCI_FUNC(d->devfn), old, new);
154
+ return new;
155
+}
156
+
157
static void pci_reset_regions(PCIDevice *dev)
158
{
159
int r;
160
@@ -XXX,XX +XXX,XX @@ static void pci_do_device_reset(PCIDevice *dev)
161
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
162
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
163
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
164
+ /* Default PM state is D0 */
165
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
166
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
167
+ PCI_PM_CTRL_STATE_MASK);
168
+ }
169
pci_reset_regions(dev);
170
pci_update_mappings(dev);
171
172
@@ -XXX,XX +XXX,XX @@ static void pci_update_mappings(PCIDevice *d)
173
continue;
174
175
new_addr = pci_bar_address(d, i, r->type, r->size);
176
- if (!d->enabled) {
177
+ if (!d->enabled || pci_pm_state(d)) {
178
new_addr = PCI_BAR_UNMAPPED;
179
}
180
181
@@ -XXX,XX +XXX,XX @@ uint32_t pci_default_read_config(PCIDevice *d,
182
183
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
184
{
185
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
186
int i, was_irq_disabled = pci_irq_disabled(d);
187
uint32_t val = val_in;
188
189
@@ -XXX,XX +XXX,XX @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
190
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
191
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
192
}
193
+
194
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
195
+
196
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
197
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
198
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
199
- range_covers_byte(addr, l, PCI_COMMAND))
200
+ range_covers_byte(addr, l, PCI_COMMAND) ||
201
+ !!new_pm_state != !!old_pm_state) {
202
pci_update_mappings(d);
203
+ }
204
205
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
206
pci_update_irq_disabled(d, was_irq_disabled);
207
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
208
index XXXXXXX..XXXXXXX 100644
209
--- a/hw/pci/trace-events
210
+++ b/hw/pci/trace-events
211
@@ -XXX,XX +XXX,XX @@
212
# See docs/devel/tracing.rst for syntax documentation.
213
214
# pci.c
215
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
216
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
217
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
218
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
219
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
220
--
221
2.48.1
222
223
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
A new function multifd_queue_device_state() is provided for device to queue
3
The IO BAR4 of IGD devices contains a pair of 32-bit address/data
4
its state for transmission via a multifd channel.
4
registers, MMIO_Index (0x0) and MMIO_Data (0x4), which provide access
5
5
to the MMIO BAR0 (GTTMMADR) from IO space. These registers are probably
6
Reviewed-by: Peter Xu <peterx@redhat.com>
6
only used by the VBIOS, and are not documented by intel. The observed
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
7
layout of MMIO_Index register is:
8
Link: https://lore.kernel.org/qemu-devel/ebd55768d3e5fecb5eb3f197bad9c0c07e5bc084.1741124640.git.maciej.szmigiero@oracle.com
8
31 2 1 0
9
+-------------------------------------------------------------------+
10
| Offset | Rsvd | Sel |
11
+-------------------------------------------------------------------+
12
- Offset: Byte offset in specified region, 4-byte aligned.
13
- Sel: Region selector
14
0: MMIO register region (first half of MMIO BAR0)
15
1: GTT region (second half of MMIO BAR0). Pre Gen11 only.
16
17
Currently, QEMU implements a quirk that adjusts the guest Data Stolen
18
Memory (DSM) region address to be (addr - host BDSM + guest BDSM) when
19
programming GTT entries via IO BAR4, assuming guest still programs GTT
20
with host DSM address, which is not the case. Guest's BDSM register is
21
emulated and initialized to 0 at startup by QEMU, then SeaBIOS programs
22
its value[1]. As result, the address programmed to GTT entries by VBIOS
23
running in guest are valid GPA, and this unnecessary adjustment brings
24
inconsistency.
25
26
[1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332
27
28
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
29
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
30
Tested-by: Alex Williamson <alex.williamson@redhat.com>
31
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
32
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-2-tomitamoeko@gmail.com
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
33
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
34
---
11
include/migration/misc.h | 4 ++
35
hw/vfio/igd.c | 191 +-------------------------------------------------
12
migration/multifd.h | 34 ++++++---
36
1 file changed, 1 insertion(+), 190 deletions(-)
13
migration/multifd-device-state.c | 118 +++++++++++++++++++++++++++++++
37
14
migration/multifd-nocomp.c | 14 +++-
38
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
15
migration/multifd.c | 42 +++++++++--
16
migration/meson.build | 1 +
17
6 files changed, 197 insertions(+), 16 deletions(-)
18
create mode 100644 migration/multifd-device-state.c
19
20
diff --git a/include/migration/misc.h b/include/migration/misc.h
21
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
22
--- a/include/migration/misc.h
40
--- a/hw/vfio/igd.c
23
+++ b/include/migration/misc.h
41
+++ b/hw/vfio/igd.c
24
@@ -XXX,XX +XXX,XX @@ bool migrate_is_uri(const char *uri);
42
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
25
bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
43
return -1;
26
Error **errp);
27
28
+/* migration/multifd-device-state.c */
29
+bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
30
+ char *data, size_t len);
31
+
32
#endif
33
diff --git a/migration/multifd.h b/migration/multifd.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/migration/multifd.h
36
+++ b/migration/multifd.h
37
@@ -XXX,XX +XXX,XX @@ struct MultiFDRecvData {
38
off_t file_offset;
39
};
40
41
+typedef struct {
42
+ char *idstr;
43
+ uint32_t instance_id;
44
+ char *buf;
45
+ size_t buf_len;
46
+} MultiFDDeviceState_t;
47
+
48
typedef enum {
49
MULTIFD_PAYLOAD_NONE,
50
MULTIFD_PAYLOAD_RAM,
51
+ MULTIFD_PAYLOAD_DEVICE_STATE,
52
} MultiFDPayloadType;
53
54
typedef union MultiFDPayload {
55
MultiFDPages_t ram;
56
+ MultiFDDeviceState_t device_state;
57
} MultiFDPayload;
58
59
struct MultiFDSendData {
60
@@ -XXX,XX +XXX,XX @@ static inline bool multifd_payload_empty(MultiFDSendData *data)
61
return data->type == MULTIFD_PAYLOAD_NONE;
62
}
44
}
63
45
64
+static inline bool multifd_payload_device_state(MultiFDSendData *data)
46
-typedef struct VFIOIGDQuirk {
65
+{
47
- struct VFIOPCIDevice *vdev;
66
+ return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
48
- uint32_t index;
67
+}
49
- uint64_t bdsm;
68
+
50
-} VFIOIGDQuirk;
69
static inline void multifd_set_payload_type(MultiFDSendData *data,
51
-
70
MultiFDPayloadType type)
52
#define IGD_GMCH 0x50 /* Graphics Control Register */
71
{
53
#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
72
@@ -XXX,XX +XXX,XX @@ typedef struct {
54
#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
73
55
@@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
74
/* thread local variables. No locking required */
56
return ret;
75
76
- /* pointer to the packet */
77
+ /* pointers to the possible packet types */
78
MultiFDPacket_t *packet;
79
+ MultiFDPacketDeviceState_t *packet_device_state;
80
/* size of the next packet that contains pages */
81
uint32_t next_packet_size;
82
/* packets sent through this channel */
83
@@ -XXX,XX +XXX,XX @@ bool multifd_send_prepare_common(MultiFDSendParams *p);
84
void multifd_send_zero_page_detect(MultiFDSendParams *p);
85
void multifd_recv_zero_page_process(MultiFDRecvParams *p);
86
87
-static inline void multifd_send_prepare_header(MultiFDSendParams *p)
88
-{
89
- p->iov[0].iov_len = p->packet_len;
90
- p->iov[0].iov_base = p->packet;
91
- p->iovs_num++;
92
-}
93
-
94
void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
95
bool multifd_send(MultiFDSendData **send_data);
96
MultiFDSendData *multifd_send_data_alloc(void);
97
@@ -XXX,XX +XXX,XX @@ bool multifd_ram_sync_per_section(void);
98
size_t multifd_ram_payload_size(void);
99
void multifd_ram_fill_packet(MultiFDSendParams *p);
100
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
101
+
102
+size_t multifd_device_state_payload_size(void);
103
+
104
+void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
105
+
106
+void multifd_device_state_send_setup(void);
107
+void multifd_device_state_send_cleanup(void);
108
+
109
+void multifd_device_state_send_prepare(MultiFDSendParams *p);
110
+
111
#endif
112
diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c
113
new file mode 100644
114
index XXXXXXX..XXXXXXX
115
--- /dev/null
116
+++ b/migration/multifd-device-state.c
117
@@ -XXX,XX +XXX,XX @@
118
+/*
119
+ * Multifd device state migration
120
+ *
121
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
122
+ *
123
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
124
+ * See the COPYING file in the top-level directory.
125
+ *
126
+ * SPDX-License-Identifier: GPL-2.0-or-later
127
+ */
128
+
129
+#include "qemu/osdep.h"
130
+#include "qemu/lockable.h"
131
+#include "migration/misc.h"
132
+#include "multifd.h"
133
+
134
+static struct {
135
+ QemuMutex queue_job_mutex;
136
+
137
+ MultiFDSendData *send_data;
138
+} *multifd_send_device_state;
139
+
140
+size_t multifd_device_state_payload_size(void)
141
+{
142
+ return sizeof(MultiFDDeviceState_t);
143
+}
144
+
145
+void multifd_device_state_send_setup(void)
146
+{
147
+ assert(!multifd_send_device_state);
148
+ multifd_send_device_state = g_malloc(sizeof(*multifd_send_device_state));
149
+
150
+ qemu_mutex_init(&multifd_send_device_state->queue_job_mutex);
151
+
152
+ multifd_send_device_state->send_data = multifd_send_data_alloc();
153
+}
154
+
155
+void multifd_device_state_send_cleanup(void)
156
+{
157
+ g_clear_pointer(&multifd_send_device_state->send_data,
158
+ multifd_send_data_free);
159
+
160
+ qemu_mutex_destroy(&multifd_send_device_state->queue_job_mutex);
161
+
162
+ g_clear_pointer(&multifd_send_device_state, g_free);
163
+}
164
+
165
+void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state)
166
+{
167
+ g_clear_pointer(&device_state->idstr, g_free);
168
+ g_clear_pointer(&device_state->buf, g_free);
169
+}
170
+
171
+static void multifd_device_state_fill_packet(MultiFDSendParams *p)
172
+{
173
+ MultiFDDeviceState_t *device_state = &p->data->u.device_state;
174
+ MultiFDPacketDeviceState_t *packet = p->packet_device_state;
175
+
176
+ packet->hdr.flags = cpu_to_be32(p->flags);
177
+ strncpy(packet->idstr, device_state->idstr, sizeof(packet->idstr) - 1);
178
+ packet->idstr[sizeof(packet->idstr) - 1] = 0;
179
+ packet->instance_id = cpu_to_be32(device_state->instance_id);
180
+ packet->next_packet_size = cpu_to_be32(p->next_packet_size);
181
+}
182
+
183
+static void multifd_prepare_header_device_state(MultiFDSendParams *p)
184
+{
185
+ p->iov[0].iov_len = sizeof(*p->packet_device_state);
186
+ p->iov[0].iov_base = p->packet_device_state;
187
+ p->iovs_num++;
188
+}
189
+
190
+void multifd_device_state_send_prepare(MultiFDSendParams *p)
191
+{
192
+ MultiFDDeviceState_t *device_state = &p->data->u.device_state;
193
+
194
+ assert(multifd_payload_device_state(p->data));
195
+
196
+ multifd_prepare_header_device_state(p);
197
+
198
+ assert(!(p->flags & MULTIFD_FLAG_SYNC));
199
+
200
+ p->next_packet_size = device_state->buf_len;
201
+ if (p->next_packet_size > 0) {
202
+ p->iov[p->iovs_num].iov_base = device_state->buf;
203
+ p->iov[p->iovs_num].iov_len = p->next_packet_size;
204
+ p->iovs_num++;
205
+ }
206
+
207
+ p->flags |= MULTIFD_FLAG_NOCOMP | MULTIFD_FLAG_DEVICE_STATE;
208
+
209
+ multifd_device_state_fill_packet(p);
210
+}
211
+
212
+bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
213
+ char *data, size_t len)
214
+{
215
+ /* Device state submissions can come from multiple threads */
216
+ QEMU_LOCK_GUARD(&multifd_send_device_state->queue_job_mutex);
217
+ MultiFDDeviceState_t *device_state;
218
+
219
+ assert(multifd_payload_empty(multifd_send_device_state->send_data));
220
+
221
+ multifd_set_payload_type(multifd_send_device_state->send_data,
222
+ MULTIFD_PAYLOAD_DEVICE_STATE);
223
+ device_state = &multifd_send_device_state->send_data->u.device_state;
224
+ device_state->idstr = g_strdup(idstr);
225
+ device_state->instance_id = instance_id;
226
+ device_state->buf = g_memdup2(data, len);
227
+ device_state->buf_len = len;
228
+
229
+ if (!multifd_send(&multifd_send_device_state->send_data)) {
230
+ multifd_send_data_clear(multifd_send_device_state->send_data);
231
+ return false;
232
+ }
233
+
234
+ return true;
235
+}
236
diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c
237
index XXXXXXX..XXXXXXX 100644
238
--- a/migration/multifd-nocomp.c
239
+++ b/migration/multifd-nocomp.c
240
@@ -XXX,XX +XXX,XX @@
241
#include "exec/ramblock.h"
242
#include "exec/target_page.h"
243
#include "file.h"
244
+#include "migration-stats.h"
245
#include "multifd.h"
246
#include "options.h"
247
#include "qapi/error.h"
248
@@ -XXX,XX +XXX,XX @@ static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
249
return;
250
}
57
}
251
58
252
+static void multifd_ram_prepare_header(MultiFDSendParams *p)
59
-/*
253
+{
60
- * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
254
+ p->iov[0].iov_len = p->packet_len;
61
- * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore
255
+ p->iov[0].iov_base = p->packet;
62
- * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
256
+ p->iovs_num++;
63
- * for programming the GTT.
257
+}
64
- *
258
+
65
- * See linux:include/drm/i915_drm.h for shift and mask values.
259
static void multifd_send_prepare_iovs(MultiFDSendParams *p)
66
- */
260
{
67
-static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
261
MultiFDPages_t *pages = &p->data->u.ram;
68
-{
262
@@ -XXX,XX +XXX,XX @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
69
- uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
263
* Only !zerocopy needs the header in IOV; zerocopy will
70
- int gen = igd_gen(vdev);
264
* send it separately.
71
- uint64_t ggms_size = igd_gtt_memory_size(gen, gmch);
265
*/
72
-
266
- multifd_send_prepare_header(p);
73
- return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8);
267
+ multifd_ram_prepare_header(p);
74
-}
75
-
76
-/*
77
- * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
78
- * Somehow the host stolen memory range is used for this, but how the ROM gets
79
- * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it
80
- * reprograms the GTT through the IOBAR where we can trap it and transpose the
81
- * programming to the VM allocated buffer. That buffer gets reserved by the VM
82
- * firmware via the fw_cfg entry added below. Here we're just monitoring the
83
- * IOBAR address and data registers to detect a write sequence targeting the
84
- * GTTADR. This code is developed by observed behavior and doesn't have a
85
- * direct spec reference, unfortunately.
86
- */
87
-static uint64_t vfio_igd_quirk_data_read(void *opaque,
88
- hwaddr addr, unsigned size)
89
-{
90
- VFIOIGDQuirk *igd = opaque;
91
- VFIOPCIDevice *vdev = igd->vdev;
92
-
93
- igd->index = ~0;
94
-
95
- return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
96
-}
97
-
98
-static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
99
- uint64_t data, unsigned size)
100
-{
101
- VFIOIGDQuirk *igd = opaque;
102
- VFIOPCIDevice *vdev = igd->vdev;
103
- uint64_t val = data;
104
- int gen = igd_gen(vdev);
105
-
106
- /*
107
- * Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
108
- * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE
109
- * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so
110
- * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points
111
- * to a 4k page, which we translate to a page from the VM allocated region,
112
- * pointed to by the BDSM register. If this is not set, we fail.
113
- *
114
- * We trap writes to the full configured GTT size, but we typically only
115
- * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often
116
- * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous
117
- * write of that last entry does work, but is hopefully unnecessary since
118
- * we clear the previous GTT on initialization.
119
- */
120
- if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
121
- if (gen < 8 || (igd->index % 8 == 1)) {
122
- uint64_t base;
123
-
124
- if (gen < 11) {
125
- base = pci_get_long(vdev->pdev.config + IGD_BDSM);
126
- } else {
127
- base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11);
128
- }
129
- if (!base) {
130
- hw_error("vfio-igd: Guest attempted to program IGD GTT before "
131
- "BIOS reserved stolen memory. Unsupported BIOS?");
132
- }
133
-
134
- val = data - igd->bdsm + base;
135
- } else {
136
- val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */
137
- }
138
-
139
- trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
140
- igd->index, data, val);
141
- }
142
-
143
- vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
144
-
145
- igd->index = ~0;
146
-}
147
-
148
-static const MemoryRegionOps vfio_igd_data_quirk = {
149
- .read = vfio_igd_quirk_data_read,
150
- .write = vfio_igd_quirk_data_write,
151
- .endianness = DEVICE_LITTLE_ENDIAN,
152
-};
153
-
154
-static uint64_t vfio_igd_quirk_index_read(void *opaque,
155
- hwaddr addr, unsigned size)
156
-{
157
- VFIOIGDQuirk *igd = opaque;
158
- VFIOPCIDevice *vdev = igd->vdev;
159
-
160
- igd->index = ~0;
161
-
162
- return vfio_region_read(&vdev->bars[4].region, addr, size);
163
-}
164
-
165
-static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
166
- uint64_t data, unsigned size)
167
-{
168
- VFIOIGDQuirk *igd = opaque;
169
- VFIOPCIDevice *vdev = igd->vdev;
170
-
171
- igd->index = data;
172
-
173
- vfio_region_write(&vdev->bars[4].region, addr, data, size);
174
-}
175
-
176
-static const MemoryRegionOps vfio_igd_index_quirk = {
177
- .read = vfio_igd_quirk_index_read,
178
- .write = vfio_igd_quirk_index_write,
179
- .endianness = DEVICE_LITTLE_ENDIAN,
180
-};
181
-
182
#define IGD_GGC_MMIO_OFFSET 0x108040
183
#define IGD_BDSM_MMIO_OFFSET 0x1080C0
184
185
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
186
g_autofree struct vfio_region_info *opregion = NULL;
187
g_autofree struct vfio_region_info *host = NULL;
188
g_autofree struct vfio_region_info *lpc = NULL;
189
- VFIOQuirk *quirk;
190
- VFIOIGDQuirk *igd;
191
PCIDevice *lpc_bridge;
192
- int i, ret, gen;
193
+ int ret, gen;
194
uint64_t ggms_size, gms_size;
195
uint64_t *bdsm_size;
196
uint32_t gmch;
197
- uint16_t cmd_orig, cmd;
198
Error *err = NULL;
199
200
/*
201
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
202
return;
268
}
203
}
269
204
270
multifd_send_prepare_iovs(p);
205
- /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
271
@@ -XXX,XX +XXX,XX @@ static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
206
- quirk = vfio_quirk_alloc(2);
272
if (ret != 0) {
207
- igd = quirk->data = g_malloc0(sizeof(*igd));
273
return -1;
208
- igd->vdev = vdev;
274
}
209
- igd->index = ~0;
275
+
210
- if (gen < 11) {
276
+ stat64_add(&mig_stats.multifd_bytes, p->packet_len);
211
- igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
212
- } else {
213
- igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4);
214
- igd->bdsm |=
215
- (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32;
216
- }
217
- igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */
218
-
219
- memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
220
- igd, "vfio-igd-index-quirk", 4);
221
- memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
222
- 0, &quirk->mem[0], 1);
223
-
224
- memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
225
- igd, "vfio-igd-data-quirk", 4);
226
- memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
227
- 4, &quirk->mem[1], 1);
228
-
229
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
230
-
231
/*
232
* Allow user to override dsm size using x-igd-gms option, in multiples of
233
* 32MiB. This option should only be used when the desired size cannot be
234
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
235
pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
277
}
236
}
278
237
279
return 0;
238
- /*
280
@@ -XXX,XX +XXX,XX @@ int multifd_ram_flush_and_sync(QEMUFile *f)
239
- * This IOBAR gives us access to GTTADR, which allows us to write to
281
bool multifd_send_prepare_common(MultiFDSendParams *p)
240
- * the GTT itself. So let's go ahead and write zero to all the GTT
282
{
241
- * entries to avoid spurious DMA faults. Be sure I/O access is enabled
283
MultiFDPages_t *pages = &p->data->u.ram;
242
- * before talking to the device.
284
- multifd_send_prepare_header(p);
243
- */
285
+ multifd_ram_prepare_header(p);
244
- if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
286
multifd_send_zero_page_detect(p);
245
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
287
246
- error_report("IGD device %s - failed to read PCI command register",
288
if (!pages->normal_num) {
247
- vdev->vbasedev.name);
289
diff --git a/migration/multifd.c b/migration/multifd.c
248
- }
290
index XXXXXXX..XXXXXXX 100644
249
-
291
--- a/migration/multifd.c
250
- cmd = cmd_orig | PCI_COMMAND_IO;
292
+++ b/migration/multifd.c
251
-
293
@@ -XXX,XX +XXX,XX @@
252
- if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
294
253
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
295
#include "qemu/osdep.h"
254
- error_report("IGD device %s - failed to write PCI command register",
296
#include "qemu/cutils.h"
255
- vdev->vbasedev.name);
297
+#include "qemu/iov.h"
256
- }
298
#include "qemu/rcu.h"
257
-
299
#include "exec/target_page.h"
258
- for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
300
#include "system/system.h"
259
- vfio_region_write(&vdev->bars[4].region, 0, i, 4);
301
@@ -XXX,XX +XXX,XX @@
260
- vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
302
#include "qemu/error-report.h"
261
- }
303
#include "qapi/error.h"
262
-
304
#include "file.h"
263
- if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
305
+#include "migration/misc.h"
264
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
306
#include "migration.h"
265
- error_report("IGD device %s - failed to restore PCI command register",
307
#include "migration-stats.h"
266
- vdev->vbasedev.name);
308
#include "savevm.h"
267
- }
309
@@ -XXX,XX +XXX,XX @@ MultiFDSendData *multifd_send_data_alloc(void)
268
-
310
* added to the union in the future are larger than
269
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name,
311
* (MultiFDPages_t + flex array).
270
(ggms_size + gms_size) / MiB);
312
*/
313
- max_payload_size = MAX(multifd_ram_payload_size(), sizeof(MultiFDPayload));
314
+ max_payload_size = MAX(multifd_ram_payload_size(),
315
+ multifd_device_state_payload_size());
316
+ max_payload_size = MAX(max_payload_size, sizeof(MultiFDPayload));
317
318
/*
319
* Account for any holes the compiler might insert. We can't pack
320
@@ -XXX,XX +XXX,XX @@ void multifd_send_data_clear(MultiFDSendData *data)
321
}
322
323
switch (data->type) {
324
+ case MULTIFD_PAYLOAD_DEVICE_STATE:
325
+ multifd_send_data_clear_device_state(&data->u.device_state);
326
+ break;
327
default:
328
/* Nothing to do */
329
break;
330
@@ -XXX,XX +XXX,XX @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
331
return msg.id;
332
}
271
}
333
334
+/* Fills a RAM multifd packet */
335
void multifd_send_fill_packet(MultiFDSendParams *p)
336
{
337
MultiFDPacket_t *packet = p->packet;
338
@@ -XXX,XX +XXX,XX @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
339
p->name = NULL;
340
g_clear_pointer(&p->data, multifd_send_data_free);
341
p->packet_len = 0;
342
+ g_clear_pointer(&p->packet_device_state, g_free);
343
g_free(p->packet);
344
p->packet = NULL;
345
multifd_send_state->ops->send_cleanup(p, errp);
346
@@ -XXX,XX +XXX,XX @@ static void multifd_send_cleanup_state(void)
347
{
348
file_cleanup_outgoing_migration();
349
socket_cleanup_outgoing_migration();
350
+ multifd_device_state_send_cleanup();
351
qemu_sem_destroy(&multifd_send_state->channels_created);
352
qemu_sem_destroy(&multifd_send_state->channels_ready);
353
qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex);
354
@@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque)
355
* qatomic_store_release() in multifd_send().
356
*/
357
if (qatomic_load_acquire(&p->pending_job)) {
358
+ bool is_device_state = multifd_payload_device_state(p->data);
359
+ size_t total_size;
360
+
361
p->flags = 0;
362
p->iovs_num = 0;
363
assert(!multifd_payload_empty(p->data));
364
365
- ret = multifd_send_state->ops->send_prepare(p, &local_err);
366
- if (ret != 0) {
367
- break;
368
+ if (is_device_state) {
369
+ multifd_device_state_send_prepare(p);
370
+ } else {
371
+ ret = multifd_send_state->ops->send_prepare(p, &local_err);
372
+ if (ret != 0) {
373
+ break;
374
+ }
375
}
376
377
+ /*
378
+ * The packet header in the zerocopy RAM case is accounted for
379
+ * in multifd_nocomp_send_prepare() - where it is actually
380
+ * being sent.
381
+ */
382
+ total_size = iov_size(p->iov, p->iovs_num);
383
+
384
if (migrate_mapped_ram()) {
385
+ assert(!is_device_state);
386
+
387
ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num,
388
&p->data->u.ram, &local_err);
389
} else {
390
@@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque)
391
break;
392
}
393
394
- stat64_add(&mig_stats.multifd_bytes,
395
- (uint64_t)p->next_packet_size + p->packet_len);
396
+ stat64_add(&mig_stats.multifd_bytes, total_size);
397
398
p->next_packet_size = 0;
399
multifd_send_data_clear(p->data);
400
@@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void)
401
p->packet_len = sizeof(MultiFDPacket_t)
402
+ sizeof(uint64_t) * page_count;
403
p->packet = g_malloc0(p->packet_len);
404
+ p->packet_device_state = g_malloc0(sizeof(*p->packet_device_state));
405
+ p->packet_device_state->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
406
+ p->packet_device_state->hdr.version = cpu_to_be32(MULTIFD_VERSION);
407
}
408
p->name = g_strdup_printf(MIGRATION_THREAD_SRC_MULTIFD, i);
409
p->write_flags = 0;
410
@@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void)
411
assert(p->iov);
412
}
413
414
+ multifd_device_state_send_setup();
415
+
416
return true;
417
418
err:
419
diff --git a/migration/meson.build b/migration/meson.build
420
index XXXXXXX..XXXXXXX 100644
421
--- a/migration/meson.build
422
+++ b/migration/meson.build
423
@@ -XXX,XX +XXX,XX @@ system_ss.add(files(
424
'migration-hmp-cmds.c',
425
'migration.c',
426
'multifd.c',
427
+ 'multifd-device-state.c',
428
'multifd-nocomp.c',
429
'multifd-zlib.c',
430
'multifd-zero-page.c',
431
--
272
--
432
2.48.1
273
2.48.1
433
274
434
275
diff view generated by jsdifflib
1
From: Tomita Moeko <tomitamoeko@gmail.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
As suggested by Cédric, I'm glad to be a maintainer of vfio-igd.
3
Though GTT Stolen Memory (GSM) is right below Data Stolen Memory (DSM)
4
in host address space, direct access to GSM is prohibited, and it is
5
not mapped to guest address space. Both host and guest accesses GSM
6
indirectly through the second half of MMIO BAR0 (GTTMMADR).
7
8
Guest firmware only need to reserve a memory region for DSM and program
9
the BDSM register with the base address of that region, that's actually
10
what both SeaBIOS[1] and IgdAssignmentDxe does now.
11
12
[1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332
4
13
5
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
14
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
6
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
15
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
7
Reviewed-by: Cédric Le Goater <clg@redhat.com>
16
Tested-by: Alex Williamson <alex.williamson@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/20250227162741.9860-1-tomitamoeko@gmail.com
17
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
18
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-3-tomitamoeko@gmail.com
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
20
---
11
MAINTAINERS | 9 ++++++++-
21
hw/vfio/igd.c | 28 +++-------------------------
12
1 file changed, 8 insertions(+), 1 deletion(-)
22
1 file changed, 3 insertions(+), 25 deletions(-)
13
23
14
diff --git a/MAINTAINERS b/MAINTAINERS
24
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
15
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
16
--- a/MAINTAINERS
26
--- a/hw/vfio/igd.c
17
+++ b/MAINTAINERS
27
+++ b/hw/vfio/igd.c
18
@@ -XXX,XX +XXX,XX @@ M: Cédric Le Goater <clg@redhat.com>
28
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
19
S: Supported
29
20
F: hw/vfio/*
30
#define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */
21
F: include/hw/vfio/
31
#define IGD_GMCH_GEN6_GMS_MASK 0x1f
22
-F: docs/igd-assign.txt
32
-#define IGD_GMCH_GEN6_GGMS_SHIFT 8
23
F: docs/devel/migration/vfio.rst
33
-#define IGD_GMCH_GEN6_GGMS_MASK 0x3
24
F: qapi/vfio.json
34
#define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */
25
35
#define IGD_GMCH_GEN8_GMS_MASK 0xff
26
+vfio-igd
36
-#define IGD_GMCH_GEN8_GGMS_SHIFT 6
27
+M: Alex Williamson <alex.williamson@redhat.com>
37
-#define IGD_GMCH_GEN8_GGMS_MASK 0x3
28
+M: Cédric Le Goater <clg@redhat.com>
38
-
29
+M: Tomita Moeko <tomitamoeko@gmail.com>
39
-static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch)
30
+S: Supported
40
-{
31
+F: hw/vfio/igd.c
41
- uint64_t ggms;
32
+F: docs/igd-assign.txt
42
-
33
+
43
- if (gen < 8) {
34
vfio-ccw
44
- ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK;
35
M: Eric Farman <farman@linux.ibm.com>
45
- } else {
36
M: Matthew Rosato <mjrosato@linux.ibm.com>
46
- ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK;
47
- if (ggms != 0) {
48
- ggms = 1ULL << ggms;
49
- }
50
- }
51
-
52
- return ggms * MiB;
53
-}
54
55
static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
56
{
57
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
58
g_autofree struct vfio_region_info *lpc = NULL;
59
PCIDevice *lpc_bridge;
60
int ret, gen;
61
- uint64_t ggms_size, gms_size;
62
+ uint64_t gms_size;
63
uint64_t *bdsm_size;
64
uint32_t gmch;
65
Error *err = NULL;
66
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
67
}
68
}
69
70
- ggms_size = igd_gtt_memory_size(gen, gmch);
71
gms_size = igd_stolen_memory_size(gen, gmch);
72
73
/*
74
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
75
* config offset 0x5C.
76
*/
77
bdsm_size = g_malloc(sizeof(*bdsm_size));
78
- *bdsm_size = cpu_to_le64(ggms_size + gms_size);
79
+ *bdsm_size = cpu_to_le64(gms_size);
80
fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
81
bdsm_size, sizeof(*bdsm_size));
82
83
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
84
pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
85
}
86
87
- trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name,
88
- (ggms_size + gms_size) / MiB);
89
+ trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
90
}
37
--
91
--
38
2.48.1
92
2.48.1
39
93
40
94
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Implement the multifd device state transfer via additional per-device
3
Both x-igd-opregion option and legacy mode require identical steps to
4
thread inside save_live_complete_precopy_thread handler.
4
set up OpRegion for IGD devices. Consolidate these steps into a single
5
5
vfio_pci_igd_setup_opregion function.
6
Switch between doing the data transfer in the new handler and doing it
6
7
in the old save_state handler depending if VFIO multifd transfer is enabled
7
The function call in pci.c is wrapped with ifdef temporarily to prevent
8
or not.
8
build error for non-x86 archs, it will be removed after we decouple it
9
9
from legacy mode.
10
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
10
11
Reviewed-by: Cédric Le Goater <clg@redhat.com>
11
Additionally, move vfio_pci_igd_opregion_init to igd.c to prevent it
12
Link: https://lore.kernel.org/qemu-devel/4d727e2e0435e0022d50004e474077632830e08d.1741124640.git.maciej.szmigiero@oracle.com
12
from being compiled in non-x86 builds.
13
[ clg: - Reordered savevm_vfio_handlers
13
14
- Updated save_live_complete_precopy* documentation ]
14
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
15
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
16
Tested-by: Alex Williamson <alex.williamson@redhat.com>
17
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
18
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-4-tomitamoeko@gmail.com
19
[ clg: Fixed spelling in vfio_pci_igd_setup_opregion() ]
15
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
---
21
---
17
docs/devel/migration/vfio.rst | 19 ++++-
22
hw/vfio/pci.h | 4 +-
18
hw/vfio/migration-multifd.h | 6 ++
23
hw/vfio/igd.c | 101 +++++++++++++++++++++++++++++++++++--------
19
include/hw/vfio/vfio-common.h | 6 ++
24
hw/vfio/pci-quirks.c | 50 ---------------------
20
hw/vfio/migration-multifd.c | 142 ++++++++++++++++++++++++++++++++++
25
hw/vfio/pci.c | 22 ++--------
21
hw/vfio/migration.c | 22 ++++--
26
4 files changed, 88 insertions(+), 89 deletions(-)
22
hw/vfio/trace-events | 2 +
27
23
6 files changed, 189 insertions(+), 8 deletions(-)
28
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
24
29
index XXXXXXX..XXXXXXX 100644
25
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
30
--- a/hw/vfio/pci.h
26
index XXXXXXX..XXXXXXX 100644
31
+++ b/hw/vfio/pci.h
27
--- a/docs/devel/migration/vfio.rst
32
@@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
28
+++ b/docs/devel/migration/vfio.rst
33
29
@@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows:
34
bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
30
reassembles the multifd received data and loads it in-order into the device.
35
31
In the non-multifd mode this function is a NOP.
36
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
32
37
- struct vfio_region_info *info,
33
-* A ``save_state`` function to save the device config space if it is present.
38
- Error **errp);
34
+* A ``save_state`` function to save the device config space if it is present
39
+bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp);
35
+ in the non-multifd mode.
40
36
+ In the multifd mode it just emits either a dummy EOS marker.
41
void vfio_display_reset(VFIOPCIDevice *vdev);
37
42
bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
38
* A ``save_live_complete_precopy`` function that sets the VFIO device in
43
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
39
_STOP_COPY state and iteratively copies the data for the VFIO device until
44
index XXXXXXX..XXXXXXX 100644
40
the vendor driver indicates that no data remains.
45
--- a/hw/vfio/igd.c
41
+ In the multifd mode it just emits a dummy EOS marker.
46
+++ b/hw/vfio/igd.c
42
+
47
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
43
+* A ``save_live_complete_precopy_thread`` function that in the multifd mode
48
return -1;
44
+ provides thread handler performing multifd device state transfer.
45
+ It sets the VFIO device to _STOP_COPY state, iteratively reads the data
46
+ from the VFIO device and queues it for multifd transmission until the vendor
47
+ driver indicates that no data remains.
48
+ After that, it saves the device config space and queues it for multifd
49
+ transfer too.
50
+ In the non-multifd mode this thread is a NOP.
51
52
* A ``load_state`` function that loads the config section and the data
53
sections that are generated by the save functions above.
54
@@ -XXX,XX +XXX,XX @@ Live migration save path
55
Then the VFIO device is put in _STOP_COPY state
56
(FINISH_MIGRATE, _ACTIVE, _STOP_COPY)
57
.save_live_complete_precopy() is called for each active device
58
- For the VFIO device, iterate in .save_live_complete_precopy() until
59
+ For the VFIO device: in the non-multifd mode iterate in
60
+ .save_live_complete_precopy() until
61
pending data is 0
62
+     In the multifd mode this iteration is done in
63
+     .save_live_complete_precopy_thread() instead.
64
|
65
(POSTMIGRATE, _COMPLETED, _STOP_COPY)
66
Migraton thread schedules cleanup bottom half and exits
67
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
68
index XXXXXXX..XXXXXXX 100644
69
--- a/hw/vfio/migration-multifd.h
70
+++ b/hw/vfio/migration-multifd.h
71
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
72
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
73
Error **errp);
74
75
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
76
+
77
+bool
78
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
79
+ Error **errp);
80
+
81
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
82
83
#endif
84
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
85
index XXXXXXX..XXXXXXX 100644
86
--- a/include/hw/vfio/vfio-common.h
87
+++ b/include/hw/vfio/vfio-common.h
88
@@ -XXX,XX +XXX,XX @@ void vfio_mig_add_bytes_transferred(unsigned long val);
89
bool vfio_device_state_is_running(VFIODevice *vbasedev);
90
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
91
92
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
93
int vfio_load_device_config_state(QEMUFile *f, void *opaque);
94
95
#ifdef CONFIG_LINUX
96
@@ -XXX,XX +XXX,XX @@ struct vfio_info_cap_header *
97
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
98
struct vfio_info_cap_header *
99
vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
100
+
101
+int vfio_migration_set_state(VFIODevice *vbasedev,
102
+ enum vfio_device_mig_state new_state,
103
+ enum vfio_device_mig_state recover_state,
104
+ Error **errp);
105
#endif
106
107
bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
108
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/hw/vfio/migration-multifd.c
111
+++ b/hw/vfio/migration-multifd.c
112
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
113
return true;
114
}
49
}
115
50
116
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
51
+#define IGD_ASLS 0xfc /* ASL Storage Register */
52
#define IGD_GMCH 0x50 /* Graphics Control Register */
53
#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
54
#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
55
@@ -XXX,XX +XXX,XX @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
56
return 0;
57
}
58
59
+/*
60
+ * The OpRegion includes the Video BIOS Table, which seems important for
61
+ * telling the driver what sort of outputs it has. Without this, the device
62
+ * may work in the guest, but we may not get output. This also requires BIOS
63
+ * support to reserve and populate a section of guest memory sufficient for
64
+ * the table and to write the base address of that memory to the ASLS register
65
+ * of the IGD device.
66
+ */
67
+static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
68
+ struct vfio_region_info *info,
69
+ Error **errp)
117
+{
70
+{
118
+ assert(vfio_multifd_transfer_enabled(vbasedev));
71
+ int ret;
72
+
73
+ vdev->igd_opregion = g_malloc0(info->size);
74
+ ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
75
+ info->size, info->offset);
76
+ if (ret != info->size) {
77
+ error_setg(errp, "failed to read IGD OpRegion");
78
+ g_free(vdev->igd_opregion);
79
+ vdev->igd_opregion = NULL;
80
+ return false;
81
+ }
119
+
82
+
120
+ /*
83
+ /*
121
+ * Emit dummy NOP data on the main migration channel since the actual
84
+ * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
122
+ * device state transfer is done via multifd channels.
85
+ * allocate 32bit reserved memory for, copy these contents into, and write
86
+ * the reserved memory base address to the device ASLS register at 0xFC.
87
+ * Alignment of this reserved region seems flexible, but using a 4k page
88
+ * alignment seems to work well. This interface assumes a single IGD
89
+ * device, which may be at VM address 00:02.0 in legacy mode or another
90
+ * address in UPT mode.
91
+ *
92
+ * NB, there may be future use cases discovered where the VM should have
93
+ * direct interaction with the host OpRegion, in which case the write to
94
+ * the ASLS register would trigger MemoryRegion setup to enable that.
123
+ */
95
+ */
124
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
96
+ fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
125
+}
97
+ vdev->igd_opregion, info->size);
126
+
98
+
127
+static bool
99
+ trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
128
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
100
+
129
+ char *idstr,
101
+ pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
130
+ uint32_t instance_id,
102
+ pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
131
+ uint32_t idx,
103
+ pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
132
+ Error **errp)
133
+{
134
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
135
+ g_autoptr(QEMUFile) f = NULL;
136
+ int ret;
137
+ g_autofree VFIODeviceStatePacket *packet = NULL;
138
+ size_t packet_len;
139
+
140
+ bioc = qio_channel_buffer_new(0);
141
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
142
+
143
+ f = qemu_file_new_output(QIO_CHANNEL(bioc));
144
+
145
+ if (vfio_save_device_config_state(f, vbasedev, errp)) {
146
+ return false;
147
+ }
148
+
149
+ ret = qemu_fflush(f);
150
+ if (ret) {
151
+ error_setg(errp, "%s: save config state flush failed: %d",
152
+ vbasedev->name, ret);
153
+ return false;
154
+ }
155
+
156
+ packet_len = sizeof(*packet) + bioc->usage;
157
+ packet = g_malloc0(packet_len);
158
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
159
+ packet->idx = idx;
160
+ packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
161
+ memcpy(&packet->data, bioc->data, bioc->usage);
162
+
163
+ if (!multifd_queue_device_state(idstr, instance_id,
164
+ (char *)packet, packet_len)) {
165
+ error_setg(errp, "%s: multifd config data queuing failed",
166
+ vbasedev->name);
167
+ return false;
168
+ }
169
+
170
+ vfio_mig_add_bytes_transferred(packet_len);
171
+
104
+
172
+ return true;
105
+ return true;
173
+}
106
+}
174
+
107
+
175
+/*
108
+bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
176
+ * This thread is spawned by the migration core directly via
177
+ * .save_live_complete_precopy_thread SaveVMHandler.
178
+ *
179
+ * It exits after either:
180
+ * * completing saving the remaining device state and device config, OR:
181
+ * * encountering some error while doing the above, OR:
182
+ * * being forcefully aborted by the migration core by
183
+ * multifd_device_state_save_thread_should_exit() returning true.
184
+ */
185
+bool
186
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
187
+ Error **errp)
188
+{
109
+{
189
+ VFIODevice *vbasedev = d->handler_opaque;
110
+ g_autofree struct vfio_region_info *opregion = NULL;
190
+ VFIOMigration *migration = vbasedev->migration;
111
+ int ret;
191
+ bool ret = false;
112
+
192
+ g_autofree VFIODeviceStatePacket *packet = NULL;
113
+ /* Hotplugging is not supported for opregion access */
193
+ uint32_t idx;
114
+ if (vdev->pdev.qdev.hotplugged) {
194
+
115
+ error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
195
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
116
+ return false;
196
+ /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
117
+ }
197
+ return true;
118
+
198
+ }
119
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
199
+
120
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
200
+ trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
121
+ VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
201
+ d->idstr, d->instance_id);
122
+ if (ret) {
202
+
123
+ error_setg_errno(errp, -ret,
203
+ /* We reach here with device state STOP or STOP_COPY only */
124
+ "Device does not supports IGD OpRegion feature");
204
+ if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
125
+ return false;
205
+ VFIO_DEVICE_STATE_STOP, errp)) {
126
+ }
206
+ goto thread_exit;
127
+
207
+ }
128
+ if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
208
+
129
+ return false;
209
+ packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
130
+ }
210
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
131
+
211
+
132
+ return true;
212
+ for (idx = 0; ; idx++) {
213
+ ssize_t data_size;
214
+ size_t packet_size;
215
+
216
+ if (multifd_device_state_save_thread_should_exit()) {
217
+ error_setg(errp, "operation cancelled");
218
+ goto thread_exit;
219
+ }
220
+
221
+ data_size = read(migration->data_fd, &packet->data,
222
+ migration->data_buffer_size);
223
+ if (data_size < 0) {
224
+ error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
225
+ vbasedev->name, idx, errno);
226
+ goto thread_exit;
227
+ } else if (data_size == 0) {
228
+ break;
229
+ }
230
+
231
+ packet->idx = idx;
232
+ packet_size = sizeof(*packet) + data_size;
233
+
234
+ if (!multifd_queue_device_state(d->idstr, d->instance_id,
235
+ (char *)packet, packet_size)) {
236
+ error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
237
+ goto thread_exit;
238
+ }
239
+
240
+ vfio_mig_add_bytes_transferred(packet_size);
241
+ }
242
+
243
+ if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
244
+ d->idstr,
245
+ d->instance_id,
246
+ idx, errp)) {
247
+ goto thread_exit;
248
+ }
249
+
250
+ ret = true;
251
+
252
+thread_exit:
253
+ trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
254
+
255
+ return ret;
256
+}
133
+}
257
+
134
+
258
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
135
/*
136
* The rather short list of registers that we copy from the host devices.
137
* The LPC/ISA bridge values are definitely needed to support the vBIOS, the
138
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
139
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
259
{
140
{
260
VFIOMigration *migration = vbasedev->migration;
141
g_autofree struct vfio_region_info *rom = NULL;
261
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
142
- g_autofree struct vfio_region_info *opregion = NULL;
262
index XXXXXXX..XXXXXXX 100644
143
g_autofree struct vfio_region_info *host = NULL;
263
--- a/hw/vfio/migration.c
144
g_autofree struct vfio_region_info *lpc = NULL;
264
+++ b/hw/vfio/migration.c
145
PCIDevice *lpc_bridge;
265
@@ -XXX,XX +XXX,XX @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
146
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
266
vfio_migration_send_event(vbasedev);
147
* Check whether we have all the vfio device specific regions to
148
* support legacy mode (added in Linux v4.6). If not, bail.
149
*/
150
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
151
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
152
- VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
153
- if (ret) {
154
- error_report("IGD device %s does not support OpRegion access,"
155
- "legacy mode disabled", vdev->vbasedev.name);
156
- return;
157
- }
158
-
159
ret = vfio_get_dev_region_info(&vdev->vbasedev,
160
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
161
VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
162
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
163
return;
164
}
165
166
+ /* Setup OpRegion access */
167
+ if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
168
+ error_append_hint(&err, "IGD legacy mode disabled\n");
169
+ error_report_err(err);
170
+ return;
171
+ }
172
+
173
/* Create our LPC/ISA bridge */
174
ret = vfio_pci_igd_lpc_init(vdev, lpc);
175
if (ret) {
176
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
177
return;
178
}
179
180
- /* Setup OpRegion access */
181
- if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) {
182
- error_append_hint(&err, "IGD legacy mode disabled\n");
183
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
184
- return;
185
- }
186
-
187
/*
188
* Allow user to override dsm size using x-igd-gms option, in multiples of
189
* 32MiB. This option should only be used when the desired size cannot be
190
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/hw/vfio/pci-quirks.c
193
+++ b/hw/vfio/pci-quirks.c
194
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
195
trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
267
}
196
}
268
197
269
-static int vfio_migration_set_state(VFIODevice *vbasedev,
198
-#define IGD_ASLS 0xfc /* ASL Storage Register */
270
- enum vfio_device_mig_state new_state,
199
-
271
- enum vfio_device_mig_state recover_state,
200
-/*
272
- Error **errp)
201
- * The OpRegion includes the Video BIOS Table, which seems important for
273
+int vfio_migration_set_state(VFIODevice *vbasedev,
202
- * telling the driver what sort of outputs it has. Without this, the device
274
+ enum vfio_device_mig_state new_state,
203
- * may work in the guest, but we may not get output. This also requires BIOS
275
+ enum vfio_device_mig_state recover_state,
204
- * support to reserve and populate a section of guest memory sufficient for
276
+ Error **errp)
205
- * the table and to write the base address of that memory to the ASLS register
277
{
206
- * of the IGD device.
278
VFIOMigration *migration = vbasedev->migration;
207
- */
279
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
208
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
280
@@ -XXX,XX +XXX,XX @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
209
- struct vfio_region_info *info, Error **errp)
281
return ret;
210
-{
282
}
211
- int ret;
283
212
-
284
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
213
- vdev->igd_opregion = g_malloc0(info->size);
285
- Error **errp)
214
- ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
286
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
215
- info->size, info->offset);
287
{
216
- if (ret != info->size) {
288
VFIODevice *vbasedev = opaque;
217
- error_setg(errp, "failed to read IGD OpRegion");
289
int ret;
218
- g_free(vdev->igd_opregion);
290
@@ -XXX,XX +XXX,XX @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
219
- vdev->igd_opregion = NULL;
291
int ret;
220
- return false;
292
Error *local_err = NULL;
221
- }
293
222
-
294
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
223
- /*
295
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
224
- * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
296
+ return 0;
225
- * allocate 32bit reserved memory for, copy these contents into, and write
297
+ }
226
- * the reserved memory base address to the device ASLS register at 0xFC.
298
+
227
- * Alignment of this reserved region seems flexible, but using a 4k page
299
trace_vfio_save_complete_precopy_start(vbasedev->name);
228
- * alignment seems to work well. This interface assumes a single IGD
300
229
- * device, which may be at VM address 00:02.0 in legacy mode or another
301
/* We reach here with device state STOP or STOP_COPY only */
230
- * address in UPT mode.
302
@@ -XXX,XX +XXX,XX @@ static void vfio_save_state(QEMUFile *f, void *opaque)
231
- *
303
Error *local_err = NULL;
232
- * NB, there may be future use cases discovered where the VM should have
304
int ret;
233
- * direct interaction with the host OpRegion, in which case the write to
305
234
- * the ASLS register would trigger MemoryRegion setup to enable that.
306
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
235
- */
307
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
236
- fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
308
+ return;
237
- vdev->igd_opregion, info->size);
309
+ }
238
-
310
+
239
- trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
311
ret = vfio_save_device_config_state(f, opaque, &local_err);
240
-
312
if (ret) {
241
- pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
313
error_prepend(&local_err,
242
- pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
314
@@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = {
243
- pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
315
*/
244
-
316
.load_state_buffer = vfio_multifd_load_state_buffer,
245
- return true;
317
.switchover_start = vfio_switchover_start,
246
-}
318
+ .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
247
-
319
};
248
/*
320
249
* Common quirk probe entry points.
321
/* ---------------------------------------------------------------------- */
250
*/
322
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
251
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
323
index XXXXXXX..XXXXXXX 100644
252
index XXXXXXX..XXXXXXX 100644
324
--- a/hw/vfio/trace-events
253
--- a/hw/vfio/pci.c
325
+++ b/hw/vfio/trace-events
254
+++ b/hw/vfio/pci.c
326
@@ -XXX,XX +XXX,XX @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
255
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
327
vfio_save_cleanup(const char *name) " (%s)"
256
vfio_bar_quirk_setup(vdev, i);
328
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
257
}
329
vfio_save_complete_precopy_start(const char *name) " (%s)"
258
330
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
259
+#ifdef CONFIG_VFIO_IGD
331
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
260
if (!vdev->igd_opregion &&
332
vfio_save_device_config_state(const char *name) " (%s)"
261
vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
333
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
262
- g_autofree struct vfio_region_info *opregion = NULL;
334
vfio_save_iterate_start(const char *name) " (%s)"
263
-
264
- if (vdev->pdev.qdev.hotplugged) {
265
- error_setg(errp,
266
- "cannot support IGD OpRegion feature on hotplugged "
267
- "device");
268
- goto out_unset_idev;
269
- }
270
-
271
- ret = vfio_get_dev_region_info(vbasedev,
272
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
273
- VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
274
- if (ret) {
275
- error_setg_errno(errp, -ret,
276
- "does not support requested IGD OpRegion feature");
277
- goto out_unset_idev;
278
- }
279
-
280
- if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
281
+ if (!vfio_pci_igd_setup_opregion(vdev, errp)) {
282
goto out_unset_idev;
283
}
284
}
285
+#endif
286
287
/* QEMU emulates all of MSI & MSIX */
288
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
335
--
289
--
336
2.48.1
290
2.48.1
337
291
338
292
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
The multifd received data needs to be reassembled since device state
3
A new option will soon be introduced to decouple the LPC bridge/Host
4
packets sent via different multifd channels can arrive out-of-order.
4
bridge ID quirk from legacy mode. To prepare for this, move the LPC
5
bridge initialization into a separate function.
5
6
6
Therefore, each VFIO device state packet carries a header indicating its
7
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
7
position in the stream.
8
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
8
The raw device state data is saved into a VFIOStateBuffer for later
9
Tested-by: Alex Williamson <alex.williamson@redhat.com>
9
in-order loading into the device.
10
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
10
11
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-5-tomitamoeko@gmail.com
11
The last such VFIO device state packet should have
12
VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state.
13
14
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
15
Reviewed-by: Cédric Le Goater <clg@redhat.com>
16
Link: https://lore.kernel.org/qemu-devel/e3bff515a8d61c582b94b409eb12a45b1a143a69.1741124640.git.maciej.szmigiero@oracle.com
17
[ clg: - Reordered savevm_vfio_handlers
18
- Added load_state_buffer documentation ]
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
---
13
---
21
docs/devel/migration/vfio.rst | 7 ++
14
hw/vfio/igd.c | 122 +++++++++++++++++++++++++++++---------------------
22
hw/vfio/migration-multifd.h | 3 +
15
1 file changed, 70 insertions(+), 52 deletions(-)
23
hw/vfio/migration-multifd.c | 163 ++++++++++++++++++++++++++++++++++
24
hw/vfio/migration.c | 4 +
25
hw/vfio/trace-events | 1 +
26
5 files changed, 178 insertions(+)
27
16
28
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
17
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
29
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
30
--- a/docs/devel/migration/vfio.rst
19
--- a/hw/vfio/igd.c
31
+++ b/docs/devel/migration/vfio.rst
20
+++ b/hw/vfio/igd.c
32
@@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows:
21
@@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
33
* A ``load_state`` function that loads the config section and the data
22
return ret;
34
sections that are generated by the save functions above.
23
}
35
24
36
+* A ``load_state_buffer`` function that loads the device state and the device
25
+static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
37
+ config that arrived via multifd channels.
26
+{
38
+ It's used only in the multifd mode.
27
+ g_autofree struct vfio_region_info *host = NULL;
28
+ g_autofree struct vfio_region_info *lpc = NULL;
29
+ PCIDevice *lpc_bridge;
30
+ int ret;
39
+
31
+
40
* ``cleanup`` functions for both save and load that perform any migration
32
+ /*
41
related cleanup.
33
+ * Copying IDs or creating new devices are not supported on hotplug
42
34
+ */
43
@@ -XXX,XX +XXX,XX @@ Live migration resume path
35
+ if (vdev->pdev.qdev.hotplugged) {
44
(RESTORE_VM, _ACTIVE, _STOP)
36
+ error_setg(errp, "IGD LPC is not supported on hotplugged device");
45
|
46
For each device, .load_state() is called for that device section data
47
+ transmitted via the main migration channel.
48
+ For data transmitted via multifd channels .load_state_buffer() is called
49
+ instead.
50
(RESTORE_VM, _ACTIVE, _RESUMING)
51
|
52
At the end, .load_cleanup() is called for each device and vCPUs are started
53
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
54
index XXXXXXX..XXXXXXX 100644
55
--- a/hw/vfio/migration-multifd.h
56
+++ b/hw/vfio/migration-multifd.h
57
@@ -XXX,XX +XXX,XX @@ void vfio_multifd_cleanup(VFIODevice *vbasedev);
58
bool vfio_multifd_transfer_supported(void);
59
bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
60
61
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
62
+ Error **errp);
63
+
64
#endif
65
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/vfio/migration-multifd.c
68
+++ b/hw/vfio/migration-multifd.c
69
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket {
70
uint8_t data[0];
71
} QEMU_PACKED VFIODeviceStatePacket;
72
73
+/* type safety */
74
+typedef struct VFIOStateBuffers {
75
+ GArray *array;
76
+} VFIOStateBuffers;
77
+
78
+typedef struct VFIOStateBuffer {
79
+ bool is_present;
80
+ char *data;
81
+ size_t len;
82
+} VFIOStateBuffer;
83
+
84
typedef struct VFIOMultifd {
85
+ VFIOStateBuffers load_bufs;
86
+ QemuCond load_bufs_buffer_ready_cond;
87
+ QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
88
+ uint32_t load_buf_idx;
89
+ uint32_t load_buf_idx_last;
90
} VFIOMultifd;
91
92
+static void vfio_state_buffer_clear(gpointer data)
93
+{
94
+ VFIOStateBuffer *lb = data;
95
+
96
+ if (!lb->is_present) {
97
+ return;
98
+ }
99
+
100
+ g_clear_pointer(&lb->data, g_free);
101
+ lb->is_present = false;
102
+}
103
+
104
+static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
105
+{
106
+ bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
107
+ g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
108
+}
109
+
110
+static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
111
+{
112
+ g_clear_pointer(&bufs->array, g_array_unref);
113
+}
114
+
115
+static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
116
+{
117
+ assert(bufs->array);
118
+}
119
+
120
+static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
121
+{
122
+ return bufs->array->len;
123
+}
124
+
125
+static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
126
+ unsigned int size)
127
+{
128
+ g_array_set_size(bufs->array, size);
129
+}
130
+
131
+static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
132
+ unsigned int idx)
133
+{
134
+ return &g_array_index(bufs->array, VFIOStateBuffer, idx);
135
+}
136
+
137
+/* called with load_bufs_mutex locked */
138
+static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
139
+ VFIODeviceStatePacket *packet,
140
+ size_t packet_total_size,
141
+ Error **errp)
142
+{
143
+ VFIOMigration *migration = vbasedev->migration;
144
+ VFIOMultifd *multifd = migration->multifd;
145
+ VFIOStateBuffer *lb;
146
+
147
+ vfio_state_buffers_assert_init(&multifd->load_bufs);
148
+ if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
149
+ vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
150
+ }
151
+
152
+ lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
153
+ if (lb->is_present) {
154
+ error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
155
+ vbasedev->name, packet->idx);
156
+ return false;
37
+ return false;
157
+ }
38
+ }
158
+
39
+
159
+ assert(packet->idx >= multifd->load_buf_idx);
40
+ /*
160
+
41
+ * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
161
+ lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
42
+ * can stuff host values into, so if there's already one there and it's not
162
+ lb->len = packet_total_size - sizeof(*packet);
43
+ * one we can hack on, this quirk is no-go. Sorry Q35.
163
+ lb->is_present = true;
44
+ */
164
+
45
+ lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
165
+ return true;
46
+ 0, PCI_DEVFN(0x1f, 0));
166
+}
47
+ if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
167
+
48
+ "vfio-pci-igd-lpc-bridge")) {
168
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
169
+ Error **errp)
170
+{
171
+ VFIODevice *vbasedev = opaque;
172
+ VFIOMigration *migration = vbasedev->migration;
173
+ VFIOMultifd *multifd = migration->multifd;
174
+ VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
175
+
176
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
177
+ error_setg(errp,
49
+ error_setg(errp,
178
+ "%s: got device state packet but not doing multifd transfer",
50
+ "Cannot create LPC bridge due to existing device at 1f.0");
179
+ vbasedev->name);
180
+ return false;
51
+ return false;
181
+ }
52
+ }
182
+
53
+
183
+ assert(multifd);
54
+ /*
184
+
55
+ * Check whether we have all the vfio device specific regions to
185
+ if (data_size < sizeof(*packet)) {
56
+ * support LPC quirk (added in Linux v4.6).
186
+ error_setg(errp, "%s: packet too short at %zu (min is %zu)",
57
+ */
187
+ vbasedev->name, data_size, sizeof(*packet));
58
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
59
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
60
+ VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
61
+ if (ret) {
62
+ error_setg(errp, "IGD LPC bridge access is not supported by kernel");
188
+ return false;
63
+ return false;
189
+ }
64
+ }
190
+
65
+
191
+ if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
66
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
192
+ error_setg(errp, "%s: packet has unknown version %" PRIu32,
67
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
193
+ vbasedev->name, packet->version);
68
+ VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
69
+ if (ret) {
70
+ error_setg(errp, "IGD host bridge access is not supported by kernel");
194
+ return false;
71
+ return false;
195
+ }
72
+ }
196
+
73
+
197
+ if (packet->idx == UINT32_MAX) {
74
+ /* Create/modify LPC bridge */
198
+ error_setg(errp, "%s: packet index is invalid", vbasedev->name);
75
+ ret = vfio_pci_igd_lpc_init(vdev, lpc);
76
+ if (ret) {
77
+ error_setg(errp, "Failed to create/modify LPC bridge for IGD");
199
+ return false;
78
+ return false;
200
+ }
79
+ }
201
+
80
+
202
+ trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
81
+ /* Stuff some host values into the VM PCI host bridge */
203
+
82
+ ret = vfio_pci_igd_host_init(vdev, host);
204
+ /*
83
+ if (ret) {
205
+ * Holding BQL here would violate the lock order and can cause
84
+ error_setg(errp, "Failed to modify host bridge for IGD");
206
+ * a deadlock once we attempt to lock load_bufs_mutex below.
85
+ return false;
207
+ */
208
+ assert(!bql_locked());
209
+
210
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
211
+ /* config state packet should be the last one in the stream */
212
+ if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
213
+ multifd->load_buf_idx_last = packet->idx;
214
+ }
215
+
216
+ if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
217
+ errp)) {
218
+ return false;
219
+ }
220
+
221
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
222
+ }
86
+ }
223
+
87
+
224
+ return true;
88
+ return true;
225
+}
89
+}
226
+
90
+
227
static VFIOMultifd *vfio_multifd_new(void)
91
#define IGD_GGC_MMIO_OFFSET 0x108040
92
#define IGD_BDSM_MMIO_OFFSET 0x1080C0
93
94
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
95
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
228
{
96
{
229
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
97
g_autofree struct vfio_region_info *rom = NULL;
230
98
- g_autofree struct vfio_region_info *host = NULL;
231
+ vfio_state_buffers_init(&multifd->load_bufs);
99
- g_autofree struct vfio_region_info *lpc = NULL;
232
+
100
- PCIDevice *lpc_bridge;
233
+ qemu_mutex_init(&multifd->load_bufs_mutex);
101
int ret, gen;
234
+
102
uint64_t gms_size;
235
+ multifd->load_buf_idx = 0;
103
uint64_t *bdsm_size;
236
+ multifd->load_buf_idx_last = UINT32_MAX;
104
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
237
+ qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
105
return;
238
+
106
}
239
return multifd;
107
240
}
108
- /*
241
109
- * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
242
static void vfio_multifd_free(VFIOMultifd *multifd)
110
- * can stuff host values into, so if there's already one there and it's not
243
{
111
- * one we can hack on, legacy mode is no-go. Sorry Q35.
244
+ vfio_state_buffers_destroy(&multifd->load_bufs);
112
- */
245
+ qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
113
- lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
246
+ qemu_mutex_destroy(&multifd->load_bufs_mutex);
114
- 0, PCI_DEVFN(0x1f, 0));
247
+
115
- if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
248
g_free(multifd);
116
- "vfio-pci-igd-lpc-bridge")) {
249
}
117
- error_report("IGD device %s cannot support legacy mode due to existing "
250
118
- "devices at address 1f.0", vdev->vbasedev.name);
251
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
119
- return;
252
index XXXXXXX..XXXXXXX 100644
120
- }
253
--- a/hw/vfio/migration.c
121
-
254
+++ b/hw/vfio/migration.c
122
/*
255
@@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = {
123
* IGD is not a standard, they like to change their specs often. We
256
.load_cleanup = vfio_load_cleanup,
124
* only attempt to support back to SandBridge and we hope that newer
257
.load_state = vfio_load_state,
125
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
258
.switchover_ack_needed = vfio_switchover_ack_needed,
126
return;
259
+ /*
127
}
260
+ * Multifd support
128
261
+ */
129
- /*
262
+ .load_state_buffer = vfio_multifd_load_state_buffer,
130
- * Check whether we have all the vfio device specific regions to
263
};
131
- * support legacy mode (added in Linux v4.6). If not, bail.
264
132
- */
265
/* ---------------------------------------------------------------------- */
133
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
266
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
134
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
267
index XXXXXXX..XXXXXXX 100644
135
- VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
268
--- a/hw/vfio/trace-events
136
- if (ret) {
269
+++ b/hw/vfio/trace-events
137
- error_report("IGD device %s does not support host bridge access,"
270
@@ -XXX,XX +XXX,XX @@ vfio_load_device_config_state_start(const char *name) " (%s)"
138
- "legacy mode disabled", vdev->vbasedev.name);
271
vfio_load_device_config_state_end(const char *name) " (%s)"
139
- return;
272
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
140
- }
273
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
141
-
274
+vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32
142
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
275
vfio_migration_realize(const char *name) " (%s)"
143
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
276
vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
144
- VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
277
vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
145
- if (ret) {
146
- error_report("IGD device %s does not support LPC bridge access,"
147
- "legacy mode disabled", vdev->vbasedev.name);
148
- return;
149
- }
150
-
151
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
152
153
/*
154
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
155
return;
156
}
157
158
- /* Create our LPC/ISA bridge */
159
- ret = vfio_pci_igd_lpc_init(vdev, lpc);
160
- if (ret) {
161
- error_report("IGD device %s failed to create LPC bridge, "
162
- "legacy mode disabled", vdev->vbasedev.name);
163
- return;
164
- }
165
-
166
- /* Stuff some host values into the VM PCI host bridge */
167
- ret = vfio_pci_igd_host_init(vdev, host);
168
- if (ret) {
169
- error_report("IGD device %s failed to modify host bridge, "
170
- "legacy mode disabled", vdev->vbasedev.name);
171
+ /* Setup LPC bridge / Host bridge PCI IDs */
172
+ if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
173
+ error_append_hint(&err, "IGD legacy mode disabled\n");
174
+ error_report_err(err);
175
return;
176
}
177
278
--
178
--
279
2.48.1
179
2.48.1
280
180
281
181
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
DEFINE_PROP_ON_OFF_AUTO() property isn't runtime-mutable so using it
3
IGD devices require device-specific quirk to be applied to their PCI
4
would mean that the source VM would need to decide upfront at startup
4
config space. Currently, it is put in the BAR4 quirk that does nothing
5
time whether it wants to do a multifd device state transfer at some
5
to BAR4 itself. Add a placeholder for PCI config space quirks to hold
6
point.
6
that quirk later.
7
7
8
Source VM can run for a long time before being migrated so it is
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
9
desirable to have a fallback mechanism to the old way of transferring
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
10
VFIO device state if it turns to be necessary.
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
11
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
12
This brings this property to the same mutability level as ordinary
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-6-tomitamoeko@gmail.com
13
migration parameters, which too can be adjusted at the run time.
14
15
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
16
Reviewed-by: Cédric Le Goater <clg@redhat.com>
17
Link: https://lore.kernel.org/qemu-devel/f2f2d66bda477da3e6cb8c0311006cff36e8651d.1741124640.git.maciej.szmigiero@oracle.com
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
---
14
---
20
hw/vfio/migration-multifd.c | 4 ++++
15
hw/vfio/pci.h | 1 +
21
hw/vfio/pci.c | 20 +++++++++++++++++---
16
hw/vfio/pci-quirks.c | 5 +++++
22
2 files changed, 21 insertions(+), 3 deletions(-)
17
hw/vfio/pci.c | 4 ++++
18
3 files changed, 10 insertions(+)
23
19
24
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
20
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
25
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/vfio/migration-multifd.c
22
--- a/hw/vfio/pci.h
27
+++ b/hw/vfio/migration-multifd.c
23
+++ b/hw/vfio/pci.h
28
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
24
@@ -XXX,XX +XXX,XX @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
25
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
26
27
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
28
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp);
29
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
30
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
31
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
32
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/vfio/pci-quirks.c
35
+++ b/hw/vfio/pci-quirks.c
36
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
37
/*
38
* Common quirk probe entry points.
39
*/
40
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp)
41
+{
42
+ return true;
43
+}
44
+
45
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
29
{
46
{
30
VFIOMigration *migration = vbasedev->migration;
47
vfio_vga_probe_ati_3c3_quirk(vdev);
31
32
+ /*
33
+ * Make a copy of this setting at the start in case it is changed
34
+ * mid-migration.
35
+ */
36
if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
37
migration->multifd_transfer = vfio_multifd_transfer_supported();
38
} else {
39
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
48
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
40
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
41
--- a/hw/vfio/pci.c
50
--- a/hw/vfio/pci.c
42
+++ b/hw/vfio/pci.c
51
+++ b/hw/vfio/pci.c
43
@@ -XXX,XX +XXX,XX @@ static void vfio_instance_init(Object *obj)
52
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
44
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
53
goto out_unset_idev;
45
}
54
}
46
55
47
+static PropertyInfo vfio_pci_migration_multifd_transfer_prop;
56
+ if (!vfio_config_quirk_setup(vdev, errp)) {
57
+ goto out_unset_idev;
58
+ }
48
+
59
+
49
static const Property vfio_pci_dev_properties[] = {
60
if (vdev->vga) {
50
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
61
vfio_vga_quirk_setup(vdev);
51
DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
62
}
52
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
53
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
54
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
55
vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
56
- DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice,
57
- vbasedev.migration_multifd_transfer,
58
- ON_OFF_AUTO_AUTO),
59
+ DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
60
+ vbasedev.migration_multifd_transfer,
61
+ vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
62
+ .set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
63
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
64
vbasedev.migration_events, false),
65
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
66
@@ -XXX,XX +XXX,XX @@ static const TypeInfo vfio_pci_nohotplug_dev_info = {
67
68
static void register_vfio_pci_dev_type(void)
69
{
70
+ /*
71
+ * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can
72
+ * run for a long time before being migrated so it is desirable to have a
73
+ * fallback mechanism to the old way of transferring VFIO device state if
74
+ * it turns to be necessary.
75
+ * The following makes this type of property have the same mutability level
76
+ * as ordinary migration parameters.
77
+ */
78
+ vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto;
79
+ vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true;
80
+
81
type_register_static(&vfio_pci_dev_info);
82
type_register_static(&vfio_pci_nohotplug_dev_info);
83
}
84
--
63
--
85
2.48.1
64
2.48.1
86
65
87
66
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
This QEMU_VM_COMMAND sub-command and its switchover_start SaveVMHandler is
3
The actual IO BAR4 write quirk in vfio_probe_igd_bar4_quirk was removed
4
used to mark the switchover point in main migration stream.
4
in previous change, leaving the function not matching its name, so move
5
it into the newly introduced vfio_config_quirk_setup. There is no
6
functional change in this commit.
5
7
6
It can be used to inform the destination that all pre-switchover main
8
For now, to align with current legacy mode behavior, it returns and
7
migration stream data has been sent/received so it can start to process
9
proceeds on error. Later it will fail on error after decoupling the
8
post-switchover data that it might have received via other migration
10
quirks from legacy mode.
9
channels like the multifd ones.
10
11
11
Add also the relevant MigrationState bit stream compatibility property and
12
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
12
its hw_compat entry.
13
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
13
14
Tested-by: Alex Williamson <alex.williamson@redhat.com>
14
Reviewed-by: Fabiano Rosas <farosas@suse.de>
15
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
15
Reviewed-by: Zhang Chen <zhangckid@gmail.com> # for the COLO part
16
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-7-tomitamoeko@gmail.com
16
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
17
Link: https://lore.kernel.org/qemu-devel/311be6da85fc7e49a7598684d80aa631778dcbce.1741124640.git.maciej.szmigiero@oracle.com
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
---
18
---
20
include/migration/client-options.h | 4 +++
19
hw/vfio/pci.h | 2 +-
21
include/migration/register.h | 12 +++++++++
20
hw/vfio/igd.c | 21 ++++++++++++---------
22
migration/migration.h | 2 ++
21
hw/vfio/pci-quirks.c | 6 +++++-
23
migration/savevm.h | 1 +
22
3 files changed, 18 insertions(+), 11 deletions(-)
24
hw/core/machine.c | 1 +
25
migration/colo.c | 3 +++
26
migration/migration-hmp-cmds.c | 2 ++
27
migration/migration.c | 2 ++
28
migration/options.c | 9 +++++++
29
migration/savevm.c | 39 ++++++++++++++++++++++++++++++
30
migration/trace-events | 1 +
31
scripts/analyze-migration.py | 11 +++++++++
32
12 files changed, 87 insertions(+)
33
23
34
diff --git a/include/migration/client-options.h b/include/migration/client-options.h
24
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
35
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
36
--- a/include/migration/client-options.h
26
--- a/hw/vfio/pci.h
37
+++ b/include/migration/client-options.h
27
+++ b/hw/vfio/pci.h
38
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
39
#ifndef QEMU_MIGRATION_CLIENT_OPTIONS_H
29
void vfio_quirk_reset(VFIOPCIDevice *vdev);
40
#define QEMU_MIGRATION_CLIENT_OPTIONS_H
30
VFIOQuirk *vfio_quirk_alloc(int nr_mem);
41
31
void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
32
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
33
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp);
34
35
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
36
37
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/vfio/igd.c
40
+++ b/hw/vfio/igd.c
41
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
42
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
43
}
44
45
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
46
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
47
+ Error **errp G_GNUC_UNUSED)
48
{
49
g_autofree struct vfio_region_info *rom = NULL;
50
int ret, gen;
51
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
52
* PCI bus address.
53
*/
54
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
55
- !vfio_is_vga(vdev) || nr != 4 ||
56
+ !vfio_is_vga(vdev) ||
57
&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
58
0, PCI_DEVFN(0x2, 0))) {
59
- return;
60
+ return true;
61
}
62
63
/*
64
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
65
if (gen == -1) {
66
error_report("IGD device %s is unsupported in legacy mode, "
67
"try SandyBridge or newer", vdev->vbasedev.name);
68
- return;
69
+ return true;
70
}
71
72
/*
73
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
74
if ((ret || !rom->size) && !vdev->pdev.romfile) {
75
error_report("IGD device %s has no ROM, legacy mode disabled",
76
vdev->vbasedev.name);
77
- return;
78
+ return true;
79
}
80
81
/*
82
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
83
error_report("IGD device %s hotplugged, ROM disabled, "
84
"legacy mode disabled", vdev->vbasedev.name);
85
vdev->rom_read_failed = true;
86
- return;
87
+ return true;
88
}
89
90
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
91
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
92
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
93
error_report("IGD device %s failed to enable VGA access, "
94
"legacy mode disabled", vdev->vbasedev.name);
95
- return;
96
+ return true;
97
}
98
99
/* Setup OpRegion access */
100
if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
101
error_append_hint(&err, "IGD legacy mode disabled\n");
102
error_report_err(err);
103
- return;
104
+ return true;
105
}
106
107
/* Setup LPC bridge / Host bridge PCI IDs */
108
if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
109
error_append_hint(&err, "IGD legacy mode disabled\n");
110
error_report_err(err);
111
- return;
112
+ return true;
113
}
114
115
/*
116
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
117
}
118
119
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
42
+
120
+
43
+/* properties */
121
+ return true;
44
+bool migrate_send_switchover_start(void);
122
}
45
+
123
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
46
/* capabilities */
47
48
bool migrate_background_snapshot(void);
49
diff --git a/include/migration/register.h b/include/migration/register.h
50
index XXXXXXX..XXXXXXX 100644
124
index XXXXXXX..XXXXXXX 100644
51
--- a/include/migration/register.h
125
--- a/hw/vfio/pci-quirks.c
52
+++ b/include/migration/register.h
126
+++ b/hw/vfio/pci-quirks.c
53
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
127
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
54
* otherwise
128
*/
55
*/
129
bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp)
56
bool (*switchover_ack_needed)(void *opaque);
130
{
57
+
131
+#ifdef CONFIG_VFIO_IGD
58
+ /**
132
+ if (!vfio_probe_igd_config_quirk(vdev, errp)) {
59
+ * @switchover_start
133
+ return false;
60
+ *
134
+ }
61
+ * Notifies that the switchover has started. Called only on
135
+#endif
62
+ * the destination.
63
+ *
64
+ * @opaque: data pointer passed to register_savevm_live()
65
+ *
66
+ * Returns zero to indicate success and negative for error
67
+ */
68
+ int (*switchover_start)(void *opaque);
69
} SaveVMHandlers;
70
71
/**
72
diff --git a/migration/migration.h b/migration/migration.h
73
index XXXXXXX..XXXXXXX 100644
74
--- a/migration/migration.h
75
+++ b/migration/migration.h
76
@@ -XXX,XX +XXX,XX @@ struct MigrationState {
77
bool send_configuration;
78
/* Whether we send section footer during migration */
79
bool send_section_footer;
80
+ /* Whether we send switchover start notification during migration */
81
+ bool send_switchover_start;
82
83
/* Needed by postcopy-pause state */
84
QemuSemaphore postcopy_pause_sem;
85
diff --git a/migration/savevm.h b/migration/savevm.h
86
index XXXXXXX..XXXXXXX 100644
87
--- a/migration/savevm.h
88
+++ b/migration/savevm.h
89
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_listen(QEMUFile *f);
90
void qemu_savevm_send_postcopy_run(QEMUFile *f);
91
void qemu_savevm_send_postcopy_resume(QEMUFile *f);
92
void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name);
93
+void qemu_savevm_maybe_send_switchover_start(QEMUFile *f);
94
95
void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
96
uint16_t len,
97
diff --git a/hw/core/machine.c b/hw/core/machine.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/hw/core/machine.c
100
+++ b/hw/core/machine.c
101
@@ -XXX,XX +XXX,XX @@ GlobalProperty hw_compat_9_2[] = {
102
{ "virtio-balloon-pci-non-transitional", "vectors", "0" },
103
{ "virtio-mem-pci", "vectors", "0" },
104
{ "migration", "multifd-clean-tls-termination", "false" },
105
+ { "migration", "send-switchover-start", "off"},
106
};
107
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
108
109
diff --git a/migration/colo.c b/migration/colo.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/migration/colo.c
112
+++ b/migration/colo.c
113
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
114
bql_unlock();
115
goto out;
116
}
117
+
118
+ qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
119
+
120
/* Note: device state is saved into buffer */
121
ret = qemu_save_device_state(fb);
122
123
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/migration/migration-hmp-cmds.c
126
+++ b/migration/migration-hmp-cmds.c
127
@@ -XXX,XX +XXX,XX @@ static void migration_global_dump(Monitor *mon)
128
ms->send_configuration ? "on" : "off");
129
monitor_printf(mon, "send-section-footer: %s\n",
130
ms->send_section_footer ? "on" : "off");
131
+ monitor_printf(mon, "send-switchover-start: %s\n",
132
+ ms->send_switchover_start ? "on" : "off");
133
monitor_printf(mon, "clear-bitmap-shift: %u\n",
134
ms->clear_bitmap_shift);
135
}
136
diff --git a/migration/migration.c b/migration/migration.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/migration/migration.c
139
+++ b/migration/migration.c
140
@@ -XXX,XX +XXX,XX @@ static bool migration_switchover_start(MigrationState *s, Error **errp)
141
142
precopy_notify_complete();
143
144
+ qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
145
+
146
return true;
136
return true;
147
}
137
}
148
138
149
diff --git a/migration/options.c b/migration/options.c
139
@@ -XXX,XX +XXX,XX @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
150
index XXXXXXX..XXXXXXX 100644
140
vfio_probe_rtl8168_bar2_quirk(vdev, nr);
151
--- a/migration/options.c
141
#ifdef CONFIG_VFIO_IGD
152
+++ b/migration/options.c
142
vfio_probe_igd_bar0_quirk(vdev, nr);
153
@@ -XXX,XX +XXX,XX @@ const Property migration_properties[] = {
143
- vfio_probe_igd_bar4_quirk(vdev, nr);
154
send_configuration, true),
144
#endif
155
DEFINE_PROP_BOOL("send-section-footer", MigrationState,
156
send_section_footer, true),
157
+ DEFINE_PROP_BOOL("send-switchover-start", MigrationState,
158
+ send_switchover_start, true),
159
DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState,
160
multifd_flush_after_each_section, false),
161
DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
162
@@ -XXX,XX +XXX,XX @@ bool migrate_auto_converge(void)
163
return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
164
}
145
}
165
146
166
+bool migrate_send_switchover_start(void)
167
+{
168
+ MigrationState *s = migrate_get_current();
169
+
170
+ return s->send_switchover_start;
171
+}
172
+
173
bool migrate_background_snapshot(void)
174
{
175
MigrationState *s = migrate_get_current();
176
diff --git a/migration/savevm.c b/migration/savevm.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/migration/savevm.c
179
+++ b/migration/savevm.c
180
@@ -XXX,XX +XXX,XX @@ enum qemu_vm_cmd {
181
MIG_CMD_ENABLE_COLO, /* Enable COLO */
182
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
183
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
184
+ MIG_CMD_SWITCHOVER_START, /* Switchover start notification */
185
MIG_CMD_MAX
186
};
187
188
@@ -XXX,XX +XXX,XX @@ static struct mig_cmd_args {
189
[MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
190
[MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
191
[MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
192
+ [MIG_CMD_SWITCHOVER_START] = { .len = 0, .name = "SWITCHOVER_START" },
193
[MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
194
};
195
196
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
197
qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
198
}
199
200
+static void qemu_savevm_send_switchover_start(QEMUFile *f)
201
+{
202
+ trace_savevm_send_switchover_start();
203
+ qemu_savevm_command_send(f, MIG_CMD_SWITCHOVER_START, 0, NULL);
204
+}
205
+
206
+void qemu_savevm_maybe_send_switchover_start(QEMUFile *f)
207
+{
208
+ if (migrate_send_switchover_start()) {
209
+ qemu_savevm_send_switchover_start(f);
210
+ }
211
+}
212
+
213
bool qemu_savevm_state_blocked(Error **errp)
214
{
215
SaveStateEntry *se;
216
@@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
217
218
ret = qemu_file_get_error(f);
219
if (ret == 0) {
220
+ qemu_savevm_maybe_send_switchover_start(f);
221
qemu_savevm_state_complete_precopy(f, false);
222
ret = qemu_file_get_error(f);
223
}
224
@@ -XXX,XX +XXX,XX @@ static int loadvm_process_enable_colo(MigrationIncomingState *mis)
225
return ret;
226
}
227
228
+static int loadvm_postcopy_handle_switchover_start(void)
229
+{
230
+ SaveStateEntry *se;
231
+
232
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
233
+ int ret;
234
+
235
+ if (!se->ops || !se->ops->switchover_start) {
236
+ continue;
237
+ }
238
+
239
+ ret = se->ops->switchover_start(se->opaque);
240
+ if (ret < 0) {
241
+ return ret;
242
+ }
243
+ }
244
+
245
+ return 0;
246
+}
247
+
248
/*
249
* Process an incoming 'QEMU_VM_COMMAND'
250
* 0 just a normal return
251
@@ -XXX,XX +XXX,XX @@ static int loadvm_process_command(QEMUFile *f)
252
253
case MIG_CMD_ENABLE_COLO:
254
return loadvm_process_enable_colo(mis);
255
+
256
+ case MIG_CMD_SWITCHOVER_START:
257
+ return loadvm_postcopy_handle_switchover_start();
258
}
259
260
return 0;
261
diff --git a/migration/trace-events b/migration/trace-events
262
index XXXXXXX..XXXXXXX 100644
263
--- a/migration/trace-events
264
+++ b/migration/trace-events
265
@@ -XXX,XX +XXX,XX @@ savevm_send_postcopy_run(void) ""
266
savevm_send_postcopy_resume(void) ""
267
savevm_send_colo_enable(void) ""
268
savevm_send_recv_bitmap(char *name) "%s"
269
+savevm_send_switchover_start(void) ""
270
savevm_state_setup(void) ""
271
savevm_state_resume_prepare(void) ""
272
savevm_state_header(void) ""
273
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
274
index XXXXXXX..XXXXXXX 100755
275
--- a/scripts/analyze-migration.py
276
+++ b/scripts/analyze-migration.py
277
@@ -XXX,XX +XXX,XX @@ class MigrationDump(object):
278
QEMU_VM_SUBSECTION = 0x05
279
QEMU_VM_VMDESCRIPTION = 0x06
280
QEMU_VM_CONFIGURATION = 0x07
281
+ QEMU_VM_COMMAND = 0x08
282
QEMU_VM_SECTION_FOOTER= 0x7e
283
+ QEMU_MIG_CMD_SWITCHOVER_START = 0x0b
284
285
def __init__(self, filename):
286
self.section_classes = {
287
@@ -XXX,XX +XXX,XX @@ def read(self, desc_only = False, dump_memory = False,
288
elif section_type == self.QEMU_VM_SECTION_PART or section_type == self.QEMU_VM_SECTION_END:
289
section_id = file.read32()
290
self.sections[section_id].read()
291
+ elif section_type == self.QEMU_VM_COMMAND:
292
+ command_type = file.read16()
293
+ command_data_len = file.read16()
294
+ if command_type != self.QEMU_MIG_CMD_SWITCHOVER_START:
295
+ raise Exception("Unknown QEMU_VM_COMMAND: %x" %
296
+ (command_type))
297
+ if command_data_len != 0:
298
+ raise Exception("Invalid SWITCHOVER_START length: %x" %
299
+ (command_data_len))
300
elif section_type == self.QEMU_VM_SECTION_FOOTER:
301
read_section_id = file.read32()
302
if read_section_id != section_id:
303
--
147
--
304
2.48.1
148
2.48.1
305
149
306
150
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
This property allows configuring whether to transfer the particular device
3
So far, IGD-specific quirks all require enabling legacy mode, which is
4
state via multifd channels when live migrating that device.
4
toggled by assigning IGD to 00:02.0. However, some quirks, like the BDSM
5
5
and GGC register quirks, should be applied to all supported IGD devices.
6
It defaults to AUTO, which means that VFIO device state transfer via
6
A new config option, x-igd-legacy-mode=[on|off|auto], is introduced to
7
multifd channels is attempted in configurations that otherwise support it.
7
control the legacy mode only quirks. The default value is "auto", which
8
8
keeps current behavior that enables legacy mode implicitly and continues
9
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
9
on error when all following conditions are met.
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
* Machine type is i440fx
11
Link: https://lore.kernel.org/qemu-devel/d6dbb326e3d53c7104d62c96c9e3dd64e1c7b940.1741124640.git.maciej.szmigiero@oracle.com
11
* IGD device is at guest BDF 00:02.0
12
[ clg: Added documentation ]
12
13
If any one of the conditions above is not met, the default behavior is
14
equivalent to "off", QEMU will fail immediately if any error occurs.
15
16
Users can also use "on" to force enabling legacy mode. It checks if all
17
the conditions above are met and set up legacy mode. QEMU will also fail
18
immediately on error in this case.
19
20
Additionally, the hotplug check in legacy mode is removed as hotplugging
21
IGD device is never supported, and it will be checked when enabling the
22
OpRegion quirk.
23
24
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
25
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
26
Tested-by: Alex Williamson <alex.williamson@redhat.com>
27
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
28
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-8-tomitamoeko@gmail.com
29
[ clg: - Changed warn_report() by info_report() in
30
vfio_probe_igd_config_quirk() as suggested by Alex W.
31
- Fixed spelling in vfio_probe_igd_config_quirk () ]
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
32
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
33
---
15
docs/devel/migration/vfio.rst | 15 +++++++++++++++
34
hw/vfio/pci.h | 1 +
16
include/hw/vfio/vfio-common.h | 2 ++
35
hw/vfio/igd.c | 127 +++++++++++++++++++++++++++++---------------------
17
hw/vfio/migration-multifd.c | 18 +++++++++++++++++-
36
hw/vfio/pci.c | 2 +
18
hw/vfio/pci.c | 7 +++++++
37
3 files changed, 77 insertions(+), 53 deletions(-)
19
4 files changed, 41 insertions(+), 1 deletion(-)
38
20
39
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
21
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
22
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
23
--- a/docs/devel/migration/vfio.rst
41
--- a/hw/vfio/pci.h
24
+++ b/docs/devel/migration/vfio.rst
42
+++ b/hw/vfio/pci.h
25
@@ -XXX,XX +XXX,XX @@ Postcopy
43
@@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice {
26
========
44
uint32_t display_xres;
27
45
uint32_t display_yres;
28
Postcopy migration is currently not supported for VFIO devices.
46
int32_t bootindex;
29
+
47
+ OnOffAuto igd_legacy_mode;
30
+Multifd
48
uint32_t igd_gms;
31
+=======
49
OffAutoPCIBAR msix_relo;
32
+
50
uint8_t nv_gpudirect_clique;
33
+Starting from QEMU version 10.0 there's a possibility to transfer VFIO device
51
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
34
+_STOP_COPY state via multifd channels. This helps reduce downtime - especially
35
+with multiple VFIO devices or with devices having a large migration state.
36
+As an additional benefit, setting the VFIO device to _STOP_COPY state and
37
+saving its config space is also parallelized (run in a separate thread) in
38
+such migration mode.
39
+
40
+The multifd VFIO device state transfer is controlled by
41
+"x-migration-multifd-transfer" VFIO device property. This property defaults to
42
+AUTO, which means that VFIO device state transfer via multifd channels is
43
+attempted in configurations that otherwise support it.
44
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
45
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
46
--- a/include/hw/vfio/vfio-common.h
53
--- a/hw/vfio/igd.c
47
+++ b/include/hw/vfio/vfio-common.h
54
+++ b/hw/vfio/igd.c
48
@@ -XXX,XX +XXX,XX @@ typedef struct VFIOMigration {
55
@@ -XXX,XX +XXX,XX @@
49
uint64_t mig_flags;
56
#include "qemu/error-report.h"
50
uint64_t precopy_init_size;
57
#include "qapi/error.h"
51
uint64_t precopy_dirty_size;
58
#include "qapi/qmp/qerror.h"
52
+ bool multifd_transfer;
59
+#include "hw/boards.h"
53
VFIOMultifd *multifd;
60
#include "hw/hw.h"
54
bool initial_data_sent;
61
#include "hw/nvram/fw_cfg.h"
55
62
#include "pci.h"
56
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODevice {
63
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
57
bool no_mmap;
64
* bus address.
58
bool ram_block_discard_allowed;
65
*/
59
OnOffAuto enable_migration;
66
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
60
+ OnOffAuto migration_multifd_transfer;
67
- !vfio_is_vga(vdev) || nr != 0 ||
61
bool migration_events;
68
- &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
62
VFIODeviceOps *ops;
69
- 0, PCI_DEVFN(0x2, 0))) {
63
unsigned int num_irqs;
70
+ !vfio_is_vga(vdev) || nr != 0) {
64
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
71
return;
65
index XXXXXXX..XXXXXXX 100644
72
}
66
--- a/hw/vfio/migration-multifd.c
73
67
+++ b/hw/vfio/migration-multifd.c
74
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
68
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_supported(void)
75
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
69
76
}
70
bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
77
78
-bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
79
- Error **errp G_GNUC_UNUSED)
80
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
71
{
81
{
72
- return false;
82
- g_autofree struct vfio_region_info *rom = NULL;
73
+ VFIOMigration *migration = vbasedev->migration;
83
int ret, gen;
74
+
84
uint64_t gms_size;
75
+ return migration->multifd_transfer;
85
uint64_t *bdsm_size;
86
uint32_t gmch;
87
+ bool legacy_mode_enabled = false;
88
Error *err = NULL;
89
90
/*
91
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
92
* PCI bus address.
93
*/
94
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
95
- !vfio_is_vga(vdev) ||
96
- &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
97
- 0, PCI_DEVFN(0x2, 0))) {
98
+ !vfio_is_vga(vdev)) {
99
return true;
100
}
101
102
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
103
return true;
104
}
105
106
- /*
107
- * Most of what we're doing here is to enable the ROM to run, so if
108
- * there's no ROM, there's no point in setting up this quirk.
109
- * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
110
- */
111
- ret = vfio_get_region_info(&vdev->vbasedev,
112
- VFIO_PCI_ROM_REGION_INDEX, &rom);
113
- if ((ret || !rom->size) && !vdev->pdev.romfile) {
114
- error_report("IGD device %s has no ROM, legacy mode disabled",
115
- vdev->vbasedev.name);
116
- return true;
117
- }
118
-
119
- /*
120
- * Ignore the hotplug corner case, mark the ROM failed, we can't
121
- * create the devices we need for legacy mode in the hotplug scenario.
122
- */
123
- if (vdev->pdev.qdev.hotplugged) {
124
- error_report("IGD device %s hotplugged, ROM disabled, "
125
- "legacy mode disabled", vdev->vbasedev.name);
126
- vdev->rom_read_failed = true;
127
- return true;
128
- }
129
-
130
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
131
132
/*
133
- * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
134
- * try to enable it. Probably shouldn't be using legacy mode without VGA,
135
- * but also no point in us enabling VGA if disabled in hardware.
136
+ * For backward compatibility, enable legacy mode when
137
+ * - Machine type is i440fx (pc_piix)
138
+ * - IGD device is at guest BDF 00:02.0
139
+ * - Not manually disabled by x-igd-legacy-mode=off
140
*/
141
- if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
142
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
143
- error_report("IGD device %s failed to enable VGA access, "
144
- "legacy mode disabled", vdev->vbasedev.name);
145
- return true;
146
- }
147
+ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) &&
148
+ !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") &&
149
+ (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev),
150
+ 0, PCI_DEVFN(0x2, 0)))) {
151
+ /*
152
+ * IGD legacy mode requires:
153
+ * - VBIOS in ROM BAR or file
154
+ * - VGA IO/MMIO ranges are claimed by IGD
155
+ * - OpRegion
156
+ * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
157
+ */
158
+ g_autofree struct vfio_region_info *rom = NULL;
159
+
160
+ legacy_mode_enabled = true;
161
+ info_report("IGD legacy mode enabled, "
162
+ "use x-igd-legacy-mode=off to disable it if unwanted.");
163
+
164
+ /*
165
+ * Most of what we're doing here is to enable the ROM to run, so if
166
+ * there's no ROM, there's no point in setting up this quirk.
167
+ * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support.
168
+ */
169
+ ret = vfio_get_region_info(&vdev->vbasedev,
170
+ VFIO_PCI_ROM_REGION_INDEX, &rom);
171
+ if ((ret || !rom->size) && !vdev->pdev.romfile) {
172
+ error_setg(&err, "Device has no ROM");
173
+ goto error;
174
+ }
175
176
- /* Setup OpRegion access */
177
- if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
178
- error_append_hint(&err, "IGD legacy mode disabled\n");
179
- error_report_err(err);
180
- return true;
181
- }
182
+ /*
183
+ * If IGD VGA Disable is clear (expected) and VGA is not already
184
+ * enabled, try to enable it. Probably shouldn't be using legacy mode
185
+ * without VGA, but also no point in us enabling VGA if disabled in
186
+ * hardware.
187
+ */
188
+ if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
189
+ error_setg(&err, "Unable to enable VGA access");
190
+ goto error;
191
+ }
192
193
- /* Setup LPC bridge / Host bridge PCI IDs */
194
- if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
195
- error_append_hint(&err, "IGD legacy mode disabled\n");
196
- error_report_err(err);
197
- return true;
198
+ /* Setup OpRegion access */
199
+ if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
200
+ goto error;
201
+ }
202
+
203
+ /* Setup LPC bridge / Host bridge PCI IDs */
204
+ if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
205
+ goto error;
206
+ }
207
+ } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) {
208
+ error_setg(&err,
209
+ "Machine is not i440fx or assigned BDF is not 00:02.0");
210
+ goto error;
211
}
212
213
/*
214
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
215
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
216
217
return true;
218
+
219
+error:
220
+ /*
221
+ * When legacy mode is implicity enabled, continue on error,
222
+ * to keep compatibility
223
+ */
224
+ if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) {
225
+ error_report_err(err);
226
+ error_report("IGD legacy mode disabled");
227
+ return true;
228
+ }
229
+
230
+ error_propagate(errp, err);
231
+ return false;
76
}
232
}
77
78
bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
79
{
80
VFIOMigration *migration = vbasedev->migration;
81
82
+ if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
83
+ migration->multifd_transfer = vfio_multifd_transfer_supported();
84
+ } else {
85
+ migration->multifd_transfer =
86
+ vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
87
+ }
88
+
89
if (!vfio_multifd_transfer_enabled(vbasedev)) {
90
/* Nothing further to check or do */
91
return true;
92
}
93
94
+ if (!vfio_multifd_transfer_supported()) {
95
+ error_setg(errp,
96
+ "%s: Multifd device transfer requested but unsupported in the current config",
97
+ vbasedev->name);
98
+ return false;
99
+ }
100
+
101
if (alloc_multifd) {
102
assert(!migration->multifd);
103
migration->multifd = vfio_multifd_new();
104
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
233
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
105
index XXXXXXX..XXXXXXX 100644
234
index XXXXXXX..XXXXXXX 100644
106
--- a/hw/vfio/pci.c
235
--- a/hw/vfio/pci.c
107
+++ b/hw/vfio/pci.c
236
+++ b/hw/vfio/pci.c
108
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
237
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
238
VFIO_FEATURE_ENABLE_REQ_BIT, true),
239
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
109
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
240
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
241
+ DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
242
+ igd_legacy_mode, ON_OFF_AUTO_AUTO),
110
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
243
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
111
vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
244
vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
112
+ DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice,
245
DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
113
+ vbasedev.migration_multifd_transfer,
114
+ ON_OFF_AUTO_AUTO),
115
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
116
vbasedev.migration_events, false),
117
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
118
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
119
"Skip config space check for Vendor Specific Capability. "
120
"Setting to false will enforce strict checking of VSC content "
121
"(DEBUG)");
122
+ object_class_property_set_description(klass, /* 10.0 */
123
+ "x-migration-multifd-transfer",
124
+ "Transfer this device state via "
125
+ "multifd channels when live migrating it");
126
}
127
128
static const TypeInfo vfio_pci_dev_info = {
129
--
246
--
130
2.48.1
247
2.48.1
131
248
132
249
diff view generated by jsdifflib
1
From: Alex Williamson <alex.williamson@redhat.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Switch callers directly initializing the PCI PM capability with
3
Both enable OpRegion option (x-igd-opregion) and legacy mode require
4
pci_add_capability() to use pci_pm_init().
4
setting up OpRegion copy for IGD devices. As the config quirk no longer
5
depends on legacy mode, we can now handle x-igd-opregion option there
6
instead of in vfio_realize.
5
7
6
Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com>
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
7
Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
8
Cc: Jason Wang <jasowang@redhat.com>
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
9
Cc: Stefan Weil <sw@weilnetz.de>
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
10
Cc: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-9-tomitamoeko@gmail.com
11
Cc: Keith Busch <kbusch@kernel.org>
12
Cc: Klaus Jensen <its@irrelevant.dk>
13
Cc: Jesper Devantier <foss@defmacro.it>
14
Cc: Michael S. Tsirkin <mst@redhat.com>
15
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
16
Cc: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
18
Reviewed-by: Eric Auger <eric.auger@redhat.com>
19
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
20
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
21
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-3-alex.williamson@redhat.com
22
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
23
---
14
---
24
hw/net/e1000e.c | 3 +--
15
hw/vfio/pci.h | 2 --
25
hw/net/eepro100.c | 4 +---
16
hw/vfio/igd.c | 14 +++++++++-----
26
hw/net/igb.c | 3 +--
17
hw/vfio/pci.c | 9 ---------
27
hw/nvme/ctrl.c | 3 +--
18
3 files changed, 9 insertions(+), 16 deletions(-)
28
hw/pci-bridge/pcie_pci_bridge.c | 2 +-
29
hw/vfio/pci.c | 7 ++++++-
30
hw/virtio/virtio-pci.c | 3 +--
31
7 files changed, 12 insertions(+), 13 deletions(-)
32
19
33
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
20
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
34
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/e1000e.c
22
--- a/hw/vfio/pci.h
36
+++ b/hw/net/e1000e.c
23
+++ b/hw/vfio/pci.h
37
@@ -XXX,XX +XXX,XX @@ static int
24
@@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
38
e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
25
26
bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
27
28
-bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp);
29
-
30
void vfio_display_reset(VFIOPCIDevice *vdev);
31
bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
32
void vfio_display_finalize(VFIOPCIDevice *vdev);
33
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/vfio/igd.c
36
+++ b/hw/vfio/igd.c
37
@@ -XXX,XX +XXX,XX @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
38
return true;
39
}
40
41
-bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
42
+static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
39
{
43
{
40
Error *local_err = NULL;
44
g_autofree struct vfio_region_info *opregion = NULL;
41
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
45
int ret;
42
- PCI_PM_SIZEOF, &local_err);
46
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
43
+ int ret = pci_pm_init(pdev, offset, &local_err);
47
goto error;
44
45
if (local_err) {
46
error_report_err(local_err);
47
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/net/eepro100.c
50
+++ b/hw/net/eepro100.c
51
@@ -XXX,XX +XXX,XX @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
52
if (info->power_management) {
53
/* Power Management Capabilities */
54
int cfg_offset = 0xdc;
55
- int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
56
- cfg_offset, PCI_PM_SIZEOF,
57
- errp);
58
+ int r = pci_pm_init(&s->dev, cfg_offset, errp);
59
if (r < 0) {
60
return;
61
}
48
}
62
diff --git a/hw/net/igb.c b/hw/net/igb.c
49
63
index XXXXXXX..XXXXXXX 100644
50
- /* Setup OpRegion access */
64
--- a/hw/net/igb.c
51
- if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
65
+++ b/hw/net/igb.c
52
- goto error;
66
@@ -XXX,XX +XXX,XX @@ static int
53
- }
67
igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
54
+ /* Enable OpRegion quirk */
68
{
55
+ vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION;
69
Error *local_err = NULL;
56
70
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
57
/* Setup LPC bridge / Host bridge PCI IDs */
71
- PCI_PM_SIZEOF, &local_err);
58
if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
72
+ int ret = pci_pm_init(pdev, offset, &local_err);
59
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
73
60
goto error;
74
if (local_err) {
75
error_report_err(local_err);
76
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/nvme/ctrl.c
79
+++ b/hw/nvme/ctrl.c
80
@@ -XXX,XX +XXX,XX @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
81
Error *err = NULL;
82
int ret;
83
84
- ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
85
- PCI_PM_SIZEOF, &err);
86
+ ret = pci_pm_init(pci_dev, offset, &err);
87
if (err) {
88
error_report_err(err);
89
return ret;
90
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/pci-bridge/pcie_pci_bridge.c
93
+++ b/hw/pci-bridge/pcie_pci_bridge.c
94
@@ -XXX,XX +XXX,XX @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
95
goto cap_error;
96
}
61
}
97
62
98
- pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
63
+ /* Setup OpRegion access */
99
+ pos = pci_pm_init(d, 0, errp);
64
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
100
if (pos < 0) {
65
+ !vfio_pci_igd_setup_opregion(vdev, errp)) {
101
goto pm_error;
66
+ goto error;
102
}
67
+ }
68
+
69
/*
70
* Allow user to override dsm size using x-igd-gms option, in multiples of
71
* 32MiB. This option should only be used when the desired size cannot be
103
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
72
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
104
index XXXXXXX..XXXXXXX 100644
73
index XXXXXXX..XXXXXXX 100644
105
--- a/hw/vfio/pci.c
74
--- a/hw/vfio/pci.c
106
+++ b/hw/vfio/pci.c
75
+++ b/hw/vfio/pci.c
107
@@ -XXX,XX +XXX,XX @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
76
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
108
case PCI_CAP_ID_PM:
77
vfio_bar_quirk_setup(vdev, i);
109
vfio_check_pm_reset(vdev, pos);
78
}
110
vdev->pm_cap = pos;
79
111
- ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0;
80
-#ifdef CONFIG_VFIO_IGD
112
+ ret = pci_pm_init(pdev, pos, errp) >= 0;
81
- if (!vdev->igd_opregion &&
113
+ /*
82
- vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
114
+ * PCI-core config space emulation needs write access to the power
83
- if (!vfio_pci_igd_setup_opregion(vdev, errp)) {
115
+ * state enabled for tracking BAR mapping relative to PM state.
84
- goto out_unset_idev;
116
+ */
85
- }
117
+ pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
86
- }
118
break;
87
-#endif
119
case PCI_CAP_ID_AF:
88
-
120
vfio_check_af_flr(vdev, pos);
89
/* QEMU emulates all of MSI & MSIX */
121
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
90
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
122
index XXXXXXX..XXXXXXX 100644
91
memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
123
--- a/hw/virtio/virtio-pci.c
124
+++ b/hw/virtio/virtio-pci.c
125
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
126
pos = pcie_endpoint_cap_init(pci_dev, 0);
127
assert(pos > 0);
128
129
- pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
130
- PCI_PM_SIZEOF, errp);
131
+ pos = pci_pm_init(pci_dev, 0, errp);
132
if (pos < 0) {
133
return;
134
}
135
--
92
--
136
2.48.1
93
2.48.1
137
94
138
95
diff view generated by jsdifflib
1
From: Alex Williamson <alex.williamson@redhat.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
This is now redundant to PCIDevice.pm_cap.
3
The LPC bridge/Host bridge IDs quirk is also not dependent on legacy
4
mode. Recent Windows driver no longer depends on these IDs, as well as
5
Linux i915 driver, while UEFI GOP seems still needs them. Make it an
6
option to allow users enabling and disabling it as needed.
4
7
5
Cc: Cédric Le Goater <clg@redhat.com>
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
6
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
8
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
9
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-10-tomitamoeko@gmail.com
10
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-4-alex.williamson@redhat.com
13
[ clg: - Fixed spelling in vfio_probe_igd_config_quirk() ]
11
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
---
15
---
13
hw/vfio/pci.h | 1 -
16
hw/vfio/pci.h | 3 +++
14
hw/vfio/pci.c | 9 ++++-----
17
hw/vfio/igd.c | 14 ++++++++------
15
2 files changed, 4 insertions(+), 6 deletions(-)
18
hw/vfio/pci.c | 2 ++
19
3 files changed, 13 insertions(+), 6 deletions(-)
16
20
17
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
21
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
18
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/vfio/pci.h
23
--- a/hw/vfio/pci.h
20
+++ b/hw/vfio/pci.h
24
+++ b/hw/vfio/pci.h
21
@@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice {
25
@@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice {
22
int32_t bootindex;
26
#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
23
uint32_t igd_gms;
27
#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
24
OffAutoPCIBAR msix_relo;
28
(1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
25
- uint8_t pm_cap;
29
+#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3
26
uint8_t nv_gpudirect_clique;
30
+#define VFIO_FEATURE_ENABLE_IGD_LPC \
27
bool pci_aer;
31
+ (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT)
28
bool req_enabled;
32
OnOffAuto display;
33
uint32_t display_xres;
34
uint32_t display_yres;
35
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/vfio/igd.c
38
+++ b/hw/vfio/igd.c
39
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
40
goto error;
41
}
42
43
- /* Enable OpRegion quirk */
44
+ /* Enable OpRegion and LPC bridge quirk */
45
vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION;
46
-
47
- /* Setup LPC bridge / Host bridge PCI IDs */
48
- if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
49
- goto error;
50
- }
51
+ vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC;
52
} else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) {
53
error_setg(&err,
54
"Machine is not i440fx or assigned BDF is not 00:02.0");
55
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
56
goto error;
57
}
58
59
+ /* Setup LPC bridge / Host bridge PCI IDs */
60
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) &&
61
+ !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) {
62
+ goto error;
63
+ }
64
+
65
/*
66
* Allow user to override dsm size using x-igd-gms option, in multiples of
67
* 32MiB. This option should only be used when the desired size cannot be
29
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
68
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
30
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/vfio/pci.c
70
--- a/hw/vfio/pci.c
32
+++ b/hw/vfio/pci.c
71
+++ b/hw/vfio/pci.c
33
@@ -XXX,XX +XXX,XX @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
72
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
34
break;
73
VFIO_FEATURE_ENABLE_REQ_BIT, true),
35
case PCI_CAP_ID_PM:
74
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
36
vfio_check_pm_reset(vdev, pos);
75
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
37
- vdev->pm_cap = pos;
76
+ DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features,
38
ret = pci_pm_init(pdev, pos, errp) >= 0;
77
+ VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false),
39
/*
78
DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
40
* PCI-core config space emulation needs write access to the power
79
igd_legacy_mode, ON_OFF_AUTO_AUTO),
41
@@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
80
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
42
vfio_disable_interrupts(vdev);
43
44
/* Make sure the device is in D0 */
45
- if (vdev->pm_cap) {
46
+ if (pdev->pm_cap) {
47
uint16_t pmcsr;
48
uint8_t state;
49
50
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
51
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
52
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
53
if (state) {
54
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
55
- vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
56
+ vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
57
/* vfio handles the necessary delay here */
58
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
59
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
60
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
61
if (state) {
62
error_report("vfio: Unable to power on device, stuck in D%d",
63
--
81
--
64
2.48.1
82
2.48.1
65
83
66
84
diff view generated by jsdifflib
Deleted patch
1
From: Alex Williamson <alex.williamson@redhat.com>
2
1
3
The pm_cap on the PCIExpressDevice object can be distilled down
4
to the new instance on the PCIDevice object.
5
6
Cc: Michael S. Tsirkin <mst@redhat.com>
7
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
12
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-5-alex.williamson@redhat.com
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
include/hw/pci/pcie.h | 2 --
16
hw/pci-bridge/pcie_pci_bridge.c | 1 -
17
hw/virtio/virtio-pci.c | 8 +++-----
18
3 files changed, 3 insertions(+), 8 deletions(-)
19
20
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/pci/pcie.h
23
+++ b/include/hw/pci/pcie.h
24
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
struct PCIExpressDevice {
26
/* Offset of express capability in config space */
27
uint8_t exp_cap;
28
- /* Offset of Power Management capability in config space */
29
- uint8_t pm_cap;
30
31
/* SLOT */
32
bool hpev_notified; /* Logical AND of conditions for hot plug event.
33
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/pci-bridge/pcie_pci_bridge.c
36
+++ b/hw/pci-bridge/pcie_pci_bridge.c
37
@@ -XXX,XX +XXX,XX @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
38
if (pos < 0) {
39
goto pm_error;
40
}
41
- d->exp.pm_cap = pos;
42
pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
43
44
pcie_cap_arifwd_init(d);
45
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/hw/virtio/virtio-pci.c
48
+++ b/hw/virtio/virtio-pci.c
49
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
50
return;
51
}
52
53
- pci_dev->exp.pm_cap = pos;
54
-
55
/*
56
* Indicates that this function complies with revision 1.2 of the
57
* PCI Power Management Interface Specification.
58
@@ -XXX,XX +XXX,XX @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev)
59
{
60
uint16_t pmcsr;
61
62
- if (!pci_is_express(dev) || !dev->exp.pm_cap) {
63
+ if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
64
return false;
65
}
66
67
- pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL);
68
+ pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
69
70
/*
71
* When No_Soft_Reset bit is set and the device
72
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
73
74
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
75
pci_word_test_and_clear_mask(
76
- dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
77
+ dev->config + dev->pm_cap + PCI_PM_CTRL,
78
PCI_PM_CTRL_STATE_MASK);
79
}
80
}
81
--
82
2.48.1
83
84
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Migration code wants to manage device data sending threads in one place.
3
The KVMGT/GVT-g vGPU also exposes OpRegion. But unlike IGD passthrough,
4
it only needs the OpRegion quirk. A previous change moved x-igd-opregion
5
handling to config quirk breaks KVMGT functionality as it brings extra
6
checks and applied other quirks. Here we check if the device is mdev
7
(KVMGT) or not (passthrough), and then applies corresponding quirks.
4
8
5
QEMU has an existing thread pool implementation, however it is limited
9
As before, users must manually specify x-igd-opregion=on to enable it
6
to queuing AIO operations only and essentially has a 1:1 mapping between
10
on KVMGT devices. In the future, we may check the VID/DID and enable
7
the current AioContext and the AIO ThreadPool in use.
11
OpRegion automatically.
8
12
9
Implement generic (non-AIO) ThreadPool by essentially wrapping Glib's
13
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
10
GThreadPool.
14
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
11
15
Tested-by: Alex Williamson <alex.williamson@redhat.com>
12
This brings a few new operations on a pool:
16
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
13
* thread_pool_wait() operation waits until all the submitted work requests
17
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-11-tomitamoeko@gmail.com
14
have finished.
15
16
* thread_pool_set_max_threads() explicitly sets the maximum thread count
17
in the pool.
18
19
* thread_pool_adjust_max_threads_to_work() adjusts the maximum thread count
20
in the pool to equal the number of still waiting in queue or unfinished work.
21
22
Reviewed-by: Fabiano Rosas <farosas@suse.de>
23
Reviewed-by: Peter Xu <peterx@redhat.com>
24
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
25
Link: https://lore.kernel.org/qemu-devel/b1efaebdbea7cb7068b8fb74148777012383e12b.1741124640.git.maciej.szmigiero@oracle.com
26
Signed-off-by: Cédric Le Goater <clg@redhat.com>
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
27
---
19
---
28
include/block/thread-pool.h | 51 ++++++++++++++++
20
hw/vfio/igd.c | 27 ++++++++++++++++++++++++++-
29
util/thread-pool.c | 119 ++++++++++++++++++++++++++++++++++++
21
1 file changed, 26 insertions(+), 1 deletion(-)
30
2 files changed, 170 insertions(+)
31
22
32
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
23
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
33
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/thread-pool.h
25
--- a/hw/vfio/igd.c
35
+++ b/include/block/thread-pool.h
26
+++ b/hw/vfio/igd.c
36
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
27
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
37
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
28
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
38
void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx);
29
}
39
30
40
+/* ------------------------------------------- */
31
-bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
41
+/* Generic thread pool types and methods below */
32
+static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
42
+typedef struct ThreadPool ThreadPool;
33
{
43
+
34
int ret, gen;
44
+/* Create a new thread pool. Never returns NULL. */
35
uint64_t gms_size;
45
+ThreadPool *thread_pool_new(void);
36
@@ -XXX,XX +XXX,XX @@ error:
37
error_propagate(errp, err);
38
return false;
39
}
46
+
40
+
47
+/*
41
+/*
48
+ * Free the thread pool.
42
+ * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify
49
+ * Waits for all the previously submitted work to complete before performing
43
+ * x-igd-opregion=on to enable the access.
50
+ * the actual freeing operation.
44
+ * TODO: Check VID/DID and enable opregion access automatically
51
+ */
45
+ */
52
+void thread_pool_free(ThreadPool *pool);
46
+static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
53
+
54
+/*
55
+ * Submit a new work (task) for the pool.
56
+ *
57
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
58
+ * to the work function at @func.
59
+ */
60
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
61
+ void *opaque, GDestroyNotify opaque_destroy);
62
+
63
+/*
64
+ * Submit a new work (task) for the pool, making sure it starts getting
65
+ * processed immediately, launching a new thread for it if necessary.
66
+ *
67
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
68
+ * to the work function at @func.
69
+ */
70
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
71
+ void *opaque, GDestroyNotify opaque_destroy);
72
+
73
+/*
74
+ * Wait for all previously submitted work to complete before returning.
75
+ *
76
+ * Can be used as a barrier between two sets of tasks executed on a thread
77
+ * pool without destroying it or in a performance sensitive path where the
78
+ * caller just wants to wait for all tasks to complete while deferring the
79
+ * pool free operation for later, less performance sensitive time.
80
+ */
81
+void thread_pool_wait(ThreadPool *pool);
82
+
83
+/* Set the maximum number of threads in the pool. */
84
+bool thread_pool_set_max_threads(ThreadPool *pool, int max_threads);
85
+
86
+/*
87
+ * Adjust the maximum number of threads in the pool to give each task its
88
+ * own thread (exactly one thread per task).
89
+ */
90
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool);
91
92
#endif
93
diff --git a/util/thread-pool.c b/util/thread-pool.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/util/thread-pool.c
96
+++ b/util/thread-pool.c
97
@@ -XXX,XX +XXX,XX @@ void thread_pool_free_aio(ThreadPoolAio *pool)
98
qemu_mutex_destroy(&pool->lock);
99
g_free(pool);
100
}
101
+
102
+struct ThreadPool {
103
+ GThreadPool *t;
104
+ size_t cur_work;
105
+ QemuMutex cur_work_lock;
106
+ QemuCond all_finished_cond;
107
+};
108
+
109
+typedef struct {
110
+ ThreadPoolFunc *func;
111
+ void *opaque;
112
+ GDestroyNotify opaque_destroy;
113
+} ThreadPoolElement;
114
+
115
+static void thread_pool_func(gpointer data, gpointer user_data)
116
+{
47
+{
117
+ ThreadPool *pool = user_data;
48
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
118
+ g_autofree ThreadPoolElement *el = data;
49
+ !vfio_pci_igd_setup_opregion(vdev, errp)) {
119
+
50
+ return false;
120
+ el->func(el->opaque);
121
+
122
+ if (el->opaque_destroy) {
123
+ el->opaque_destroy(el->opaque);
124
+ }
51
+ }
125
+
52
+
126
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
53
+ return true;
127
+
128
+ assert(pool->cur_work > 0);
129
+ pool->cur_work--;
130
+
131
+ if (pool->cur_work == 0) {
132
+ qemu_cond_signal(&pool->all_finished_cond);
133
+ }
134
+}
54
+}
135
+
55
+
136
+ThreadPool *thread_pool_new(void)
56
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
137
+{
57
+{
138
+ ThreadPool *pool = g_new(ThreadPool, 1);
58
+ /* KVMGT/GVT-g vGPU is exposed as mdev */
139
+
59
+ if (vdev->vbasedev.mdev) {
140
+ pool->cur_work = 0;
60
+ return vfio_pci_kvmgt_config_quirk(vdev, errp);
141
+ qemu_mutex_init(&pool->cur_work_lock);
142
+ qemu_cond_init(&pool->all_finished_cond);
143
+
144
+ pool->t = g_thread_pool_new(thread_pool_func, pool, 0, TRUE, NULL);
145
+ /*
146
+ * g_thread_pool_new() can only return errors if initial thread(s)
147
+ * creation fails but we ask for 0 initial threads above.
148
+ */
149
+ assert(pool->t);
150
+
151
+ return pool;
152
+}
153
+
154
+void thread_pool_free(ThreadPool *pool)
155
+{
156
+ /*
157
+ * With _wait = TRUE this effectively waits for all
158
+ * previously submitted work to complete first.
159
+ */
160
+ g_thread_pool_free(pool->t, FALSE, TRUE);
161
+
162
+ qemu_cond_destroy(&pool->all_finished_cond);
163
+ qemu_mutex_destroy(&pool->cur_work_lock);
164
+
165
+ g_free(pool);
166
+}
167
+
168
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
169
+ void *opaque, GDestroyNotify opaque_destroy)
170
+{
171
+ ThreadPoolElement *el = g_new(ThreadPoolElement, 1);
172
+
173
+ el->func = func;
174
+ el->opaque = opaque;
175
+ el->opaque_destroy = opaque_destroy;
176
+
177
+ WITH_QEMU_LOCK_GUARD(&pool->cur_work_lock) {
178
+ pool->cur_work++;
179
+ }
61
+ }
180
+
62
+
181
+ /*
63
+ return vfio_pci_igd_config_quirk(vdev, errp);
182
+ * Ignore the return value since this function can only return errors
183
+ * if creation of an additional thread fails but even in this case the
184
+ * provided work is still getting queued (just for the existing threads).
185
+ */
186
+ g_thread_pool_push(pool->t, el, NULL);
187
+}
188
+
189
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
190
+ void *opaque, GDestroyNotify opaque_destroy)
191
+{
192
+ thread_pool_submit(pool, func, opaque, opaque_destroy);
193
+ thread_pool_adjust_max_threads_to_work(pool);
194
+}
195
+
196
+void thread_pool_wait(ThreadPool *pool)
197
+{
198
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
199
+
200
+ while (pool->cur_work > 0) {
201
+ qemu_cond_wait(&pool->all_finished_cond,
202
+ &pool->cur_work_lock);
203
+ }
204
+}
205
+
206
+bool thread_pool_set_max_threads(ThreadPool *pool,
207
+ int max_threads)
208
+{
209
+ assert(max_threads > 0);
210
+
211
+ return g_thread_pool_set_max_threads(pool->t, max_threads, NULL);
212
+}
213
+
214
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool)
215
+{
216
+ QEMU_LOCK_GUARD(&pool->cur_work_lock);
217
+
218
+ return thread_pool_set_max_threads(pool, pool->cur_work);
219
+}
64
+}
220
--
65
--
221
2.48.1
66
2.48.1
222
67
223
68
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
2
3
Load device config received via multifd using the existing machinery
3
Wire data commonly use BE byte order (including in the existing migration
4
behind vfio_load_device_config_state().
4
protocol), use it also for for VFIO device state packets.
5
5
6
Also, make sure to process the relevant main migration channel flags.
6
This will allow VFIO multifd device state transfer between hosts with
7
different endianness.
8
Although currently there is no such use case, it's good to have it now
9
for completeness.
7
10
11
Reviewed-by: Avihai Horon <avihaih@nvidia.com>
8
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
12
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
13
Link: https://lore.kernel.org/qemu-devel/dcfc04cc1a50655650dbac8398e2742ada84ee39.1741611079.git.maciej.szmigiero@oracle.com
10
Link: https://lore.kernel.org/qemu-devel/5dbd3f3703ec1097da2cf82a7262233452146fee.1741124640.git.maciej.szmigiero@oracle.com
11
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
---
15
---
13
include/hw/vfio/vfio-common.h | 2 ++
16
hw/vfio/migration-multifd.c | 15 ++++++++++-----
14
hw/vfio/migration-multifd.c | 49 +++++++++++++++++++++++++++++++++--
17
1 file changed, 10 insertions(+), 5 deletions(-)
15
hw/vfio/migration.c | 9 ++++++-
16
3 files changed, 57 insertions(+), 3 deletions(-)
17
18
18
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/vfio/vfio-common.h
21
+++ b/include/hw/vfio/vfio-common.h
22
@@ -XXX,XX +XXX,XX @@ void vfio_mig_add_bytes_transferred(unsigned long val);
23
bool vfio_device_state_is_running(VFIODevice *vbasedev);
24
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
25
26
+int vfio_load_device_config_state(QEMUFile *f, void *opaque);
27
+
28
#ifdef CONFIG_LINUX
29
int vfio_get_region_info(VFIODevice *vbasedev, int index,
30
struct vfio_region_info **info);
31
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
19
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
32
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/vfio/migration-multifd.c
21
--- a/hw/vfio/migration-multifd.c
34
+++ b/hw/vfio/migration-multifd.c
22
+++ b/hw/vfio/migration-multifd.c
35
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@
24
#include "hw/vfio/vfio-common.h"
25
#include "migration/misc.h"
26
#include "qapi/error.h"
27
+#include "qemu/bswap.h"
28
#include "qemu/error-report.h"
36
#include "qemu/lockable.h"
29
#include "qemu/lockable.h"
37
#include "qemu/main-loop.h"
30
#include "qemu/main-loop.h"
38
#include "qemu/thread.h"
39
+#include "io/channel-buffer.h"
40
#include "migration/qemu-file.h"
41
#include "migration-multifd.h"
42
#include "trace.h"
43
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
31
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
44
static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
32
return false;
45
Error **errp)
33
}
46
{
34
47
- error_setg(errp, "not yet there");
35
+ packet->version = be32_to_cpu(packet->version);
48
- return false;
36
if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
49
+ VFIOMigration *migration = vbasedev->migration;
37
error_setg(errp, "%s: packet has unknown version %" PRIu32,
50
+ VFIOMultifd *multifd = migration->multifd;
38
vbasedev->name, packet->version);
51
+ VFIOStateBuffer *lb;
39
return false;
52
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
40
}
53
+ g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
41
54
+ uint64_t mig_header;
42
+ packet->idx = be32_to_cpu(packet->idx);
55
+ int ret;
43
+ packet->flags = be32_to_cpu(packet->flags);
56
+
44
+
57
+ assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
45
if (packet->idx == UINT32_MAX) {
58
+ lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
46
error_setg(errp, "%s: packet index is invalid", vbasedev->name);
59
+ assert(lb->is_present);
47
return false;
60
+
48
@@ -XXX,XX +XXX,XX @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
61
+ bioc = qio_channel_buffer_new(lb->len);
49
62
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
50
packet_len = sizeof(*packet) + bioc->usage;
63
+
51
packet = g_malloc0(packet_len);
64
+ f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
52
- packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
65
+ qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
53
- packet->idx = idx;
66
+
54
- packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
67
+ ret = qemu_fflush(f_out);
55
+ packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
68
+ if (ret) {
56
+ packet->idx = cpu_to_be32(idx);
69
+ error_setg(errp, "%s: load config state flush failed: %d",
57
+ packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
70
+ vbasedev->name, ret);
58
memcpy(&packet->data, bioc->data, bioc->usage);
71
+ return false;
59
72
+ }
60
if (!multifd_queue_device_state(idstr, instance_id,
73
+
61
@@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
74
+ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
62
}
75
+ f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
63
76
+
64
packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
77
+ mig_header = qemu_get_be64(f_in);
65
- packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
78
+ if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
66
+ packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
79
+ error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
67
80
+ vbasedev->name, mig_header);
68
for (idx = 0; ; idx++) {
81
+ return false;
69
ssize_t data_size;
82
+ }
70
@@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
83
+
71
break;
84
+ bql_lock();
85
+ ret = vfio_load_device_config_state(f_in, vbasedev);
86
+ bql_unlock();
87
+
88
+ if (ret < 0) {
89
+ error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
90
+ vbasedev->name, ret);
91
+ return false;
92
+ }
93
+
94
+ return true;
95
}
96
97
static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
98
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/hw/vfio/migration.c
101
+++ b/hw/vfio/migration.c
102
@@ -XXX,XX +XXX,XX @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
103
return ret;
104
}
105
106
-static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
107
+int vfio_load_device_config_state(QEMUFile *f, void *opaque)
108
{
109
VFIODevice *vbasedev = opaque;
110
uint64_t data;
111
@@ -XXX,XX +XXX,XX @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
112
switch (data) {
113
case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
114
{
115
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
116
+ error_report("%s: got DEV_CONFIG_STATE in main migration "
117
+ "channel but doing multifd transfer",
118
+ vbasedev->name);
119
+ return -EINVAL;
120
+ }
121
+
122
return vfio_load_device_config_state(f, opaque);
123
}
72
}
124
case VFIO_MIG_FLAG_DEV_SETUP_STATE:
73
74
- packet->idx = idx;
75
+ packet->idx = cpu_to_be32(idx);
76
packet_size = sizeof(*packet) + data_size;
77
78
if (!multifd_queue_device_state(d->idstr, d->instance_id,
125
--
79
--
126
2.48.1
80
2.48.1
127
81
128
82
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add a thread which loads the VFIO device state buffers that were received
3
Both qemu_minrampagesize() and qemu_maxrampagesize() are
4
via multifd.
4
related to host memory backends, having the following call
5
stack:
5
6
6
Each VFIO device that has multifd device state transfer enabled has one
7
qemu_minrampagesize()
7
such thread, which is created using migration core API
8
-> find_min_backend_pagesize()
8
qemu_loadvm_start_load_thread().
9
-> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)
9
10
10
Since it's important to finish loading device state transferred via the
11
qemu_maxrampagesize()
11
main migration channel (via save_live_iterate SaveVMHandler) before
12
-> find_max_backend_pagesize()
12
starting loading the data asynchronously transferred via multifd the thread
13
-> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)
13
doing the actual loading of the multifd transferred data is only started
14
from switchover_start SaveVMHandler.
15
14
16
switchover_start handler is called when MIG_CMD_SWITCHOVER_START
15
Having TYPE_MEMORY_BACKEND defined in "system/hostmem.h":
17
sub-command of QEMU_VM_COMMAND is received via the main migration channel.
18
16
19
This sub-command is only sent after all save_live_iterate data have already
17
include/system/hostmem.h:23:#define TYPE_MEMORY_BACKEND "memory-backend"
20
been posted so it is safe to commence loading of the multifd-transferred
21
device state upon receiving it - loading of save_live_iterate data happens
22
synchronously in the main migration thread (much like the processing of
23
MIG_CMD_SWITCHOVER_START) so by the time MIG_CMD_SWITCHOVER_START is
24
processed all the proceeding data must have already been loaded.
25
18
26
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
19
Move their prototype declaration to "system/hostmem.h".
27
Reviewed-by: Cédric Le Goater <clg@redhat.com>
20
28
Link: https://lore.kernel.org/qemu-devel/9abe612d775aaf42e31646796acd2363c723a57a.1741124640.git.maciej.szmigiero@oracle.com
21
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
29
[ clg: - Reordered savevm_vfio_handlers
22
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
30
- Added switchover_start documentation ]
23
Reviewed-by: Eric Auger <eric.auger@redhat.com>
24
Message-Id: <20250308230917.18907-7-philmd@linaro.org>
25
Acked-by: David Hildenbrand <david@redhat.com>
26
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-2-philmd@linaro.org
31
Signed-off-by: Cédric Le Goater <clg@redhat.com>
27
Signed-off-by: Cédric Le Goater <clg@redhat.com>
32
---
28
---
33
docs/devel/migration/vfio.rst | 4 +
29
include/exec/ram_addr.h | 3 ---
34
hw/vfio/migration-multifd.h | 2 +
30
include/system/hostmem.h | 3 +++
35
hw/vfio/migration-multifd.c | 226 ++++++++++++++++++++++++++++++++++
31
hw/ppc/spapr_caps.c | 1 +
36
hw/vfio/migration.c | 12 ++
32
hw/s390x/s390-virtio-ccw.c | 1 +
37
hw/vfio/trace-events | 7 ++
33
hw/vfio/spapr.c | 1 +
38
5 files changed, 251 insertions(+)
34
5 files changed, 6 insertions(+), 3 deletions(-)
39
35
40
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
36
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
41
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
42
--- a/docs/devel/migration/vfio.rst
38
--- a/include/exec/ram_addr.h
43
+++ b/docs/devel/migration/vfio.rst
39
+++ b/include/exec/ram_addr.h
44
@@ -XXX,XX +XXX,XX @@ VFIO implements the device hooks for the iterative approach as follows:
40
@@ -XXX,XX +XXX,XX @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
45
* A ``switchover_ack_needed`` function that checks if the VFIO device uses
41
46
"switchover-ack" migration capability when this capability is enabled.
42
bool ramblock_is_pmem(RAMBlock *rb);
47
43
48
+* A ``switchover_start`` function that in the multifd mode starts a thread that
44
-long qemu_minrampagesize(void);
49
+ reassembles the multifd received data and loads it in-order into the device.
45
-long qemu_maxrampagesize(void);
50
+ In the non-multifd mode this function is a NOP.
46
-
51
+
47
/**
52
* A ``save_state`` function to save the device config space if it is present.
48
* qemu_ram_alloc_from_file,
53
49
* qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing
54
* A ``save_live_complete_precopy`` function that sets the VFIO device in
50
diff --git a/include/system/hostmem.h b/include/system/hostmem.h
55
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
56
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
57
--- a/hw/vfio/migration-multifd.h
52
--- a/include/system/hostmem.h
58
+++ b/hw/vfio/migration-multifd.h
53
+++ b/include/system/hostmem.h
59
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
54
@@ -XXX,XX +XXX,XX @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
60
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
55
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev);
61
Error **errp);
56
char *host_memory_backend_get_name(HostMemoryBackend *backend);
62
57
63
+int vfio_multifd_switchover_start(VFIODevice *vbasedev);
58
+long qemu_minrampagesize(void);
59
+long qemu_maxrampagesize(void);
64
+
60
+
65
#endif
61
#endif
66
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
62
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
67
index XXXXXXX..XXXXXXX 100644
63
index XXXXXXX..XXXXXXX 100644
68
--- a/hw/vfio/migration-multifd.c
64
--- a/hw/ppc/spapr_caps.c
69
+++ b/hw/vfio/migration-multifd.c
65
+++ b/hw/ppc/spapr_caps.c
70
@@ -XXX,XX +XXX,XX @@ typedef struct VFIOStateBuffer {
66
@@ -XXX,XX +XXX,XX @@
71
} VFIOStateBuffer;
67
#include "kvm_ppc.h"
72
68
#include "migration/vmstate.h"
73
typedef struct VFIOMultifd {
69
#include "system/tcg.h"
74
+ bool load_bufs_thread_running;
70
+#include "system/hostmem.h"
75
+ bool load_bufs_thread_want_exit;
71
76
+
72
#include "hw/ppc/spapr.h"
77
VFIOStateBuffers load_bufs;
73
78
QemuCond load_bufs_buffer_ready_cond;
74
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
79
+ QemuCond load_bufs_thread_finished_cond;
80
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
81
uint32_t load_buf_idx;
82
uint32_t load_buf_idx_last;
83
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
84
return true;
85
}
86
87
+static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
88
+ Error **errp)
89
+{
90
+ error_setg(errp, "not yet there");
91
+ return false;
92
+}
93
+
94
+static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
95
+{
96
+ VFIOStateBuffer *lb;
97
+ unsigned int bufs_len;
98
+
99
+ bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
100
+ if (multifd->load_buf_idx >= bufs_len) {
101
+ assert(multifd->load_buf_idx == bufs_len);
102
+ return NULL;
103
+ }
104
+
105
+ lb = vfio_state_buffers_at(&multifd->load_bufs,
106
+ multifd->load_buf_idx);
107
+ if (!lb->is_present) {
108
+ return NULL;
109
+ }
110
+
111
+ return lb;
112
+}
113
+
114
+static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
115
+ VFIOStateBuffer *lb,
116
+ Error **errp)
117
+{
118
+ VFIOMigration *migration = vbasedev->migration;
119
+ VFIOMultifd *multifd = migration->multifd;
120
+ g_autofree char *buf = NULL;
121
+ char *buf_cur;
122
+ size_t buf_len;
123
+
124
+ if (!lb->len) {
125
+ return true;
126
+ }
127
+
128
+ trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
129
+ multifd->load_buf_idx);
130
+
131
+ /* lb might become re-allocated when we drop the lock */
132
+ buf = g_steal_pointer(&lb->data);
133
+ buf_cur = buf;
134
+ buf_len = lb->len;
135
+ while (buf_len > 0) {
136
+ ssize_t wr_ret;
137
+ int errno_save;
138
+
139
+ /*
140
+ * Loading data to the device takes a while,
141
+ * drop the lock during this process.
142
+ */
143
+ qemu_mutex_unlock(&multifd->load_bufs_mutex);
144
+ wr_ret = write(migration->data_fd, buf_cur, buf_len);
145
+ errno_save = errno;
146
+ qemu_mutex_lock(&multifd->load_bufs_mutex);
147
+
148
+ if (wr_ret < 0) {
149
+ error_setg(errp,
150
+ "%s: writing state buffer %" PRIu32 " failed: %d",
151
+ vbasedev->name, multifd->load_buf_idx, errno_save);
152
+ return false;
153
+ }
154
+
155
+ assert(wr_ret <= buf_len);
156
+ buf_len -= wr_ret;
157
+ buf_cur += wr_ret;
158
+ }
159
+
160
+ trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
161
+ multifd->load_buf_idx);
162
+
163
+ return true;
164
+}
165
+
166
+static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
167
+ bool *should_quit)
168
+{
169
+ return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
170
+}
171
+
172
+/*
173
+ * This thread is spawned by vfio_multifd_switchover_start() which gets
174
+ * called upon encountering the switchover point marker in main migration
175
+ * stream.
176
+ *
177
+ * It exits after either:
178
+ * * completing loading the remaining device state and device config, OR:
179
+ * * encountering some error while doing the above, OR:
180
+ * * being forcefully aborted by the migration core by it setting should_quit
181
+ * or by vfio_load_cleanup_load_bufs_thread() setting
182
+ * multifd->load_bufs_thread_want_exit.
183
+ */
184
+static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
185
+{
186
+ VFIODevice *vbasedev = opaque;
187
+ VFIOMigration *migration = vbasedev->migration;
188
+ VFIOMultifd *multifd = migration->multifd;
189
+ bool ret = false;
190
+
191
+ trace_vfio_load_bufs_thread_start(vbasedev->name);
192
+
193
+ assert(multifd);
194
+ QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
195
+
196
+ assert(multifd->load_bufs_thread_running);
197
+
198
+ while (true) {
199
+ VFIOStateBuffer *lb;
200
+
201
+ /*
202
+ * Always check cancellation first after the buffer_ready wait below in
203
+ * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
204
+ */
205
+ if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
206
+ error_setg(errp, "operation cancelled");
207
+ goto thread_exit;
208
+ }
209
+
210
+ assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
211
+
212
+ lb = vfio_load_state_buffer_get(multifd);
213
+ if (!lb) {
214
+ trace_vfio_load_state_device_buffer_starved(vbasedev->name,
215
+ multifd->load_buf_idx);
216
+ qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
217
+ &multifd->load_bufs_mutex);
218
+ continue;
219
+ }
220
+
221
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
222
+ break;
223
+ }
224
+
225
+ if (multifd->load_buf_idx == 0) {
226
+ trace_vfio_load_state_device_buffer_start(vbasedev->name);
227
+ }
228
+
229
+ if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
230
+ goto thread_exit;
231
+ }
232
+
233
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
234
+ trace_vfio_load_state_device_buffer_end(vbasedev->name);
235
+ }
236
+
237
+ multifd->load_buf_idx++;
238
+ }
239
+
240
+ if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
241
+ goto thread_exit;
242
+ }
243
+
244
+ ret = true;
245
+
246
+thread_exit:
247
+ /*
248
+ * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
249
+ * this thread is exiting.
250
+ */
251
+ multifd->load_bufs_thread_running = false;
252
+ qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
253
+
254
+ trace_vfio_load_bufs_thread_end(vbasedev->name);
255
+
256
+ return ret;
257
+}
258
+
259
static VFIOMultifd *vfio_multifd_new(void)
260
{
261
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
262
@@ -XXX,XX +XXX,XX @@ static VFIOMultifd *vfio_multifd_new(void)
263
multifd->load_buf_idx_last = UINT32_MAX;
264
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
265
266
+ multifd->load_bufs_thread_running = false;
267
+ multifd->load_bufs_thread_want_exit = false;
268
+ qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
269
+
270
return multifd;
271
}
272
273
+/*
274
+ * Terminates vfio_load_bufs_thread by setting
275
+ * multifd->load_bufs_thread_want_exit and signalling all the conditions
276
+ * the thread could be blocked on.
277
+ *
278
+ * Waits for the thread to signal that it had finished.
279
+ */
280
+static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
281
+{
282
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
283
+ bql_unlock();
284
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
285
+ while (multifd->load_bufs_thread_running) {
286
+ multifd->load_bufs_thread_want_exit = true;
287
+
288
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
289
+ qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
290
+ &multifd->load_bufs_mutex);
291
+ }
292
+ }
293
+ bql_lock();
294
+}
295
+
296
static void vfio_multifd_free(VFIOMultifd *multifd)
297
{
298
+ vfio_load_cleanup_load_bufs_thread(multifd);
299
+
300
+ qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
301
vfio_state_buffers_destroy(&multifd->load_bufs);
302
qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
303
qemu_mutex_destroy(&multifd->load_bufs_mutex);
304
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
305
306
return true;
307
}
308
+
309
+int vfio_multifd_switchover_start(VFIODevice *vbasedev)
310
+{
311
+ VFIOMigration *migration = vbasedev->migration;
312
+ VFIOMultifd *multifd = migration->multifd;
313
+
314
+ assert(multifd);
315
+
316
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
317
+ bql_unlock();
318
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
319
+ assert(!multifd->load_bufs_thread_running);
320
+ multifd->load_bufs_thread_running = true;
321
+ }
322
+ bql_lock();
323
+
324
+ qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
325
+
326
+ return 0;
327
+}
328
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
329
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
330
--- a/hw/vfio/migration.c
76
--- a/hw/s390x/s390-virtio-ccw.c
331
+++ b/hw/vfio/migration.c
77
+++ b/hw/s390x/s390-virtio-ccw.c
332
@@ -XXX,XX +XXX,XX @@ static bool vfio_switchover_ack_needed(void *opaque)
78
@@ -XXX,XX +XXX,XX @@
333
return vfio_precopy_supported(vbasedev);
79
#include "hw/s390x/tod.h"
334
}
80
#include "system/system.h"
335
81
#include "system/cpus.h"
336
+static int vfio_switchover_start(void *opaque)
82
+#include "system/hostmem.h"
337
+{
83
#include "target/s390x/kvm/pv.h"
338
+ VFIODevice *vbasedev = opaque;
84
#include "migration/blocker.h"
339
+
85
#include "qapi/visitor.h"
340
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
86
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
341
+ return vfio_multifd_switchover_start(vbasedev);
342
+ }
343
+
344
+ return 0;
345
+}
346
+
347
static const SaveVMHandlers savevm_vfio_handlers = {
348
.save_prepare = vfio_save_prepare,
349
.save_setup = vfio_save_setup,
350
@@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = {
351
* Multifd support
352
*/
353
.load_state_buffer = vfio_multifd_load_state_buffer,
354
+ .switchover_start = vfio_switchover_start,
355
};
356
357
/* ---------------------------------------------------------------------- */
358
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
359
index XXXXXXX..XXXXXXX 100644
87
index XXXXXXX..XXXXXXX 100644
360
--- a/hw/vfio/trace-events
88
--- a/hw/vfio/spapr.c
361
+++ b/hw/vfio/trace-events
89
+++ b/hw/vfio/spapr.c
362
@@ -XXX,XX +XXX,XX @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
90
@@ -XXX,XX +XXX,XX @@
363
vfio_display_edid_write_error(void) ""
91
#include <linux/kvm.h>
364
92
#endif
365
# migration.c
93
#include "system/kvm.h"
366
+vfio_load_bufs_thread_start(const char *name) " (%s)"
94
+#include "system/hostmem.h"
367
+vfio_load_bufs_thread_end(const char *name) " (%s)"
95
#include "exec/address-spaces.h"
368
vfio_load_cleanup(const char *name) " (%s)"
96
369
vfio_load_device_config_state_start(const char *name) " (%s)"
97
#include "hw/vfio/vfio-common.h"
370
vfio_load_device_config_state_end(const char *name) " (%s)"
371
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
372
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
373
vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32
374
+vfio_load_state_device_buffer_start(const char *name) " (%s)"
375
+vfio_load_state_device_buffer_starved(const char *name, uint32_t idx) " (%s) idx %"PRIu32
376
+vfio_load_state_device_buffer_load_start(const char *name, uint32_t idx) " (%s) idx %"PRIu32
377
+vfio_load_state_device_buffer_load_end(const char *name, uint32_t idx) " (%s) idx %"PRIu32
378
+vfio_load_state_device_buffer_end(const char *name) " (%s)"
379
vfio_migration_realize(const char *name) " (%s)"
380
vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
381
vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
382
--
98
--
383
2.48.1
99
2.48.1
384
100
385
101
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Automatic memory management helps avoid memory safety issues.
3
<linux/kvm.h> is already included by "system/kvm.h" in the next line.
4
4
5
Reviewed-by: Fabiano Rosas <farosas@suse.de>
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Peter Xu <peterx@redhat.com>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Link: https://lore.kernel.org/qemu-devel/2fd01d773a783d572dcf538a064a98cc09e75c12.1741124640.git.maciej.szmigiero@oracle.com
8
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
Message-Id: <20250307180337.14811-3-philmd@linaro.org>
11
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-3-philmd@linaro.org
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
13
---
11
migration/qemu-file.h | 2 ++
14
hw/vfio/spapr.c | 3 ---
12
1 file changed, 2 insertions(+)
15
1 file changed, 3 deletions(-)
13
16
14
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
17
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/migration/qemu-file.h
19
--- a/hw/vfio/spapr.c
17
+++ b/migration/qemu-file.h
20
+++ b/hw/vfio/spapr.c
18
@@ -XXX,XX +XXX,XX @@ QEMUFile *qemu_file_new_input(QIOChannel *ioc);
21
@@ -XXX,XX +XXX,XX @@
19
QEMUFile *qemu_file_new_output(QIOChannel *ioc);
22
#include "qemu/osdep.h"
20
int qemu_fclose(QEMUFile *f);
23
#include <sys/ioctl.h>
21
24
#include <linux/vfio.h>
22
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QEMUFile, qemu_fclose)
25
-#ifdef CONFIG_KVM
23
+
26
-#include <linux/kvm.h>
24
/*
27
-#endif
25
* qemu_file_transferred:
28
#include "system/kvm.h"
26
*
29
#include "system/hostmem.h"
30
#include "exec/address-spaces.h"
27
--
31
--
28
2.48.1
32
2.48.1
29
33
30
34
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add a hw_compat entry for recently added x-migration-multifd-transfer VFIO
3
Always include necessary headers explicitly, to avoid
4
property.
4
when refactoring unrelated ones:
5
5
6
hw/vfio/common.c:1176:45: error: implicit declaration of function ‘tcg_enabled’;
7
1176 | tcg_enabled() ? DIRTY_CLIENTS_ALL :
8
| ^~~~~~~~~~~
9
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Cédric Le Goater <clg@redhat.com>
13
Reviewed-by: Cédric Le Goater <clg@redhat.com>
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
14
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/92c354f0457c152d1f267cc258c6967fff551cb1.1741124640.git.maciej.szmigiero@oracle.com
15
Message-Id: <20250307180337.14811-2-philmd@linaro.org>
16
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-4-philmd@linaro.org
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
18
---
11
hw/core/machine.c | 1 +
19
hw/vfio/common.c | 1 +
12
1 file changed, 1 insertion(+)
20
1 file changed, 1 insertion(+)
13
21
14
diff --git a/hw/core/machine.c b/hw/core/machine.c
22
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/core/machine.c
24
--- a/hw/vfio/common.c
17
+++ b/hw/core/machine.c
25
+++ b/hw/vfio/common.c
18
@@ -XXX,XX +XXX,XX @@ GlobalProperty hw_compat_9_2[] = {
26
@@ -XXX,XX +XXX,XX @@
19
{ "virtio-mem-pci", "vectors", "0" },
27
#include "migration/misc.h"
20
{ "migration", "multifd-clean-tls-termination", "false" },
28
#include "migration/blocker.h"
21
{ "migration", "send-switchover-start", "off"},
29
#include "migration/qemu-file.h"
22
+ { "vfio-pci", "x-migration-multifd-transfer", "off" },
30
+#include "system/tcg.h"
23
};
31
#include "system/tpm.h"
24
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
32
25
33
VFIODeviceList vfio_device_list =
26
--
34
--
27
2.48.1
35
2.48.1
28
36
29
37
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Wire VFIO multifd transfer specific setup and cleanup functions into
3
Prefer runtime helpers to get target page size.
4
general VFIO load/save setup and cleanup methods.
5
4
6
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Cédric Le Goater <clg@redhat.com>
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Link: https://lore.kernel.org/qemu-devel/b1f864a65fafd4fdab1f89230df52e46ae41f2ac.1741124640.git.maciej.szmigiero@oracle.com
7
Message-Id: <20250305153929.43687-3-philmd@linaro.org>
8
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-5-philmd@linaro.org
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
10
---
11
hw/vfio/migration.c | 24 ++++++++++++++++++++++--
11
hw/vfio/common.c | 8 +++++---
12
1 file changed, 22 insertions(+), 2 deletions(-)
12
1 file changed, 5 insertions(+), 3 deletions(-)
13
13
14
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
14
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/vfio/migration.c
16
--- a/hw/vfio/common.c
17
+++ b/hw/vfio/migration.c
17
+++ b/hw/vfio/common.c
18
@@ -XXX,XX +XXX,XX @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
18
@@ -XXX,XX +XXX,XX @@
19
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
19
#include "exec/address-spaces.h"
20
int ret;
20
#include "exec/memory.h"
21
21
#include "exec/ram_addr.h"
22
+ if (!vfio_multifd_setup(vbasedev, false, errp)) {
22
+#include "exec/target_page.h"
23
+ return -EINVAL;
23
#include "hw/hw.h"
24
+ }
24
#include "qemu/error-report.h"
25
+
25
#include "qemu/main-loop.h"
26
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
26
@@ -XXX,XX +XXX,XX @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
27
27
MemoryRegionSection *section)
28
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
29
@@ -XXX,XX +XXX,XX @@ static void vfio_save_cleanup(void *opaque)
30
Error *local_err = NULL;
31
int ret;
32
33
+ /* Currently a NOP, done for symmetry with load_cleanup() */
34
+ vfio_multifd_cleanup(vbasedev);
35
+
36
/*
37
* Changing device state from STOP_COPY to STOP can take time. Do it here,
38
* after migration has completed, so it won't increase downtime.
39
@@ -XXX,XX +XXX,XX @@ static void vfio_save_state(QEMUFile *f, void *opaque)
40
static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
41
{
28
{
42
VFIODevice *vbasedev = opaque;
29
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
43
+ VFIOMigration *migration = vbasedev->migration;
30
+ int target_page_size = qemu_target_page_size();
44
+ int ret;
31
VFIORamDiscardListener *vrdl;
45
32
46
- return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
33
/* Ignore some corner cases not relevant in practice. */
47
- vbasedev->migration->device_state, errp);
34
- g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
48
+ if (!vfio_multifd_setup(vbasedev, true, errp)) {
35
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size));
49
+ return -EINVAL;
36
g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
50
+ }
37
- TARGET_PAGE_SIZE));
51
+
38
- g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
52
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
39
+ target_page_size));
53
+ migration->device_state, errp);
40
+ g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size));
54
+ if (ret) {
41
55
+ return ret;
42
vrdl = g_new0(VFIORamDiscardListener, 1);
56
+ }
43
vrdl->bcontainer = bcontainer;
57
+
58
+ return 0;
59
}
60
61
static int vfio_load_cleanup(void *opaque)
62
{
63
VFIODevice *vbasedev = opaque;
64
65
+ vfio_multifd_cleanup(vbasedev);
66
+
67
vfio_migration_cleanup(vbasedev);
68
trace_vfio_load_cleanup(vbasedev->name);
69
70
--
44
--
71
2.48.1
45
2.48.1
72
46
73
47
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
This way they can also be referenced in other translation
3
Some files don't rely on any target-specific knowledge
4
units than migration.c.
4
and can be compiled once:
5
5
6
- helpers.c
7
- container-base.c
8
- migration.c (removing unnecessary "exec/ram_addr.h")
9
- migration-multifd.c
10
- cpr.c
11
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Cédric Le Goater <clg@redhat.com>
15
Reviewed-by: Cédric Le Goater <clg@redhat.com>
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
16
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/26a940f6b22c1b685818251b7a3ddbbca601b1d6.1741124640.git.maciej.szmigiero@oracle.com
17
Message-Id: <20250308230917.18907-4-philmd@linaro.org>
18
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-6-philmd@linaro.org
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
20
---
11
include/hw/vfio/vfio-common.h | 17 +++++++++++++++++
21
hw/vfio/migration.c | 1 -
12
hw/vfio/migration.c | 17 -----------------
22
hw/vfio/meson.build | 13 ++++++++-----
13
2 files changed, 17 insertions(+), 17 deletions(-)
23
2 files changed, 8 insertions(+), 6 deletions(-)
14
24
15
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/hw/vfio/vfio-common.h
18
+++ b/include/hw/vfio/vfio-common.h
19
@@ -XXX,XX +XXX,XX @@
20
21
#define VFIO_MSG_PREFIX "vfio %s: "
22
23
+/*
24
+ * Flags to be used as unique delimiters for VFIO devices in the migration
25
+ * stream. These flags are composed as:
26
+ * 0xffffffff => MSB 32-bit all 1s
27
+ * 0xef10 => Magic ID, represents emulated (virtual) function IO
28
+ * 0x0000 => 16-bits reserved for flags
29
+ *
30
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
31
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
32
+ * certain state information is marked by _END_OF_STATE.
33
+ */
34
+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
35
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
36
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
37
+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
38
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
39
+
40
enum {
41
VFIO_DEVICE_TYPE_PCI = 0,
42
VFIO_DEVICE_TYPE_PLATFORM = 1,
43
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
25
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
44
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/vfio/migration.c
27
--- a/hw/vfio/migration.c
46
+++ b/hw/vfio/migration.c
28
+++ b/hw/vfio/migration.c
47
@@ -XXX,XX +XXX,XX @@
29
@@ -XXX,XX +XXX,XX @@
30
#include "qapi/error.h"
31
#include "qapi/qapi-events-vfio.h"
32
#include "exec/ramlist.h"
33
-#include "exec/ram_addr.h"
34
#include "pci.h"
48
#include "trace.h"
35
#include "trace.h"
49
#include "hw/hw.h"
36
#include "hw/hw.h"
50
37
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
51
-/*
38
index XXXXXXX..XXXXXXX 100644
52
- * Flags to be used as unique delimiters for VFIO devices in the migration
39
--- a/hw/vfio/meson.build
53
- * stream. These flags are composed as:
40
+++ b/hw/vfio/meson.build
54
- * 0xffffffff => MSB 32-bit all 1s
41
@@ -XXX,XX +XXX,XX @@
55
- * 0xef10 => Magic ID, represents emulated (virtual) function IO
42
vfio_ss = ss.source_set()
56
- * 0x0000 => 16-bits reserved for flags
43
vfio_ss.add(files(
57
- *
44
- 'helpers.c',
58
- * The beginning of state information is marked by _DEV_CONFIG_STATE,
45
'common.c',
59
- * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
46
- 'container-base.c',
60
- * certain state information is marked by _END_OF_STATE.
47
'container.c',
61
- */
48
- 'migration.c',
62
-#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
49
- 'migration-multifd.c',
63
-#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
50
- 'cpr.c',
64
-#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
51
))
65
-#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
52
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
66
-#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
53
vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
67
-
54
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
68
/*
55
vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
69
* This is an arbitrary size based on migration of mlx5 devices, where typically
56
70
* total device migration size is on the order of 100s of MB. Testing with
57
specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
58
+
59
+system_ss.add(when: 'CONFIG_VFIO', if_true: files(
60
+ 'helpers.c',
61
+ 'container-base.c',
62
+ 'migration.c',
63
+ 'migration-multifd.c',
64
+ 'cpr.c',
65
+))
71
--
66
--
72
2.48.1
67
2.48.1
73
68
74
69
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add multifd setup/cleanup functions and an associated VFIOMultifd data
3
These files depend on the VFIO symbol in their Kconfig
4
structure that will contain most of the receive-side data together
4
definition. They don't rely on target specific definitions,
5
with its init/cleanup methods.
5
move them to system_ss[] to build them once.
6
6
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
Link: https://lore.kernel.org/qemu-devel/c0520523053b1087787152ddf2163257d3030be0.1741124640.git.maciej.szmigiero@oracle.com
11
Reviewed-by: Eric Auger <eric.auger@redhat.com>
12
Message-Id: <20250308230917.18907-5-philmd@linaro.org>
13
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-7-philmd@linaro.org
10
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
11
---
15
---
12
hw/vfio/migration-multifd.h | 4 ++++
16
hw/vfio/meson.build | 4 ++--
13
include/hw/vfio/vfio-common.h | 3 +++
17
1 file changed, 2 insertions(+), 2 deletions(-)
14
hw/vfio/migration-multifd.c | 44 +++++++++++++++++++++++++++++++++++
15
3 files changed, 51 insertions(+)
16
18
17
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
19
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/vfio/migration-multifd.h
21
--- a/hw/vfio/meson.build
20
+++ b/hw/vfio/migration-multifd.h
22
+++ b/hw/vfio/meson.build
21
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
22
24
))
23
#include "hw/vfio/vfio-common.h"
25
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
24
26
vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
25
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp);
27
-vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
26
+void vfio_multifd_cleanup(VFIODevice *vbasedev);
28
-vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
27
+
29
vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
28
bool vfio_multifd_transfer_supported(void);
30
vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
29
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
31
30
32
specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
31
#endif
33
32
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
34
+system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
33
index XXXXXXX..XXXXXXX 100644
35
+system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
34
--- a/include/hw/vfio/vfio-common.h
36
system_ss.add(when: 'CONFIG_VFIO', if_true: files(
35
+++ b/include/hw/vfio/vfio-common.h
37
'helpers.c',
36
@@ -XXX,XX +XXX,XX @@ typedef struct VFIORegion {
38
'container-base.c',
37
uint8_t nr; /* cache the region number for debug */
38
} VFIORegion;
39
40
+typedef struct VFIOMultifd VFIOMultifd;
41
+
42
typedef struct VFIOMigration {
43
struct VFIODevice *vbasedev;
44
VMChangeStateEntry *vm_state;
45
@@ -XXX,XX +XXX,XX @@ typedef struct VFIOMigration {
46
uint64_t mig_flags;
47
uint64_t precopy_init_size;
48
uint64_t precopy_dirty_size;
49
+ VFIOMultifd *multifd;
50
bool initial_data_sent;
51
52
bool event_save_iterate_started;
53
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/hw/vfio/migration-multifd.c
56
+++ b/hw/vfio/migration-multifd.c
57
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket {
58
uint8_t data[0];
59
} QEMU_PACKED VFIODeviceStatePacket;
60
61
+typedef struct VFIOMultifd {
62
+} VFIOMultifd;
63
+
64
+static VFIOMultifd *vfio_multifd_new(void)
65
+{
66
+ VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
67
+
68
+ return multifd;
69
+}
70
+
71
+static void vfio_multifd_free(VFIOMultifd *multifd)
72
+{
73
+ g_free(multifd);
74
+}
75
+
76
+void vfio_multifd_cleanup(VFIODevice *vbasedev)
77
+{
78
+ VFIOMigration *migration = vbasedev->migration;
79
+
80
+ g_clear_pointer(&migration->multifd, vfio_multifd_free);
81
+}
82
+
83
bool vfio_multifd_transfer_supported(void)
84
{
85
return multifd_device_state_supported() &&
86
migrate_send_switchover_start();
87
}
88
+
89
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
90
+{
91
+ return false;
92
+}
93
+
94
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
95
+{
96
+ VFIOMigration *migration = vbasedev->migration;
97
+
98
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
99
+ /* Nothing further to check or do */
100
+ return true;
101
+ }
102
+
103
+ if (alloc_multifd) {
104
+ assert(!migration->multifd);
105
+ migration->multifd = vfio_multifd_new();
106
+ }
107
+
108
+ return true;
109
+}
110
--
39
--
111
2.48.1
40
2.48.1
112
41
113
42
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add vfio_multifd_transfer_supported() function that tells whether the
3
Removing unused "exec/ram_addr.h" header allow to compile
4
multifd device state transfer is supported.
4
iommufd.c once for all targets.
5
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
7
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/8ce50256f341b3d47342bb217cb5fbb2deb14639.1741124640.git.maciej.szmigiero@oracle.com
11
Message-Id: <20250308230917.18907-6-philmd@linaro.org>
12
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-8-philmd@linaro.org
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
14
---
11
hw/vfio/migration-multifd.h | 2 ++
15
hw/vfio/iommufd.c | 1 -
12
hw/vfio/migration-multifd.c | 6 ++++++
16
hw/vfio/meson.build | 6 +++---
13
2 files changed, 8 insertions(+)
17
2 files changed, 3 insertions(+), 4 deletions(-)
14
18
15
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
19
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
16
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/vfio/migration-multifd.h
21
--- a/hw/vfio/iommufd.c
18
+++ b/hw/vfio/migration-multifd.h
22
+++ b/hw/vfio/iommufd.c
19
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@
20
24
#include "qemu/cutils.h"
21
#include "hw/vfio/vfio-common.h"
25
#include "qemu/chardev_open.h"
22
26
#include "pci.h"
23
+bool vfio_multifd_transfer_supported(void);
27
-#include "exec/ram_addr.h"
24
+
28
25
#endif
29
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
26
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
30
ram_addr_t size, void *vaddr, bool readonly)
31
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
27
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/vfio/migration-multifd.c
33
--- a/hw/vfio/meson.build
29
+++ b/hw/vfio/migration-multifd.c
34
+++ b/hw/vfio/meson.build
30
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODeviceStatePacket {
35
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(files(
31
uint32_t flags;
36
'container.c',
32
uint8_t data[0];
37
))
33
} QEMU_PACKED VFIODeviceStatePacket;
38
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
34
+
39
-vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
35
+bool vfio_multifd_transfer_supported(void)
40
- 'iommufd.c',
36
+{
41
-))
37
+ return multifd_device_state_supported() &&
42
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
38
+ migrate_send_switchover_start();
43
'display.c',
39
+}
44
'pci-quirks.c',
45
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
46
'migration-multifd.c',
47
'cpr.c',
48
))
49
+system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
50
+ 'iommufd.c',
51
+))
40
--
52
--
41
2.48.1
53
2.48.1
42
54
43
55
diff view generated by jsdifflib
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Add basic types and flags used by VFIO multifd device state transfer
3
display.c doesn't rely on target specific definitions,
4
support.
4
move it to system_ss[] to build it once.
5
5
6
Since we'll be introducing a lot of multifd transfer specific code,
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
add a new file migration-multifd.c to home it, wired into main VFIO
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
migration code (migration.c) via migration-multifd.h header file.
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
10
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
11
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
12
Link: https://lore.kernel.org/qemu-devel/4eedd529e6617f80f3d6a66d7268a0db2bc173fa.1741124640.git.maciej.szmigiero@oracle.com
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Message-Id: <20250308230917.18907-8-philmd@linaro.org>
12
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-9-philmd@linaro.org
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
14
---
15
hw/vfio/migration-multifd.h | 17 +++++++++++++++++
15
hw/vfio/meson.build | 4 +++-
16
hw/vfio/migration-multifd.c | 33 +++++++++++++++++++++++++++++++++
16
1 file changed, 3 insertions(+), 1 deletion(-)
17
hw/vfio/migration.c | 1 +
18
hw/vfio/meson.build | 1 +
19
4 files changed, 52 insertions(+)
20
create mode 100644 hw/vfio/migration-multifd.h
21
create mode 100644 hw/vfio/migration-multifd.c
22
17
23
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
24
new file mode 100644
25
index XXXXXXX..XXXXXXX
26
--- /dev/null
27
+++ b/hw/vfio/migration-multifd.h
28
@@ -XXX,XX +XXX,XX @@
29
+/*
30
+ * Multifd VFIO migration
31
+ *
32
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
33
+ *
34
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
35
+ * See the COPYING file in the top-level directory.
36
+ *
37
+ * SPDX-License-Identifier: GPL-2.0-or-later
38
+ */
39
+
40
+#ifndef HW_VFIO_MIGRATION_MULTIFD_H
41
+#define HW_VFIO_MIGRATION_MULTIFD_H
42
+
43
+#include "hw/vfio/vfio-common.h"
44
+
45
+#endif
46
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
47
new file mode 100644
48
index XXXXXXX..XXXXXXX
49
--- /dev/null
50
+++ b/hw/vfio/migration-multifd.c
51
@@ -XXX,XX +XXX,XX @@
52
+/*
53
+ * Multifd VFIO migration
54
+ *
55
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
56
+ *
57
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
58
+ * See the COPYING file in the top-level directory.
59
+ *
60
+ * SPDX-License-Identifier: GPL-2.0-or-later
61
+ */
62
+
63
+#include "qemu/osdep.h"
64
+#include "hw/vfio/vfio-common.h"
65
+#include "migration/misc.h"
66
+#include "qapi/error.h"
67
+#include "qemu/error-report.h"
68
+#include "qemu/lockable.h"
69
+#include "qemu/main-loop.h"
70
+#include "qemu/thread.h"
71
+#include "migration/qemu-file.h"
72
+#include "migration-multifd.h"
73
+#include "trace.h"
74
+
75
+#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
76
+
77
+#define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
78
+
79
+typedef struct VFIODeviceStatePacket {
80
+ uint32_t version;
81
+ uint32_t idx;
82
+ uint32_t flags;
83
+ uint8_t data[0];
84
+} QEMU_PACKED VFIODeviceStatePacket;
85
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/hw/vfio/migration.c
88
+++ b/hw/vfio/migration.c
89
@@ -XXX,XX +XXX,XX @@
90
#include "migration/qemu-file.h"
91
#include "migration/register.h"
92
#include "migration/blocker.h"
93
+#include "migration-multifd.h"
94
#include "qapi/error.h"
95
#include "qapi/qapi-events-vfio.h"
96
#include "exec/ramlist.h"
97
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
18
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
98
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
99
--- a/hw/vfio/meson.build
20
--- a/hw/vfio/meson.build
100
+++ b/hw/vfio/meson.build
21
+++ b/hw/vfio/meson.build
101
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(files(
22
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(files(
102
'container-base.c',
103
'container.c',
104
'migration.c',
105
+ 'migration-multifd.c',
106
'cpr.c',
107
))
23
))
108
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
24
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
25
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
26
- 'display.c',
27
'pci-quirks.c',
28
'pci.c',
29
))
30
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
31
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
32
'iommufd.c',
33
))
34
+system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
35
+ 'display.c',
36
+))
109
--
37
--
110
2.48.1
38
2.48.1
111
39
112
40
diff view generated by jsdifflib
1
From: Peter Xu <peterx@redhat.com>
1
From: Vasilis Liaskovitis <vliaskovitis@suse.com>
2
2
3
The newly introduced device state buffer can be used for either storing
3
The ATI BAR4 quirk is targeting an ioport BAR. Older devices may
4
VFIO's read() raw data, but already also possible to store generic device
4
have a BAR4 which is not an ioport, causing a segfault here. Test
5
states. After noticing that device states may not easily provide a max
5
the BAR type to skip these devices.
6
buffer size (also the fact that RAM MultiFDPages_t after all also want to
7
have flexibility on managing offset[] array), it may not be a good idea to
8
stick with union on MultiFDSendData.. as it won't play well with such
9
flexibility.
10
6
11
Switch MultiFDSendData to a struct.
7
Similar to
8
"8f419c5b: vfio/pci-quirks: Exclude non-ioport BAR from NVIDIA quirk"
12
9
13
It won't consume a lot more space in reality, after all the real buffers
10
Untested, as I don't have the card to test.
14
were already dynamically allocated, so it's so far only about the two
15
structs (pages, device_state) that will be duplicated, but they're small.
16
11
17
With this, we can remove the pretty hard to understand alloc size logic.
12
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2856
18
Because now we can allocate offset[] together with the SendData, and
13
Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com>
19
properly free it when the SendData is freed.
14
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
20
15
Link: https://lore.kernel.org/qemu-devel/20250310235833.41026-1-vliaskovitis@suse.com
21
[MSS: Make sure to clear possible device state payload before freeing
22
MultiFDSendData, remove placeholders for other patches not included]
23
24
Signed-off-by: Peter Xu <peterx@redhat.com>
25
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
26
Acked-by: Fabiano Rosas <farosas@suse.de>
27
Link: https://lore.kernel.org/qemu-devel/7b02baba8e6ddb23ef7c349d312b9b631db09d7e.1741124640.git.maciej.szmigiero@oracle.com
28
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
Signed-off-by: Cédric Le Goater <clg@redhat.com>
29
---
17
---
30
migration/multifd.h | 15 +++++++++------
18
hw/vfio/pci-quirks.c | 2 +-
31
migration/multifd-device-state.c | 5 -----
19
1 file changed, 1 insertion(+), 1 deletion(-)
32
migration/multifd-nocomp.c | 13 ++++++-------
33
migration/multifd.c | 25 +++++++------------------
34
4 files changed, 22 insertions(+), 36 deletions(-)
35
20
36
diff --git a/migration/multifd.h b/migration/multifd.h
21
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
37
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
38
--- a/migration/multifd.h
23
--- a/hw/vfio/pci-quirks.c
39
+++ b/migration/multifd.h
24
+++ b/hw/vfio/pci-quirks.c
40
@@ -XXX,XX +XXX,XX @@ typedef struct {
25
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
41
uint32_t num;
26
42
/* number of normal pages */
27
/* This windows doesn't seem to be used except by legacy VGA code */
43
uint32_t normal_num;
28
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
44
+ /*
29
- !vdev->vga || nr != 4) {
45
+ * Pointer to the ramblock. NOTE: it's caller's responsibility to make
30
+ !vdev->vga || nr != 4 || !vdev->bars[4].ioport) {
46
+ * sure the pointer is always valid!
47
+ */
48
RAMBlock *block;
49
- /* offset of each page */
50
- ram_addr_t offset[];
51
+ /* offset array of each page, managed by multifd */
52
+ ram_addr_t *offset;
53
} MultiFDPages_t;
54
55
struct MultiFDRecvData {
56
@@ -XXX,XX +XXX,XX @@ typedef enum {
57
MULTIFD_PAYLOAD_DEVICE_STATE,
58
} MultiFDPayloadType;
59
60
-typedef union MultiFDPayload {
61
+typedef struct MultiFDPayload {
62
MultiFDPages_t ram;
63
MultiFDDeviceState_t device_state;
64
} MultiFDPayload;
65
@@ -XXX,XX +XXX,XX @@ void multifd_ram_save_cleanup(void);
66
int multifd_ram_flush_and_sync(QEMUFile *f);
67
bool multifd_ram_sync_per_round(void);
68
bool multifd_ram_sync_per_section(void);
69
-size_t multifd_ram_payload_size(void);
70
+void multifd_ram_payload_alloc(MultiFDPages_t *pages);
71
+void multifd_ram_payload_free(MultiFDPages_t *pages);
72
void multifd_ram_fill_packet(MultiFDSendParams *p);
73
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
74
75
-size_t multifd_device_state_payload_size(void);
76
-
77
void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
78
79
void multifd_device_state_send_setup(void);
80
diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/migration/multifd-device-state.c
83
+++ b/migration/multifd-device-state.c
84
@@ -XXX,XX +XXX,XX @@ static struct {
85
MultiFDSendData *send_data;
86
} *multifd_send_device_state;
87
88
-size_t multifd_device_state_payload_size(void)
89
-{
90
- return sizeof(MultiFDDeviceState_t);
91
-}
92
-
93
void multifd_device_state_send_setup(void)
94
{
95
assert(!multifd_send_device_state);
96
diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/migration/multifd-nocomp.c
99
+++ b/migration/multifd-nocomp.c
100
@@ -XXX,XX +XXX,XX @@
101
102
static MultiFDSendData *multifd_ram_send;
103
104
-size_t multifd_ram_payload_size(void)
105
+void multifd_ram_payload_alloc(MultiFDPages_t *pages)
106
{
107
- uint32_t n = multifd_ram_page_count();
108
+ pages->offset = g_new0(ram_addr_t, multifd_ram_page_count());
109
+}
110
111
- /*
112
- * We keep an array of page offsets at the end of MultiFDPages_t,
113
- * add space for it in the allocation.
114
- */
115
- return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
116
+void multifd_ram_payload_free(MultiFDPages_t *pages)
117
+{
118
+ g_clear_pointer(&pages->offset, g_free);
119
}
120
121
void multifd_ram_save_setup(void)
122
diff --git a/migration/multifd.c b/migration/multifd.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/migration/multifd.c
125
+++ b/migration/multifd.c
126
@@ -XXX,XX +XXX,XX @@ struct {
127
128
MultiFDSendData *multifd_send_data_alloc(void)
129
{
130
- size_t max_payload_size, size_minus_payload;
131
+ MultiFDSendData *new = g_new0(MultiFDSendData, 1);
132
133
- /*
134
- * MultiFDPages_t has a flexible array at the end, account for it
135
- * when allocating MultiFDSendData. Use max() in case other types
136
- * added to the union in the future are larger than
137
- * (MultiFDPages_t + flex array).
138
- */
139
- max_payload_size = MAX(multifd_ram_payload_size(),
140
- multifd_device_state_payload_size());
141
- max_payload_size = MAX(max_payload_size, sizeof(MultiFDPayload));
142
-
143
- /*
144
- * Account for any holes the compiler might insert. We can't pack
145
- * the structure because that misaligns the members and triggers
146
- * Waddress-of-packed-member.
147
- */
148
- size_minus_payload = sizeof(MultiFDSendData) - sizeof(MultiFDPayload);
149
+ multifd_ram_payload_alloc(&new->u.ram);
150
+ /* Device state allocates its payload on-demand */
151
152
- return g_malloc0(size_minus_payload + max_payload_size);
153
+ return new;
154
}
155
156
void multifd_send_data_clear(MultiFDSendData *data)
157
@@ -XXX,XX +XXX,XX @@ void multifd_send_data_free(MultiFDSendData *data)
158
return;
31
return;
159
}
32
}
160
161
+ /* This also free's device state payload */
162
multifd_send_data_clear(data);
163
164
+ multifd_ram_payload_free(&data->u.ram);
165
+
166
g_free(data);
167
}
168
33
169
--
34
--
170
2.48.1
35
2.48.1
171
36
172
37
diff view generated by jsdifflib
1
From: Alex Williamson <alex.williamson@redhat.com>
1
From: Joao Martins <joao.m.martins@oracle.com>
2
2
3
We want the device in the D0 power state going into reset, but the
3
The intent behind the x-device-dirty-page-tracking option is twofold:
4
config write can enable the BARs in the address space, which are
5
then removed from the address space once we clear the memory enable
6
bit in the command register. Re-order to clear the command bit
7
first, so the power state change doesn't enable the BARs.
8
4
9
Cc: Cédric Le Goater <clg@redhat.com>
5
1) development/testing in the presence of VFs with VF dirty page tracking
10
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
6
11
Reviewed-by: Eric Auger <eric.auger@redhat.com>
7
2) deliberately choosing platform dirty tracker over the VF one.
12
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
8
13
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Item 2) scenario is useful when VF dirty tracker is not as fast as
14
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-6-alex.williamson@redhat.com
10
IOMMU, or there's some limitations around it (e.g. number of them is
11
limited; aggregated address space under tracking is limited),
12
efficiency/scalability (e.g. 1 pagetable in IOMMU dirty tracker to scan
13
vs N VFs) or just troubleshooting. Given item 2 it is not restricted to
14
debugging, hence drop the debug parenthesis from the option description.
15
16
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
17
Reviewed-by: Cédric Le Goater <clg@redhat.com>
18
Link: https://lore.kernel.org/qemu-devel/20250311174807.79825-1-joao.m.martins@oracle.com
19
[ clg: Fixed subject spelling ]
15
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
---
21
---
17
hw/vfio/pci.c | 18 +++++++++---------
22
hw/vfio/pci.c | 2 +-
18
1 file changed, 9 insertions(+), 9 deletions(-)
23
1 file changed, 1 insertion(+), 1 deletion(-)
19
24
20
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
25
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
21
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/vfio/pci.c
27
--- a/hw/vfio/pci.c
23
+++ b/hw/vfio/pci.c
28
+++ b/hw/vfio/pci.c
24
@@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
29
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
25
30
object_class_property_set_description(klass, /* 9.1 */
26
vfio_disable_interrupts(vdev);
31
"x-device-dirty-page-tracking",
27
32
"Disable device dirty page tracking and use "
28
+ /*
33
- "container-based dirty page tracking (DEBUG)");
29
+ * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
34
+ "container-based dirty page tracking");
30
+ * Also put INTx Disable in known state.
35
object_class_property_set_description(klass, /* 9.1 */
31
+ */
36
"migration-events",
32
+ cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
37
"Emit VFIO migration QAPI event when a VFIO device "
33
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
34
+ PCI_COMMAND_INTX_DISABLE);
35
+ vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
36
+
37
/* Make sure the device is in D0 */
38
if (pdev->pm_cap) {
39
uint16_t pmcsr;
40
@@ -XXX,XX +XXX,XX @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
41
}
42
}
43
}
44
-
45
- /*
46
- * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
47
- * Also put INTx Disable in known state.
48
- */
49
- cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
50
- cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
51
- PCI_COMMAND_INTX_DISABLE);
52
- vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
53
}
54
55
void vfio_pci_post_reset(VFIOPCIDevice *vdev)
56
--
38
--
57
2.48.1
39
2.48.1
58
40
59
41
diff view generated by jsdifflib
Deleted patch
1
From: Eric Auger <eric.auger@redhat.com>
2
1
3
As an outcome of KVM forum 2024 "vfio-platform: live and let die?"
4
talk, let's deprecate vfio-platform devices.
5
6
Signed-off-by: Eric Auger <eric.auger@redhat.com>
7
Reviewed-by: Cédric Le Goater <clg@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/20250305124225.952791-1-eric.auger@redhat.com
9
[ clg: Fixed spelling in vfio-amd-xgbe section ]
10
Signed-off-by: Cédric Le Goater <clg@redhat.com>
11
---
12
docs/about/deprecated.rst | 25 +++++++++++++++++++++++++
13
hw/vfio/amd-xgbe.c | 2 ++
14
hw/vfio/calxeda-xgmac.c | 2 ++
15
hw/vfio/platform.c | 1 +
16
4 files changed, 30 insertions(+)
17
18
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
19
index XXXXXXX..XXXXXXX 100644
20
--- a/docs/about/deprecated.rst
21
+++ b/docs/about/deprecated.rst
22
@@ -XXX,XX +XXX,XX @@ Stream ``reconnect`` (since 9.2)
23
The ``reconnect`` option only allows specifiying second granularity timeouts,
24
which is not enough for all types of use cases, use ``reconnect-ms`` instead.
25
26
+VFIO device options
27
+'''''''''''''''''''
28
+
29
+``-device vfio-calxeda-xgmac`` (since 10.0)
30
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
31
+The vfio-calxeda-xgmac device allows to assign a host Calxeda Highbank
32
+10Gb XGMAC Ethernet controller device ("calxeda,hb-xgmac" compatibility
33
+string) to a guest. Calxeda HW has been ewasted now and there is no point
34
+keeping that device.
35
+
36
+``-device vfio-amd-xgbe`` (since 10.0)
37
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38
+The vfio-amd-xgbe device allows to assign a host AMD 10GbE controller
39
+to a guest ("amd,xgbe-seattle-v1a" compatibility string). AMD "Seattle"
40
+is not supported anymore and there is no point keeping that device.
41
+
42
+``-device vfio-platform`` (since 10.0)
43
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
44
+The vfio-platform device allows to assign a host platform device
45
+to a guest in a generic manner. Integrating a new device into
46
+the vfio-platform infrastructure requires some adaptation at
47
+both kernel and qemu level. No such attempt has been done for years
48
+and the conclusion is that vfio-platform has not got any traction.
49
+PCIe passthrough shall be the mainline solution.
50
+
51
CPU device properties
52
'''''''''''''''''''''
53
54
diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/hw/vfio/amd-xgbe.c
57
+++ b/hw/vfio/amd-xgbe.c
58
@@ -XXX,XX +XXX,XX @@
59
#include "hw/vfio/vfio-amd-xgbe.h"
60
#include "migration/vmstate.h"
61
#include "qemu/module.h"
62
+#include "qemu/error-report.h"
63
64
static void amd_xgbe_realize(DeviceState *dev, Error **errp)
65
{
66
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
67
VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev);
68
69
+ warn_report("-device vfio-amd-xgbe is deprecated");
70
vdev->compat = g_strdup("amd,xgbe-seattle-v1a");
71
vdev->num_compat = 1;
72
73
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/hw/vfio/calxeda-xgmac.c
76
+++ b/hw/vfio/calxeda-xgmac.c
77
@@ -XXX,XX +XXX,XX @@
78
#include "hw/vfio/vfio-calxeda-xgmac.h"
79
#include "migration/vmstate.h"
80
#include "qemu/module.h"
81
+#include "qemu/error-report.h"
82
83
static void calxeda_xgmac_realize(DeviceState *dev, Error **errp)
84
{
85
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
86
VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev);
87
88
+ warn_report("-device vfio-calxeda-xgmac is deprecated");
89
vdev->compat = g_strdup("calxeda,hb-xgmac");
90
vdev->num_compat = 1;
91
92
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/vfio/platform.c
95
+++ b/hw/vfio/platform.c
96
@@ -XXX,XX +XXX,XX @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
97
VFIODevice *vbasedev = &vdev->vbasedev;
98
int i;
99
100
+ warn_report("-device vfio-platform is deprecated");
101
qemu_mutex_init(&vdev->intp_mutex);
102
103
trace_vfio_platform_realize(vbasedev->sysfsdev ?
104
--
105
2.48.1
106
107
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
It's possible for {load,save}_cleanup SaveVMHandlers to get called without
4
the corresponding {load,save}_setup handler being called first.
5
6
One such example is if {load,save}_setup handler of a proceeding device
7
returns error.
8
In this case the migration core cleanup code will call all corresponding
9
cleanup handlers, even for these devices which haven't had its setup
10
handler called.
11
12
Since this behavior can generate some surprises let's clearly document it
13
in these SaveVMHandlers description.
14
15
Reviewed-by: Fabiano Rosas <farosas@suse.de>
16
Reviewed-by: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
18
Link: https://lore.kernel.org/qemu-devel/991636623fb780350f493b5f045cb17e13ce4c0f.1741124640.git.maciej.szmigiero@oracle.com
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
---
21
include/migration/register.h | 6 +++++-
22
1 file changed, 5 insertions(+), 1 deletion(-)
23
24
diff --git a/include/migration/register.h b/include/migration/register.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/migration/register.h
27
+++ b/include/migration/register.h
28
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
29
/**
30
* @save_cleanup
31
*
32
- * Uninitializes the data structures on the source
33
+ * Uninitializes the data structures on the source.
34
+ * Note that this handler can be called even if save_setup
35
+ * wasn't called earlier.
36
*
37
* @opaque: data pointer passed to register_savevm_live()
38
*/
39
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
40
* @load_cleanup
41
*
42
* Uninitializes the data structures on the destination.
43
+ * Note that this handler can be called even if load_setup
44
+ * wasn't called earlier.
45
*
46
* @opaque: data pointer passed to register_savevm_live()
47
*
48
--
49
2.48.1
50
51
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
This function name conflicts with one used by a future generic thread pool
4
function and it was only used by one test anyway.
5
6
Update the trace event name in thread_pool_submit_aio() accordingly.
7
8
Acked-by: Fabiano Rosas <farosas@suse.de>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Peter Xu <peterx@redhat.com>
11
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
12
Link: https://lore.kernel.org/qemu-devel/6830f07777f939edaf0a2d301c39adcaaf3817f0.1741124640.git.maciej.szmigiero@oracle.com
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
include/block/thread-pool.h | 3 +--
16
tests/unit/test-thread-pool.c | 6 +++---
17
util/thread-pool.c | 7 +------
18
util/trace-events | 2 +-
19
4 files changed, 6 insertions(+), 12 deletions(-)
20
21
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/block/thread-pool.h
24
+++ b/include/block/thread-pool.h
25
@@ -XXX,XX +XXX,XX @@ ThreadPool *thread_pool_new(struct AioContext *ctx);
26
void thread_pool_free(ThreadPool *pool);
27
28
/*
29
- * thread_pool_submit* API: submit I/O requests in the thread's
30
+ * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's
31
* current AioContext.
32
*/
33
BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
34
BlockCompletionFunc *cb, void *opaque);
35
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
36
-void thread_pool_submit(ThreadPoolFunc *func, void *arg);
37
38
void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
39
40
diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tests/unit/test-thread-pool.c
43
+++ b/tests/unit/test-thread-pool.c
44
@@ -XXX,XX +XXX,XX @@ static void done_cb(void *opaque, int ret)
45
active--;
46
}
47
48
-static void test_submit(void)
49
+static void test_submit_no_complete(void)
50
{
51
WorkerTestData data = { .n = 0 };
52
- thread_pool_submit(worker_cb, &data);
53
+ thread_pool_submit_aio(worker_cb, &data, NULL, NULL);
54
while (data.n == 0) {
55
aio_poll(ctx, true);
56
}
57
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
58
ctx = qemu_get_current_aio_context();
59
60
g_test_init(&argc, &argv, NULL);
61
- g_test_add_func("/thread-pool/submit", test_submit);
62
+ g_test_add_func("/thread-pool/submit-no-complete", test_submit_no_complete);
63
g_test_add_func("/thread-pool/submit-aio", test_submit_aio);
64
g_test_add_func("/thread-pool/submit-co", test_submit_co);
65
g_test_add_func("/thread-pool/submit-many", test_submit_many);
66
diff --git a/util/thread-pool.c b/util/thread-pool.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/util/thread-pool.c
69
+++ b/util/thread-pool.c
70
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
71
72
QLIST_INSERT_HEAD(&pool->head, req, all);
73
74
- trace_thread_pool_submit(pool, req, arg);
75
+ trace_thread_pool_submit_aio(pool, req, arg);
76
77
qemu_mutex_lock(&pool->lock);
78
if (pool->idle_threads == 0 && pool->cur_threads < pool->max_threads) {
79
@@ -XXX,XX +XXX,XX @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
80
return tpc.ret;
81
}
82
83
-void thread_pool_submit(ThreadPoolFunc *func, void *arg)
84
-{
85
- thread_pool_submit_aio(func, arg, NULL, NULL);
86
-}
87
-
88
void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
89
{
90
qemu_mutex_lock(&pool->lock);
91
diff --git a/util/trace-events b/util/trace-events
92
index XXXXXXX..XXXXXXX 100644
93
--- a/util/trace-events
94
+++ b/util/trace-events
95
@@ -XXX,XX +XXX,XX @@ aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
96
reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
97
98
# thread-pool.c
99
-thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
100
+thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
101
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
102
thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
103
104
--
105
2.48.1
106
107
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
These names conflict with ones used by future generic thread pool
4
equivalents.
5
Generic names should belong to the generic pool type, not specific (AIO)
6
type.
7
8
Acked-by: Fabiano Rosas <farosas@suse.de>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Peter Xu <peterx@redhat.com>
11
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
12
Link: https://lore.kernel.org/qemu-devel/70f9e0fb4b01042258a1a57996c64d19779dc7f0.1741124640.git.maciej.szmigiero@oracle.com
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
include/block/aio.h | 8 ++---
16
include/block/thread-pool.h | 8 ++---
17
util/async.c | 6 ++--
18
util/thread-pool.c | 58 ++++++++++++++++++-------------------
19
util/trace-events | 4 +--
20
5 files changed, 42 insertions(+), 42 deletions(-)
21
22
diff --git a/include/block/aio.h b/include/block/aio.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/block/aio.h
25
+++ b/include/block/aio.h
26
@@ -XXX,XX +XXX,XX @@ typedef void QEMUBHFunc(void *opaque);
27
typedef bool AioPollFn(void *opaque);
28
typedef void IOHandler(void *opaque);
29
30
-struct ThreadPool;
31
+struct ThreadPoolAio;
32
struct LinuxAioState;
33
typedef struct LuringState LuringState;
34
35
@@ -XXX,XX +XXX,XX @@ struct AioContext {
36
/* Thread pool for performing work and receiving completion callbacks.
37
* Has its own locking.
38
*/
39
- struct ThreadPool *thread_pool;
40
+ struct ThreadPoolAio *thread_pool;
41
42
#ifdef CONFIG_LINUX_AIO
43
struct LinuxAioState *linux_aio;
44
@@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier_poll(AioContext *ctx,
45
*/
46
GSource *aio_get_g_source(AioContext *ctx);
47
48
-/* Return the ThreadPool bound to this AioContext */
49
-struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
50
+/* Return the ThreadPoolAio bound to this AioContext */
51
+struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx);
52
53
/* Setup the LinuxAioState bound to this AioContext */
54
struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
55
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/include/block/thread-pool.h
58
+++ b/include/block/thread-pool.h
59
@@ -XXX,XX +XXX,XX @@
60
61
typedef int ThreadPoolFunc(void *opaque);
62
63
-typedef struct ThreadPool ThreadPool;
64
+typedef struct ThreadPoolAio ThreadPoolAio;
65
66
-ThreadPool *thread_pool_new(struct AioContext *ctx);
67
-void thread_pool_free(ThreadPool *pool);
68
+ThreadPoolAio *thread_pool_new_aio(struct AioContext *ctx);
69
+void thread_pool_free_aio(ThreadPoolAio *pool);
70
71
/*
72
* thread_pool_submit_{aio,co} API: submit I/O requests in the thread's
73
@@ -XXX,XX +XXX,XX @@ void thread_pool_free(ThreadPool *pool);
74
BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
75
BlockCompletionFunc *cb, void *opaque);
76
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
77
+void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx);
78
79
-void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
80
81
#endif
82
diff --git a/util/async.c b/util/async.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/util/async.c
85
+++ b/util/async.c
86
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
87
QEMUBH *bh;
88
unsigned flags;
89
90
- thread_pool_free(ctx->thread_pool);
91
+ thread_pool_free_aio(ctx->thread_pool);
92
93
#ifdef CONFIG_LINUX_AIO
94
if (ctx->linux_aio) {
95
@@ -XXX,XX +XXX,XX @@ GSource *aio_get_g_source(AioContext *ctx)
96
return &ctx->source;
97
}
98
99
-ThreadPool *aio_get_thread_pool(AioContext *ctx)
100
+ThreadPoolAio *aio_get_thread_pool(AioContext *ctx)
101
{
102
if (!ctx->thread_pool) {
103
- ctx->thread_pool = thread_pool_new(ctx);
104
+ ctx->thread_pool = thread_pool_new_aio(ctx);
105
}
106
return ctx->thread_pool;
107
}
108
diff --git a/util/thread-pool.c b/util/thread-pool.c
109
index XXXXXXX..XXXXXXX 100644
110
--- a/util/thread-pool.c
111
+++ b/util/thread-pool.c
112
@@ -XXX,XX +XXX,XX @@
113
#include "block/thread-pool.h"
114
#include "qemu/main-loop.h"
115
116
-static void do_spawn_thread(ThreadPool *pool);
117
+static void do_spawn_thread(ThreadPoolAio *pool);
118
119
-typedef struct ThreadPoolElement ThreadPoolElement;
120
+typedef struct ThreadPoolElementAio ThreadPoolElementAio;
121
122
enum ThreadState {
123
THREAD_QUEUED,
124
@@ -XXX,XX +XXX,XX @@ enum ThreadState {
125
THREAD_DONE,
126
};
127
128
-struct ThreadPoolElement {
129
+struct ThreadPoolElementAio {
130
BlockAIOCB common;
131
- ThreadPool *pool;
132
+ ThreadPoolAio *pool;
133
ThreadPoolFunc *func;
134
void *arg;
135
136
@@ -XXX,XX +XXX,XX @@ struct ThreadPoolElement {
137
int ret;
138
139
/* Access to this list is protected by lock. */
140
- QTAILQ_ENTRY(ThreadPoolElement) reqs;
141
+ QTAILQ_ENTRY(ThreadPoolElementAio) reqs;
142
143
/* This list is only written by the thread pool's mother thread. */
144
- QLIST_ENTRY(ThreadPoolElement) all;
145
+ QLIST_ENTRY(ThreadPoolElementAio) all;
146
};
147
148
-struct ThreadPool {
149
+struct ThreadPoolAio {
150
AioContext *ctx;
151
QEMUBH *completion_bh;
152
QemuMutex lock;
153
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
154
QEMUBH *new_thread_bh;
155
156
/* The following variables are only accessed from one AioContext. */
157
- QLIST_HEAD(, ThreadPoolElement) head;
158
+ QLIST_HEAD(, ThreadPoolElementAio) head;
159
160
/* The following variables are protected by lock. */
161
- QTAILQ_HEAD(, ThreadPoolElement) request_list;
162
+ QTAILQ_HEAD(, ThreadPoolElementAio) request_list;
163
int cur_threads;
164
int idle_threads;
165
int new_threads; /* backlog of threads we need to create */
166
@@ -XXX,XX +XXX,XX @@ struct ThreadPool {
167
168
static void *worker_thread(void *opaque)
169
{
170
- ThreadPool *pool = opaque;
171
+ ThreadPoolAio *pool = opaque;
172
173
qemu_mutex_lock(&pool->lock);
174
pool->pending_threads--;
175
do_spawn_thread(pool);
176
177
while (pool->cur_threads <= pool->max_threads) {
178
- ThreadPoolElement *req;
179
+ ThreadPoolElementAio *req;
180
int ret;
181
182
if (QTAILQ_EMPTY(&pool->request_list)) {
183
@@ -XXX,XX +XXX,XX @@ static void *worker_thread(void *opaque)
184
return NULL;
185
}
186
187
-static void do_spawn_thread(ThreadPool *pool)
188
+static void do_spawn_thread(ThreadPoolAio *pool)
189
{
190
QemuThread t;
191
192
@@ -XXX,XX +XXX,XX @@ static void do_spawn_thread(ThreadPool *pool)
193
194
static void spawn_thread_bh_fn(void *opaque)
195
{
196
- ThreadPool *pool = opaque;
197
+ ThreadPoolAio *pool = opaque;
198
199
qemu_mutex_lock(&pool->lock);
200
do_spawn_thread(pool);
201
qemu_mutex_unlock(&pool->lock);
202
}
203
204
-static void spawn_thread(ThreadPool *pool)
205
+static void spawn_thread(ThreadPoolAio *pool)
206
{
207
pool->cur_threads++;
208
pool->new_threads++;
209
@@ -XXX,XX +XXX,XX @@ static void spawn_thread(ThreadPool *pool)
210
211
static void thread_pool_completion_bh(void *opaque)
212
{
213
- ThreadPool *pool = opaque;
214
- ThreadPoolElement *elem, *next;
215
+ ThreadPoolAio *pool = opaque;
216
+ ThreadPoolElementAio *elem, *next;
217
218
defer_call_begin(); /* cb() may use defer_call() to coalesce work */
219
220
@@ -XXX,XX +XXX,XX @@ restart:
221
continue;
222
}
223
224
- trace_thread_pool_complete(pool, elem, elem->common.opaque,
225
- elem->ret);
226
+ trace_thread_pool_complete_aio(pool, elem, elem->common.opaque,
227
+ elem->ret);
228
QLIST_REMOVE(elem, all);
229
230
if (elem->common.cb) {
231
@@ -XXX,XX +XXX,XX @@ restart:
232
233
static void thread_pool_cancel(BlockAIOCB *acb)
234
{
235
- ThreadPoolElement *elem = (ThreadPoolElement *)acb;
236
- ThreadPool *pool = elem->pool;
237
+ ThreadPoolElementAio *elem = (ThreadPoolElementAio *)acb;
238
+ ThreadPoolAio *pool = elem->pool;
239
240
- trace_thread_pool_cancel(elem, elem->common.opaque);
241
+ trace_thread_pool_cancel_aio(elem, elem->common.opaque);
242
243
QEMU_LOCK_GUARD(&pool->lock);
244
if (elem->state == THREAD_QUEUED) {
245
@@ -XXX,XX +XXX,XX @@ static void thread_pool_cancel(BlockAIOCB *acb)
246
}
247
248
static const AIOCBInfo thread_pool_aiocb_info = {
249
- .aiocb_size = sizeof(ThreadPoolElement),
250
+ .aiocb_size = sizeof(ThreadPoolElementAio),
251
.cancel_async = thread_pool_cancel,
252
};
253
254
BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
255
BlockCompletionFunc *cb, void *opaque)
256
{
257
- ThreadPoolElement *req;
258
+ ThreadPoolElementAio *req;
259
AioContext *ctx = qemu_get_current_aio_context();
260
- ThreadPool *pool = aio_get_thread_pool(ctx);
261
+ ThreadPoolAio *pool = aio_get_thread_pool(ctx);
262
263
/* Assert that the thread submitting work is the same running the pool */
264
assert(pool->ctx == qemu_get_current_aio_context());
265
@@ -XXX,XX +XXX,XX @@ int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
266
return tpc.ret;
267
}
268
269
-void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
270
+void thread_pool_update_params(ThreadPoolAio *pool, AioContext *ctx)
271
{
272
qemu_mutex_lock(&pool->lock);
273
274
@@ -XXX,XX +XXX,XX @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
275
qemu_mutex_unlock(&pool->lock);
276
}
277
278
-static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
279
+static void thread_pool_init_one(ThreadPoolAio *pool, AioContext *ctx)
280
{
281
if (!ctx) {
282
ctx = qemu_get_aio_context();
283
@@ -XXX,XX +XXX,XX @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
284
thread_pool_update_params(pool, ctx);
285
}
286
287
-ThreadPool *thread_pool_new(AioContext *ctx)
288
+ThreadPoolAio *thread_pool_new_aio(AioContext *ctx)
289
{
290
- ThreadPool *pool = g_new(ThreadPool, 1);
291
+ ThreadPoolAio *pool = g_new(ThreadPoolAio, 1);
292
thread_pool_init_one(pool, ctx);
293
return pool;
294
}
295
296
-void thread_pool_free(ThreadPool *pool)
297
+void thread_pool_free_aio(ThreadPoolAio *pool)
298
{
299
if (!pool) {
300
return;
301
diff --git a/util/trace-events b/util/trace-events
302
index XXXXXXX..XXXXXXX 100644
303
--- a/util/trace-events
304
+++ b/util/trace-events
305
@@ -XXX,XX +XXX,XX @@ reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
306
307
# thread-pool.c
308
thread_pool_submit_aio(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
309
-thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
310
-thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
311
+thread_pool_complete_aio(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
312
+thread_pool_cancel_aio(void *req, void *opaque) "req %p opaque %p"
313
314
# buffer.c
315
buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
316
--
317
2.48.1
318
319
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
qemu_loadvm_load_state_buffer() and its load_state_buffer
4
SaveVMHandler allow providing device state buffer to explicitly
5
specified device via its idstr and instance id.
6
7
Reviewed-by: Fabiano Rosas <farosas@suse.de>
8
Reviewed-by: Peter Xu <peterx@redhat.com>
9
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
10
Link: https://lore.kernel.org/qemu-devel/71ca753286b87831ced4afd422e2e2bed071af25.1741124640.git.maciej.szmigiero@oracle.com
11
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
---
13
include/migration/register.h | 15 +++++++++++++++
14
migration/savevm.h | 3 +++
15
migration/savevm.c | 23 +++++++++++++++++++++++
16
3 files changed, 41 insertions(+)
17
18
diff --git a/include/migration/register.h b/include/migration/register.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/migration/register.h
21
+++ b/include/migration/register.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
23
*/
24
int (*load_state)(QEMUFile *f, void *opaque, int version_id);
25
26
+ /**
27
+ * @load_state_buffer (invoked outside the BQL)
28
+ *
29
+ * Load device state buffer provided to qemu_loadvm_load_state_buffer().
30
+ *
31
+ * @opaque: data pointer passed to register_savevm_live()
32
+ * @buf: the data buffer to load
33
+ * @len: the data length in buffer
34
+ * @errp: pointer to Error*, to store an error if it happens.
35
+ *
36
+ * Returns true to indicate success and false for errors.
37
+ */
38
+ bool (*load_state_buffer)(void *opaque, char *buf, size_t len,
39
+ Error **errp);
40
+
41
/**
42
* @load_setup
43
*
44
diff --git a/migration/savevm.h b/migration/savevm.h
45
index XXXXXXX..XXXXXXX 100644
46
--- a/migration/savevm.h
47
+++ b/migration/savevm.h
48
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_approve_switchover(void);
49
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
50
bool in_postcopy);
51
52
+bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id,
53
+ char *buf, size_t len, Error **errp);
54
+
55
#endif
56
diff --git a/migration/savevm.c b/migration/savevm.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/migration/savevm.c
59
+++ b/migration/savevm.c
60
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_approve_switchover(void)
61
return migrate_send_rp_switchover_ack(mis);
62
}
63
64
+bool qemu_loadvm_load_state_buffer(const char *idstr, uint32_t instance_id,
65
+ char *buf, size_t len, Error **errp)
66
+{
67
+ SaveStateEntry *se;
68
+
69
+ se = find_se(idstr, instance_id);
70
+ if (!se) {
71
+ error_setg(errp,
72
+ "Unknown idstr %s or instance id %u for load state buffer",
73
+ idstr, instance_id);
74
+ return false;
75
+ }
76
+
77
+ if (!se->ops || !se->ops->load_state_buffer) {
78
+ error_setg(errp,
79
+ "idstr %s / instance %u has no load state buffer operation",
80
+ idstr, instance_id);
81
+ return false;
82
+ }
83
+
84
+ return se->ops->load_state_buffer(se->opaque, buf, len, errp);
85
+}
86
+
87
bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
88
bool has_devices, strList *devices, Error **errp)
89
{
90
--
91
2.48.1
92
93
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
All callers to migration_incoming_state_destroy() other than
4
postcopy_ram_listen_thread() do this call with BQL held.
5
6
Since migration_incoming_state_destroy() ultimately calls "load_cleanup"
7
SaveVMHandlers and it will soon call BQL-sensitive code it makes sense
8
to always call that function under BQL rather than to have it deal with
9
both cases (with BQL and without BQL).
10
Add the necessary bql_lock() and bql_unlock() to
11
postcopy_ram_listen_thread().
12
13
qemu_loadvm_state_main() in postcopy_ram_listen_thread() could call
14
"load_state" SaveVMHandlers that are expecting BQL to be held.
15
16
In principle, the only devices that should be arriving on migration
17
channel serviced by postcopy_ram_listen_thread() are those that are
18
postcopiable and whose load handlers are safe to be called without BQL
19
being held.
20
21
But nothing currently prevents the source from sending data for "unsafe"
22
devices which would cause trouble there.
23
Add a TODO comment there so it's clear that it would be good to improve
24
handling of such (erroneous) case in the future.
25
26
Acked-by: Peter Xu <peterx@redhat.com>
27
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
28
Link: https://lore.kernel.org/qemu-devel/21bb5ca337b1d5a802e697f553f37faf296b5ff4.1741193259.git.maciej.szmigiero@oracle.com
29
Signed-off-by: Cédric Le Goater <clg@redhat.com>
30
---
31
migration/migration.c | 13 +++++++++++++
32
migration/savevm.c | 4 ++++
33
2 files changed, 17 insertions(+)
34
35
diff --git a/migration/migration.c b/migration/migration.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/migration/migration.c
38
+++ b/migration/migration.c
39
@@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void)
40
struct MigrationIncomingState *mis = migration_incoming_get_current();
41
42
multifd_recv_cleanup();
43
+
44
/*
45
* RAM state cleanup needs to happen after multifd cleanup, because
46
* multifd threads can use some of its states (receivedmap).
47
+ * The VFIO load_cleanup() implementation is BQL-sensitive. It requires
48
+ * BQL must NOT be taken when recycling load threads, so that it won't
49
+ * block the load threads from making progress on address space
50
+ * modification operations.
51
+ *
52
+ * To make it work, we could try to not take BQL for all load_cleanup(),
53
+ * or conditionally unlock BQL only if bql_locked() in VFIO.
54
+ *
55
+ * Since most existing call sites take BQL for load_cleanup(), make
56
+ * it simple by taking BQL always as the rule, so that VFIO can unlock
57
+ * BQL and retake unconditionally.
58
*/
59
+ assert(bql_locked());
60
qemu_loadvm_state_cleanup();
61
62
if (mis->to_src_file) {
63
diff --git a/migration/savevm.c b/migration/savevm.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/migration/savevm.c
66
+++ b/migration/savevm.c
67
@@ -XXX,XX +XXX,XX @@ static void *postcopy_ram_listen_thread(void *opaque)
68
* in qemu_file, and thus we must be blocking now.
69
*/
70
qemu_file_set_blocking(f, true);
71
+
72
+ /* TODO: sanity check that only postcopiable data will be loaded here */
73
load_res = qemu_loadvm_state_main(f, mis);
74
75
/*
76
@@ -XXX,XX +XXX,XX @@ static void *postcopy_ram_listen_thread(void *opaque)
77
* (If something broke then qemu will have to exit anyway since it's
78
* got a bad migration state).
79
*/
80
+ bql_lock();
81
migration_incoming_state_destroy();
82
+ bql_unlock();
83
84
rcu_unregister_thread();
85
mis->have_listen_thread = false;
86
--
87
2.48.1
88
89
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
Automatic memory management helps avoid memory safety issues.
4
5
Reviewed-by: Peter Xu <peterx@redhat.com>
6
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
7
Link: https://lore.kernel.org/qemu-devel/a5843c5fa64d7e5239a4316092ec0ef0d10c2320.1741124640.git.maciej.szmigiero@oracle.com
8
Signed-off-by: Cédric Le Goater <clg@redhat.com>
9
---
10
include/qapi/error.h | 2 ++
11
1 file changed, 2 insertions(+)
12
13
diff --git a/include/qapi/error.h b/include/qapi/error.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/qapi/error.h
16
+++ b/include/qapi/error.h
17
@@ -XXX,XX +XXX,XX @@ Error *error_copy(const Error *err);
18
*/
19
void error_free(Error *err);
20
21
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(Error, error_free)
22
+
23
/*
24
* Convenience function to assert that *@errp is set, then silently free it.
25
*/
26
--
27
2.48.1
28
29
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
Some drivers might want to make use of auxiliary helper threads during VM
4
state loading, for example to make sure that their blocking (sync) I/O
5
operations don't block the rest of the migration process.
6
7
Add a migration core managed thread pool to facilitate this use case.
8
9
The migration core will wait for these threads to finish before
10
(re)starting the VM at destination.
11
12
Reviewed-by: Fabiano Rosas <farosas@suse.de>
13
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
14
Link: https://lore.kernel.org/qemu-devel/b09fd70369b6159c75847e69f235cb908b02570c.1741124640.git.maciej.szmigiero@oracle.com
15
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
---
17
include/migration/misc.h | 3 ++
18
include/qemu/typedefs.h | 2 +
19
migration/migration.h | 5 +++
20
migration/savevm.h | 2 +-
21
migration/migration.c | 2 +-
22
migration/savevm.c | 95 +++++++++++++++++++++++++++++++++++++++-
23
6 files changed, 105 insertions(+), 4 deletions(-)
24
25
diff --git a/include/migration/misc.h b/include/migration/misc.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/include/migration/misc.h
28
+++ b/include/migration/misc.h
29
@@ -XXX,XX +XXX,XX @@ bool migrate_ram_is_ignored(RAMBlock *block);
30
/* migration/block.c */
31
32
AnnounceParameters *migrate_announce_params(void);
33
+
34
/* migration/savevm.c */
35
36
void dump_vmstate_json_to_file(FILE *out_fp);
37
+void qemu_loadvm_start_load_thread(MigrationLoadThread function,
38
+ void *opaque);
39
40
/* migration/migration.c */
41
void migration_object_init(void);
42
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/include/qemu/typedefs.h
45
+++ b/include/qemu/typedefs.h
46
@@ -XXX,XX +XXX,XX @@ typedef struct IRQState *qemu_irq;
47
* Function types
48
*/
49
typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
50
+typedef bool (*MigrationLoadThread)(void *opaque, bool *should_quit,
51
+ Error **errp);
52
53
#endif /* QEMU_TYPEDEFS_H */
54
diff --git a/migration/migration.h b/migration/migration.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/migration/migration.h
57
+++ b/migration/migration.h
58
@@ -XXX,XX +XXX,XX @@
59
#define MIGRATION_THREAD_DST_PREEMPT "mig/dst/preempt"
60
61
struct PostcopyBlocktimeContext;
62
+typedef struct ThreadPool ThreadPool;
63
64
#define MIGRATION_RESUME_ACK_VALUE (1)
65
66
@@ -XXX,XX +XXX,XX @@ struct MigrationIncomingState {
67
Coroutine *colo_incoming_co;
68
QemuSemaphore colo_incoming_sem;
69
70
+ /* Optional load threads pool and its thread exit request flag */
71
+ ThreadPool *load_threads;
72
+ bool load_threads_abort;
73
+
74
/*
75
* PostcopyBlocktimeContext to keep information for postcopy
76
* live migration, to calculate vCPU block time
77
diff --git a/migration/savevm.h b/migration/savevm.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/migration/savevm.h
80
+++ b/migration/savevm.h
81
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_live_state(QEMUFile *f);
82
int qemu_save_device_state(QEMUFile *f);
83
84
int qemu_loadvm_state(QEMUFile *f);
85
-void qemu_loadvm_state_cleanup(void);
86
+void qemu_loadvm_state_cleanup(MigrationIncomingState *mis);
87
int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
88
int qemu_load_device_state(QEMUFile *f);
89
int qemu_loadvm_approve_switchover(void);
90
diff --git a/migration/migration.c b/migration/migration.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/migration/migration.c
93
+++ b/migration/migration.c
94
@@ -XXX,XX +XXX,XX @@ void migration_incoming_state_destroy(void)
95
* BQL and retake unconditionally.
96
*/
97
assert(bql_locked());
98
- qemu_loadvm_state_cleanup();
99
+ qemu_loadvm_state_cleanup(mis);
100
101
if (mis->to_src_file) {
102
/* Tell source that we are done */
103
diff --git a/migration/savevm.c b/migration/savevm.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/migration/savevm.c
106
+++ b/migration/savevm.c
107
@@ -XXX,XX +XXX,XX @@
108
#include "qemu/job.h"
109
#include "qemu/main-loop.h"
110
#include "block/snapshot.h"
111
+#include "block/thread-pool.h"
112
#include "qemu/cutils.h"
113
#include "io/channel-buffer.h"
114
#include "io/channel-file.h"
115
@@ -XXX,XX +XXX,XX @@ static struct mig_cmd_args {
116
* generic extendable format with an exception for two old entities.
117
*/
118
119
+/***********************************************************/
120
+/* Optional load threads pool support */
121
+
122
+static void qemu_loadvm_thread_pool_create(MigrationIncomingState *mis)
123
+{
124
+ assert(!mis->load_threads);
125
+ mis->load_threads = thread_pool_new();
126
+ mis->load_threads_abort = false;
127
+}
128
+
129
+static void qemu_loadvm_thread_pool_destroy(MigrationIncomingState *mis)
130
+{
131
+ qatomic_set(&mis->load_threads_abort, true);
132
+
133
+ bql_unlock(); /* Load threads might be waiting for BQL */
134
+ g_clear_pointer(&mis->load_threads, thread_pool_free);
135
+ bql_lock();
136
+}
137
+
138
+static bool qemu_loadvm_thread_pool_wait(MigrationState *s,
139
+ MigrationIncomingState *mis)
140
+{
141
+ bql_unlock(); /* Let load threads do work requiring BQL */
142
+ thread_pool_wait(mis->load_threads);
143
+ bql_lock();
144
+
145
+ return !migrate_has_error(s);
146
+}
147
+
148
/***********************************************************/
149
/* savevm/loadvm support */
150
151
@@ -XXX,XX +XXX,XX @@ static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
152
return 0;
153
}
154
155
-void qemu_loadvm_state_cleanup(void)
156
+struct LoadThreadData {
157
+ MigrationLoadThread function;
158
+ void *opaque;
159
+};
160
+
161
+static int qemu_loadvm_load_thread(void *thread_opaque)
162
+{
163
+ struct LoadThreadData *data = thread_opaque;
164
+ MigrationIncomingState *mis = migration_incoming_get_current();
165
+ g_autoptr(Error) local_err = NULL;
166
+
167
+ if (!data->function(data->opaque, &mis->load_threads_abort, &local_err)) {
168
+ MigrationState *s = migrate_get_current();
169
+
170
+ /*
171
+ * Can't set load_threads_abort here since processing of main migration
172
+ * channel data could still be happening, resulting in launching of new
173
+ * load threads.
174
+ */
175
+
176
+ assert(local_err);
177
+
178
+ /*
179
+ * In case of multiple load threads failing which thread error
180
+ * return we end setting is purely arbitrary.
181
+ */
182
+ migrate_set_error(s, local_err);
183
+ }
184
+
185
+ return 0;
186
+}
187
+
188
+void qemu_loadvm_start_load_thread(MigrationLoadThread function,
189
+ void *opaque)
190
+{
191
+ MigrationIncomingState *mis = migration_incoming_get_current();
192
+ struct LoadThreadData *data;
193
+
194
+ /* We only set it from this thread so it's okay to read it directly */
195
+ assert(!mis->load_threads_abort);
196
+
197
+ data = g_new(struct LoadThreadData, 1);
198
+ data->function = function;
199
+ data->opaque = opaque;
200
+
201
+ thread_pool_submit_immediate(mis->load_threads, qemu_loadvm_load_thread,
202
+ data, g_free);
203
+}
204
+
205
+void qemu_loadvm_state_cleanup(MigrationIncomingState *mis)
206
{
207
SaveStateEntry *se;
208
209
trace_loadvm_state_cleanup();
210
+
211
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
212
if (se->ops && se->ops->load_cleanup) {
213
se->ops->load_cleanup(se->opaque);
214
}
215
}
216
+
217
+ qemu_loadvm_thread_pool_destroy(mis);
218
}
219
220
/* Return true if we should continue the migration, or false. */
221
@@ -XXX,XX +XXX,XX @@ out:
222
223
int qemu_loadvm_state(QEMUFile *f)
224
{
225
+ MigrationState *s = migrate_get_current();
226
MigrationIncomingState *mis = migration_incoming_get_current();
227
Error *local_err = NULL;
228
int ret;
229
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f)
230
return -EINVAL;
231
}
232
233
+ qemu_loadvm_thread_pool_create(mis);
234
+
235
ret = qemu_loadvm_state_header(f);
236
if (ret) {
237
return ret;
238
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f)
239
240
/* When reaching here, it must be precopy */
241
if (ret == 0) {
242
- if (migrate_has_error(migrate_get_current())) {
243
+ if (migrate_has_error(migrate_get_current()) ||
244
+ !qemu_loadvm_thread_pool_wait(s, mis)) {
245
ret = -EINVAL;
246
} else {
247
ret = qemu_file_get_error(f);
248
}
249
}
250
+ /*
251
+ * Set this flag unconditionally so we'll catch further attempts to
252
+ * start additional threads via an appropriate assert()
253
+ */
254
+ qatomic_set(&mis->load_threads_abort, true);
255
256
/*
257
* Try to read in the VMDESC section as well, so that dumping tools that
258
--
259
2.48.1
260
261
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
Read packet header first so in the future we will be able to
4
differentiate between a RAM multifd packet and a device state multifd
5
packet.
6
7
Since these two are of different size we can't read the packet body until
8
we know which packet type it is.
9
10
Reviewed-by: Fabiano Rosas <farosas@suse.de>
11
Reviewed-by: Peter Xu <peterx@redhat.com>
12
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
13
Link: https://lore.kernel.org/qemu-devel/832ad055fe447561ac1ad565d61658660cb3f63f.1741124640.git.maciej.szmigiero@oracle.com
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
16
migration/multifd.h | 5 +++++
17
migration/multifd.c | 55 ++++++++++++++++++++++++++++++++++++---------
18
2 files changed, 49 insertions(+), 11 deletions(-)
19
20
diff --git a/migration/multifd.h b/migration/multifd.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/migration/multifd.h
23
+++ b/migration/multifd.h
24
@@ -XXX,XX +XXX,XX @@ typedef struct {
25
uint32_t magic;
26
uint32_t version;
27
uint32_t flags;
28
+} __attribute__((packed)) MultiFDPacketHdr_t;
29
+
30
+typedef struct {
31
+ MultiFDPacketHdr_t hdr;
32
+
33
/* maximum number of allocated pages */
34
uint32_t pages_alloc;
35
/* non zero pages */
36
diff --git a/migration/multifd.c b/migration/multifd.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/migration/multifd.c
39
+++ b/migration/multifd.c
40
@@ -XXX,XX +XXX,XX @@ void multifd_send_fill_packet(MultiFDSendParams *p)
41
42
memset(packet, 0, p->packet_len);
43
44
- packet->magic = cpu_to_be32(MULTIFD_MAGIC);
45
- packet->version = cpu_to_be32(MULTIFD_VERSION);
46
+ packet->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
47
+ packet->hdr.version = cpu_to_be32(MULTIFD_VERSION);
48
49
- packet->flags = cpu_to_be32(p->flags);
50
+ packet->hdr.flags = cpu_to_be32(p->flags);
51
packet->next_packet_size = cpu_to_be32(p->next_packet_size);
52
53
packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
54
@@ -XXX,XX +XXX,XX @@ void multifd_send_fill_packet(MultiFDSendParams *p)
55
p->flags, p->next_packet_size);
56
}
57
58
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
59
+static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p,
60
+ const MultiFDPacketHdr_t *hdr,
61
+ Error **errp)
62
{
63
- const MultiFDPacket_t *packet = p->packet;
64
- uint32_t magic = be32_to_cpu(packet->magic);
65
- uint32_t version = be32_to_cpu(packet->version);
66
- int ret = 0;
67
+ uint32_t magic = be32_to_cpu(hdr->magic);
68
+ uint32_t version = be32_to_cpu(hdr->version);
69
70
if (magic != MULTIFD_MAGIC) {
71
error_setg(errp, "multifd: received packet magic %x, expected %x",
72
@@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
73
return -1;
74
}
75
76
- p->flags = be32_to_cpu(packet->flags);
77
+ p->flags = be32_to_cpu(hdr->flags);
78
+
79
+ return 0;
80
+}
81
+
82
+static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
83
+{
84
+ const MultiFDPacket_t *packet = p->packet;
85
+ int ret = 0;
86
+
87
p->next_packet_size = be32_to_cpu(packet->next_packet_size);
88
p->packet_num = be64_to_cpu(packet->packet_num);
89
p->packets_recved++;
90
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
91
}
92
93
while (true) {
94
+ MultiFDPacketHdr_t hdr;
95
uint32_t flags = 0;
96
bool has_data = false;
97
+ uint8_t *pkt_buf;
98
+ size_t pkt_len;
99
+
100
p->normal_num = 0;
101
102
if (use_packets) {
103
struct iovec iov = {
104
- .iov_base = (void *)p->packet,
105
- .iov_len = p->packet_len
106
+ .iov_base = (void *)&hdr,
107
+ .iov_len = sizeof(hdr)
108
};
109
110
if (multifd_recv_should_exit()) {
111
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
112
break;
113
}
114
115
+ ret = multifd_recv_unfill_packet_header(p, &hdr, &local_err);
116
+ if (ret) {
117
+ break;
118
+ }
119
+
120
+ pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
121
+ pkt_len = p->packet_len - sizeof(hdr);
122
+
123
+ ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len,
124
+ &local_err);
125
+ if (!ret) {
126
+ /* EOF */
127
+ error_setg(&local_err, "multifd: unexpected EOF after packet header");
128
+ break;
129
+ }
130
+
131
+ if (ret == -1) {
132
+ break;
133
+ }
134
+
135
qemu_mutex_lock(&p->mutex);
136
ret = multifd_recv_unfill_packet(p, &local_err);
137
if (ret) {
138
--
139
2.48.1
140
141
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
Add a basic support for receiving device state via multifd channels -
4
channels that are shared with RAM transfers.
5
6
Depending whether MULTIFD_FLAG_DEVICE_STATE flag is present or not in the
7
packet header either device state (MultiFDPacketDeviceState_t) or RAM
8
data (existing MultiFDPacket_t) is read.
9
10
The received device state data is provided to
11
qemu_loadvm_load_state_buffer() function for processing in the
12
device's load_state_buffer handler.
13
14
Reviewed-by: Peter Xu <peterx@redhat.com>
15
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
16
Link: https://lore.kernel.org/qemu-devel/9b86f806c134e7815ecce0eee84f0e0e34aa0146.1741124640.git.maciej.szmigiero@oracle.com
17
Signed-off-by: Cédric Le Goater <clg@redhat.com>
18
---
19
migration/multifd.h | 19 ++++++++-
20
migration/multifd.c | 101 +++++++++++++++++++++++++++++++++++++++-----
21
2 files changed, 108 insertions(+), 12 deletions(-)
22
23
diff --git a/migration/multifd.h b/migration/multifd.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/migration/multifd.h
26
+++ b/migration/multifd.h
27
@@ -XXX,XX +XXX,XX @@ MultiFDRecvData *multifd_get_recv_data(void);
28
#define MULTIFD_FLAG_UADK (8 << 1)
29
#define MULTIFD_FLAG_QATZIP (16 << 1)
30
31
+/*
32
+ * If set it means that this packet contains device state
33
+ * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
34
+ */
35
+#define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
36
+
37
/* This value needs to be a multiple of qemu_target_page_size() */
38
#define MULTIFD_PACKET_SIZE (512 * 1024)
39
40
@@ -XXX,XX +XXX,XX @@ typedef struct {
41
uint64_t offset[];
42
} __attribute__((packed)) MultiFDPacket_t;
43
44
+typedef struct {
45
+ MultiFDPacketHdr_t hdr;
46
+
47
+ char idstr[256];
48
+ uint32_t instance_id;
49
+
50
+ /* size of the next packet that contains the actual data */
51
+ uint32_t next_packet_size;
52
+} __attribute__((packed)) MultiFDPacketDeviceState_t;
53
+
54
typedef struct {
55
/* number of used pages */
56
uint32_t num;
57
@@ -XXX,XX +XXX,XX @@ typedef struct {
58
59
/* thread local variables. No locking required */
60
61
- /* pointer to the packet */
62
+ /* pointers to the possible packet types */
63
MultiFDPacket_t *packet;
64
+ MultiFDPacketDeviceState_t *packet_dev_state;
65
/* size of the next packet that contains pages */
66
uint32_t next_packet_size;
67
/* packets received through this channel */
68
diff --git a/migration/multifd.c b/migration/multifd.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/migration/multifd.c
71
+++ b/migration/multifd.c
72
@@ -XXX,XX +XXX,XX @@
73
#include "file.h"
74
#include "migration.h"
75
#include "migration-stats.h"
76
+#include "savevm.h"
77
#include "socket.h"
78
#include "tls.h"
79
#include "qemu-file.h"
80
@@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p,
81
return 0;
82
}
83
84
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
85
+static int multifd_recv_unfill_packet_device_state(MultiFDRecvParams *p,
86
+ Error **errp)
87
+{
88
+ MultiFDPacketDeviceState_t *packet = p->packet_dev_state;
89
+
90
+ packet->instance_id = be32_to_cpu(packet->instance_id);
91
+ p->next_packet_size = be32_to_cpu(packet->next_packet_size);
92
+
93
+ return 0;
94
+}
95
+
96
+static int multifd_recv_unfill_packet_ram(MultiFDRecvParams *p, Error **errp)
97
{
98
const MultiFDPacket_t *packet = p->packet;
99
int ret = 0;
100
101
p->next_packet_size = be32_to_cpu(packet->next_packet_size);
102
p->packet_num = be64_to_cpu(packet->packet_num);
103
- p->packets_recved++;
104
105
/* Always unfill, old QEMUs (<9.0) send data along with SYNC */
106
ret = multifd_ram_unfill_packet(p, errp);
107
@@ -XXX,XX +XXX,XX @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
108
return ret;
109
}
110
111
+static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
112
+{
113
+ p->packets_recved++;
114
+
115
+ if (p->flags & MULTIFD_FLAG_DEVICE_STATE) {
116
+ return multifd_recv_unfill_packet_device_state(p, errp);
117
+ }
118
+
119
+ return multifd_recv_unfill_packet_ram(p, errp);
120
+}
121
+
122
static bool multifd_send_should_exit(void)
123
{
124
return qatomic_read(&multifd_send_state->exiting);
125
@@ -XXX,XX +XXX,XX @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
126
p->packet_len = 0;
127
g_free(p->packet);
128
p->packet = NULL;
129
+ g_clear_pointer(&p->packet_dev_state, g_free);
130
g_free(p->normal);
131
p->normal = NULL;
132
g_free(p->zero);
133
@@ -XXX,XX +XXX,XX @@ void multifd_recv_sync_main(void)
134
trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
135
}
136
137
+static int multifd_device_state_recv(MultiFDRecvParams *p, Error **errp)
138
+{
139
+ g_autofree char *dev_state_buf = NULL;
140
+ int ret;
141
+
142
+ dev_state_buf = g_malloc(p->next_packet_size);
143
+
144
+ ret = qio_channel_read_all(p->c, dev_state_buf, p->next_packet_size, errp);
145
+ if (ret != 0) {
146
+ return ret;
147
+ }
148
+
149
+ if (p->packet_dev_state->idstr[sizeof(p->packet_dev_state->idstr) - 1]
150
+ != 0) {
151
+ error_setg(errp, "unterminated multifd device state idstr");
152
+ return -1;
153
+ }
154
+
155
+ if (!qemu_loadvm_load_state_buffer(p->packet_dev_state->idstr,
156
+ p->packet_dev_state->instance_id,
157
+ dev_state_buf, p->next_packet_size,
158
+ errp)) {
159
+ ret = -1;
160
+ }
161
+
162
+ return ret;
163
+}
164
+
165
static void *multifd_recv_thread(void *opaque)
166
{
167
MigrationState *s = migrate_get_current();
168
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
169
while (true) {
170
MultiFDPacketHdr_t hdr;
171
uint32_t flags = 0;
172
+ bool is_device_state = false;
173
bool has_data = false;
174
uint8_t *pkt_buf;
175
size_t pkt_len;
176
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
177
break;
178
}
179
180
- pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
181
- pkt_len = p->packet_len - sizeof(hdr);
182
+ is_device_state = p->flags & MULTIFD_FLAG_DEVICE_STATE;
183
+ if (is_device_state) {
184
+ pkt_buf = (uint8_t *)p->packet_dev_state + sizeof(hdr);
185
+ pkt_len = sizeof(*p->packet_dev_state) - sizeof(hdr);
186
+ } else {
187
+ pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
188
+ pkt_len = p->packet_len - sizeof(hdr);
189
+ }
190
191
ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len,
192
&local_err);
193
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
194
/* recv methods don't know how to handle the SYNC flag */
195
p->flags &= ~MULTIFD_FLAG_SYNC;
196
197
- /*
198
- * Even if it's a SYNC packet, this needs to be set
199
- * because older QEMUs (<9.0) still send data along with
200
- * the SYNC packet.
201
- */
202
- has_data = p->normal_num || p->zero_num;
203
+ if (is_device_state) {
204
+ has_data = p->next_packet_size > 0;
205
+ } else {
206
+ /*
207
+ * Even if it's a SYNC packet, this needs to be set
208
+ * because older QEMUs (<9.0) still send data along with
209
+ * the SYNC packet.
210
+ */
211
+ has_data = p->normal_num || p->zero_num;
212
+ }
213
+
214
qemu_mutex_unlock(&p->mutex);
215
} else {
216
/*
217
@@ -XXX,XX +XXX,XX @@ static void *multifd_recv_thread(void *opaque)
218
}
219
220
if (has_data) {
221
- ret = multifd_recv_state->ops->recv(p, &local_err);
222
+ if (is_device_state) {
223
+ assert(use_packets);
224
+ ret = multifd_device_state_recv(p, &local_err);
225
+ } else {
226
+ ret = multifd_recv_state->ops->recv(p, &local_err);
227
+ }
228
if (ret != 0) {
229
break;
230
}
231
+ } else if (is_device_state) {
232
+ error_setg(&local_err,
233
+ "multifd: received empty device state packet");
234
+ break;
235
}
236
237
if (use_packets) {
238
if (flags & MULTIFD_FLAG_SYNC) {
239
+ if (is_device_state) {
240
+ error_setg(&local_err,
241
+ "multifd: received SYNC device state packet");
242
+ break;
243
+ }
244
+
245
qemu_sem_post(&multifd_recv_state->sem_sync);
246
qemu_sem_wait(&p->sem_sync);
247
}
248
@@ -XXX,XX +XXX,XX @@ int multifd_recv_setup(Error **errp)
249
p->packet_len = sizeof(MultiFDPacket_t)
250
+ sizeof(uint64_t) * page_count;
251
p->packet = g_malloc0(p->packet_len);
252
+ p->packet_dev_state = g_malloc0(sizeof(*p->packet_dev_state));
253
}
254
p->name = g_strdup_printf(MIGRATION_THREAD_DST_MULTIFD, i);
255
p->normal = g_new0(ram_addr_t, page_count);
256
--
257
2.48.1
258
259
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
multifd_send() function is currently not thread safe, make it thread safe
4
by holding a lock during its execution.
5
6
This way it will be possible to safely call it concurrently from multiple
7
threads.
8
9
Reviewed-by: Peter Xu <peterx@redhat.com>
10
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
11
Link: https://lore.kernel.org/qemu-devel/dd0f3bcc02ca96a7d523ca58ea69e495a33b453b.1741124640.git.maciej.szmigiero@oracle.com
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
---
14
migration/multifd.c | 8 ++++++++
15
1 file changed, 8 insertions(+)
16
17
diff --git a/migration/multifd.c b/migration/multifd.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/migration/multifd.c
20
+++ b/migration/multifd.c
21
@@ -XXX,XX +XXX,XX @@ typedef struct {
22
23
struct {
24
MultiFDSendParams *params;
25
+
26
+ /* multifd_send() body is not thread safe, needs serialization */
27
+ QemuMutex multifd_send_mutex;
28
+
29
/*
30
* Global number of generated multifd packets.
31
*
32
@@ -XXX,XX +XXX,XX @@ bool multifd_send(MultiFDSendData **send_data)
33
return false;
34
}
35
36
+ QEMU_LOCK_GUARD(&multifd_send_state->multifd_send_mutex);
37
+
38
/* We wait here, until at least one channel is ready */
39
qemu_sem_wait(&multifd_send_state->channels_ready);
40
41
@@ -XXX,XX +XXX,XX @@ static void multifd_send_cleanup_state(void)
42
socket_cleanup_outgoing_migration();
43
qemu_sem_destroy(&multifd_send_state->channels_created);
44
qemu_sem_destroy(&multifd_send_state->channels_ready);
45
+ qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex);
46
g_free(multifd_send_state->params);
47
multifd_send_state->params = NULL;
48
g_free(multifd_send_state);
49
@@ -XXX,XX +XXX,XX @@ bool multifd_send_setup(void)
50
thread_count = migrate_multifd_channels();
51
multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
52
multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
53
+ qemu_mutex_init(&multifd_send_state->multifd_send_mutex);
54
qemu_sem_init(&multifd_send_state->channels_created, 0);
55
qemu_sem_init(&multifd_send_state->channels_ready, 0);
56
qatomic_set(&multifd_send_state->exiting, 0);
57
--
58
2.48.1
59
60
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
This way if there are fields there that needs explicit disposal (like, for
4
example, some attached buffers) they will be handled appropriately.
5
6
Add a related assert to multifd_set_payload_type() in order to make sure
7
that this function is only used to fill a previously empty MultiFDSendData
8
with some payload, not the other way around.
9
10
Reviewed-by: Fabiano Rosas <farosas@suse.de>
11
Reviewed-by: Peter Xu <peterx@redhat.com>
12
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
13
Link: https://lore.kernel.org/qemu-devel/6755205f2b95abbed251f87061feee1c0e410836.1741124640.git.maciej.szmigiero@oracle.com
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
16
migration/multifd.h | 5 +++++
17
migration/multifd-nocomp.c | 3 +--
18
migration/multifd.c | 31 ++++++++++++++++++++++++++++---
19
3 files changed, 34 insertions(+), 5 deletions(-)
20
21
diff --git a/migration/multifd.h b/migration/multifd.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/migration/multifd.h
24
+++ b/migration/multifd.h
25
@@ -XXX,XX +XXX,XX @@ static inline bool multifd_payload_empty(MultiFDSendData *data)
26
static inline void multifd_set_payload_type(MultiFDSendData *data,
27
MultiFDPayloadType type)
28
{
29
+ assert(multifd_payload_empty(data));
30
+ assert(type != MULTIFD_PAYLOAD_NONE);
31
+
32
data->type = type;
33
}
34
35
@@ -XXX,XX +XXX,XX @@ static inline void multifd_send_prepare_header(MultiFDSendParams *p)
36
void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
37
bool multifd_send(MultiFDSendData **send_data);
38
MultiFDSendData *multifd_send_data_alloc(void);
39
+void multifd_send_data_clear(MultiFDSendData *data);
40
+void multifd_send_data_free(MultiFDSendData *data);
41
42
static inline uint32_t multifd_ram_page_size(void)
43
{
44
diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/migration/multifd-nocomp.c
47
+++ b/migration/multifd-nocomp.c
48
@@ -XXX,XX +XXX,XX @@ void multifd_ram_save_setup(void)
49
50
void multifd_ram_save_cleanup(void)
51
{
52
- g_free(multifd_ram_send);
53
- multifd_ram_send = NULL;
54
+ g_clear_pointer(&multifd_ram_send, multifd_send_data_free);
55
}
56
57
static void multifd_set_file_bitmap(MultiFDSendParams *p)
58
diff --git a/migration/multifd.c b/migration/multifd.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/migration/multifd.c
61
+++ b/migration/multifd.c
62
@@ -XXX,XX +XXX,XX @@ MultiFDSendData *multifd_send_data_alloc(void)
63
return g_malloc0(size_minus_payload + max_payload_size);
64
}
65
66
+void multifd_send_data_clear(MultiFDSendData *data)
67
+{
68
+ if (multifd_payload_empty(data)) {
69
+ return;
70
+ }
71
+
72
+ switch (data->type) {
73
+ default:
74
+ /* Nothing to do */
75
+ break;
76
+ }
77
+
78
+ data->type = MULTIFD_PAYLOAD_NONE;
79
+}
80
+
81
+void multifd_send_data_free(MultiFDSendData *data)
82
+{
83
+ if (!data) {
84
+ return;
85
+ }
86
+
87
+ multifd_send_data_clear(data);
88
+
89
+ g_free(data);
90
+}
91
+
92
static bool multifd_use_packets(void)
93
{
94
return !migrate_mapped_ram();
95
@@ -XXX,XX +XXX,XX @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
96
qemu_sem_destroy(&p->sem_sync);
97
g_free(p->name);
98
p->name = NULL;
99
- g_free(p->data);
100
- p->data = NULL;
101
+ g_clear_pointer(&p->data, multifd_send_data_free);
102
p->packet_len = 0;
103
g_free(p->packet);
104
p->packet = NULL;
105
@@ -XXX,XX +XXX,XX @@ static void *multifd_send_thread(void *opaque)
106
(uint64_t)p->next_packet_size + p->packet_len);
107
108
p->next_packet_size = 0;
109
- multifd_set_payload_type(p->data, MULTIFD_PAYLOAD_NONE);
110
+ multifd_send_data_clear(p->data);
111
112
/*
113
* Making sure p->data is published before saying "we're
114
--
115
2.48.1
116
117
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
Since device state transfer via multifd channels requires multifd
4
channels with packets and is currently not compatible with multifd
5
compression add an appropriate query function so device can learn
6
whether it can actually make use of it.
7
8
Reviewed-by: Fabiano Rosas <farosas@suse.de>
9
Reviewed-by: Peter Xu <peterx@redhat.com>
10
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
11
Link: https://lore.kernel.org/qemu-devel/1ff0d98b85f470e5a33687406e877583b8fab74e.1741124640.git.maciej.szmigiero@oracle.com
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
---
14
include/migration/misc.h | 1 +
15
migration/multifd-device-state.c | 7 +++++++
16
2 files changed, 8 insertions(+)
17
18
diff --git a/include/migration/misc.h b/include/migration/misc.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/migration/misc.h
21
+++ b/include/migration/misc.h
22
@@ -XXX,XX +XXX,XX @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
23
/* migration/multifd-device-state.c */
24
bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
25
char *data, size_t len);
26
+bool multifd_device_state_supported(void);
27
28
#endif
29
diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/multifd-device-state.c
32
+++ b/migration/multifd-device-state.c
33
@@ -XXX,XX +XXX,XX @@
34
#include "qemu/lockable.h"
35
#include "migration/misc.h"
36
#include "multifd.h"
37
+#include "options.h"
38
39
static struct {
40
QemuMutex queue_job_mutex;
41
@@ -XXX,XX +XXX,XX @@ bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
42
43
return true;
44
}
45
+
46
+bool multifd_device_state_supported(void)
47
+{
48
+ return migrate_multifd() && !migrate_mapped_ram() &&
49
+ migrate_multifd_compression() == MULTIFD_COMPRESSION_NONE;
50
+}
51
--
52
2.48.1
53
54
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
This SaveVMHandler helps device provide its own asynchronous transmission
4
of the remaining data at the end of a precopy phase via multifd channels,
5
in parallel with the transfer done by save_live_complete_precopy handlers.
6
7
These threads are launched only when multifd device state transfer is
8
supported.
9
10
Management of these threads in done in the multifd migration code,
11
wrapping them in the generic thread pool.
12
13
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
14
Reviewed-by: Peter Xu <peterx@redhat.com>
15
Link: https://lore.kernel.org/qemu-devel/eac74a4ca7edd8968bbf72aa07b9041c76364a16.1741124640.git.maciej.szmigiero@oracle.com
16
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
---
18
include/migration/misc.h | 17 ++++++
19
include/migration/register.h | 19 +++++++
20
include/qemu/typedefs.h | 3 ++
21
migration/multifd-device-state.c | 92 ++++++++++++++++++++++++++++++++
22
migration/savevm.c | 40 +++++++++++++-
23
5 files changed, 170 insertions(+), 1 deletion(-)
24
25
diff --git a/include/migration/misc.h b/include/migration/misc.h
26
index XXXXXXX..XXXXXXX 100644
27
--- a/include/migration/misc.h
28
+++ b/include/migration/misc.h
29
@@ -XXX,XX +XXX,XX @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
30
Error **errp);
31
32
/* migration/multifd-device-state.c */
33
+typedef struct SaveLiveCompletePrecopyThreadData {
34
+ SaveLiveCompletePrecopyThreadHandler hdlr;
35
+ char *idstr;
36
+ uint32_t instance_id;
37
+ void *handler_opaque;
38
+} SaveLiveCompletePrecopyThreadData;
39
+
40
bool multifd_queue_device_state(char *idstr, uint32_t instance_id,
41
char *data, size_t len);
42
bool multifd_device_state_supported(void);
43
44
+void
45
+multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr,
46
+ char *idstr, uint32_t instance_id,
47
+ void *opaque);
48
+
49
+bool multifd_device_state_save_thread_should_exit(void);
50
+
51
+void multifd_abort_device_state_save_threads(void);
52
+bool multifd_join_device_state_save_threads(void);
53
+
54
#endif
55
diff --git a/include/migration/register.h b/include/migration/register.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/include/migration/register.h
58
+++ b/include/migration/register.h
59
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
60
*/
61
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
62
63
+ /**
64
+ * @save_live_complete_precopy_thread (invoked in a separate thread)
65
+ *
66
+ * Called at the end of a precopy phase from a separate worker thread
67
+ * in configurations where multifd device state transfer is supported
68
+ * in order to perform asynchronous transmission of the remaining data in
69
+ * parallel with @save_live_complete_precopy handlers.
70
+ * When postcopy is enabled, devices that support postcopy will skip this
71
+ * step.
72
+ *
73
+ * @d: a #SaveLiveCompletePrecopyThreadData containing parameters that the
74
+ * handler may need, including this device section idstr and instance_id,
75
+ * and opaque data pointer passed to register_savevm_live().
76
+ * @errp: pointer to Error*, to store an error if it happens.
77
+ *
78
+ * Returns true to indicate success and false for errors.
79
+ */
80
+ SaveLiveCompletePrecopyThreadHandler save_live_complete_precopy_thread;
81
+
82
/* This runs both outside and inside the BQL. */
83
84
/**
85
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
86
index XXXXXXX..XXXXXXX 100644
87
--- a/include/qemu/typedefs.h
88
+++ b/include/qemu/typedefs.h
89
@@ -XXX,XX +XXX,XX @@ typedef struct QString QString;
90
typedef struct RAMBlock RAMBlock;
91
typedef struct Range Range;
92
typedef struct ReservedRegion ReservedRegion;
93
+typedef struct SaveLiveCompletePrecopyThreadData SaveLiveCompletePrecopyThreadData;
94
typedef struct SHPCDevice SHPCDevice;
95
typedef struct SSIBus SSIBus;
96
typedef struct TCGCPUOps TCGCPUOps;
97
@@ -XXX,XX +XXX,XX @@ typedef struct IRQState *qemu_irq;
98
typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
99
typedef bool (*MigrationLoadThread)(void *opaque, bool *should_quit,
100
Error **errp);
101
+typedef bool (*SaveLiveCompletePrecopyThreadHandler)(SaveLiveCompletePrecopyThreadData *d,
102
+ Error **errp);
103
104
#endif /* QEMU_TYPEDEFS_H */
105
diff --git a/migration/multifd-device-state.c b/migration/multifd-device-state.c
106
index XXXXXXX..XXXXXXX 100644
107
--- a/migration/multifd-device-state.c
108
+++ b/migration/multifd-device-state.c
109
@@ -XXX,XX +XXX,XX @@
110
*/
111
112
#include "qemu/osdep.h"
113
+#include "qapi/error.h"
114
#include "qemu/lockable.h"
115
+#include "block/thread-pool.h"
116
+#include "migration.h"
117
#include "migration/misc.h"
118
#include "multifd.h"
119
#include "options.h"
120
@@ -XXX,XX +XXX,XX @@ static struct {
121
QemuMutex queue_job_mutex;
122
123
MultiFDSendData *send_data;
124
+
125
+ ThreadPool *threads;
126
+ bool threads_abort;
127
} *multifd_send_device_state;
128
129
void multifd_device_state_send_setup(void)
130
@@ -XXX,XX +XXX,XX @@ void multifd_device_state_send_setup(void)
131
qemu_mutex_init(&multifd_send_device_state->queue_job_mutex);
132
133
multifd_send_device_state->send_data = multifd_send_data_alloc();
134
+
135
+ multifd_send_device_state->threads = thread_pool_new();
136
+ multifd_send_device_state->threads_abort = false;
137
}
138
139
void multifd_device_state_send_cleanup(void)
140
{
141
+ g_clear_pointer(&multifd_send_device_state->threads, thread_pool_free);
142
g_clear_pointer(&multifd_send_device_state->send_data,
143
multifd_send_data_free);
144
145
@@ -XXX,XX +XXX,XX @@ bool multifd_device_state_supported(void)
146
return migrate_multifd() && !migrate_mapped_ram() &&
147
migrate_multifd_compression() == MULTIFD_COMPRESSION_NONE;
148
}
149
+
150
+static void multifd_device_state_save_thread_data_free(void *opaque)
151
+{
152
+ SaveLiveCompletePrecopyThreadData *data = opaque;
153
+
154
+ g_clear_pointer(&data->idstr, g_free);
155
+ g_free(data);
156
+}
157
+
158
+static int multifd_device_state_save_thread(void *opaque)
159
+{
160
+ SaveLiveCompletePrecopyThreadData *data = opaque;
161
+ g_autoptr(Error) local_err = NULL;
162
+
163
+ if (!data->hdlr(data, &local_err)) {
164
+ MigrationState *s = migrate_get_current();
165
+
166
+ /*
167
+ * Can't call abort_device_state_save_threads() here since new
168
+ * save threads could still be in process of being launched
169
+ * (if, for example, the very first save thread launched exited
170
+ * with an error very quickly).
171
+ */
172
+
173
+ assert(local_err);
174
+
175
+ /*
176
+ * In case of multiple save threads failing which thread error
177
+ * return we end setting is purely arbitrary.
178
+ */
179
+ migrate_set_error(s, local_err);
180
+ }
181
+
182
+ return 0;
183
+}
184
+
185
+bool multifd_device_state_save_thread_should_exit(void)
186
+{
187
+ return qatomic_read(&multifd_send_device_state->threads_abort);
188
+}
189
+
190
+void
191
+multifd_spawn_device_state_save_thread(SaveLiveCompletePrecopyThreadHandler hdlr,
192
+ char *idstr, uint32_t instance_id,
193
+ void *opaque)
194
+{
195
+ SaveLiveCompletePrecopyThreadData *data;
196
+
197
+ assert(multifd_device_state_supported());
198
+ assert(multifd_send_device_state);
199
+
200
+ assert(!qatomic_read(&multifd_send_device_state->threads_abort));
201
+
202
+ data = g_new(SaveLiveCompletePrecopyThreadData, 1);
203
+ data->hdlr = hdlr;
204
+ data->idstr = g_strdup(idstr);
205
+ data->instance_id = instance_id;
206
+ data->handler_opaque = opaque;
207
+
208
+ thread_pool_submit_immediate(multifd_send_device_state->threads,
209
+ multifd_device_state_save_thread,
210
+ data,
211
+ multifd_device_state_save_thread_data_free);
212
+}
213
+
214
+void multifd_abort_device_state_save_threads(void)
215
+{
216
+ assert(multifd_device_state_supported());
217
+
218
+ qatomic_set(&multifd_send_device_state->threads_abort, true);
219
+}
220
+
221
+bool multifd_join_device_state_save_threads(void)
222
+{
223
+ MigrationState *s = migrate_get_current();
224
+
225
+ assert(multifd_device_state_supported());
226
+
227
+ thread_pool_wait(multifd_send_device_state->threads);
228
+
229
+ return !migrate_has_error(s);
230
+}
231
diff --git a/migration/savevm.c b/migration/savevm.c
232
index XXXXXXX..XXXXXXX 100644
233
--- a/migration/savevm.c
234
+++ b/migration/savevm.c
235
@@ -XXX,XX +XXX,XX @@
236
#include "migration/register.h"
237
#include "migration/global_state.h"
238
#include "migration/channel-block.h"
239
+#include "multifd.h"
240
#include "ram.h"
241
#include "qemu-file.h"
242
#include "savevm.h"
243
@@ -XXX,XX +XXX,XX @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
244
int64_t start_ts_each, end_ts_each;
245
SaveStateEntry *se;
246
int ret;
247
+ bool multifd_device_state = multifd_device_state_supported();
248
+
249
+ if (multifd_device_state) {
250
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
251
+ SaveLiveCompletePrecopyThreadHandler hdlr;
252
+
253
+ if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
254
+ se->ops->has_postcopy(se->opaque)) ||
255
+ !se->ops->save_live_complete_precopy_thread) {
256
+ continue;
257
+ }
258
+
259
+ hdlr = se->ops->save_live_complete_precopy_thread;
260
+ multifd_spawn_device_state_save_thread(hdlr,
261
+ se->idstr, se->instance_id,
262
+ se->opaque);
263
+ }
264
+ }
265
266
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
267
if (!se->ops ||
268
@@ -XXX,XX +XXX,XX @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
269
save_section_footer(f, se);
270
if (ret < 0) {
271
qemu_file_set_error(f, ret);
272
- return -1;
273
+ goto ret_fail_abort_threads;
274
}
275
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
276
trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
277
end_ts_each - start_ts_each);
278
}
279
280
+ if (multifd_device_state) {
281
+ if (migrate_has_error(migrate_get_current())) {
282
+ multifd_abort_device_state_save_threads();
283
+ }
284
+
285
+ if (!multifd_join_device_state_save_threads()) {
286
+ qemu_file_set_error(f, -EINVAL);
287
+ return -1;
288
+ }
289
+ }
290
+
291
trace_vmstate_downtime_checkpoint("src-iterable-saved");
292
293
return 0;
294
+
295
+ret_fail_abort_threads:
296
+ if (multifd_device_state) {
297
+ multifd_abort_device_state_save_threads();
298
+ multifd_join_device_state_save_threads();
299
+ }
300
+
301
+ return -1;
302
}
303
304
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
305
--
306
2.48.1
307
308
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
And rename existing load_device_config_state trace event to
4
load_device_config_state_end for consistency since it is triggered at the
5
end of loading of the VFIO device config state.
6
7
This way both the start and end points of particular device config
8
loading operation (a long, BQL-serialized operation) are known.
9
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
11
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
12
Link: https://lore.kernel.org/qemu-devel/1b6c5a2097e64c272eb7e53f9e4cca4b79581b38.1741124640.git.maciej.szmigiero@oracle.com
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
hw/vfio/migration.c | 4 +++-
16
hw/vfio/trace-events | 3 ++-
17
2 files changed, 5 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/vfio/migration.c
22
+++ b/hw/vfio/migration.c
23
@@ -XXX,XX +XXX,XX @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
24
VFIODevice *vbasedev = opaque;
25
uint64_t data;
26
27
+ trace_vfio_load_device_config_state_start(vbasedev->name);
28
+
29
if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
30
int ret;
31
32
@@ -XXX,XX +XXX,XX @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
33
return -EINVAL;
34
}
35
36
- trace_vfio_load_device_config_state(vbasedev->name);
37
+ trace_vfio_load_device_config_state_end(vbasedev->name);
38
return qemu_file_get_error(f);
39
}
40
41
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/vfio/trace-events
44
+++ b/hw/vfio/trace-events
45
@@ -XXX,XX +XXX,XX @@ vfio_display_edid_write_error(void) ""
46
47
# migration.c
48
vfio_load_cleanup(const char *name) " (%s)"
49
-vfio_load_device_config_state(const char *name) " (%s)"
50
+vfio_load_device_config_state_start(const char *name) " (%s)"
51
+vfio_load_device_config_state_end(const char *name) " (%s)"
52
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
53
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
54
vfio_migration_realize(const char *name) " (%s)"
55
--
56
2.48.1
57
58
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
So it can be safety accessed from multiple threads.
4
5
This variable type needs to be changed to unsigned long since
6
32-bit host platforms lack the necessary addition atomics on 64-bit
7
variables.
8
9
Using 32-bit counters on 32-bit host platforms should not be a problem
10
in practice since they can't realistically address more memory anyway.
11
12
Reviewed-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
14
Link: https://lore.kernel.org/qemu-devel/dc391771d2d9ad0f311994f0cb9e666da564aeaf.1741124640.git.maciej.szmigiero@oracle.com
15
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
---
17
hw/vfio/migration.c | 8 ++++----
18
1 file changed, 4 insertions(+), 4 deletions(-)
19
20
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/vfio/migration.c
23
+++ b/hw/vfio/migration.c
24
@@ -XXX,XX +XXX,XX @@
25
*/
26
#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
27
28
-static int64_t bytes_transferred;
29
+static unsigned long bytes_transferred;
30
31
static const char *mig_state_to_str(enum vfio_device_mig_state state)
32
{
33
@@ -XXX,XX +XXX,XX @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
34
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
35
qemu_put_be64(f, data_size);
36
qemu_put_buffer(f, migration->data_buffer, data_size);
37
- bytes_transferred += data_size;
38
+ qatomic_add(&bytes_transferred, data_size);
39
40
trace_vfio_save_block(migration->vbasedev->name, data_size);
41
42
@@ -XXX,XX +XXX,XX @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
43
44
int64_t vfio_mig_bytes_transferred(void)
45
{
46
- return bytes_transferred;
47
+ return MIN(qatomic_read(&bytes_transferred), INT64_MAX);
48
}
49
50
void vfio_reset_bytes_transferred(void)
51
{
52
- bytes_transferred = 0;
53
+ qatomic_set(&bytes_transferred, 0);
54
}
55
56
/*
57
--
58
2.48.1
59
60
diff view generated by jsdifflib
Deleted patch
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
1
3
This way bytes_transferred can also be incremented in other translation
4
units than migration.c.
5
6
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
7
Reviewed-by: Cédric Le Goater <clg@redhat.com>
8
Link: https://lore.kernel.org/qemu-devel/d1fbc27ac2417b49892f354ba20f6c6b3f7209f8.1741124640.git.maciej.szmigiero@oracle.com
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
10
---
11
include/hw/vfio/vfio-common.h | 1 +
12
hw/vfio/migration.c | 7 ++++++-
13
2 files changed, 7 insertions(+), 1 deletion(-)
14
15
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/hw/vfio/vfio-common.h
18
+++ b/include/hw/vfio/vfio-common.h
19
@@ -XXX,XX +XXX,XX @@ void vfio_unblock_multiple_devices_migration(void);
20
bool vfio_viommu_preset(VFIODevice *vbasedev);
21
int64_t vfio_mig_bytes_transferred(void);
22
void vfio_reset_bytes_transferred(void);
23
+void vfio_mig_add_bytes_transferred(unsigned long val);
24
bool vfio_device_state_is_running(VFIODevice *vbasedev);
25
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
26
27
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/vfio/migration.c
30
+++ b/hw/vfio/migration.c
31
@@ -XXX,XX +XXX,XX @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
32
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
33
qemu_put_be64(f, data_size);
34
qemu_put_buffer(f, migration->data_buffer, data_size);
35
- qatomic_add(&bytes_transferred, data_size);
36
+ vfio_mig_add_bytes_transferred(data_size);
37
38
trace_vfio_save_block(migration->vbasedev->name, data_size);
39
40
@@ -XXX,XX +XXX,XX @@ void vfio_reset_bytes_transferred(void)
41
qatomic_set(&bytes_transferred, 0);
42
}
43
44
+void vfio_mig_add_bytes_transferred(unsigned long val)
45
+{
46
+ qatomic_add(&bytes_transferred, val);
47
+}
48
+
49
/*
50
* Return true when either migration initialized or blocker registered.
51
* Currently only return false when adding blocker fails which will
52
--
53
2.48.1
54
55
diff view generated by jsdifflib