1
The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/legoater/qemu/ tags/pull-vfio-20230911
7
https://github.com/legoater/qemu/ tags/pull-vfio-20250311
8
8
9
for you to fetch changes up to a31fe5daeaa230556145bfc04af1bd4e68f377fa:
9
for you to fetch changes up to 4d9607481560e6c8e1508a0aafe94f86a0503c8c:
10
10
11
vfio/common: Separate vfio-pci ranges (2023-09-11 08:34:06 +0200)
11
vfio/pci: Drop debug commentary from x-device-dirty-page-tracking (2025-03-11 19:04:58 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
vfio queue:
14
vfio queue:
15
15
16
* Small downtime optimisation for VFIO migration
16
* Fixed endianness of VFIO device state packets
17
* P2P support for VFIO migration
17
* Improved IGD passthrough support with legacy mode
18
* Introduction of a save_prepare() handler to fail VFIO migration
18
* Improved build
19
* Fix on DMA logging ranges calculation for OVMF enabling dynamic window
19
* Added support for old AMD GPUs (x550)
20
* Updated property documentation
20
21
21
----------------------------------------------------------------
22
----------------------------------------------------------------
22
Avihai Horon (11):
23
Joao Martins (1):
23
vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup()
24
vfio/pci: Drop debug commentary from x-device-dirty-page-tracking
24
sysemu: Add prepare callback to struct VMChangeStateEntry
25
qdev: Add qdev_add_vm_change_state_handler_full()
26
vfio/migration: Add P2P support for VFIO migration
27
vfio/migration: Allow migration of multiple P2P supporting devices
28
migration: Add migration prefix to functions in target.c
29
vfio/migration: Fail adding device with enable-migration=on and existing blocker
30
migration: Move more initializations to migrate_init()
31
migration: Add .save_prepare() handler to struct SaveVMHandlers
32
vfio/migration: Block VFIO migration with postcopy migration
33
vfio/migration: Block VFIO migration with background snapshot
34
25
35
Joao Martins (2):
26
Maciej S. Szmigiero (1):
36
vfio/migration: Refactor PRE_COPY and RUNNING state checks
27
vfio/migration: Use BE byte order for device state wire packets
37
vfio/common: Separate vfio-pci ranges
38
28
39
docs/devel/vfio-migration.rst | 93 +++++++++++++++++-----------
29
Philippe Mathieu-Daudé (8):
40
include/hw/vfio/vfio-common.h | 2 +
30
system: Declare qemu_[min/max]rampagesize() in 'system/hostmem.h'
41
include/migration/register.h | 5 ++
31
hw/vfio/spapr: Do not include <linux/kvm.h>
42
include/sysemu/runstate.h | 7 +++
32
hw/vfio/common: Include missing 'system/tcg.h' header
43
migration/migration.h | 6 +-
33
hw/vfio/common: Get target page size using runtime helpers
44
migration/savevm.h | 1 +
34
hw/vfio: Compile some common objects once
45
hw/core/vm-change-state-handler.c | 14 ++++-
35
hw/vfio: Compile more objects once
46
hw/vfio/common.c | 126 ++++++++++++++++++++++++++++++--------
36
hw/vfio: Compile iommufd.c once
47
hw/vfio/migration.c | 106 +++++++++++++++++++++++++++-----
37
hw/vfio: Compile display.c once
48
migration/migration.c | 33 ++++++----
38
49
migration/savevm.c | 32 ++++++++--
39
Tomita Moeko (10):
50
migration/target.c | 8 +--
40
vfio/igd: Remove GTT write quirk in IO BAR 4
51
softmmu/runstate.c | 40 ++++++++++++
41
vfio/igd: Do not include GTT stolen size in etc/igd-bdsm-size
52
hw/vfio/trace-events | 3 +-
42
vfio/igd: Consolidate OpRegion initialization into a single function
53
14 files changed, 377 insertions(+), 99 deletions(-)
43
vfio/igd: Move LPC bridge initialization to a separate function
44
vfio/pci: Add placeholder for device-specific config space quirks
45
vfio/igd: Refactor vfio_probe_igd_bar4_quirk into pci config quirk
46
vfio/igd: Decouple common quirks from legacy mode
47
vfio/igd: Handle x-igd-opregion option in config quirk
48
vfio/igd: Introduce x-igd-lpc option for LPC bridge ID quirk
49
vfio/igd: Fix broken KVMGT OpRegion support
50
51
Vasilis Liaskovitis (1):
52
vfio/pci-quirks: Exclude non-ioport BAR from ATI quirk
53
54
hw/vfio/pci.h | 11 +-
55
include/exec/ram_addr.h | 3 -
56
include/system/hostmem.h | 3 +
57
hw/ppc/spapr_caps.c | 1 +
58
hw/s390x/s390-virtio-ccw.c | 1 +
59
hw/vfio/common.c | 9 +-
60
hw/vfio/igd.c | 529 +++++++++++++++++++-------------------------
61
hw/vfio/iommufd.c | 1 -
62
hw/vfio/migration-multifd.c | 15 +-
63
hw/vfio/migration.c | 1 -
64
hw/vfio/pci-quirks.c | 53 +----
65
hw/vfio/pci.c | 35 +--
66
hw/vfio/spapr.c | 4 +-
67
hw/vfio/meson.build | 27 ++-
68
14 files changed, 288 insertions(+), 405 deletions(-)
69
70
diff view generated by jsdifflib
New patch
1
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
3
The IO BAR4 of IGD devices contains a pair of 32-bit address/data
4
registers, MMIO_Index (0x0) and MMIO_Data (0x4), which provide access
5
to the MMIO BAR0 (GTTMMADR) from IO space. These registers are probably
6
only used by the VBIOS, and are not documented by intel. The observed
7
layout of MMIO_Index register is:
8
31 2 1 0
9
+-------------------------------------------------------------------+
10
| Offset | Rsvd | Sel |
11
+-------------------------------------------------------------------+
12
- Offset: Byte offset in specified region, 4-byte aligned.
13
- Sel: Region selector
14
0: MMIO register region (first half of MMIO BAR0)
15
1: GTT region (second half of MMIO BAR0). Pre Gen11 only.
16
17
Currently, QEMU implements a quirk that adjusts the guest Data Stolen
18
Memory (DSM) region address to be (addr - host BDSM + guest BDSM) when
19
programming GTT entries via IO BAR4, assuming guest still programs GTT
20
with host DSM address, which is not the case. Guest's BDSM register is
21
emulated and initialized to 0 at startup by QEMU, then SeaBIOS programs
22
its value[1]. As result, the address programmed to GTT entries by VBIOS
23
running in guest are valid GPA, and this unnecessary adjustment brings
24
inconsistency.
25
26
[1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332
27
28
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
29
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
30
Tested-by: Alex Williamson <alex.williamson@redhat.com>
31
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
32
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-2-tomitamoeko@gmail.com
33
Signed-off-by: Cédric Le Goater <clg@redhat.com>
34
---
35
hw/vfio/igd.c | 191 +-------------------------------------------------
36
1 file changed, 1 insertion(+), 190 deletions(-)
37
38
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/vfio/igd.c
41
+++ b/hw/vfio/igd.c
42
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
43
return -1;
44
}
45
46
-typedef struct VFIOIGDQuirk {
47
- struct VFIOPCIDevice *vdev;
48
- uint32_t index;
49
- uint64_t bdsm;
50
-} VFIOIGDQuirk;
51
-
52
#define IGD_GMCH 0x50 /* Graphics Control Register */
53
#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
54
#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
55
@@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
56
return ret;
57
}
58
59
-/*
60
- * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
61
- * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore
62
- * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
63
- * for programming the GTT.
64
- *
65
- * See linux:include/drm/i915_drm.h for shift and mask values.
66
- */
67
-static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
68
-{
69
- uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
70
- int gen = igd_gen(vdev);
71
- uint64_t ggms_size = igd_gtt_memory_size(gen, gmch);
72
-
73
- return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8);
74
-}
75
-
76
-/*
77
- * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
78
- * Somehow the host stolen memory range is used for this, but how the ROM gets
79
- * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it
80
- * reprograms the GTT through the IOBAR where we can trap it and transpose the
81
- * programming to the VM allocated buffer. That buffer gets reserved by the VM
82
- * firmware via the fw_cfg entry added below. Here we're just monitoring the
83
- * IOBAR address and data registers to detect a write sequence targeting the
84
- * GTTADR. This code is developed by observed behavior and doesn't have a
85
- * direct spec reference, unfortunately.
86
- */
87
-static uint64_t vfio_igd_quirk_data_read(void *opaque,
88
- hwaddr addr, unsigned size)
89
-{
90
- VFIOIGDQuirk *igd = opaque;
91
- VFIOPCIDevice *vdev = igd->vdev;
92
-
93
- igd->index = ~0;
94
-
95
- return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
96
-}
97
-
98
-static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
99
- uint64_t data, unsigned size)
100
-{
101
- VFIOIGDQuirk *igd = opaque;
102
- VFIOPCIDevice *vdev = igd->vdev;
103
- uint64_t val = data;
104
- int gen = igd_gen(vdev);
105
-
106
- /*
107
- * Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
108
- * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE
109
- * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so
110
- * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points
111
- * to a 4k page, which we translate to a page from the VM allocated region,
112
- * pointed to by the BDSM register. If this is not set, we fail.
113
- *
114
- * We trap writes to the full configured GTT size, but we typically only
115
- * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often
116
- * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous
117
- * write of that last entry does work, but is hopefully unnecessary since
118
- * we clear the previous GTT on initialization.
119
- */
120
- if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
121
- if (gen < 8 || (igd->index % 8 == 1)) {
122
- uint64_t base;
123
-
124
- if (gen < 11) {
125
- base = pci_get_long(vdev->pdev.config + IGD_BDSM);
126
- } else {
127
- base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11);
128
- }
129
- if (!base) {
130
- hw_error("vfio-igd: Guest attempted to program IGD GTT before "
131
- "BIOS reserved stolen memory. Unsupported BIOS?");
132
- }
133
-
134
- val = data - igd->bdsm + base;
135
- } else {
136
- val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */
137
- }
138
-
139
- trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
140
- igd->index, data, val);
141
- }
142
-
143
- vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
144
-
145
- igd->index = ~0;
146
-}
147
-
148
-static const MemoryRegionOps vfio_igd_data_quirk = {
149
- .read = vfio_igd_quirk_data_read,
150
- .write = vfio_igd_quirk_data_write,
151
- .endianness = DEVICE_LITTLE_ENDIAN,
152
-};
153
-
154
-static uint64_t vfio_igd_quirk_index_read(void *opaque,
155
- hwaddr addr, unsigned size)
156
-{
157
- VFIOIGDQuirk *igd = opaque;
158
- VFIOPCIDevice *vdev = igd->vdev;
159
-
160
- igd->index = ~0;
161
-
162
- return vfio_region_read(&vdev->bars[4].region, addr, size);
163
-}
164
-
165
-static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
166
- uint64_t data, unsigned size)
167
-{
168
- VFIOIGDQuirk *igd = opaque;
169
- VFIOPCIDevice *vdev = igd->vdev;
170
-
171
- igd->index = data;
172
-
173
- vfio_region_write(&vdev->bars[4].region, addr, data, size);
174
-}
175
-
176
-static const MemoryRegionOps vfio_igd_index_quirk = {
177
- .read = vfio_igd_quirk_index_read,
178
- .write = vfio_igd_quirk_index_write,
179
- .endianness = DEVICE_LITTLE_ENDIAN,
180
-};
181
-
182
#define IGD_GGC_MMIO_OFFSET 0x108040
183
#define IGD_BDSM_MMIO_OFFSET 0x1080C0
184
185
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
186
g_autofree struct vfio_region_info *opregion = NULL;
187
g_autofree struct vfio_region_info *host = NULL;
188
g_autofree struct vfio_region_info *lpc = NULL;
189
- VFIOQuirk *quirk;
190
- VFIOIGDQuirk *igd;
191
PCIDevice *lpc_bridge;
192
- int i, ret, gen;
193
+ int ret, gen;
194
uint64_t ggms_size, gms_size;
195
uint64_t *bdsm_size;
196
uint32_t gmch;
197
- uint16_t cmd_orig, cmd;
198
Error *err = NULL;
199
200
/*
201
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
202
return;
203
}
204
205
- /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
206
- quirk = vfio_quirk_alloc(2);
207
- igd = quirk->data = g_malloc0(sizeof(*igd));
208
- igd->vdev = vdev;
209
- igd->index = ~0;
210
- if (gen < 11) {
211
- igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
212
- } else {
213
- igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4);
214
- igd->bdsm |=
215
- (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32;
216
- }
217
- igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */
218
-
219
- memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
220
- igd, "vfio-igd-index-quirk", 4);
221
- memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
222
- 0, &quirk->mem[0], 1);
223
-
224
- memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
225
- igd, "vfio-igd-data-quirk", 4);
226
- memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
227
- 4, &quirk->mem[1], 1);
228
-
229
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
230
-
231
/*
232
* Allow user to override dsm size using x-igd-gms option, in multiples of
233
* 32MiB. This option should only be used when the desired size cannot be
234
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
235
pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
236
}
237
238
- /*
239
- * This IOBAR gives us access to GTTADR, which allows us to write to
240
- * the GTT itself. So let's go ahead and write zero to all the GTT
241
- * entries to avoid spurious DMA faults. Be sure I/O access is enabled
242
- * before talking to the device.
243
- */
244
- if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
245
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
246
- error_report("IGD device %s - failed to read PCI command register",
247
- vdev->vbasedev.name);
248
- }
249
-
250
- cmd = cmd_orig | PCI_COMMAND_IO;
251
-
252
- if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
253
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
254
- error_report("IGD device %s - failed to write PCI command register",
255
- vdev->vbasedev.name);
256
- }
257
-
258
- for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
259
- vfio_region_write(&vdev->bars[4].region, 0, i, 4);
260
- vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
261
- }
262
-
263
- if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
264
- vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
265
- error_report("IGD device %s - failed to restore PCI command register",
266
- vdev->vbasedev.name);
267
- }
268
-
269
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name,
270
(ggms_size + gms_size) / MiB);
271
}
272
--
273
2.48.1
274
275
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Add qdev_add_vm_change_state_handler_full() variant that allows setting
3
Though GTT Stolen Memory (GSM) is right below Data Stolen Memory (DSM)
4
a prepare callback in addition to the main callback.
4
in host address space, direct access to GSM is prohibited, and it is
5
not mapped to guest address space. Both host and guest accesses GSM
6
indirectly through the second half of MMIO BAR0 (GTTMMADR).
5
7
6
This will facilitate adding P2P support for VFIO migration in the
8
Guest firmware only need to reserve a memory region for DSM and program
7
following patches.
9
the BDSM register with the base address of that region, that's actually
10
what both SeaBIOS[1] and IgdAssignmentDxe does now.
8
11
9
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
12
[1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332
10
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
13
11
Reviewed-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
12
Tested-by: YangHang Liu <yanghliu@redhat.com>
15
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
16
Tested-by: Alex Williamson <alex.williamson@redhat.com>
17
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
18
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-3-tomitamoeko@gmail.com
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
20
---
15
include/sysemu/runstate.h | 3 +++
21
hw/vfio/igd.c | 28 +++-------------------------
16
hw/core/vm-change-state-handler.c | 14 +++++++++++++-
22
1 file changed, 3 insertions(+), 25 deletions(-)
17
2 files changed, 16 insertions(+), 1 deletion(-)
18
23
19
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
24
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
20
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
21
--- a/include/sysemu/runstate.h
26
--- a/hw/vfio/igd.c
22
+++ b/include/sysemu/runstate.h
27
+++ b/hw/vfio/igd.c
23
@@ -XXX,XX +XXX,XX @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
28
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
24
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
29
25
VMChangeStateHandler *cb,
30
#define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */
26
void *opaque);
31
#define IGD_GMCH_GEN6_GMS_MASK 0x1f
27
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
32
-#define IGD_GMCH_GEN6_GGMS_SHIFT 8
28
+ DeviceState *dev, VMChangeStateHandler *cb,
33
-#define IGD_GMCH_GEN6_GGMS_MASK 0x3
29
+ VMChangeStateHandler *prepare_cb, void *opaque);
34
#define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */
30
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
35
#define IGD_GMCH_GEN8_GMS_MASK 0xff
31
/**
36
-#define IGD_GMCH_GEN8_GGMS_SHIFT 6
32
* vm_state_notify: Notify the state of the VM
37
-#define IGD_GMCH_GEN8_GGMS_MASK 0x3
33
diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c
38
-
34
index XXXXXXX..XXXXXXX 100644
39
-static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch)
35
--- a/hw/core/vm-change-state-handler.c
40
-{
36
+++ b/hw/core/vm-change-state-handler.c
41
- uint64_t ggms;
37
@@ -XXX,XX +XXX,XX @@ static int qdev_get_dev_tree_depth(DeviceState *dev)
42
-
38
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
43
- if (gen < 8) {
39
VMChangeStateHandler *cb,
44
- ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK;
40
void *opaque)
45
- } else {
41
+{
46
- ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK;
42
+ return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque);
47
- if (ggms != 0) {
43
+}
48
- ggms = 1ULL << ggms;
44
+
49
- }
45
+/*
50
- }
46
+ * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb
51
-
47
+ * argument too.
52
- return ggms * MiB;
48
+ */
53
-}
49
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
54
50
+ DeviceState *dev, VMChangeStateHandler *cb,
55
static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
51
+ VMChangeStateHandler *prepare_cb, void *opaque)
52
{
56
{
53
int depth = qdev_get_dev_tree_depth(dev);
57
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
54
58
g_autofree struct vfio_region_info *lpc = NULL;
55
- return qemu_add_vm_change_state_handler_prio(cb, opaque, depth);
59
PCIDevice *lpc_bridge;
56
+ return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque,
60
int ret, gen;
57
+ depth);
61
- uint64_t ggms_size, gms_size;
62
+ uint64_t gms_size;
63
uint64_t *bdsm_size;
64
uint32_t gmch;
65
Error *err = NULL;
66
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
67
}
68
}
69
70
- ggms_size = igd_gtt_memory_size(gen, gmch);
71
gms_size = igd_stolen_memory_size(gen, gmch);
72
73
/*
74
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
75
* config offset 0x5C.
76
*/
77
bdsm_size = g_malloc(sizeof(*bdsm_size));
78
- *bdsm_size = cpu_to_le64(ggms_size + gms_size);
79
+ *bdsm_size = cpu_to_le64(gms_size);
80
fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
81
bdsm_size, sizeof(*bdsm_size));
82
83
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
84
pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
85
}
86
87
- trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name,
88
- (ggms_size + gms_size) / MiB);
89
+ trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
58
}
90
}
59
--
91
--
60
2.41.0
92
2.48.1
61
93
62
94
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
VFIO migration is not compatible with postcopy migration. A VFIO device
3
Both x-igd-opregion option and legacy mode require identical steps to
4
in the destination can't handle page faults for pages that have not been
4
set up OpRegion for IGD devices. Consolidate these steps into a single
5
sent yet.
5
vfio_pci_igd_setup_opregion function.
6
6
7
Doing such migration will cause the VM to crash in the destination:
7
The function call in pci.c is wrapped with ifdef temporarily to prevent
8
8
build error for non-x86 archs, it will be removed after we decouple it
9
qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address
9
from legacy mode.
10
qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address)
10
11
qemu: hardware error: vfio: DMA mapping failed, unable to continue
11
Additionally, move vfio_pci_igd_opregion_init to igd.c to prevent it
12
12
from being compiled in non-x86 builds.
13
To prevent this, block VFIO migration with postcopy migration.
13
14
14
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
15
Reported-by: Yanghang Liu <yanghliu@redhat.com>
15
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
16
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
16
Tested-by: Alex Williamson <alex.williamson@redhat.com>
17
Tested-by: Yanghang Liu <yanghliu@redhat.com>
17
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
18
Reviewed-by: Peter Xu <peterx@redhat.com>
18
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-4-tomitamoeko@gmail.com
19
[ clg: Fixed spelling in vfio_pci_igd_setup_opregion() ]
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
---
21
---
21
hw/vfio/migration.c | 22 ++++++++++++++++++++++
22
hw/vfio/pci.h | 4 +-
22
1 file changed, 22 insertions(+)
23
hw/vfio/igd.c | 101 +++++++++++++++++++++++++++++++++++--------
23
24
hw/vfio/pci-quirks.c | 50 ---------------------
24
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
25
hw/vfio/pci.c | 22 ++--------
25
index XXXXXXX..XXXXXXX 100644
26
4 files changed, 88 insertions(+), 89 deletions(-)
26
--- a/hw/vfio/migration.c
27
27
+++ b/hw/vfio/migration.c
28
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
28
@@ -XXX,XX +XXX,XX @@ static bool vfio_precopy_supported(VFIODevice *vbasedev)
29
index XXXXXXX..XXXXXXX 100644
29
30
--- a/hw/vfio/pci.h
30
/* ---------------------------------------------------------------------- */
31
+++ b/hw/vfio/pci.h
31
32
@@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
32
+static int vfio_save_prepare(void *opaque, Error **errp)
33
34
bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
35
36
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
37
- struct vfio_region_info *info,
38
- Error **errp);
39
+bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp);
40
41
void vfio_display_reset(VFIOPCIDevice *vdev);
42
bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
43
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/vfio/igd.c
46
+++ b/hw/vfio/igd.c
47
@@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev)
48
return -1;
49
}
50
51
+#define IGD_ASLS 0xfc /* ASL Storage Register */
52
#define IGD_GMCH 0x50 /* Graphics Control Register */
53
#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
54
#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
55
@@ -XXX,XX +XXX,XX @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch)
56
return 0;
57
}
58
59
+/*
60
+ * The OpRegion includes the Video BIOS Table, which seems important for
61
+ * telling the driver what sort of outputs it has. Without this, the device
62
+ * may work in the guest, but we may not get output. This also requires BIOS
63
+ * support to reserve and populate a section of guest memory sufficient for
64
+ * the table and to write the base address of that memory to the ASLS register
65
+ * of the IGD device.
66
+ */
67
+static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
68
+ struct vfio_region_info *info,
69
+ Error **errp)
33
+{
70
+{
34
+ VFIODevice *vbasedev = opaque;
71
+ int ret;
72
+
73
+ vdev->igd_opregion = g_malloc0(info->size);
74
+ ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
75
+ info->size, info->offset);
76
+ if (ret != info->size) {
77
+ error_setg(errp, "failed to read IGD OpRegion");
78
+ g_free(vdev->igd_opregion);
79
+ vdev->igd_opregion = NULL;
80
+ return false;
81
+ }
35
+
82
+
36
+ /*
83
+ /*
37
+ * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on.
84
+ * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
85
+ * allocate 32bit reserved memory for, copy these contents into, and write
86
+ * the reserved memory base address to the device ASLS register at 0xFC.
87
+ * Alignment of this reserved region seems flexible, but using a 4k page
88
+ * alignment seems to work well. This interface assumes a single IGD
89
+ * device, which may be at VM address 00:02.0 in legacy mode or another
90
+ * address in UPT mode.
91
+ *
92
+ * NB, there may be future use cases discovered where the VM should have
93
+ * direct interaction with the host OpRegion, in which case the write to
94
+ * the ASLS register would trigger MemoryRegion setup to enable that.
38
+ */
95
+ */
39
+ if (runstate_check(RUN_STATE_SAVE_VM)) {
96
+ fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
40
+ return 0;
97
+ vdev->igd_opregion, info->size);
41
+ }
98
+
42
+
99
+ trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
43
+ if (migrate_postcopy_ram()) {
100
+
44
+ error_setg(
101
+ pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
45
+ errp, "%s: VFIO migration is not supported with postcopy migration",
102
+ pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
46
+ vbasedev->name);
103
+ pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
47
+ return -EOPNOTSUPP;
104
+
48
+ }
105
+ return true;
49
+
50
+ return 0;
51
+}
106
+}
52
+
107
+
53
static int vfio_save_setup(QEMUFile *f, void *opaque)
108
+bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
109
+{
110
+ g_autofree struct vfio_region_info *opregion = NULL;
111
+ int ret;
112
+
113
+ /* Hotplugging is not supported for opregion access */
114
+ if (vdev->pdev.qdev.hotplugged) {
115
+ error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
116
+ return false;
117
+ }
118
+
119
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
120
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
121
+ VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
122
+ if (ret) {
123
+ error_setg_errno(errp, -ret,
124
+ "Device does not supports IGD OpRegion feature");
125
+ return false;
126
+ }
127
+
128
+ if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
129
+ return false;
130
+ }
131
+
132
+ return true;
133
+}
134
+
135
/*
136
* The rather short list of registers that we copy from the host devices.
137
* The LPC/ISA bridge values are definitely needed to support the vBIOS, the
138
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
139
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
54
{
140
{
55
VFIODevice *vbasedev = opaque;
141
g_autofree struct vfio_region_info *rom = NULL;
56
@@ -XXX,XX +XXX,XX @@ static bool vfio_switchover_ack_needed(void *opaque)
142
- g_autofree struct vfio_region_info *opregion = NULL;
143
g_autofree struct vfio_region_info *host = NULL;
144
g_autofree struct vfio_region_info *lpc = NULL;
145
PCIDevice *lpc_bridge;
146
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
147
* Check whether we have all the vfio device specific regions to
148
* support legacy mode (added in Linux v4.6). If not, bail.
149
*/
150
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
151
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
152
- VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
153
- if (ret) {
154
- error_report("IGD device %s does not support OpRegion access,"
155
- "legacy mode disabled", vdev->vbasedev.name);
156
- return;
157
- }
158
-
159
ret = vfio_get_dev_region_info(&vdev->vbasedev,
160
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
161
VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
162
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
163
return;
164
}
165
166
+ /* Setup OpRegion access */
167
+ if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
168
+ error_append_hint(&err, "IGD legacy mode disabled\n");
169
+ error_report_err(err);
170
+ return;
171
+ }
172
+
173
/* Create our LPC/ISA bridge */
174
ret = vfio_pci_igd_lpc_init(vdev, lpc);
175
if (ret) {
176
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
177
return;
178
}
179
180
- /* Setup OpRegion access */
181
- if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) {
182
- error_append_hint(&err, "IGD legacy mode disabled\n");
183
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
184
- return;
185
- }
186
-
187
/*
188
* Allow user to override dsm size using x-igd-gms option, in multiples of
189
* 32MiB. This option should only be used when the desired size cannot be
190
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
191
index XXXXXXX..XXXXXXX 100644
192
--- a/hw/vfio/pci-quirks.c
193
+++ b/hw/vfio/pci-quirks.c
194
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
195
trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
57
}
196
}
58
197
59
static const SaveVMHandlers savevm_vfio_handlers = {
198
-#define IGD_ASLS 0xfc /* ASL Storage Register */
60
+ .save_prepare = vfio_save_prepare,
199
-
61
.save_setup = vfio_save_setup,
200
-/*
62
.save_cleanup = vfio_save_cleanup,
201
- * The OpRegion includes the Video BIOS Table, which seems important for
63
.state_pending_estimate = vfio_state_pending_estimate,
202
- * telling the driver what sort of outputs it has. Without this, the device
203
- * may work in the guest, but we may not get output. This also requires BIOS
204
- * support to reserve and populate a section of guest memory sufficient for
205
- * the table and to write the base address of that memory to the ASLS register
206
- * of the IGD device.
207
- */
208
-bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
209
- struct vfio_region_info *info, Error **errp)
210
-{
211
- int ret;
212
-
213
- vdev->igd_opregion = g_malloc0(info->size);
214
- ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
215
- info->size, info->offset);
216
- if (ret != info->size) {
217
- error_setg(errp, "failed to read IGD OpRegion");
218
- g_free(vdev->igd_opregion);
219
- vdev->igd_opregion = NULL;
220
- return false;
221
- }
222
-
223
- /*
224
- * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
225
- * allocate 32bit reserved memory for, copy these contents into, and write
226
- * the reserved memory base address to the device ASLS register at 0xFC.
227
- * Alignment of this reserved region seems flexible, but using a 4k page
228
- * alignment seems to work well. This interface assumes a single IGD
229
- * device, which may be at VM address 00:02.0 in legacy mode or another
230
- * address in UPT mode.
231
- *
232
- * NB, there may be future use cases discovered where the VM should have
233
- * direct interaction with the host OpRegion, in which case the write to
234
- * the ASLS register would trigger MemoryRegion setup to enable that.
235
- */
236
- fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
237
- vdev->igd_opregion, info->size);
238
-
239
- trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
240
-
241
- pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
242
- pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
243
- pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
244
-
245
- return true;
246
-}
247
-
248
/*
249
* Common quirk probe entry points.
250
*/
251
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
252
index XXXXXXX..XXXXXXX 100644
253
--- a/hw/vfio/pci.c
254
+++ b/hw/vfio/pci.c
255
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
256
vfio_bar_quirk_setup(vdev, i);
257
}
258
259
+#ifdef CONFIG_VFIO_IGD
260
if (!vdev->igd_opregion &&
261
vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
262
- g_autofree struct vfio_region_info *opregion = NULL;
263
-
264
- if (vdev->pdev.qdev.hotplugged) {
265
- error_setg(errp,
266
- "cannot support IGD OpRegion feature on hotplugged "
267
- "device");
268
- goto out_unset_idev;
269
- }
270
-
271
- ret = vfio_get_dev_region_info(vbasedev,
272
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
273
- VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
274
- if (ret) {
275
- error_setg_errno(errp, -ret,
276
- "does not support requested IGD OpRegion feature");
277
- goto out_unset_idev;
278
- }
279
-
280
- if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
281
+ if (!vfio_pci_igd_setup_opregion(vdev, errp)) {
282
goto out_unset_idev;
283
}
284
}
285
+#endif
286
287
/* QEMU emulates all of MSI & MSIX */
288
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
64
--
289
--
65
2.41.0
290
2.48.1
66
291
67
292
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
VFIO migration uAPI defines an optional intermediate P2P quiescent
3
A new option will soon be introduced to decouple the LPC bridge/Host
4
state. While in the P2P quiescent state, P2P DMA transactions cannot be
4
bridge ID quirk from legacy mode. To prepare for this, move the LPC
5
initiated by the device, but the device can respond to incoming ones.
5
bridge initialization into a separate function.
6
Additionally, all outstanding P2P transactions are guaranteed to have
7
been completed by the time the device enters this state.
8
6
9
The purpose of this state is to support migration of multiple devices
7
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
10
that might do P2P transactions between themselves.
8
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
11
9
Tested-by: Alex Williamson <alex.williamson@redhat.com>
12
Add support for P2P migration by transitioning all the devices to the
10
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
13
P2P quiescent state before stopping or starting the devices. Use the new
11
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-5-tomitamoeko@gmail.com
14
VMChangeStateHandler prepare_cb to achieve that behavior.
15
16
This will allow migration of multiple VFIO devices if all of them
17
support P2P migration.
18
19
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
20
Tested-by: YangHang Liu <yanghliu@redhat.com>
21
Reviewed-by: Cédric Le Goater <clg@redhat.com>
22
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
23
---
13
---
24
docs/devel/vfio-migration.rst | 93 +++++++++++++++++++++--------------
14
hw/vfio/igd.c | 122 +++++++++++++++++++++++++++++---------------------
25
hw/vfio/common.c | 6 ++-
15
1 file changed, 70 insertions(+), 52 deletions(-)
26
hw/vfio/migration.c | 46 +++++++++++++++--
27
hw/vfio/trace-events | 1 +
28
4 files changed, 105 insertions(+), 41 deletions(-)
29
16
30
diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
17
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
31
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
32
--- a/docs/devel/vfio-migration.rst
19
--- a/hw/vfio/igd.c
33
+++ b/docs/devel/vfio-migration.rst
20
+++ b/hw/vfio/igd.c
34
@@ -XXX,XX +XXX,XX @@ and recommends that the initial bytes are sent and loaded in the destination
21
@@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
35
before stopping the source VM. Enabling this migration capability will
22
return ret;
36
guarantee that and thus, can potentially reduce downtime even further.
37
38
-Note that currently VFIO migration is supported only for a single device. This
39
-is due to VFIO migration's lack of P2P support. However, P2P support is planned
40
-to be added later on.
41
+To support migration of multiple devices that might do P2P transactions between
42
+themselves, VFIO migration uAPI defines an intermediate P2P quiescent state.
43
+While in the P2P quiescent state, P2P DMA transactions cannot be initiated by
44
+the device, but the device can respond to incoming ones. Additionally, all
45
+outstanding P2P transactions are guaranteed to have been completed by the time
46
+the device enters this state.
47
+
48
+All the devices that support P2P migration are first transitioned to the P2P
49
+quiescent state and only then are they stopped or started. This makes migration
50
+safe P2P-wise, since starting and stopping the devices is not done atomically
51
+for all the devices together.
52
+
53
+Thus, multiple VFIO devices migration is allowed only if all the devices
54
+support P2P migration. Single VFIO device migration is allowed regardless of
55
+P2P migration support.
56
57
A detailed description of the UAPI for VFIO device migration can be found in
58
the comment for the ``vfio_device_mig_state`` structure in the header file
59
@@ -XXX,XX +XXX,XX @@ will be blocked.
60
Flow of state changes during Live migration
61
===========================================
62
63
-Below is the flow of state change during live migration.
64
+Below is the state change flow during live migration for a VFIO device that
65
+supports both precopy and P2P migration. The flow for devices that don't
66
+support it is similar, except that the relevant states for precopy and P2P are
67
+skipped.
68
The values in the parentheses represent the VM state, the migration state, and
69
the VFIO device state, respectively.
70
-The text in the square brackets represents the flow if the VFIO device supports
71
-pre-copy.
72
73
Live migration save path
74
------------------------
75
76
::
77
78
- QEMU normal running state
79
- (RUNNING, _NONE, _RUNNING)
80
- |
81
+ QEMU normal running state
82
+ (RUNNING, _NONE, _RUNNING)
83
+ |
84
migrate_init spawns migration_thread
85
- Migration thread then calls each device's .save_setup()
86
- (RUNNING, _SETUP, _RUNNING [_PRE_COPY])
87
- |
88
- (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY])
89
- If device is active, get pending_bytes by .state_pending_{estimate,exact}()
90
- If total pending_bytes >= threshold_size, call .save_live_iterate()
91
- [Data of VFIO device for pre-copy phase is copied]
92
- Iterate till total pending bytes converge and are less than threshold
93
- |
94
- On migration completion, vCPU stops and calls .save_live_complete_precopy for
95
- each active device. The VFIO device is then transitioned into _STOP_COPY state
96
- (FINISH_MIGRATE, _DEVICE, _STOP_COPY)
97
- |
98
- For the VFIO device, iterate in .save_live_complete_precopy until
99
- pending data is 0
100
- (FINISH_MIGRATE, _DEVICE, _STOP)
101
- |
102
- (FINISH_MIGRATE, _COMPLETED, _STOP)
103
- Migraton thread schedules cleanup bottom half and exits
104
+ Migration thread then calls each device's .save_setup()
105
+ (RUNNING, _SETUP, _PRE_COPY)
106
+ |
107
+ (RUNNING, _ACTIVE, _PRE_COPY)
108
+ If device is active, get pending_bytes by .state_pending_{estimate,exact}()
109
+ If total pending_bytes >= threshold_size, call .save_live_iterate()
110
+ Data of VFIO device for pre-copy phase is copied
111
+ Iterate till total pending bytes converge and are less than threshold
112
+ |
113
+ On migration completion, the vCPUs and the VFIO device are stopped
114
+ The VFIO device is first put in P2P quiescent state
115
+ (FINISH_MIGRATE, _ACTIVE, _PRE_COPY_P2P)
116
+ |
117
+ Then the VFIO device is put in _STOP_COPY state
118
+ (FINISH_MIGRATE, _ACTIVE, _STOP_COPY)
119
+ .save_live_complete_precopy() is called for each active device
120
+ For the VFIO device, iterate in .save_live_complete_precopy() until
121
+ pending data is 0
122
+ |
123
+ (POSTMIGRATE, _COMPLETED, _STOP_COPY)
124
+ Migraton thread schedules cleanup bottom half and exits
125
+ |
126
+ .save_cleanup() is called
127
+ (POSTMIGRATE, _COMPLETED, _STOP)
128
129
Live migration resume path
130
--------------------------
131
132
::
133
134
- Incoming migration calls .load_setup for each device
135
- (RESTORE_VM, _ACTIVE, _STOP)
136
- |
137
- For each device, .load_state is called for that device section data
138
- (RESTORE_VM, _ACTIVE, _RESUMING)
139
- |
140
- At the end, .load_cleanup is called for each device and vCPUs are started
141
- (RUNNING, _NONE, _RUNNING)
142
+ Incoming migration calls .load_setup() for each device
143
+ (RESTORE_VM, _ACTIVE, _STOP)
144
+ |
145
+ For each device, .load_state() is called for that device section data
146
+ (RESTORE_VM, _ACTIVE, _RESUMING)
147
+ |
148
+ At the end, .load_cleanup() is called for each device and vCPUs are started
149
+ The VFIO device is first put in P2P quiescent state
150
+ (RUNNING, _ACTIVE, _RUNNING_P2P)
151
+ |
152
+ (RUNNING, _NONE, _RUNNING)
153
154
Postcopy
155
========
156
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
157
index XXXXXXX..XXXXXXX 100644
158
--- a/hw/vfio/common.c
159
+++ b/hw/vfio/common.c
160
@@ -XXX,XX +XXX,XX @@ bool vfio_device_state_is_running(VFIODevice *vbasedev)
161
{
162
VFIOMigration *migration = vbasedev->migration;
163
164
- return migration->device_state == VFIO_DEVICE_STATE_RUNNING;
165
+ return migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
166
+ migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P;
167
}
23
}
168
24
169
bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
25
+static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
170
{
171
VFIOMigration *migration = vbasedev->migration;
172
173
- return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
174
+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ||
175
+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P;
176
}
177
178
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
179
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/hw/vfio/migration.c
182
+++ b/hw/vfio/migration.c
183
@@ -XXX,XX +XXX,XX @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
184
return "STOP_COPY";
185
case VFIO_DEVICE_STATE_RESUMING:
186
return "RESUMING";
187
+ case VFIO_DEVICE_STATE_RUNNING_P2P:
188
+ return "RUNNING_P2P";
189
case VFIO_DEVICE_STATE_PRE_COPY:
190
return "PRE_COPY";
191
+ case VFIO_DEVICE_STATE_PRE_COPY_P2P:
192
+ return "PRE_COPY_P2P";
193
default:
194
return "UNKNOWN STATE";
195
}
196
@@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = {
197
198
/* ---------------------------------------------------------------------- */
199
200
+static void vfio_vmstate_change_prepare(void *opaque, bool running,
201
+ RunState state)
202
+{
26
+{
203
+ VFIODevice *vbasedev = opaque;
27
+ g_autofree struct vfio_region_info *host = NULL;
204
+ VFIOMigration *migration = vbasedev->migration;
28
+ g_autofree struct vfio_region_info *lpc = NULL;
205
+ enum vfio_device_mig_state new_state;
29
+ PCIDevice *lpc_bridge;
206
+ int ret;
30
+ int ret;
207
+
31
+
208
+ new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ?
32
+ /*
209
+ VFIO_DEVICE_STATE_PRE_COPY_P2P :
33
+ * Copying IDs or creating new devices are not supported on hotplug
210
+ VFIO_DEVICE_STATE_RUNNING_P2P;
34
+ */
35
+ if (vdev->pdev.qdev.hotplugged) {
36
+ error_setg(errp, "IGD LPC is not supported on hotplugged device");
37
+ return false;
38
+ }
211
+
39
+
212
+ /*
40
+ /*
213
+ * If setting the device in new_state fails, the device should be reset.
41
+ * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
214
+ * To do so, use ERROR state as a recover state.
42
+ * can stuff host values into, so if there's already one there and it's not
43
+ * one we can hack on, this quirk is no-go. Sorry Q35.
215
+ */
44
+ */
216
+ ret = vfio_migration_set_state(vbasedev, new_state,
45
+ lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
217
+ VFIO_DEVICE_STATE_ERROR);
46
+ 0, PCI_DEVFN(0x1f, 0));
218
+ if (ret) {
47
+ if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
219
+ /*
48
+ "vfio-pci-igd-lpc-bridge")) {
220
+ * Migration should be aborted in this case, but vm_state_notify()
49
+ error_setg(errp,
221
+ * currently does not support reporting failures.
50
+ "Cannot create LPC bridge due to existing device at 1f.0");
222
+ */
51
+ return false;
223
+ if (migrate_get_current()->to_dst_file) {
224
+ qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
225
+ }
226
+ }
52
+ }
227
+
53
+
228
+ trace_vfio_vmstate_change_prepare(vbasedev->name, running,
54
+ /*
229
+ RunState_str(state),
55
+ * Check whether we have all the vfio device specific regions to
230
+ mig_state_to_str(new_state));
56
+ * support LPC quirk (added in Linux v4.6).
57
+ */
58
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
59
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
60
+ VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
61
+ if (ret) {
62
+ error_setg(errp, "IGD LPC bridge access is not supported by kernel");
63
+ return false;
64
+ }
65
+
66
+ ret = vfio_get_dev_region_info(&vdev->vbasedev,
67
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
68
+ VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
69
+ if (ret) {
70
+ error_setg(errp, "IGD host bridge access is not supported by kernel");
71
+ return false;
72
+ }
73
+
74
+ /* Create/modify LPC bridge */
75
+ ret = vfio_pci_igd_lpc_init(vdev, lpc);
76
+ if (ret) {
77
+ error_setg(errp, "Failed to create/modify LPC bridge for IGD");
78
+ return false;
79
+ }
80
+
81
+ /* Stuff some host values into the VM PCI host bridge */
82
+ ret = vfio_pci_igd_host_init(vdev, host);
83
+ if (ret) {
84
+ error_setg(errp, "Failed to modify host bridge for IGD");
85
+ return false;
86
+ }
87
+
88
+ return true;
231
+}
89
+}
232
+
90
+
233
static void vfio_vmstate_change(void *opaque, bool running, RunState state)
91
#define IGD_GGC_MMIO_OFFSET 0x108040
92
#define IGD_BDSM_MMIO_OFFSET 0x1080C0
93
94
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
95
void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
234
{
96
{
235
VFIODevice *vbasedev = opaque;
97
g_autofree struct vfio_region_info *rom = NULL;
236
@@ -XXX,XX +XXX,XX @@ static int vfio_migration_init(VFIODevice *vbasedev)
98
- g_autofree struct vfio_region_info *host = NULL;
237
char id[256] = "";
99
- g_autofree struct vfio_region_info *lpc = NULL;
238
g_autofree char *path = NULL, *oid = NULL;
100
- PCIDevice *lpc_bridge;
239
uint64_t mig_flags = 0;
101
int ret, gen;
240
+ VMChangeStateHandler *prepare_cb;
102
uint64_t gms_size;
241
103
uint64_t *bdsm_size;
242
if (!vbasedev->ops->vfio_get_object) {
104
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
243
return -EINVAL;
105
return;
244
@@ -XXX,XX +XXX,XX @@ static int vfio_migration_init(VFIODevice *vbasedev)
106
}
245
register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
107
246
vbasedev);
108
- /*
247
109
- * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
248
- migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
110
- * can stuff host values into, so if there's already one there and it's not
249
- vfio_vmstate_change,
111
- * one we can hack on, legacy mode is no-go. Sorry Q35.
250
- vbasedev);
112
- */
251
+ prepare_cb = migration->mig_flags & VFIO_MIGRATION_P2P ?
113
- lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
252
+ vfio_vmstate_change_prepare :
114
- 0, PCI_DEVFN(0x1f, 0));
253
+ NULL;
115
- if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
254
+ migration->vm_state = qdev_add_vm_change_state_handler_full(
116
- "vfio-pci-igd-lpc-bridge")) {
255
+ vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev);
117
- error_report("IGD device %s cannot support legacy mode due to existing "
256
migration->migration_state.notify = vfio_migration_state_notifier;
118
- "devices at address 1f.0", vdev->vbasedev.name);
257
add_migration_state_change_notifier(&migration->migration_state);
119
- return;
258
120
- }
259
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
121
-
260
index XXXXXXX..XXXXXXX 100644
122
/*
261
--- a/hw/vfio/trace-events
123
* IGD is not a standard, they like to change their specs often. We
262
+++ b/hw/vfio/trace-events
124
* only attempt to support back to SandBridge and we hope that newer
263
@@ -XXX,XX +XXX,XX @@ vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer
125
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
264
vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
126
return;
265
vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
127
}
266
vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
128
267
+vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
129
- /*
130
- * Check whether we have all the vfio device specific regions to
131
- * support legacy mode (added in Linux v4.6). If not, bail.
132
- */
133
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
134
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
135
- VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
136
- if (ret) {
137
- error_report("IGD device %s does not support host bridge access,"
138
- "legacy mode disabled", vdev->vbasedev.name);
139
- return;
140
- }
141
-
142
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
143
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
144
- VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
145
- if (ret) {
146
- error_report("IGD device %s does not support LPC bridge access,"
147
- "legacy mode disabled", vdev->vbasedev.name);
148
- return;
149
- }
150
-
151
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
152
153
/*
154
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
155
return;
156
}
157
158
- /* Create our LPC/ISA bridge */
159
- ret = vfio_pci_igd_lpc_init(vdev, lpc);
160
- if (ret) {
161
- error_report("IGD device %s failed to create LPC bridge, "
162
- "legacy mode disabled", vdev->vbasedev.name);
163
- return;
164
- }
165
-
166
- /* Stuff some host values into the VM PCI host bridge */
167
- ret = vfio_pci_igd_host_init(vdev, host);
168
- if (ret) {
169
- error_report("IGD device %s failed to modify host bridge, "
170
- "legacy mode disabled", vdev->vbasedev.name);
171
+ /* Setup LPC bridge / Host bridge PCI IDs */
172
+ if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
173
+ error_append_hint(&err, "IGD legacy mode disabled\n");
174
+ error_report_err(err);
175
return;
176
}
177
268
--
178
--
269
2.41.0
179
2.48.1
270
180
271
181
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Background snapshot allows creating a snapshot of the VM while it's
3
IGD devices require device-specific quirk to be applied to their PCI
4
running and keeping it small by not including dirty RAM pages.
4
config space. Currently, it is put in the BAR4 quirk that does nothing
5
to BAR4 itself. Add a placeholder for PCI config space quirks to hold
6
that quirk later.
5
7
6
The way it works is by first stopping the VM, saving the non-iterable
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
7
devices' state and then starting the VM and saving the RAM while write
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
8
protecting it with UFFD. The resulting snapshot represents the VM state
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
9
at snapshot start.
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
10
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-6-tomitamoeko@gmail.com
11
VFIO migration is not compatible with background snapshot.
12
First of all, VFIO device state is not even saved in background snapshot
13
because only non-iterable device state is saved. But even if it was
14
saved, after starting the VM, a VFIO device could dirty pages without it
15
being detected by UFFD write protection. This would corrupt the
16
snapshot, as the RAM in it would not represent the RAM at snapshot
17
start.
18
19
To prevent this, block VFIO migration with background snapshot.
20
21
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
22
Reviewed-by: Peter Xu <peterx@redhat.com>
23
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
24
---
14
---
25
hw/vfio/migration.c | 11 ++++++++++-
15
hw/vfio/pci.h | 1 +
26
1 file changed, 10 insertions(+), 1 deletion(-)
16
hw/vfio/pci-quirks.c | 5 +++++
17
hw/vfio/pci.c | 4 ++++
18
3 files changed, 10 insertions(+)
27
19
28
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
20
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
29
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
30
--- a/hw/vfio/migration.c
22
--- a/hw/vfio/pci.h
31
+++ b/hw/vfio/migration.c
23
+++ b/hw/vfio/pci.h
32
@@ -XXX,XX +XXX,XX @@ static int vfio_save_prepare(void *opaque, Error **errp)
24
@@ -XXX,XX +XXX,XX @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
33
VFIODevice *vbasedev = opaque;
25
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
34
26
35
/*
27
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
36
- * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on.
28
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp);
37
+ * Snapshot doesn't use postcopy nor background snapshot, so allow snapshot
29
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
38
+ * even if they are on.
30
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
39
*/
31
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
40
if (runstate_check(RUN_STATE_SAVE_VM)) {
32
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
41
return 0;
33
index XXXXXXX..XXXXXXX 100644
42
@@ -XXX,XX +XXX,XX @@ static int vfio_save_prepare(void *opaque, Error **errp)
34
--- a/hw/vfio/pci-quirks.c
43
return -EOPNOTSUPP;
35
+++ b/hw/vfio/pci-quirks.c
36
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
37
/*
38
* Common quirk probe entry points.
39
*/
40
+bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp)
41
+{
42
+ return true;
43
+}
44
+
45
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
46
{
47
vfio_vga_probe_ati_3c3_quirk(vdev);
48
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/hw/vfio/pci.c
51
+++ b/hw/vfio/pci.c
52
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
53
goto out_unset_idev;
44
}
54
}
45
55
46
+ if (migrate_background_snapshot()) {
56
+ if (!vfio_config_quirk_setup(vdev, errp)) {
47
+ error_setg(
57
+ goto out_unset_idev;
48
+ errp,
49
+ "%s: VFIO migration is not supported with background snapshot",
50
+ vbasedev->name);
51
+ return -EOPNOTSUPP;
52
+ }
58
+ }
53
+
59
+
54
return 0;
60
if (vdev->vga) {
55
}
61
vfio_vga_quirk_setup(vdev);
56
62
}
57
--
63
--
58
2.41.0
64
2.48.1
59
65
60
66
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
The functions in target.c are not static, yet they don't have a proper
3
The actual IO BAR4 write quirk in vfio_probe_igd_bar4_quirk was removed
4
migration prefix. Add such prefix.
4
in previous change, leaving the function not matching its name, so move
5
it into the newly introduced vfio_config_quirk_setup. There is no
6
functional change in this commit.
5
7
6
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
8
For now, to align with current legacy mode behavior, it returns and
7
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
proceeds on error. Later it will fail on error after decoupling the
10
quirks from legacy mode.
11
12
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
13
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
14
Tested-by: Alex Williamson <alex.williamson@redhat.com>
15
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
16
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-7-tomitamoeko@gmail.com
8
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Cédric Le Goater <clg@redhat.com>
9
---
18
---
10
migration/migration.h | 4 ++--
19
hw/vfio/pci.h | 2 +-
11
migration/migration.c | 6 +++---
20
hw/vfio/igd.c | 21 ++++++++++++---------
12
migration/savevm.c | 2 +-
21
hw/vfio/pci-quirks.c | 6 +++++-
13
migration/target.c | 8 ++++----
22
3 files changed, 18 insertions(+), 11 deletions(-)
14
4 files changed, 10 insertions(+), 10 deletions(-)
15
23
16
diff --git a/migration/migration.h b/migration/migration.h
24
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
17
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/migration.h
26
--- a/hw/vfio/pci.h
19
+++ b/migration/migration.h
27
+++ b/hw/vfio/pci.h
20
@@ -XXX,XX +XXX,XX @@ void migration_consume_urgent_request(void);
28
@@ -XXX,XX +XXX,XX @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
21
bool migration_rate_limit(void);
29
void vfio_quirk_reset(VFIOPCIDevice *vdev);
22
void migration_cancel(const Error *error);
30
VFIOQuirk *vfio_quirk_alloc(int nr_mem);
23
31
void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
24
-void populate_vfio_info(MigrationInfo *info);
32
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
25
-void reset_vfio_bytes_transferred(void);
33
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp);
26
+void migration_populate_vfio_info(MigrationInfo *info);
34
27
+void migration_reset_vfio_bytes_transferred(void);
35
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
28
void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
36
29
37
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
30
#endif
31
diff --git a/migration/migration.c b/migration/migration.c
32
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
33
--- a/migration/migration.c
39
--- a/hw/vfio/igd.c
34
+++ b/migration/migration.c
40
+++ b/hw/vfio/igd.c
35
@@ -XXX,XX +XXX,XX @@ static void fill_source_migration_info(MigrationInfo *info)
41
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
36
populate_time_info(info, s);
42
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
37
populate_ram_info(info, s);
43
}
38
populate_disk_info(info);
44
39
- populate_vfio_info(info);
45
-void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
40
+ migration_populate_vfio_info(info);
46
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
41
break;
47
+ Error **errp G_GNUC_UNUSED)
42
case MIGRATION_STATUS_COLO:
48
{
43
info->has_status = true;
49
g_autofree struct vfio_region_info *rom = NULL;
44
@@ -XXX,XX +XXX,XX @@ static void fill_source_migration_info(MigrationInfo *info)
50
int ret, gen;
45
case MIGRATION_STATUS_COMPLETED:
51
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
46
populate_time_info(info, s);
52
* PCI bus address.
47
populate_ram_info(info, s);
48
- populate_vfio_info(info);
49
+ migration_populate_vfio_info(info);
50
break;
51
case MIGRATION_STATUS_FAILED:
52
info->has_status = true;
53
@@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
54
*/
53
*/
55
memset(&mig_stats, 0, sizeof(mig_stats));
54
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
56
memset(&compression_counters, 0, sizeof(compression_counters));
55
- !vfio_is_vga(vdev) || nr != 4 ||
57
- reset_vfio_bytes_transferred();
56
+ !vfio_is_vga(vdev) ||
58
+ migration_reset_vfio_bytes_transferred();
57
&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
59
58
0, PCI_DEVFN(0x2, 0))) {
59
- return;
60
+ return true;
61
}
62
63
/*
64
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
65
if (gen == -1) {
66
error_report("IGD device %s is unsupported in legacy mode, "
67
"try SandyBridge or newer", vdev->vbasedev.name);
68
- return;
69
+ return true;
70
}
71
72
/*
73
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
74
if ((ret || !rom->size) && !vdev->pdev.romfile) {
75
error_report("IGD device %s has no ROM, legacy mode disabled",
76
vdev->vbasedev.name);
77
- return;
78
+ return true;
79
}
80
81
/*
82
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
83
error_report("IGD device %s hotplugged, ROM disabled, "
84
"legacy mode disabled", vdev->vbasedev.name);
85
vdev->rom_read_failed = true;
86
- return;
87
+ return true;
88
}
89
90
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
91
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
92
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
93
error_report("IGD device %s failed to enable VGA access, "
94
"legacy mode disabled", vdev->vbasedev.name);
95
- return;
96
+ return true;
97
}
98
99
/* Setup OpRegion access */
100
if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
101
error_append_hint(&err, "IGD legacy mode disabled\n");
102
error_report_err(err);
103
- return;
104
+ return true;
105
}
106
107
/* Setup LPC bridge / Host bridge PCI IDs */
108
if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
109
error_append_hint(&err, "IGD legacy mode disabled\n");
110
error_report_err(err);
111
- return;
112
+ return true;
113
}
114
115
/*
116
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
117
}
118
119
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
120
+
121
+ return true;
122
}
123
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/hw/vfio/pci-quirks.c
126
+++ b/hw/vfio/pci-quirks.c
127
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
128
*/
129
bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp)
130
{
131
+#ifdef CONFIG_VFIO_IGD
132
+ if (!vfio_probe_igd_config_quirk(vdev, errp)) {
133
+ return false;
134
+ }
135
+#endif
60
return true;
136
return true;
61
}
137
}
62
diff --git a/migration/savevm.c b/migration/savevm.c
138
63
index XXXXXXX..XXXXXXX 100644
139
@@ -XXX,XX +XXX,XX @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
64
--- a/migration/savevm.c
140
vfio_probe_rtl8168_bar2_quirk(vdev, nr);
65
+++ b/migration/savevm.c
141
#ifdef CONFIG_VFIO_IGD
66
@@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
142
vfio_probe_igd_bar0_quirk(vdev, nr);
67
migrate_init(ms);
143
- vfio_probe_igd_bar4_quirk(vdev, nr);
68
memset(&mig_stats, 0, sizeof(mig_stats));
69
memset(&compression_counters, 0, sizeof(compression_counters));
70
- reset_vfio_bytes_transferred();
71
+ migration_reset_vfio_bytes_transferred();
72
ms->to_dst_file = f;
73
74
qemu_mutex_unlock_iothread();
75
diff --git a/migration/target.c b/migration/target.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/migration/target.c
78
+++ b/migration/target.c
79
@@ -XXX,XX +XXX,XX @@
80
#endif
144
#endif
81
82
#ifdef CONFIG_VFIO
83
-void populate_vfio_info(MigrationInfo *info)
84
+void migration_populate_vfio_info(MigrationInfo *info)
85
{
86
if (vfio_mig_active()) {
87
info->vfio = g_malloc0(sizeof(*info->vfio));
88
@@ -XXX,XX +XXX,XX @@ void populate_vfio_info(MigrationInfo *info)
89
}
90
}
145
}
91
146
92
-void reset_vfio_bytes_transferred(void)
93
+void migration_reset_vfio_bytes_transferred(void)
94
{
95
vfio_reset_bytes_transferred();
96
}
97
#else
98
-void populate_vfio_info(MigrationInfo *info)
99
+void migration_populate_vfio_info(MigrationInfo *info)
100
{
101
}
102
103
-void reset_vfio_bytes_transferred(void)
104
+void migration_reset_vfio_bytes_transferred(void)
105
{
106
}
107
#endif
108
--
147
--
109
2.41.0
148
2.48.1
110
149
111
150
diff view generated by jsdifflib
New patch
1
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
3
So far, IGD-specific quirks all require enabling legacy mode, which is
4
toggled by assigning IGD to 00:02.0. However, some quirks, like the BDSM
5
and GGC register quirks, should be applied to all supported IGD devices.
6
A new config option, x-igd-legacy-mode=[on|off|auto], is introduced to
7
control the legacy mode only quirks. The default value is "auto", which
8
keeps current behavior that enables legacy mode implicitly and continues
9
on error when all following conditions are met.
10
* Machine type is i440fx
11
* IGD device is at guest BDF 00:02.0
12
13
If any one of the conditions above is not met, the default behavior is
14
equivalent to "off", QEMU will fail immediately if any error occurs.
15
16
Users can also use "on" to force enabling legacy mode. It checks if all
17
the conditions above are met and set up legacy mode. QEMU will also fail
18
immediately on error in this case.
19
20
Additionally, the hotplug check in legacy mode is removed as hotplugging
21
IGD device is never supported, and it will be checked when enabling the
22
OpRegion quirk.
23
24
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
25
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
26
Tested-by: Alex Williamson <alex.williamson@redhat.com>
27
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
28
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-8-tomitamoeko@gmail.com
29
[ clg: - Changed warn_report() by info_report() in
30
vfio_probe_igd_config_quirk() as suggested by Alex W.
31
- Fixed spelling in vfio_probe_igd_config_quirk () ]
32
Signed-off-by: Cédric Le Goater <clg@redhat.com>
33
---
34
hw/vfio/pci.h | 1 +
35
hw/vfio/igd.c | 127 +++++++++++++++++++++++++++++---------------------
36
hw/vfio/pci.c | 2 +
37
3 files changed, 77 insertions(+), 53 deletions(-)
38
39
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/hw/vfio/pci.h
42
+++ b/hw/vfio/pci.h
43
@@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice {
44
uint32_t display_xres;
45
uint32_t display_yres;
46
int32_t bootindex;
47
+ OnOffAuto igd_legacy_mode;
48
uint32_t igd_gms;
49
OffAutoPCIBAR msix_relo;
50
uint8_t nv_gpudirect_clique;
51
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/hw/vfio/igd.c
54
+++ b/hw/vfio/igd.c
55
@@ -XXX,XX +XXX,XX @@
56
#include "qemu/error-report.h"
57
#include "qapi/error.h"
58
#include "qapi/qmp/qerror.h"
59
+#include "hw/boards.h"
60
#include "hw/hw.h"
61
#include "hw/nvram/fw_cfg.h"
62
#include "pci.h"
63
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
64
* bus address.
65
*/
66
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
67
- !vfio_is_vga(vdev) || nr != 0 ||
68
- &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
69
- 0, PCI_DEVFN(0x2, 0))) {
70
+ !vfio_is_vga(vdev) || nr != 0) {
71
return;
72
}
73
74
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
75
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
76
}
77
78
-bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
79
- Error **errp G_GNUC_UNUSED)
80
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
81
{
82
- g_autofree struct vfio_region_info *rom = NULL;
83
int ret, gen;
84
uint64_t gms_size;
85
uint64_t *bdsm_size;
86
uint32_t gmch;
87
+ bool legacy_mode_enabled = false;
88
Error *err = NULL;
89
90
/*
91
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
92
* PCI bus address.
93
*/
94
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
95
- !vfio_is_vga(vdev) ||
96
- &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
97
- 0, PCI_DEVFN(0x2, 0))) {
98
+ !vfio_is_vga(vdev)) {
99
return true;
100
}
101
102
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
103
return true;
104
}
105
106
- /*
107
- * Most of what we're doing here is to enable the ROM to run, so if
108
- * there's no ROM, there's no point in setting up this quirk.
109
- * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
110
- */
111
- ret = vfio_get_region_info(&vdev->vbasedev,
112
- VFIO_PCI_ROM_REGION_INDEX, &rom);
113
- if ((ret || !rom->size) && !vdev->pdev.romfile) {
114
- error_report("IGD device %s has no ROM, legacy mode disabled",
115
- vdev->vbasedev.name);
116
- return true;
117
- }
118
-
119
- /*
120
- * Ignore the hotplug corner case, mark the ROM failed, we can't
121
- * create the devices we need for legacy mode in the hotplug scenario.
122
- */
123
- if (vdev->pdev.qdev.hotplugged) {
124
- error_report("IGD device %s hotplugged, ROM disabled, "
125
- "legacy mode disabled", vdev->vbasedev.name);
126
- vdev->rom_read_failed = true;
127
- return true;
128
- }
129
-
130
gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
131
132
/*
133
- * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
134
- * try to enable it. Probably shouldn't be using legacy mode without VGA,
135
- * but also no point in us enabling VGA if disabled in hardware.
136
+ * For backward compatibility, enable legacy mode when
137
+ * - Machine type is i440fx (pc_piix)
138
+ * - IGD device is at guest BDF 00:02.0
139
+ * - Not manually disabled by x-igd-legacy-mode=off
140
*/
141
- if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
142
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
143
- error_report("IGD device %s failed to enable VGA access, "
144
- "legacy mode disabled", vdev->vbasedev.name);
145
- return true;
146
- }
147
+ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) &&
148
+ !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") &&
149
+ (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev),
150
+ 0, PCI_DEVFN(0x2, 0)))) {
151
+ /*
152
+ * IGD legacy mode requires:
153
+ * - VBIOS in ROM BAR or file
154
+ * - VGA IO/MMIO ranges are claimed by IGD
155
+ * - OpRegion
156
+ * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
157
+ */
158
+ g_autofree struct vfio_region_info *rom = NULL;
159
+
160
+ legacy_mode_enabled = true;
161
+ info_report("IGD legacy mode enabled, "
162
+ "use x-igd-legacy-mode=off to disable it if unwanted.");
163
+
164
+ /*
165
+ * Most of what we're doing here is to enable the ROM to run, so if
166
+ * there's no ROM, there's no point in setting up this quirk.
167
+ * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support.
168
+ */
169
+ ret = vfio_get_region_info(&vdev->vbasedev,
170
+ VFIO_PCI_ROM_REGION_INDEX, &rom);
171
+ if ((ret || !rom->size) && !vdev->pdev.romfile) {
172
+ error_setg(&err, "Device has no ROM");
173
+ goto error;
174
+ }
175
176
- /* Setup OpRegion access */
177
- if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
178
- error_append_hint(&err, "IGD legacy mode disabled\n");
179
- error_report_err(err);
180
- return true;
181
- }
182
+ /*
183
+ * If IGD VGA Disable is clear (expected) and VGA is not already
184
+ * enabled, try to enable it. Probably shouldn't be using legacy mode
185
+ * without VGA, but also no point in us enabling VGA if disabled in
186
+ * hardware.
187
+ */
188
+ if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
189
+ error_setg(&err, "Unable to enable VGA access");
190
+ goto error;
191
+ }
192
193
- /* Setup LPC bridge / Host bridge PCI IDs */
194
- if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
195
- error_append_hint(&err, "IGD legacy mode disabled\n");
196
- error_report_err(err);
197
- return true;
198
+ /* Setup OpRegion access */
199
+ if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
200
+ goto error;
201
+ }
202
+
203
+ /* Setup LPC bridge / Host bridge PCI IDs */
204
+ if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
205
+ goto error;
206
+ }
207
+ } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) {
208
+ error_setg(&err,
209
+ "Machine is not i440fx or assigned BDF is not 00:02.0");
210
+ goto error;
211
}
212
213
/*
214
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
215
trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB));
216
217
return true;
218
+
219
+error:
220
+ /*
221
+ * When legacy mode is implicity enabled, continue on error,
222
+ * to keep compatibility
223
+ */
224
+ if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) {
225
+ error_report_err(err);
226
+ error_report("IGD legacy mode disabled");
227
+ return true;
228
+ }
229
+
230
+ error_propagate(errp, err);
231
+ return false;
232
}
233
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
234
index XXXXXXX..XXXXXXX 100644
235
--- a/hw/vfio/pci.c
236
+++ b/hw/vfio/pci.c
237
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
238
VFIO_FEATURE_ENABLE_REQ_BIT, true),
239
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
240
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
241
+ DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
242
+ igd_legacy_mode, ON_OFF_AUTO_AUTO),
243
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
244
vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
245
DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
246
--
247
2.48.1
248
249
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Add a new .save_prepare() handler to struct SaveVMHandlers. This handler
3
Both enable OpRegion option (x-igd-opregion) and legacy mode require
4
is called early, even before migration starts, and can be used by
4
setting up OpRegion copy for IGD devices. As the config quirk no longer
5
devices to perform early checks.
5
depends on legacy mode, we can now handle x-igd-opregion option there
6
instead of in vfio_realize.
6
7
7
Refactor migrate_init() to be able to return errors and call
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
8
.save_prepare() from there.
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
9
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
10
Suggested-by: Peter Xu <peterx@redhat.com>
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
11
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-9-tomitamoeko@gmail.com
12
Reviewed-by: Peter Xu <peterx@redhat.com>
13
Reviewed-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
14
---
16
include/migration/register.h | 5 +++++
15
hw/vfio/pci.h | 2 --
17
migration/migration.h | 2 +-
16
hw/vfio/igd.c | 14 +++++++++-----
18
migration/savevm.h | 1 +
17
hw/vfio/pci.c | 9 ---------
19
migration/migration.c | 15 +++++++++++++--
18
3 files changed, 9 insertions(+), 16 deletions(-)
20
migration/savevm.c | 29 ++++++++++++++++++++++++++++-
21
5 files changed, 48 insertions(+), 4 deletions(-)
22
19
23
diff --git a/include/migration/register.h b/include/migration/register.h
20
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
24
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
25
--- a/include/migration/register.h
22
--- a/hw/vfio/pci.h
26
+++ b/include/migration/register.h
23
+++ b/hw/vfio/pci.h
27
@@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers {
24
@@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
28
/* This runs inside the iothread lock. */
25
29
SaveStateHandler *save_state;
26
bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
30
27
31
+ /*
28
-bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp);
32
+ * save_prepare is called early, even before migration starts, and can be
29
-
33
+ * used to perform early checks.
30
void vfio_display_reset(VFIOPCIDevice *vdev);
34
+ */
31
bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
35
+ int (*save_prepare)(void *opaque, Error **errp);
32
void vfio_display_finalize(VFIOPCIDevice *vdev);
36
void (*save_cleanup)(void *opaque);
33
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
37
int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
38
int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
39
diff --git a/migration/migration.h b/migration/migration.h
40
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
41
--- a/migration/migration.h
35
--- a/hw/vfio/igd.c
42
+++ b/migration/migration.h
36
+++ b/hw/vfio/igd.c
43
@@ -XXX,XX +XXX,XX @@ void migrate_fd_connect(MigrationState *s, Error *error_in);
37
@@ -XXX,XX +XXX,XX @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
44
bool migration_is_setup_or_active(int state);
38
return true;
45
bool migration_is_running(int state);
46
47
-void migrate_init(MigrationState *s);
48
+int migrate_init(MigrationState *s, Error **errp);
49
bool migration_is_blocked(Error **errp);
50
/* True if outgoing migration has entered postcopy phase */
51
bool migration_in_postcopy(void);
52
diff --git a/migration/savevm.h b/migration/savevm.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/migration/savevm.h
55
+++ b/migration/savevm.h
56
@@ -XXX,XX +XXX,XX @@
57
58
bool qemu_savevm_state_blocked(Error **errp);
59
void qemu_savevm_non_migratable_list(strList **reasons);
60
+int qemu_savevm_state_prepare(Error **errp);
61
void qemu_savevm_state_setup(QEMUFile *f);
62
bool qemu_savevm_state_guest_unplug_pending(void);
63
int qemu_savevm_state_resume_prepare(MigrationState *s);
64
diff --git a/migration/migration.c b/migration/migration.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/migration/migration.c
67
+++ b/migration/migration.c
68
@@ -XXX,XX +XXX,XX @@ bool migration_is_active(MigrationState *s)
69
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
70
}
39
}
71
40
72
-void migrate_init(MigrationState *s)
41
-bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
73
+int migrate_init(MigrationState *s, Error **errp)
42
+static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
74
{
43
{
75
+ int ret;
44
g_autofree struct vfio_region_info *opregion = NULL;
76
+
45
int ret;
77
+ ret = qemu_savevm_state_prepare(errp);
46
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
78
+ if (ret) {
47
goto error;
79
+ return ret;
48
}
49
50
- /* Setup OpRegion access */
51
- if (!vfio_pci_igd_setup_opregion(vdev, &err)) {
52
- goto error;
53
- }
54
+ /* Enable OpRegion quirk */
55
+ vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION;
56
57
/* Setup LPC bridge / Host bridge PCI IDs */
58
if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
59
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
60
goto error;
61
}
62
63
+ /* Setup OpRegion access */
64
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
65
+ !vfio_pci_igd_setup_opregion(vdev, errp)) {
66
+ goto error;
80
+ }
67
+ }
81
+
68
+
82
/*
69
/*
83
* Reinitialise all migration state, except
70
* Allow user to override dsm size using x-igd-gms option, in multiples of
84
* parameters/capabilities that the user set, and
71
* 32MiB. This option should only be used when the desired size cannot be
85
@@ -XXX,XX +XXX,XX @@ void migrate_init(MigrationState *s)
72
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
86
memset(&mig_stats, 0, sizeof(mig_stats));
73
index XXXXXXX..XXXXXXX 100644
87
memset(&compression_counters, 0, sizeof(compression_counters));
74
--- a/hw/vfio/pci.c
88
migration_reset_vfio_bytes_transferred();
75
+++ b/hw/vfio/pci.c
89
+
76
@@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
90
+ return 0;
77
vfio_bar_quirk_setup(vdev, i);
91
}
92
93
int migrate_add_blocker_internal(Error *reason, Error **errp)
94
@@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
95
migrate_set_block_incremental(true);
96
}
78
}
97
79
98
- migrate_init(s);
80
-#ifdef CONFIG_VFIO_IGD
99
+ if (migrate_init(s, errp)) {
81
- if (!vdev->igd_opregion &&
100
+ return false;
82
- vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
101
+ }
83
- if (!vfio_pci_igd_setup_opregion(vdev, errp)) {
102
84
- goto out_unset_idev;
103
return true;
85
- }
104
}
86
- }
105
diff --git a/migration/savevm.c b/migration/savevm.c
87
-#endif
106
index XXXXXXX..XXXXXXX 100644
88
-
107
--- a/migration/savevm.c
89
/* QEMU emulates all of MSI & MSIX */
108
+++ b/migration/savevm.c
90
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
109
@@ -XXX,XX +XXX,XX @@ bool qemu_savevm_state_guest_unplug_pending(void)
91
memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
110
return false;
111
}
112
113
+int qemu_savevm_state_prepare(Error **errp)
114
+{
115
+ SaveStateEntry *se;
116
+ int ret;
117
+
118
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
119
+ if (!se->ops || !se->ops->save_prepare) {
120
+ continue;
121
+ }
122
+ if (se->ops->is_active) {
123
+ if (!se->ops->is_active(se->opaque)) {
124
+ continue;
125
+ }
126
+ }
127
+
128
+ ret = se->ops->save_prepare(se->opaque, errp);
129
+ if (ret < 0) {
130
+ return ret;
131
+ }
132
+ }
133
+
134
+ return 0;
135
+}
136
+
137
void qemu_savevm_state_setup(QEMUFile *f)
138
{
139
MigrationState *ms = migrate_get_current();
140
@@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
141
return -EINVAL;
142
}
143
144
- migrate_init(ms);
145
+ ret = migrate_init(ms, errp);
146
+ if (ret) {
147
+ return ret;
148
+ }
149
ms->to_dst_file = f;
150
151
qemu_mutex_unlock_iothread();
152
--
92
--
153
2.41.0
93
2.48.1
154
94
155
95
diff view generated by jsdifflib
New patch
1
From: Tomita Moeko <tomitamoeko@gmail.com>
1
2
3
The LPC bridge/Host bridge IDs quirk is also not dependent on legacy
4
mode. Recent Windows driver no longer depends on these IDs, as well as
5
Linux i915 driver, while UEFI GOP seems still needs them. Make it an
6
option to allow users enabling and disabling it as needed.
7
8
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
9
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
10
Tested-by: Alex Williamson <alex.williamson@redhat.com>
11
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
12
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-10-tomitamoeko@gmail.com
13
[ clg: - Fixed spelling in vfio_probe_igd_config_quirk() ]
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
16
hw/vfio/pci.h | 3 +++
17
hw/vfio/igd.c | 14 ++++++++------
18
hw/vfio/pci.c | 2 ++
19
3 files changed, 13 insertions(+), 6 deletions(-)
20
21
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/vfio/pci.h
24
+++ b/hw/vfio/pci.h
25
@@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice {
26
#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
27
#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
28
(1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
29
+#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3
30
+#define VFIO_FEATURE_ENABLE_IGD_LPC \
31
+ (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT)
32
OnOffAuto display;
33
uint32_t display_xres;
34
uint32_t display_yres;
35
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/vfio/igd.c
38
+++ b/hw/vfio/igd.c
39
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
40
goto error;
41
}
42
43
- /* Enable OpRegion quirk */
44
+ /* Enable OpRegion and LPC bridge quirk */
45
vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION;
46
-
47
- /* Setup LPC bridge / Host bridge PCI IDs */
48
- if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) {
49
- goto error;
50
- }
51
+ vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC;
52
} else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) {
53
error_setg(&err,
54
"Machine is not i440fx or assigned BDF is not 00:02.0");
55
@@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
56
goto error;
57
}
58
59
+ /* Setup LPC bridge / Host bridge PCI IDs */
60
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) &&
61
+ !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) {
62
+ goto error;
63
+ }
64
+
65
/*
66
* Allow user to override dsm size using x-igd-gms option, in multiples of
67
* 32MiB. This option should only be used when the desired size cannot be
68
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/hw/vfio/pci.c
71
+++ b/hw/vfio/pci.c
72
@@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = {
73
VFIO_FEATURE_ENABLE_REQ_BIT, true),
74
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
75
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
76
+ DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features,
77
+ VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false),
78
DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
79
igd_legacy_mode, ON_OFF_AUTO_AUTO),
80
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
81
--
82
2.48.1
83
84
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Tomita Moeko <tomitamoeko@gmail.com>
2
2
3
Add prepare callback to struct VMChangeStateEntry.
3
The KVMGT/GVT-g vGPU also exposes OpRegion. But unlike IGD passthrough,
4
it only needs the OpRegion quirk. A previous change moved x-igd-opregion
5
handling to config quirk breaks KVMGT functionality as it brings extra
6
checks and applied other quirks. Here we check if the device is mdev
7
(KVMGT) or not (passthrough), and then applies corresponding quirks.
4
8
5
The prepare callback is optional and can be set by the new function
9
As before, users must manually specify x-igd-opregion=on to enable it
6
qemu_add_vm_change_state_handler_prio_full() that allows setting this
10
on KVMGT devices. In the future, we may check the VID/DID and enable
7
callback in addition to the main callback.
11
OpRegion automatically.
8
12
9
The prepare callbacks and main callbacks are called in two separate
13
Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
10
phases: First all prepare callbacks are called and only then all main
14
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
11
callbacks are called.
15
Tested-by: Alex Williamson <alex.williamson@redhat.com>
12
16
Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com>
13
The purpose of the new prepare callback is to allow all devices to run a
17
Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-11-tomitamoeko@gmail.com
14
preliminary task before calling the devices' main callbacks.
15
16
This will facilitate adding P2P support for VFIO migration where all
17
VFIO devices need to be put in an intermediate P2P quiescent state
18
before being stopped or started by the main callback.
19
20
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
21
Reviewed-by: Cédric Le Goater <clg@redhat.com>
22
Tested-by: YangHang Liu <yanghliu@redhat.com>
23
Signed-off-by: Cédric Le Goater <clg@redhat.com>
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
24
---
19
---
25
include/sysemu/runstate.h | 4 ++++
20
hw/vfio/igd.c | 27 ++++++++++++++++++++++++++-
26
softmmu/runstate.c | 40 +++++++++++++++++++++++++++++++++++++++
21
1 file changed, 26 insertions(+), 1 deletion(-)
27
2 files changed, 44 insertions(+)
28
22
29
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
23
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
30
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
31
--- a/include/sysemu/runstate.h
25
--- a/hw/vfio/igd.c
32
+++ b/include/sysemu/runstate.h
26
+++ b/hw/vfio/igd.c
33
@@ -XXX,XX +XXX,XX @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
27
@@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
34
void *opaque);
28
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
35
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
36
VMChangeStateHandler *cb, void *opaque, int priority);
37
+VMChangeStateEntry *
38
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
39
+ VMChangeStateHandler *prepare_cb,
40
+ void *opaque, int priority);
41
VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
42
VMChangeStateHandler *cb,
43
void *opaque);
44
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/softmmu/runstate.c
47
+++ b/softmmu/runstate.c
48
@@ -XXX,XX +XXX,XX @@ void qemu_system_vmstop_request(RunState state)
49
}
29
}
50
struct VMChangeStateEntry {
30
51
VMChangeStateHandler *cb;
31
-bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
52
+ VMChangeStateHandler *prepare_cb;
32
+static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
53
void *opaque;
33
{
54
QTAILQ_ENTRY(VMChangeStateEntry) entries;
34
int ret, gen;
55
int priority;
35
uint64_t gms_size;
56
@@ -XXX,XX +XXX,XX @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
36
@@ -XXX,XX +XXX,XX @@ error:
57
*/
37
error_propagate(errp, err);
58
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
38
return false;
59
VMChangeStateHandler *cb, void *opaque, int priority)
39
}
40
+
41
+/*
42
+ * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify
43
+ * x-igd-opregion=on to enable the access.
44
+ * TODO: Check VID/DID and enable opregion access automatically
45
+ */
46
+static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
60
+{
47
+{
61
+ return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
48
+ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) &&
62
+ priority);
49
+ !vfio_pci_igd_setup_opregion(vdev, errp)) {
50
+ return false;
51
+ }
52
+
53
+ return true;
63
+}
54
+}
64
+
55
+
65
+/**
56
+bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
66
+ * qemu_add_vm_change_state_handler_prio_full:
57
+{
67
+ * @cb: the main callback to invoke
58
+ /* KVMGT/GVT-g vGPU is exposed as mdev */
68
+ * @prepare_cb: a callback to invoke before the main callback
59
+ if (vdev->vbasedev.mdev) {
69
+ * @opaque: user data passed to the callbacks
60
+ return vfio_pci_kvmgt_config_quirk(vdev, errp);
70
+ * @priority: low priorities execute first when the vm runs and the reverse is
61
+ }
71
+ * true when the vm stops
72
+ *
73
+ * Register a main callback function and an optional prepare callback function
74
+ * that are invoked when the vm starts or stops running. The main callback and
75
+ * the prepare callback are called in two separate phases: First all prepare
76
+ * callbacks are called and only then all main callbacks are called. As its
77
+ * name suggests, the prepare callback can be used to do some preparatory work
78
+ * before invoking the main callback.
79
+ *
80
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
81
+ */
82
+VMChangeStateEntry *
83
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
84
+ VMChangeStateHandler *prepare_cb,
85
+ void *opaque, int priority)
86
{
87
VMChangeStateEntry *e;
88
VMChangeStateEntry *other;
89
90
e = g_malloc0(sizeof(*e));
91
e->cb = cb;
92
+ e->prepare_cb = prepare_cb;
93
e->opaque = opaque;
94
e->priority = priority;
95
96
@@ -XXX,XX +XXX,XX @@ void vm_state_notify(bool running, RunState state)
97
trace_vm_state_notify(running, state, RunState_str(state));
98
99
if (running) {
100
+ QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
101
+ if (e->prepare_cb) {
102
+ e->prepare_cb(e->opaque, running, state);
103
+ }
104
+ }
105
+
62
+
106
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
63
+ return vfio_pci_igd_config_quirk(vdev, errp);
107
e->cb(e->opaque, running, state);
64
+}
108
}
109
} else {
110
+ QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
111
+ if (e->prepare_cb) {
112
+ e->prepare_cb(e->opaque, running, state);
113
+ }
114
+ }
115
+
116
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
117
e->cb(e->opaque, running, state);
118
}
119
--
65
--
120
2.41.0
66
2.48.1
121
67
122
68
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
2
2
3
Initialization of mig_stats, compression_counters and VFIO bytes
3
Wire data commonly use BE byte order (including in the existing migration
4
transferred is hard-coded in migration code path and snapshot code path.
4
protocol), use it also for for VFIO device state packets.
5
5
6
Make the code cleaner by initializing them in migrate_init().
6
This will allow VFIO multifd device state transfer between hosts with
7
different endianness.
8
Although currently there is no such use case, it's good to have it now
9
for completeness.
7
10
8
Suggested-by: Cédric Le Goater <clg@redhat.com>
11
Reviewed-by: Avihai Horon <avihaih@nvidia.com>
9
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
12
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
13
Link: https://lore.kernel.org/qemu-devel/dcfc04cc1a50655650dbac8398e2742ada84ee39.1741611079.git.maciej.szmigiero@oracle.com
11
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
12
---
15
---
13
migration/migration.c | 14 +++++++-------
16
hw/vfio/migration-multifd.c | 15 ++++++++++-----
14
migration/savevm.c | 3 ---
17
1 file changed, 10 insertions(+), 5 deletions(-)
15
2 files changed, 7 insertions(+), 10 deletions(-)
16
18
17
diff --git a/migration/migration.c b/migration/migration.c
19
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/migration/migration.c
21
--- a/hw/vfio/migration-multifd.c
20
+++ b/migration/migration.c
22
+++ b/hw/vfio/migration-multifd.c
21
@@ -XXX,XX +XXX,XX @@ void migrate_init(MigrationState *s)
23
@@ -XXX,XX +XXX,XX @@
22
s->iteration_initial_bytes = 0;
24
#include "hw/vfio/vfio-common.h"
23
s->threshold_size = 0;
25
#include "migration/misc.h"
24
s->switchover_acked = false;
26
#include "qapi/error.h"
25
+ /*
27
+#include "qemu/bswap.h"
26
+ * set mig_stats compression_counters memory to zero for a
28
#include "qemu/error-report.h"
27
+ * new migration
29
#include "qemu/lockable.h"
28
+ */
30
#include "qemu/main-loop.h"
29
+ memset(&mig_stats, 0, sizeof(mig_stats));
31
@@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
30
+ memset(&compression_counters, 0, sizeof(compression_counters));
32
return false;
31
+ migration_reset_vfio_bytes_transferred();
32
}
33
34
int migrate_add_blocker_internal(Error *reason, Error **errp)
35
@@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
36
}
33
}
37
34
38
migrate_init(s);
35
+ packet->version = be32_to_cpu(packet->version);
39
- /*
36
if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
40
- * set mig_stats compression_counters memory to zero for a
37
error_setg(errp, "%s: packet has unknown version %" PRIu32,
41
- * new migration
38
vbasedev->name, packet->version);
42
- */
39
return false;
43
- memset(&mig_stats, 0, sizeof(mig_stats));
44
- memset(&compression_counters, 0, sizeof(compression_counters));
45
- migration_reset_vfio_bytes_transferred();
46
47
return true;
48
}
49
diff --git a/migration/savevm.c b/migration/savevm.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/migration/savevm.c
52
+++ b/migration/savevm.c
53
@@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
54
}
40
}
55
41
56
migrate_init(ms);
42
+ packet->idx = be32_to_cpu(packet->idx);
57
- memset(&mig_stats, 0, sizeof(mig_stats));
43
+ packet->flags = be32_to_cpu(packet->flags);
58
- memset(&compression_counters, 0, sizeof(compression_counters));
44
+
59
- migration_reset_vfio_bytes_transferred();
45
if (packet->idx == UINT32_MAX) {
60
ms->to_dst_file = f;
46
error_setg(errp, "%s: packet index is invalid", vbasedev->name);
61
47
return false;
62
qemu_mutex_unlock_iothread();
48
@@ -XXX,XX +XXX,XX @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
49
50
packet_len = sizeof(*packet) + bioc->usage;
51
packet = g_malloc0(packet_len);
52
- packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
53
- packet->idx = idx;
54
- packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
55
+ packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
56
+ packet->idx = cpu_to_be32(idx);
57
+ packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE);
58
memcpy(&packet->data, bioc->data, bioc->usage);
59
60
if (!multifd_queue_device_state(idstr, instance_id,
61
@@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
62
}
63
64
packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
65
- packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
66
+ packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT);
67
68
for (idx = 0; ; idx++) {
69
ssize_t data_size;
70
@@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
71
break;
72
}
73
74
- packet->idx = idx;
75
+ packet->idx = cpu_to_be32(idx);
76
packet_size = sizeof(*packet) + data_size;
77
78
if (!multifd_queue_device_state(d->idstr, d->instance_id,
63
--
79
--
64
2.41.0
80
2.48.1
65
81
66
82
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
2
3
Both qemu_minrampagesize() and qemu_maxrampagesize() are
4
related to host memory backends, having the following call
5
stack:
6
7
qemu_minrampagesize()
8
-> find_min_backend_pagesize()
9
-> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)
10
11
qemu_maxrampagesize()
12
-> find_max_backend_pagesize()
13
-> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)
14
15
Having TYPE_MEMORY_BACKEND defined in "system/hostmem.h":
16
17
include/system/hostmem.h:23:#define TYPE_MEMORY_BACKEND "memory-backend"
18
19
Move their prototype declaration to "system/hostmem.h".
20
21
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
22
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
23
Reviewed-by: Eric Auger <eric.auger@redhat.com>
24
Message-Id: <20250308230917.18907-7-philmd@linaro.org>
25
Acked-by: David Hildenbrand <david@redhat.com>
26
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-2-philmd@linaro.org
27
Signed-off-by: Cédric Le Goater <clg@redhat.com>
28
---
29
include/exec/ram_addr.h | 3 ---
30
include/system/hostmem.h | 3 +++
31
hw/ppc/spapr_caps.c | 1 +
32
hw/s390x/s390-virtio-ccw.c | 1 +
33
hw/vfio/spapr.c | 1 +
34
5 files changed, 6 insertions(+), 3 deletions(-)
35
36
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/include/exec/ram_addr.h
39
+++ b/include/exec/ram_addr.h
40
@@ -XXX,XX +XXX,XX @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
41
42
bool ramblock_is_pmem(RAMBlock *rb);
43
44
-long qemu_minrampagesize(void);
45
-long qemu_maxrampagesize(void);
46
-
47
/**
48
* qemu_ram_alloc_from_file,
49
* qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing
50
diff --git a/include/system/hostmem.h b/include/system/hostmem.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/include/system/hostmem.h
53
+++ b/include/system/hostmem.h
54
@@ -XXX,XX +XXX,XX @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend);
55
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev);
56
char *host_memory_backend_get_name(HostMemoryBackend *backend);
57
58
+long qemu_minrampagesize(void);
59
+long qemu_maxrampagesize(void);
60
+
61
#endif
62
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/hw/ppc/spapr_caps.c
65
+++ b/hw/ppc/spapr_caps.c
66
@@ -XXX,XX +XXX,XX @@
67
#include "kvm_ppc.h"
68
#include "migration/vmstate.h"
69
#include "system/tcg.h"
70
+#include "system/hostmem.h"
71
72
#include "hw/ppc/spapr.h"
73
74
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/hw/s390x/s390-virtio-ccw.c
77
+++ b/hw/s390x/s390-virtio-ccw.c
78
@@ -XXX,XX +XXX,XX @@
79
#include "hw/s390x/tod.h"
80
#include "system/system.h"
81
#include "system/cpus.h"
82
+#include "system/hostmem.h"
83
#include "target/s390x/kvm/pv.h"
84
#include "migration/blocker.h"
85
#include "qapi/visitor.h"
86
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/hw/vfio/spapr.c
89
+++ b/hw/vfio/spapr.c
90
@@ -XXX,XX +XXX,XX @@
91
#include <linux/kvm.h>
92
#endif
93
#include "system/kvm.h"
94
+#include "system/hostmem.h"
95
#include "exec/address-spaces.h"
96
97
#include "hw/vfio/vfio-common.h"
98
--
99
2.48.1
100
101
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
2
3
<linux/kvm.h> is already included by "system/kvm.h" in the next line.
4
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Cédric Le Goater <clg@redhat.com>
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
Message-Id: <20250307180337.14811-3-philmd@linaro.org>
11
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-3-philmd@linaro.org
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
---
14
hw/vfio/spapr.c | 3 ---
15
1 file changed, 3 deletions(-)
16
17
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/vfio/spapr.c
20
+++ b/hw/vfio/spapr.c
21
@@ -XXX,XX +XXX,XX @@
22
#include "qemu/osdep.h"
23
#include <sys/ioctl.h>
24
#include <linux/vfio.h>
25
-#ifdef CONFIG_KVM
26
-#include <linux/kvm.h>
27
-#endif
28
#include "system/kvm.h"
29
#include "system/hostmem.h"
30
#include "exec/address-spaces.h"
31
--
32
2.48.1
33
34
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
If a device with enable-migration=on is added and it causes a migration
3
Always include necessary headers explicitly, to avoid
4
blocker, adding the device should fail with a proper error.
4
when refactoring unrelated ones:
5
5
6
This is not the case with multiple device migration blocker when the
6
hw/vfio/common.c:1176:45: error: implicit declaration of function ‘tcg_enabled’;
7
blocker already exists. If the blocker already exists and a device with
7
1176 | tcg_enabled() ? DIRTY_CLIENTS_ALL :
8
enable-migration=on is added which causes a migration blocker, adding
8
| ^~~~~~~~~~~
9
the device will succeed.
10
9
11
Fix it by failing adding the device in such case.
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
11
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
13
Fixes: 8bbcb64a71d8 ("vfio/migration: Make VFIO migration non-experimental")
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
15
Reviewed-by: Cédric Le Goater <clg@redhat.com>
13
Reviewed-by: Cédric Le Goater <clg@redhat.com>
14
Reviewed-by: Eric Auger <eric.auger@redhat.com>
15
Message-Id: <20250307180337.14811-2-philmd@linaro.org>
16
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-4-philmd@linaro.org
16
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
Signed-off-by: Cédric Le Goater <clg@redhat.com>
17
---
18
---
18
hw/vfio/common.c | 7 +++++--
19
hw/vfio/common.c | 1 +
19
1 file changed, 5 insertions(+), 2 deletions(-)
20
1 file changed, 1 insertion(+)
20
21
21
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
22
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
22
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/vfio/common.c
24
--- a/hw/vfio/common.c
24
+++ b/hw/vfio/common.c
25
+++ b/hw/vfio/common.c
25
@@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
26
@@ -XXX,XX +XXX,XX @@
26
{
27
#include "migration/misc.h"
27
int ret;
28
#include "migration/blocker.h"
28
29
#include "migration/qemu-file.h"
29
- if (multiple_devices_migration_blocker ||
30
+#include "system/tcg.h"
30
- vfio_multiple_devices_migration_is_supported()) {
31
#include "system/tpm.h"
31
+ if (vfio_multiple_devices_migration_is_supported()) {
32
32
return 0;
33
VFIODeviceList vfio_device_list =
33
}
34
35
@@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
36
return -EINVAL;
37
}
38
39
+ if (multiple_devices_migration_blocker) {
40
+ return 0;
41
+ }
42
+
43
error_setg(&multiple_devices_migration_blocker,
44
"Multiple VFIO devices migration is supported only if all of "
45
"them support P2P migration");
46
--
34
--
47
2.41.0
35
2.48.1
48
36
49
37
diff view generated by jsdifflib
1
From: Joao Martins <joao.m.martins@oracle.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
QEMU computes the DMA logging ranges for two predefined ranges: 32-bit
3
Prefer runtime helpers to get target page size.
4
and 64-bit. In the OVMF case, when the dynamic MMIO window is enabled,
5
QEMU includes in the 64-bit range the RAM regions at the lower part
6
and vfio-pci device RAM regions which are at the top of the address
7
space. This range contains a large gap and the size can be bigger than
8
the dirty tracking HW limits of some devices (MLX5 has a 2^42 limit).
9
4
10
To avoid such large ranges, introduce a new PCI range covering the
5
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
vfio-pci device RAM regions, this only if the addresses are above 4GB
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
to avoid breaking potential SeaBIOS guests.
7
Message-Id: <20250305153929.43687-3-philmd@linaro.org>
13
8
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-5-philmd@linaro.org
14
[ clg: - wrote commit log
15
- fixed overlapping 32-bit and PCI ranges when using SeaBIOS ]
16
17
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
18
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
Fixes: 5255bbf4ec16 ("vfio/common: Add device dirty page tracking start/stop")
20
Signed-off-by: Cédric Le Goater <clg@redhat.com>
9
Signed-off-by: Cédric Le Goater <clg@redhat.com>
21
---
10
---
22
hw/vfio/common.c | 71 +++++++++++++++++++++++++++++++++++++-------
11
hw/vfio/common.c | 8 +++++---
23
hw/vfio/trace-events | 2 +-
12
1 file changed, 5 insertions(+), 3 deletions(-)
24
2 files changed, 61 insertions(+), 12 deletions(-)
25
13
26
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
14
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
27
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/vfio/common.c
16
--- a/hw/vfio/common.c
29
+++ b/hw/vfio/common.c
17
+++ b/hw/vfio/common.c
30
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
31
32
#include "hw/vfio/vfio-common.h"
33
#include "hw/vfio/vfio.h"
34
+#include "hw/vfio/pci.h"
35
#include "exec/address-spaces.h"
19
#include "exec/address-spaces.h"
36
#include "exec/memory.h"
20
#include "exec/memory.h"
37
#include "exec/ram_addr.h"
21
#include "exec/ram_addr.h"
38
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODirtyRanges {
22
+#include "exec/target_page.h"
39
hwaddr max32;
23
#include "hw/hw.h"
40
hwaddr min64;
24
#include "qemu/error-report.h"
41
hwaddr max64;
25
#include "qemu/main-loop.h"
42
+ hwaddr minpci64;
26
@@ -XXX,XX +XXX,XX @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
43
+ hwaddr maxpci64;
27
MemoryRegionSection *section)
44
} VFIODirtyRanges;
45
46
typedef struct VFIODirtyRangesListener {
47
@@ -XXX,XX +XXX,XX @@ typedef struct VFIODirtyRangesListener {
48
MemoryListener listener;
49
} VFIODirtyRangesListener;
50
51
+static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
52
+ VFIOContainer *container)
53
+{
54
+ VFIOPCIDevice *pcidev;
55
+ VFIODevice *vbasedev;
56
+ VFIOGroup *group;
57
+ Object *owner;
58
+
59
+ owner = memory_region_owner(section->mr);
60
+
61
+ QLIST_FOREACH(group, &container->group_list, container_next) {
62
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
63
+ if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
64
+ continue;
65
+ }
66
+ pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
67
+ if (OBJECT(pcidev) == owner) {
68
+ return true;
69
+ }
70
+ }
71
+ }
72
+
73
+ return false;
74
+}
75
+
76
static void vfio_dirty_tracking_update(MemoryListener *listener,
77
MemoryRegionSection *section)
78
{
28
{
79
@@ -XXX,XX +XXX,XX @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
29
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
80
}
30
+ int target_page_size = qemu_target_page_size();
81
31
VFIORamDiscardListener *vrdl;
82
/*
32
83
- * The address space passed to the dirty tracker is reduced to two ranges:
33
/* Ignore some corner cases not relevant in practice. */
84
- * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges.
34
- g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
85
+ * The address space passed to the dirty tracker is reduced to three ranges:
35
+ g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size));
86
+ * one for 32-bit DMA ranges, one for 64-bit DMA ranges and one for the
36
g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
87
+ * PCI 64-bit hole.
37
- TARGET_PAGE_SIZE));
88
+ *
38
- g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
89
* The underlying reports of dirty will query a sub-interval of each of
39
+ target_page_size));
90
* these ranges.
40
+ g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size));
91
*
41
92
- * The purpose of the dual range handling is to handle known cases of big
42
vrdl = g_new0(VFIORamDiscardListener, 1);
93
- * holes in the address space, like the x86 AMD 1T hole. The alternative
43
vrdl->bcontainer = bcontainer;
94
- * would be an IOVATree but that has a much bigger runtime overhead and
95
- * unnecessary complexity.
96
+ * The purpose of the three range handling is to handle known cases of big
97
+ * holes in the address space, like the x86 AMD 1T hole, and firmware (like
98
+ * OVMF) which may relocate the pci-hole64 to the end of the address space.
99
+ * The latter would otherwise generate large ranges for tracking, stressing
100
+ * the limits of supported hardware. The pci-hole32 will always be below 4G
101
+ * (overlapping or not) so it doesn't need special handling and is part of
102
+ * the 32-bit range.
103
+ *
104
+ * The alternative would be an IOVATree but that has a much bigger runtime
105
+ * overhead and unnecessary complexity.
106
*/
107
- min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
108
- max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
109
-
110
+ if (vfio_section_is_vfio_pci(section, dirty->container) &&
111
+ iova >= UINT32_MAX) {
112
+ min = &range->minpci64;
113
+ max = &range->maxpci64;
114
+ } else {
115
+ min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
116
+ max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
117
+ }
118
if (*min > iova) {
119
*min = iova;
120
}
121
@@ -XXX,XX +XXX,XX @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
122
memset(&dirty, 0, sizeof(dirty));
123
dirty.ranges.min32 = UINT32_MAX;
124
dirty.ranges.min64 = UINT64_MAX;
125
+ dirty.ranges.minpci64 = UINT64_MAX;
126
dirty.listener = vfio_dirty_tracking_listener;
127
dirty.container = container;
128
129
@@ -XXX,XX +XXX,XX @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
130
* DMA logging uAPI guarantees to support at least a number of ranges that
131
* fits into a single host kernel base page.
132
*/
133
- control->num_ranges = !!tracking->max32 + !!tracking->max64;
134
+ control->num_ranges = !!tracking->max32 + !!tracking->max64 +
135
+ !!tracking->maxpci64;
136
ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
137
control->num_ranges);
138
if (!ranges) {
139
@@ -XXX,XX +XXX,XX @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
140
if (tracking->max64) {
141
ranges->iova = tracking->min64;
142
ranges->length = (tracking->max64 - tracking->min64) + 1;
143
+ ranges++;
144
+ }
145
+ if (tracking->maxpci64) {
146
+ ranges->iova = tracking->minpci64;
147
+ ranges->length = (tracking->maxpci64 - tracking->minpci64) + 1;
148
}
149
150
trace_vfio_device_dirty_tracking_start(control->num_ranges,
151
tracking->min32, tracking->max32,
152
- tracking->min64, tracking->max64);
153
+ tracking->min64, tracking->max64,
154
+ tracking->minpci64, tracking->maxpci64);
155
156
return feature;
157
}
158
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
159
index XXXXXXX..XXXXXXX 100644
160
--- a/hw/vfio/trace-events
161
+++ b/hw/vfio/trace-events
162
@@ -XXX,XX +XXX,XX @@ vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_wi
163
vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
164
vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
165
vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]"
166
-vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"]"
167
+vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
168
vfio_disconnect_container(int fd) "close container->fd=%d"
169
vfio_put_group(int fd) "close group->fd=%d"
170
vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
171
--
44
--
172
2.41.0
45
2.48.1
173
46
174
47
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Changing the device state from STOP_COPY to STOP can take time as the
3
Some files don't rely on any target-specific knowledge
4
device may need to free resources and do other operations as part of the
4
and can be compiled once:
5
transition. Currently, this is done in vfio_save_complete_precopy() and
6
therefore it is counted in the migration downtime.
7
5
8
To avoid this, change the device state from STOP_COPY to STOP in
6
- helpers.c
9
vfio_save_cleanup(), which is called after migration has completed and
7
- container-base.c
10
thus is not part of migration downtime.
8
- migration.c (removing unnecessary "exec/ram_addr.h")
9
- migration-multifd.c
10
- cpr.c
11
11
12
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
12
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
13
Tested-by: YangHang Liu <yanghliu@redhat.com>
13
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Reviewed-by: Cédric Le Goater <clg@redhat.com>
16
Reviewed-by: Eric Auger <eric.auger@redhat.com>
17
Message-Id: <20250308230917.18907-4-philmd@linaro.org>
18
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-6-philmd@linaro.org
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
19
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
20
---
16
hw/vfio/migration.c | 19 +++++++++++++------
21
hw/vfio/migration.c | 1 -
17
1 file changed, 13 insertions(+), 6 deletions(-)
22
hw/vfio/meson.build | 13 ++++++++-----
23
2 files changed, 8 insertions(+), 6 deletions(-)
18
24
19
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
25
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
20
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/vfio/migration.c
27
--- a/hw/vfio/migration.c
22
+++ b/hw/vfio/migration.c
28
+++ b/hw/vfio/migration.c
23
@@ -XXX,XX +XXX,XX @@ static void vfio_save_cleanup(void *opaque)
29
@@ -XXX,XX +XXX,XX @@
24
VFIODevice *vbasedev = opaque;
30
#include "qapi/error.h"
25
VFIOMigration *migration = vbasedev->migration;
31
#include "qapi/qapi-events-vfio.h"
26
32
#include "exec/ramlist.h"
27
+ /*
33
-#include "exec/ram_addr.h"
28
+ * Changing device state from STOP_COPY to STOP can take time. Do it here,
34
#include "pci.h"
29
+ * after migration has completed, so it won't increase downtime.
35
#include "trace.h"
30
+ */
36
#include "hw/hw.h"
31
+ if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
37
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
32
+ /*
38
index XXXXXXX..XXXXXXX 100644
33
+ * If setting the device in STOP state fails, the device should be
39
--- a/hw/vfio/meson.build
34
+ * reset. To do so, use ERROR state as a recover state.
40
+++ b/hw/vfio/meson.build
35
+ */
41
@@ -XXX,XX +XXX,XX @@
36
+ vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
42
vfio_ss = ss.source_set()
37
+ VFIO_DEVICE_STATE_ERROR);
43
vfio_ss.add(files(
38
+ }
44
- 'helpers.c',
45
'common.c',
46
- 'container-base.c',
47
'container.c',
48
- 'migration.c',
49
- 'migration-multifd.c',
50
- 'cpr.c',
51
))
52
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
53
vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
54
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
55
vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
56
57
specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
39
+
58
+
40
g_free(migration->data_buffer);
59
+system_ss.add(when: 'CONFIG_VFIO', if_true: files(
41
migration->data_buffer = NULL;
60
+ 'helpers.c',
42
migration->precopy_init_size = 0;
61
+ 'container-base.c',
43
@@ -XXX,XX +XXX,XX @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
62
+ 'migration.c',
44
return ret;
63
+ 'migration-multifd.c',
45
}
64
+ 'cpr.c',
46
65
+))
47
- /*
48
- * If setting the device in STOP state fails, the device should be reset.
49
- * To do so, use ERROR state as a recover state.
50
- */
51
- ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
52
- VFIO_DEVICE_STATE_ERROR);
53
trace_vfio_save_complete_precopy(vbasedev->name, ret);
54
55
return ret;
56
--
66
--
57
2.41.0
67
2.48.1
58
68
59
69
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
2
3
These files depend on the VFIO symbol in their Kconfig
4
definition. They don't rely on target specific definitions,
5
move them to system_ss[] to build them once.
6
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
11
Reviewed-by: Eric Auger <eric.auger@redhat.com>
12
Message-Id: <20250308230917.18907-5-philmd@linaro.org>
13
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-7-philmd@linaro.org
14
Signed-off-by: Cédric Le Goater <clg@redhat.com>
15
---
16
hw/vfio/meson.build | 4 ++--
17
1 file changed, 2 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/vfio/meson.build
22
+++ b/hw/vfio/meson.build
23
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
24
))
25
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
26
vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
27
-vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
28
-vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
29
vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
30
vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
31
32
specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
33
34
+system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
35
+system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
36
system_ss.add(when: 'CONFIG_VFIO', if_true: files(
37
'helpers.c',
38
'container-base.c',
39
--
40
2.48.1
41
42
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
2
3
Removing unused "exec/ram_addr.h" header allow to compile
4
iommufd.c once for all targets.
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Message-Id: <20250308230917.18907-6-philmd@linaro.org>
12
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-8-philmd@linaro.org
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
hw/vfio/iommufd.c | 1 -
16
hw/vfio/meson.build | 6 +++---
17
2 files changed, 3 insertions(+), 4 deletions(-)
18
19
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/vfio/iommufd.c
22
+++ b/hw/vfio/iommufd.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "qemu/cutils.h"
25
#include "qemu/chardev_open.h"
26
#include "pci.h"
27
-#include "exec/ram_addr.h"
28
29
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
30
ram_addr_t size, void *vaddr, bool readonly)
31
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/vfio/meson.build
34
+++ b/hw/vfio/meson.build
35
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(files(
36
'container.c',
37
))
38
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
39
-vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
40
- 'iommufd.c',
41
-))
42
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
43
'display.c',
44
'pci-quirks.c',
45
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
46
'migration-multifd.c',
47
'cpr.c',
48
))
49
+system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
50
+ 'iommufd.c',
51
+))
52
--
53
2.48.1
54
55
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
2
3
display.c doesn't rely on target specific definitions,
4
move it to system_ss[] to build it once.
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Cédric Le Goater <clg@redhat.com>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Message-Id: <20250308230917.18907-8-philmd@linaro.org>
12
Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-9-philmd@linaro.org
13
Signed-off-by: Cédric Le Goater <clg@redhat.com>
14
---
15
hw/vfio/meson.build | 4 +++-
16
1 file changed, 3 insertions(+), 1 deletion(-)
17
18
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/vfio/meson.build
21
+++ b/hw/vfio/meson.build
22
@@ -XXX,XX +XXX,XX @@ vfio_ss.add(files(
23
))
24
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
25
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
26
- 'display.c',
27
'pci-quirks.c',
28
'pci.c',
29
))
30
@@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
31
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
32
'iommufd.c',
33
))
34
+system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
35
+ 'display.c',
36
+))
37
--
38
2.48.1
39
40
diff view generated by jsdifflib
1
From: Avihai Horon <avihaih@nvidia.com>
1
From: Vasilis Liaskovitis <vliaskovitis@suse.com>
2
2
3
Now that P2P support has been added to VFIO migration, allow migration
3
The ATI BAR4 quirk is targeting an ioport BAR. Older devices may
4
of multiple devices if all of them support P2P migration.
4
have a BAR4 which is not an ioport, causing a segfault here. Test
5
the BAR type to skip these devices.
5
6
6
Single device migration is allowed regardless of P2P migration support.
7
Similar to
8
"8f419c5b: vfio/pci-quirks: Exclude non-ioport BAR from NVIDIA quirk"
7
9
8
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
10
Untested, as I don't have the card to test.
9
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
11
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
12
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2856
11
Tested-by: YangHang Liu <yanghliu@redhat.com>
13
Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com>
14
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
15
Link: https://lore.kernel.org/qemu-devel/20250310235833.41026-1-vliaskovitis@suse.com
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
16
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
---
17
---
14
hw/vfio/common.c | 26 ++++++++++++++++++--------
18
hw/vfio/pci-quirks.c | 2 +-
15
1 file changed, 18 insertions(+), 8 deletions(-)
19
1 file changed, 1 insertion(+), 1 deletion(-)
16
20
17
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
21
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
18
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/vfio/common.c
23
--- a/hw/vfio/pci-quirks.c
20
+++ b/hw/vfio/common.c
24
+++ b/hw/vfio/pci-quirks.c
21
@@ -XXX,XX +XXX,XX @@ bool vfio_mig_active(void)
25
@@ -XXX,XX +XXX,XX @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
22
26
23
static Error *multiple_devices_migration_blocker;
27
/* This windows doesn't seem to be used except by legacy VGA code */
24
28
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
25
-static unsigned int vfio_migratable_device_num(void)
29
- !vdev->vga || nr != 4) {
26
+/*
30
+ !vdev->vga || nr != 4 || !vdev->bars[4].ioport) {
27
+ * Multiple devices migration is allowed only if all devices support P2P
28
+ * migration. Single device migration is allowed regardless of P2P migration
29
+ * support.
30
+ */
31
+static bool vfio_multiple_devices_migration_is_supported(void)
32
{
33
VFIOGroup *group;
34
VFIODevice *vbasedev;
35
unsigned int device_num = 0;
36
+ bool all_support_p2p = true;
37
38
QLIST_FOREACH(group, &vfio_group_list, next) {
39
QLIST_FOREACH(vbasedev, &group->device_list, next) {
40
if (vbasedev->migration) {
41
device_num++;
42
+
43
+ if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) {
44
+ all_support_p2p = false;
45
+ }
46
}
47
}
48
}
49
50
- return device_num;
51
+ return all_support_p2p || device_num <= 1;
52
}
53
54
int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
55
@@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
56
int ret;
57
58
if (multiple_devices_migration_blocker ||
59
- vfio_migratable_device_num() <= 1) {
60
+ vfio_multiple_devices_migration_is_supported()) {
61
return 0;
62
}
63
64
if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
65
- error_setg(errp, "Migration is currently not supported with multiple "
66
- "VFIO devices");
67
+ error_setg(errp, "Multiple VFIO devices migration is supported only if "
68
+ "all of them support P2P migration");
69
return -EINVAL;
70
}
71
72
error_setg(&multiple_devices_migration_blocker,
73
- "Migration is currently not supported with multiple "
74
- "VFIO devices");
75
+ "Multiple VFIO devices migration is supported only if all of "
76
+ "them support P2P migration");
77
ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
78
if (ret < 0) {
79
error_free(multiple_devices_migration_blocker);
80
@@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
81
void vfio_unblock_multiple_devices_migration(void)
82
{
83
if (!multiple_devices_migration_blocker ||
84
- vfio_migratable_device_num() > 1) {
85
+ !vfio_multiple_devices_migration_is_supported()) {
86
return;
31
return;
87
}
32
}
88
33
89
--
34
--
90
2.41.0
35
2.48.1
91
36
92
37
diff view generated by jsdifflib
1
From: Joao Martins <joao.m.martins@oracle.com>
1
From: Joao Martins <joao.m.martins@oracle.com>
2
2
3
Move the PRE_COPY and RUNNING state checks to helper functions.
3
The intent behind the x-device-dirty-page-tracking option is twofold:
4
4
5
This is in preparation for adding P2P VFIO migration support, where
5
1) development/testing in the presence of VFs with VF dirty page tracking
6
these helpers will also test for PRE_COPY_P2P and RUNNING_P2P states.
6
7
2) deliberately choosing platform dirty tracker over the VF one.
8
9
Item 2) scenario is useful when VF dirty tracker is not as fast as
10
IOMMU, or there's some limitations around it (e.g. number of them is
11
limited; aggregated address space under tracking is limited),
12
efficiency/scalability (e.g. 1 pagetable in IOMMU dirty tracker to scan
13
vs N VFs) or just troubleshooting. Given item 2 it is not restricted to
14
debugging, hence drop the debug parenthesis from the option description.
7
15
8
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
16
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
9
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
10
Reviewed-by: Cédric Le Goater <clg@redhat.com>
17
Reviewed-by: Cédric Le Goater <clg@redhat.com>
11
Tested-by: YangHang Liu <yanghliu@redhat.com>
18
Link: https://lore.kernel.org/qemu-devel/20250311174807.79825-1-joao.m.martins@oracle.com
19
[ clg: Fixed subject spelling ]
12
Signed-off-by: Cédric Le Goater <clg@redhat.com>
20
Signed-off-by: Cédric Le Goater <clg@redhat.com>
13
---
21
---
14
include/hw/vfio/vfio-common.h | 2 ++
22
hw/vfio/pci.c | 2 +-
15
hw/vfio/common.c | 22 ++++++++++++++++++----
23
1 file changed, 1 insertion(+), 1 deletion(-)
16
hw/vfio/migration.c | 10 ++++------
17
3 files changed, 24 insertions(+), 10 deletions(-)
18
24
19
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
25
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
20
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
21
--- a/include/hw/vfio/vfio-common.h
27
--- a/hw/vfio/pci.c
22
+++ b/include/hw/vfio/vfio-common.h
28
+++ b/hw/vfio/pci.c
23
@@ -XXX,XX +XXX,XX @@ void vfio_unblock_multiple_devices_migration(void);
29
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
24
bool vfio_viommu_preset(VFIODevice *vbasedev);
30
object_class_property_set_description(klass, /* 9.1 */
25
int64_t vfio_mig_bytes_transferred(void);
31
"x-device-dirty-page-tracking",
26
void vfio_reset_bytes_transferred(void);
32
"Disable device dirty page tracking and use "
27
+bool vfio_device_state_is_running(VFIODevice *vbasedev);
33
- "container-based dirty page tracking (DEBUG)");
28
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
34
+ "container-based dirty page tracking");
29
35
object_class_property_set_description(klass, /* 9.1 */
30
#ifdef CONFIG_LINUX
36
"migration-events",
31
int vfio_get_region_info(VFIODevice *vbasedev, int index,
37
"Emit VFIO migration QAPI event when a VFIO device "
32
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/vfio/common.c
35
+++ b/hw/vfio/common.c
36
@@ -XXX,XX +XXX,XX @@ static void vfio_set_migration_error(int err)
37
}
38
}
39
40
+bool vfio_device_state_is_running(VFIODevice *vbasedev)
41
+{
42
+ VFIOMigration *migration = vbasedev->migration;
43
+
44
+ return migration->device_state == VFIO_DEVICE_STATE_RUNNING;
45
+}
46
+
47
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
48
+{
49
+ VFIOMigration *migration = vbasedev->migration;
50
+
51
+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
52
+}
53
+
54
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
55
{
56
VFIOGroup *group;
57
@@ -XXX,XX +XXX,XX @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
58
}
59
60
if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
61
- (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
62
- migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) {
63
+ (vfio_device_state_is_running(vbasedev) ||
64
+ vfio_device_state_is_precopy(vbasedev))) {
65
return false;
66
}
67
}
68
@@ -XXX,XX +XXX,XX @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
69
return false;
70
}
71
72
- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
73
- migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
74
+ if (vfio_device_state_is_running(vbasedev) ||
75
+ vfio_device_state_is_precopy(vbasedev)) {
76
continue;
77
} else {
78
return false;
79
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/hw/vfio/migration.c
82
+++ b/hw/vfio/migration.c
83
@@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy,
84
VFIODevice *vbasedev = opaque;
85
VFIOMigration *migration = vbasedev->migration;
86
87
- if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) {
88
+ if (!vfio_device_state_is_precopy(vbasedev)) {
89
return;
90
}
91
92
@@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
93
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
94
*must_precopy += stop_copy_size;
95
96
- if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
97
+ if (vfio_device_state_is_precopy(vbasedev)) {
98
vfio_query_precopy_size(migration);
99
100
*must_precopy +=
101
@@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
102
static bool vfio_is_active_iterate(void *opaque)
103
{
104
VFIODevice *vbasedev = opaque;
105
- VFIOMigration *migration = vbasedev->migration;
106
107
- return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
108
+ return vfio_device_state_is_precopy(vbasedev);
109
}
110
111
static int vfio_save_iterate(QEMUFile *f, void *opaque)
112
@@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = {
113
static void vfio_vmstate_change(void *opaque, bool running, RunState state)
114
{
115
VFIODevice *vbasedev = opaque;
116
- VFIOMigration *migration = vbasedev->migration;
117
enum vfio_device_mig_state new_state;
118
int ret;
119
120
@@ -XXX,XX +XXX,XX @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
121
new_state = VFIO_DEVICE_STATE_RUNNING;
122
} else {
123
new_state =
124
- (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY &&
125
+ (vfio_device_state_is_precopy(vbasedev) &&
126
(state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ?
127
VFIO_DEVICE_STATE_STOP_COPY :
128
VFIO_DEVICE_STATE_STOP;
129
--
38
--
130
2.41.0
39
2.48.1
131
40
132
41
diff view generated by jsdifflib