1 | The following changes since commit c5ea91da443b458352c1b629b490ee6631775cb4: | 1 | The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2023-09-08 10:06:25 -0400) | 3 | Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/legoater/qemu/ tags/pull-vfio-20230911 | 7 | https://github.com/legoater/qemu/ tags/pull-vfio-20250311 |
8 | 8 | ||
9 | for you to fetch changes up to a31fe5daeaa230556145bfc04af1bd4e68f377fa: | 9 | for you to fetch changes up to 4d9607481560e6c8e1508a0aafe94f86a0503c8c: |
10 | 10 | ||
11 | vfio/common: Separate vfio-pci ranges (2023-09-11 08:34:06 +0200) | 11 | vfio/pci: Drop debug commentary from x-device-dirty-page-tracking (2025-03-11 19:04:58 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | vfio queue: | 14 | vfio queue: |
15 | 15 | ||
16 | * Small downtime optimisation for VFIO migration | 16 | * Fixed endianness of VFIO device state packets |
17 | * P2P support for VFIO migration | 17 | * Improved IGD passthrough support with legacy mode |
18 | * Introduction of a save_prepare() handler to fail VFIO migration | 18 | * Improved build |
19 | * Fix on DMA logging ranges calculation for OVMF enabling dynamic window | 19 | * Added support for old AMD GPUs (x550) |
20 | * Updated property documentation | ||
20 | 21 | ||
21 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
22 | Avihai Horon (11): | 23 | Joao Martins (1): |
23 | vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup() | 24 | vfio/pci: Drop debug commentary from x-device-dirty-page-tracking |
24 | sysemu: Add prepare callback to struct VMChangeStateEntry | ||
25 | qdev: Add qdev_add_vm_change_state_handler_full() | ||
26 | vfio/migration: Add P2P support for VFIO migration | ||
27 | vfio/migration: Allow migration of multiple P2P supporting devices | ||
28 | migration: Add migration prefix to functions in target.c | ||
29 | vfio/migration: Fail adding device with enable-migration=on and existing blocker | ||
30 | migration: Move more initializations to migrate_init() | ||
31 | migration: Add .save_prepare() handler to struct SaveVMHandlers | ||
32 | vfio/migration: Block VFIO migration with postcopy migration | ||
33 | vfio/migration: Block VFIO migration with background snapshot | ||
34 | 25 | ||
35 | Joao Martins (2): | 26 | Maciej S. Szmigiero (1): |
36 | vfio/migration: Refactor PRE_COPY and RUNNING state checks | 27 | vfio/migration: Use BE byte order for device state wire packets |
37 | vfio/common: Separate vfio-pci ranges | ||
38 | 28 | ||
39 | docs/devel/vfio-migration.rst | 93 +++++++++++++++++----------- | 29 | Philippe Mathieu-Daudé (8): |
40 | include/hw/vfio/vfio-common.h | 2 + | 30 | system: Declare qemu_[min/max]rampagesize() in 'system/hostmem.h' |
41 | include/migration/register.h | 5 ++ | 31 | hw/vfio/spapr: Do not include <linux/kvm.h> |
42 | include/sysemu/runstate.h | 7 +++ | 32 | hw/vfio/common: Include missing 'system/tcg.h' header |
43 | migration/migration.h | 6 +- | 33 | hw/vfio/common: Get target page size using runtime helpers |
44 | migration/savevm.h | 1 + | 34 | hw/vfio: Compile some common objects once |
45 | hw/core/vm-change-state-handler.c | 14 ++++- | 35 | hw/vfio: Compile more objects once |
46 | hw/vfio/common.c | 126 ++++++++++++++++++++++++++++++-------- | 36 | hw/vfio: Compile iommufd.c once |
47 | hw/vfio/migration.c | 106 +++++++++++++++++++++++++++----- | 37 | hw/vfio: Compile display.c once |
48 | migration/migration.c | 33 ++++++---- | 38 | |
49 | migration/savevm.c | 32 ++++++++-- | 39 | Tomita Moeko (10): |
50 | migration/target.c | 8 +-- | 40 | vfio/igd: Remove GTT write quirk in IO BAR 4 |
51 | softmmu/runstate.c | 40 ++++++++++++ | 41 | vfio/igd: Do not include GTT stolen size in etc/igd-bdsm-size |
52 | hw/vfio/trace-events | 3 +- | 42 | vfio/igd: Consolidate OpRegion initialization into a single function |
53 | 14 files changed, 377 insertions(+), 99 deletions(-) | 43 | vfio/igd: Move LPC bridge initialization to a separate function |
44 | vfio/pci: Add placeholder for device-specific config space quirks | ||
45 | vfio/igd: Refactor vfio_probe_igd_bar4_quirk into pci config quirk | ||
46 | vfio/igd: Decouple common quirks from legacy mode | ||
47 | vfio/igd: Handle x-igd-opregion option in config quirk | ||
48 | vfio/igd: Introduce x-igd-lpc option for LPC bridge ID quirk | ||
49 | vfio/igd: Fix broken KVMGT OpRegion support | ||
50 | |||
51 | Vasilis Liaskovitis (1): | ||
52 | vfio/pci-quirks: Exclude non-ioport BAR from ATI quirk | ||
53 | |||
54 | hw/vfio/pci.h | 11 +- | ||
55 | include/exec/ram_addr.h | 3 - | ||
56 | include/system/hostmem.h | 3 + | ||
57 | hw/ppc/spapr_caps.c | 1 + | ||
58 | hw/s390x/s390-virtio-ccw.c | 1 + | ||
59 | hw/vfio/common.c | 9 +- | ||
60 | hw/vfio/igd.c | 529 +++++++++++++++++++------------------------- | ||
61 | hw/vfio/iommufd.c | 1 - | ||
62 | hw/vfio/migration-multifd.c | 15 +- | ||
63 | hw/vfio/migration.c | 1 - | ||
64 | hw/vfio/pci-quirks.c | 53 +---- | ||
65 | hw/vfio/pci.c | 35 +-- | ||
66 | hw/vfio/spapr.c | 4 +- | ||
67 | hw/vfio/meson.build | 27 ++- | ||
68 | 14 files changed, 288 insertions(+), 405 deletions(-) | ||
69 | |||
70 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> | |
2 | |||
3 | The IO BAR4 of IGD devices contains a pair of 32-bit address/data | ||
4 | registers, MMIO_Index (0x0) and MMIO_Data (0x4), which provide access | ||
5 | to the MMIO BAR0 (GTTMMADR) from IO space. These registers are probably | ||
6 | only used by the VBIOS, and are not documented by intel. The observed | ||
7 | layout of MMIO_Index register is: | ||
8 | 31 2 1 0 | ||
9 | +-------------------------------------------------------------------+ | ||
10 | | Offset | Rsvd | Sel | | ||
11 | +-------------------------------------------------------------------+ | ||
12 | - Offset: Byte offset in specified region, 4-byte aligned. | ||
13 | - Sel: Region selector | ||
14 | 0: MMIO register region (first half of MMIO BAR0) | ||
15 | 1: GTT region (second half of MMIO BAR0). Pre Gen11 only. | ||
16 | |||
17 | Currently, QEMU implements a quirk that adjusts the guest Data Stolen | ||
18 | Memory (DSM) region address to be (addr - host BDSM + guest BDSM) when | ||
19 | programming GTT entries via IO BAR4, assuming guest still programs GTT | ||
20 | with host DSM address, which is not the case. Guest's BDSM register is | ||
21 | emulated and initialized to 0 at startup by QEMU, then SeaBIOS programs | ||
22 | its value[1]. As result, the address programmed to GTT entries by VBIOS | ||
23 | running in guest are valid GPA, and this unnecessary adjustment brings | ||
24 | inconsistency. | ||
25 | |||
26 | [1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332 | ||
27 | |||
28 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
29 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
30 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
31 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
32 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-2-tomitamoeko@gmail.com | ||
33 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
34 | --- | ||
35 | hw/vfio/igd.c | 191 +------------------------------------------------- | ||
36 | 1 file changed, 1 insertion(+), 190 deletions(-) | ||
37 | |||
38 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/hw/vfio/igd.c | ||
41 | +++ b/hw/vfio/igd.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) | ||
43 | return -1; | ||
44 | } | ||
45 | |||
46 | -typedef struct VFIOIGDQuirk { | ||
47 | - struct VFIOPCIDevice *vdev; | ||
48 | - uint32_t index; | ||
49 | - uint64_t bdsm; | ||
50 | -} VFIOIGDQuirk; | ||
51 | - | ||
52 | #define IGD_GMCH 0x50 /* Graphics Control Register */ | ||
53 | #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ | ||
54 | #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ | ||
55 | @@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, | ||
56 | return ret; | ||
57 | } | ||
58 | |||
59 | -/* | ||
60 | - * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE | ||
61 | - * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore | ||
62 | - * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index | ||
63 | - * for programming the GTT. | ||
64 | - * | ||
65 | - * See linux:include/drm/i915_drm.h for shift and mask values. | ||
66 | - */ | ||
67 | -static int vfio_igd_gtt_max(VFIOPCIDevice *vdev) | ||
68 | -{ | ||
69 | - uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch)); | ||
70 | - int gen = igd_gen(vdev); | ||
71 | - uint64_t ggms_size = igd_gtt_memory_size(gen, gmch); | ||
72 | - | ||
73 | - return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8); | ||
74 | -} | ||
75 | - | ||
76 | -/* | ||
77 | - * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes. | ||
78 | - * Somehow the host stolen memory range is used for this, but how the ROM gets | ||
79 | - * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it | ||
80 | - * reprograms the GTT through the IOBAR where we can trap it and transpose the | ||
81 | - * programming to the VM allocated buffer. That buffer gets reserved by the VM | ||
82 | - * firmware via the fw_cfg entry added below. Here we're just monitoring the | ||
83 | - * IOBAR address and data registers to detect a write sequence targeting the | ||
84 | - * GTTADR. This code is developed by observed behavior and doesn't have a | ||
85 | - * direct spec reference, unfortunately. | ||
86 | - */ | ||
87 | -static uint64_t vfio_igd_quirk_data_read(void *opaque, | ||
88 | - hwaddr addr, unsigned size) | ||
89 | -{ | ||
90 | - VFIOIGDQuirk *igd = opaque; | ||
91 | - VFIOPCIDevice *vdev = igd->vdev; | ||
92 | - | ||
93 | - igd->index = ~0; | ||
94 | - | ||
95 | - return vfio_region_read(&vdev->bars[4].region, addr + 4, size); | ||
96 | -} | ||
97 | - | ||
98 | -static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr, | ||
99 | - uint64_t data, unsigned size) | ||
100 | -{ | ||
101 | - VFIOIGDQuirk *igd = opaque; | ||
102 | - VFIOPCIDevice *vdev = igd->vdev; | ||
103 | - uint64_t val = data; | ||
104 | - int gen = igd_gen(vdev); | ||
105 | - | ||
106 | - /* | ||
107 | - * Programming the GGMS starts at index 0x1 and uses every 4th index (ie. | ||
108 | - * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE | ||
109 | - * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so | ||
110 | - * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points | ||
111 | - * to a 4k page, which we translate to a page from the VM allocated region, | ||
112 | - * pointed to by the BDSM register. If this is not set, we fail. | ||
113 | - * | ||
114 | - * We trap writes to the full configured GTT size, but we typically only | ||
115 | - * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often | ||
116 | - * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous | ||
117 | - * write of that last entry does work, but is hopefully unnecessary since | ||
118 | - * we clear the previous GTT on initialization. | ||
119 | - */ | ||
120 | - if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) { | ||
121 | - if (gen < 8 || (igd->index % 8 == 1)) { | ||
122 | - uint64_t base; | ||
123 | - | ||
124 | - if (gen < 11) { | ||
125 | - base = pci_get_long(vdev->pdev.config + IGD_BDSM); | ||
126 | - } else { | ||
127 | - base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11); | ||
128 | - } | ||
129 | - if (!base) { | ||
130 | - hw_error("vfio-igd: Guest attempted to program IGD GTT before " | ||
131 | - "BIOS reserved stolen memory. Unsupported BIOS?"); | ||
132 | - } | ||
133 | - | ||
134 | - val = data - igd->bdsm + base; | ||
135 | - } else { | ||
136 | - val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */ | ||
137 | - } | ||
138 | - | ||
139 | - trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name, | ||
140 | - igd->index, data, val); | ||
141 | - } | ||
142 | - | ||
143 | - vfio_region_write(&vdev->bars[4].region, addr + 4, val, size); | ||
144 | - | ||
145 | - igd->index = ~0; | ||
146 | -} | ||
147 | - | ||
148 | -static const MemoryRegionOps vfio_igd_data_quirk = { | ||
149 | - .read = vfio_igd_quirk_data_read, | ||
150 | - .write = vfio_igd_quirk_data_write, | ||
151 | - .endianness = DEVICE_LITTLE_ENDIAN, | ||
152 | -}; | ||
153 | - | ||
154 | -static uint64_t vfio_igd_quirk_index_read(void *opaque, | ||
155 | - hwaddr addr, unsigned size) | ||
156 | -{ | ||
157 | - VFIOIGDQuirk *igd = opaque; | ||
158 | - VFIOPCIDevice *vdev = igd->vdev; | ||
159 | - | ||
160 | - igd->index = ~0; | ||
161 | - | ||
162 | - return vfio_region_read(&vdev->bars[4].region, addr, size); | ||
163 | -} | ||
164 | - | ||
165 | -static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr, | ||
166 | - uint64_t data, unsigned size) | ||
167 | -{ | ||
168 | - VFIOIGDQuirk *igd = opaque; | ||
169 | - VFIOPCIDevice *vdev = igd->vdev; | ||
170 | - | ||
171 | - igd->index = data; | ||
172 | - | ||
173 | - vfio_region_write(&vdev->bars[4].region, addr, data, size); | ||
174 | -} | ||
175 | - | ||
176 | -static const MemoryRegionOps vfio_igd_index_quirk = { | ||
177 | - .read = vfio_igd_quirk_index_read, | ||
178 | - .write = vfio_igd_quirk_index_write, | ||
179 | - .endianness = DEVICE_LITTLE_ENDIAN, | ||
180 | -}; | ||
181 | - | ||
182 | #define IGD_GGC_MMIO_OFFSET 0x108040 | ||
183 | #define IGD_BDSM_MMIO_OFFSET 0x1080C0 | ||
184 | |||
185 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
186 | g_autofree struct vfio_region_info *opregion = NULL; | ||
187 | g_autofree struct vfio_region_info *host = NULL; | ||
188 | g_autofree struct vfio_region_info *lpc = NULL; | ||
189 | - VFIOQuirk *quirk; | ||
190 | - VFIOIGDQuirk *igd; | ||
191 | PCIDevice *lpc_bridge; | ||
192 | - int i, ret, gen; | ||
193 | + int ret, gen; | ||
194 | uint64_t ggms_size, gms_size; | ||
195 | uint64_t *bdsm_size; | ||
196 | uint32_t gmch; | ||
197 | - uint16_t cmd_orig, cmd; | ||
198 | Error *err = NULL; | ||
199 | |||
200 | /* | ||
201 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
202 | return; | ||
203 | } | ||
204 | |||
205 | - /* Setup our quirk to munge GTT addresses to the VM allocated buffer */ | ||
206 | - quirk = vfio_quirk_alloc(2); | ||
207 | - igd = quirk->data = g_malloc0(sizeof(*igd)); | ||
208 | - igd->vdev = vdev; | ||
209 | - igd->index = ~0; | ||
210 | - if (gen < 11) { | ||
211 | - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4); | ||
212 | - } else { | ||
213 | - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4); | ||
214 | - igd->bdsm |= | ||
215 | - (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32; | ||
216 | - } | ||
217 | - igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */ | ||
218 | - | ||
219 | - memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk, | ||
220 | - igd, "vfio-igd-index-quirk", 4); | ||
221 | - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, | ||
222 | - 0, &quirk->mem[0], 1); | ||
223 | - | ||
224 | - memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk, | ||
225 | - igd, "vfio-igd-data-quirk", 4); | ||
226 | - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, | ||
227 | - 4, &quirk->mem[1], 1); | ||
228 | - | ||
229 | - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); | ||
230 | - | ||
231 | /* | ||
232 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
233 | * 32MiB. This option should only be used when the desired size cannot be | ||
234 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
235 | pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); | ||
236 | } | ||
237 | |||
238 | - /* | ||
239 | - * This IOBAR gives us access to GTTADR, which allows us to write to | ||
240 | - * the GTT itself. So let's go ahead and write zero to all the GTT | ||
241 | - * entries to avoid spurious DMA faults. Be sure I/O access is enabled | ||
242 | - * before talking to the device. | ||
243 | - */ | ||
244 | - if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), | ||
245 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { | ||
246 | - error_report("IGD device %s - failed to read PCI command register", | ||
247 | - vdev->vbasedev.name); | ||
248 | - } | ||
249 | - | ||
250 | - cmd = cmd_orig | PCI_COMMAND_IO; | ||
251 | - | ||
252 | - if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd), | ||
253 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) { | ||
254 | - error_report("IGD device %s - failed to write PCI command register", | ||
255 | - vdev->vbasedev.name); | ||
256 | - } | ||
257 | - | ||
258 | - for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) { | ||
259 | - vfio_region_write(&vdev->bars[4].region, 0, i, 4); | ||
260 | - vfio_region_write(&vdev->bars[4].region, 4, 0, 4); | ||
261 | - } | ||
262 | - | ||
263 | - if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), | ||
264 | - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { | ||
265 | - error_report("IGD device %s - failed to restore PCI command register", | ||
266 | - vdev->vbasedev.name); | ||
267 | - } | ||
268 | - | ||
269 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, | ||
270 | (ggms_size + gms_size) / MiB); | ||
271 | } | ||
272 | -- | ||
273 | 2.48.1 | ||
274 | |||
275 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Add qdev_add_vm_change_state_handler_full() variant that allows setting | 3 | Though GTT Stolen Memory (GSM) is right below Data Stolen Memory (DSM) |
4 | a prepare callback in addition to the main callback. | 4 | in host address space, direct access to GSM is prohibited, and it is |
5 | not mapped to guest address space. Both host and guest accesses GSM | ||
6 | indirectly through the second half of MMIO BAR0 (GTTMMADR). | ||
5 | 7 | ||
6 | This will facilitate adding P2P support for VFIO migration in the | 8 | Guest firmware only need to reserve a memory region for DSM and program |
7 | following patches. | 9 | the BDSM register with the base address of that region, that's actually |
10 | what both SeaBIOS[1] and IgdAssignmentDxe does now. | ||
8 | 11 | ||
9 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 12 | [1] https://gitlab.com/qemu-project/seabios/-/blob/1.12-stable/src/fw/pciinit.c#L319-332 |
10 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> | 13 | |
11 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 14 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
12 | Tested-by: YangHang Liu <yanghliu@redhat.com> | 15 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
16 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
17 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
18 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-3-tomitamoeko@gmail.com | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
14 | --- | 20 | --- |
15 | include/sysemu/runstate.h | 3 +++ | 21 | hw/vfio/igd.c | 28 +++------------------------- |
16 | hw/core/vm-change-state-handler.c | 14 +++++++++++++- | 22 | 1 file changed, 3 insertions(+), 25 deletions(-) |
17 | 2 files changed, 16 insertions(+), 1 deletion(-) | ||
18 | 23 | ||
19 | diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h | 24 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
20 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/include/sysemu/runstate.h | 26 | --- a/hw/vfio/igd.c |
22 | +++ b/include/sysemu/runstate.h | 27 | +++ b/hw/vfio/igd.c |
23 | @@ -XXX,XX +XXX,XX @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, | 28 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) |
24 | VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, | 29 | |
25 | VMChangeStateHandler *cb, | 30 | #define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */ |
26 | void *opaque); | 31 | #define IGD_GMCH_GEN6_GMS_MASK 0x1f |
27 | +VMChangeStateEntry *qdev_add_vm_change_state_handler_full( | 32 | -#define IGD_GMCH_GEN6_GGMS_SHIFT 8 |
28 | + DeviceState *dev, VMChangeStateHandler *cb, | 33 | -#define IGD_GMCH_GEN6_GGMS_MASK 0x3 |
29 | + VMChangeStateHandler *prepare_cb, void *opaque); | 34 | #define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */ |
30 | void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); | 35 | #define IGD_GMCH_GEN8_GMS_MASK 0xff |
31 | /** | 36 | -#define IGD_GMCH_GEN8_GGMS_SHIFT 6 |
32 | * vm_state_notify: Notify the state of the VM | 37 | -#define IGD_GMCH_GEN8_GGMS_MASK 0x3 |
33 | diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c | 38 | - |
34 | index XXXXXXX..XXXXXXX 100644 | 39 | -static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch) |
35 | --- a/hw/core/vm-change-state-handler.c | 40 | -{ |
36 | +++ b/hw/core/vm-change-state-handler.c | 41 | - uint64_t ggms; |
37 | @@ -XXX,XX +XXX,XX @@ static int qdev_get_dev_tree_depth(DeviceState *dev) | 42 | - |
38 | VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, | 43 | - if (gen < 8) { |
39 | VMChangeStateHandler *cb, | 44 | - ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK; |
40 | void *opaque) | 45 | - } else { |
41 | +{ | 46 | - ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK; |
42 | + return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque); | 47 | - if (ggms != 0) { |
43 | +} | 48 | - ggms = 1ULL << ggms; |
44 | + | 49 | - } |
45 | +/* | 50 | - } |
46 | + * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb | 51 | - |
47 | + * argument too. | 52 | - return ggms * MiB; |
48 | + */ | 53 | -} |
49 | +VMChangeStateEntry *qdev_add_vm_change_state_handler_full( | 54 | |
50 | + DeviceState *dev, VMChangeStateHandler *cb, | 55 | static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) |
51 | + VMChangeStateHandler *prepare_cb, void *opaque) | ||
52 | { | 56 | { |
53 | int depth = qdev_get_dev_tree_depth(dev); | 57 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
54 | 58 | g_autofree struct vfio_region_info *lpc = NULL; | |
55 | - return qemu_add_vm_change_state_handler_prio(cb, opaque, depth); | 59 | PCIDevice *lpc_bridge; |
56 | + return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque, | 60 | int ret, gen; |
57 | + depth); | 61 | - uint64_t ggms_size, gms_size; |
62 | + uint64_t gms_size; | ||
63 | uint64_t *bdsm_size; | ||
64 | uint32_t gmch; | ||
65 | Error *err = NULL; | ||
66 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
67 | } | ||
68 | } | ||
69 | |||
70 | - ggms_size = igd_gtt_memory_size(gen, gmch); | ||
71 | gms_size = igd_stolen_memory_size(gen, gmch); | ||
72 | |||
73 | /* | ||
74 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
75 | * config offset 0x5C. | ||
76 | */ | ||
77 | bdsm_size = g_malloc(sizeof(*bdsm_size)); | ||
78 | - *bdsm_size = cpu_to_le64(ggms_size + gms_size); | ||
79 | + *bdsm_size = cpu_to_le64(gms_size); | ||
80 | fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", | ||
81 | bdsm_size, sizeof(*bdsm_size)); | ||
82 | |||
83 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
84 | pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); | ||
85 | } | ||
86 | |||
87 | - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, | ||
88 | - (ggms_size + gms_size) / MiB); | ||
89 | + trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
58 | } | 90 | } |
59 | -- | 91 | -- |
60 | 2.41.0 | 92 | 2.48.1 |
61 | 93 | ||
62 | 94 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | VFIO migration is not compatible with postcopy migration. A VFIO device | 3 | Both x-igd-opregion option and legacy mode require identical steps to |
4 | in the destination can't handle page faults for pages that have not been | 4 | set up OpRegion for IGD devices. Consolidate these steps into a single |
5 | sent yet. | 5 | vfio_pci_igd_setup_opregion function. |
6 | 6 | ||
7 | Doing such migration will cause the VM to crash in the destination: | 7 | The function call in pci.c is wrapped with ifdef temporarily to prevent |
8 | 8 | build error for non-x86 archs, it will be removed after we decouple it | |
9 | qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address | 9 | from legacy mode. |
10 | qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address) | 10 | |
11 | qemu: hardware error: vfio: DMA mapping failed, unable to continue | 11 | Additionally, move vfio_pci_igd_opregion_init to igd.c to prevent it |
12 | 12 | from being compiled in non-x86 builds. | |
13 | To prevent this, block VFIO migration with postcopy migration. | 13 | |
14 | 14 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | |
15 | Reported-by: Yanghang Liu <yanghliu@redhat.com> | 15 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
16 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 16 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
17 | Tested-by: Yanghang Liu <yanghliu@redhat.com> | 17 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
18 | Reviewed-by: Peter Xu <peterx@redhat.com> | 18 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-4-tomitamoeko@gmail.com |
19 | [ clg: Fixed spelling in vfio_pci_igd_setup_opregion() ] | ||
19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 20 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
20 | --- | 21 | --- |
21 | hw/vfio/migration.c | 22 ++++++++++++++++++++++ | 22 | hw/vfio/pci.h | 4 +- |
22 | 1 file changed, 22 insertions(+) | 23 | hw/vfio/igd.c | 101 +++++++++++++++++++++++++++++++++++-------- |
23 | 24 | hw/vfio/pci-quirks.c | 50 --------------------- | |
24 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 25 | hw/vfio/pci.c | 22 ++-------- |
25 | index XXXXXXX..XXXXXXX 100644 | 26 | 4 files changed, 88 insertions(+), 89 deletions(-) |
26 | --- a/hw/vfio/migration.c | 27 | |
27 | +++ b/hw/vfio/migration.c | 28 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
28 | @@ -XXX,XX +XXX,XX @@ static bool vfio_precopy_supported(VFIODevice *vbasedev) | 29 | index XXXXXXX..XXXXXXX 100644 |
29 | 30 | --- a/hw/vfio/pci.h | |
30 | /* ---------------------------------------------------------------------- */ | 31 | +++ b/hw/vfio/pci.h |
31 | 32 | @@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, | |
32 | +static int vfio_save_prepare(void *opaque, Error **errp) | 33 | |
34 | bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); | ||
35 | |||
36 | -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, | ||
37 | - struct vfio_region_info *info, | ||
38 | - Error **errp); | ||
39 | +bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp); | ||
40 | |||
41 | void vfio_display_reset(VFIOPCIDevice *vdev); | ||
42 | bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); | ||
43 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/hw/vfio/igd.c | ||
46 | +++ b/hw/vfio/igd.c | ||
47 | @@ -XXX,XX +XXX,XX @@ static int igd_gen(VFIOPCIDevice *vdev) | ||
48 | return -1; | ||
49 | } | ||
50 | |||
51 | +#define IGD_ASLS 0xfc /* ASL Storage Register */ | ||
52 | #define IGD_GMCH 0x50 /* Graphics Control Register */ | ||
53 | #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ | ||
54 | #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ | ||
55 | @@ -XXX,XX +XXX,XX @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | +/* | ||
60 | + * The OpRegion includes the Video BIOS Table, which seems important for | ||
61 | + * telling the driver what sort of outputs it has. Without this, the device | ||
62 | + * may work in the guest, but we may not get output. This also requires BIOS | ||
63 | + * support to reserve and populate a section of guest memory sufficient for | ||
64 | + * the table and to write the base address of that memory to the ASLS register | ||
65 | + * of the IGD device. | ||
66 | + */ | ||
67 | +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, | ||
68 | + struct vfio_region_info *info, | ||
69 | + Error **errp) | ||
33 | +{ | 70 | +{ |
34 | + VFIODevice *vbasedev = opaque; | 71 | + int ret; |
72 | + | ||
73 | + vdev->igd_opregion = g_malloc0(info->size); | ||
74 | + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, | ||
75 | + info->size, info->offset); | ||
76 | + if (ret != info->size) { | ||
77 | + error_setg(errp, "failed to read IGD OpRegion"); | ||
78 | + g_free(vdev->igd_opregion); | ||
79 | + vdev->igd_opregion = NULL; | ||
80 | + return false; | ||
81 | + } | ||
35 | + | 82 | + |
36 | + /* | 83 | + /* |
37 | + * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on. | 84 | + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to |
85 | + * allocate 32bit reserved memory for, copy these contents into, and write | ||
86 | + * the reserved memory base address to the device ASLS register at 0xFC. | ||
87 | + * Alignment of this reserved region seems flexible, but using a 4k page | ||
88 | + * alignment seems to work well. This interface assumes a single IGD | ||
89 | + * device, which may be at VM address 00:02.0 in legacy mode or another | ||
90 | + * address in UPT mode. | ||
91 | + * | ||
92 | + * NB, there may be future use cases discovered where the VM should have | ||
93 | + * direct interaction with the host OpRegion, in which case the write to | ||
94 | + * the ASLS register would trigger MemoryRegion setup to enable that. | ||
38 | + */ | 95 | + */ |
39 | + if (runstate_check(RUN_STATE_SAVE_VM)) { | 96 | + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", |
40 | + return 0; | 97 | + vdev->igd_opregion, info->size); |
41 | + } | 98 | + |
42 | + | 99 | + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); |
43 | + if (migrate_postcopy_ram()) { | 100 | + |
44 | + error_setg( | 101 | + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); |
45 | + errp, "%s: VFIO migration is not supported with postcopy migration", | 102 | + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); |
46 | + vbasedev->name); | 103 | + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); |
47 | + return -EOPNOTSUPP; | 104 | + |
48 | + } | 105 | + return true; |
49 | + | ||
50 | + return 0; | ||
51 | +} | 106 | +} |
52 | + | 107 | + |
53 | static int vfio_save_setup(QEMUFile *f, void *opaque) | 108 | +bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) |
109 | +{ | ||
110 | + g_autofree struct vfio_region_info *opregion = NULL; | ||
111 | + int ret; | ||
112 | + | ||
113 | + /* Hotplugging is not supported for opregion access */ | ||
114 | + if (vdev->pdev.qdev.hotplugged) { | ||
115 | + error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); | ||
116 | + return false; | ||
117 | + } | ||
118 | + | ||
119 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
120 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
121 | + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); | ||
122 | + if (ret) { | ||
123 | + error_setg_errno(errp, -ret, | ||
124 | + "Device does not supports IGD OpRegion feature"); | ||
125 | + return false; | ||
126 | + } | ||
127 | + | ||
128 | + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { | ||
129 | + return false; | ||
130 | + } | ||
131 | + | ||
132 | + return true; | ||
133 | +} | ||
134 | + | ||
135 | /* | ||
136 | * The rather short list of registers that we copy from the host devices. | ||
137 | * The LPC/ISA bridge values are definitely needed to support the vBIOS, the | ||
138 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
139 | void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
54 | { | 140 | { |
55 | VFIODevice *vbasedev = opaque; | 141 | g_autofree struct vfio_region_info *rom = NULL; |
56 | @@ -XXX,XX +XXX,XX @@ static bool vfio_switchover_ack_needed(void *opaque) | 142 | - g_autofree struct vfio_region_info *opregion = NULL; |
143 | g_autofree struct vfio_region_info *host = NULL; | ||
144 | g_autofree struct vfio_region_info *lpc = NULL; | ||
145 | PCIDevice *lpc_bridge; | ||
146 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
147 | * Check whether we have all the vfio device specific regions to | ||
148 | * support legacy mode (added in Linux v4.6). If not, bail. | ||
149 | */ | ||
150 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
151 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
152 | - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); | ||
153 | - if (ret) { | ||
154 | - error_report("IGD device %s does not support OpRegion access," | ||
155 | - "legacy mode disabled", vdev->vbasedev.name); | ||
156 | - return; | ||
157 | - } | ||
158 | - | ||
159 | ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
160 | VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
161 | VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); | ||
162 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
163 | return; | ||
164 | } | ||
165 | |||
166 | + /* Setup OpRegion access */ | ||
167 | + if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
168 | + error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
169 | + error_report_err(err); | ||
170 | + return; | ||
171 | + } | ||
172 | + | ||
173 | /* Create our LPC/ISA bridge */ | ||
174 | ret = vfio_pci_igd_lpc_init(vdev, lpc); | ||
175 | if (ret) { | ||
176 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
177 | return; | ||
178 | } | ||
179 | |||
180 | - /* Setup OpRegion access */ | ||
181 | - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { | ||
182 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
183 | - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
184 | - return; | ||
185 | - } | ||
186 | - | ||
187 | /* | ||
188 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
189 | * 32MiB. This option should only be used when the desired size cannot be | ||
190 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c | ||
191 | index XXXXXXX..XXXXXXX 100644 | ||
192 | --- a/hw/vfio/pci-quirks.c | ||
193 | +++ b/hw/vfio/pci-quirks.c | ||
194 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) | ||
195 | trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); | ||
57 | } | 196 | } |
58 | 197 | ||
59 | static const SaveVMHandlers savevm_vfio_handlers = { | 198 | -#define IGD_ASLS 0xfc /* ASL Storage Register */ |
60 | + .save_prepare = vfio_save_prepare, | 199 | - |
61 | .save_setup = vfio_save_setup, | 200 | -/* |
62 | .save_cleanup = vfio_save_cleanup, | 201 | - * The OpRegion includes the Video BIOS Table, which seems important for |
63 | .state_pending_estimate = vfio_state_pending_estimate, | 202 | - * telling the driver what sort of outputs it has. Without this, the device |
203 | - * may work in the guest, but we may not get output. This also requires BIOS | ||
204 | - * support to reserve and populate a section of guest memory sufficient for | ||
205 | - * the table and to write the base address of that memory to the ASLS register | ||
206 | - * of the IGD device. | ||
207 | - */ | ||
208 | -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, | ||
209 | - struct vfio_region_info *info, Error **errp) | ||
210 | -{ | ||
211 | - int ret; | ||
212 | - | ||
213 | - vdev->igd_opregion = g_malloc0(info->size); | ||
214 | - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, | ||
215 | - info->size, info->offset); | ||
216 | - if (ret != info->size) { | ||
217 | - error_setg(errp, "failed to read IGD OpRegion"); | ||
218 | - g_free(vdev->igd_opregion); | ||
219 | - vdev->igd_opregion = NULL; | ||
220 | - return false; | ||
221 | - } | ||
222 | - | ||
223 | - /* | ||
224 | - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to | ||
225 | - * allocate 32bit reserved memory for, copy these contents into, and write | ||
226 | - * the reserved memory base address to the device ASLS register at 0xFC. | ||
227 | - * Alignment of this reserved region seems flexible, but using a 4k page | ||
228 | - * alignment seems to work well. This interface assumes a single IGD | ||
229 | - * device, which may be at VM address 00:02.0 in legacy mode or another | ||
230 | - * address in UPT mode. | ||
231 | - * | ||
232 | - * NB, there may be future use cases discovered where the VM should have | ||
233 | - * direct interaction with the host OpRegion, in which case the write to | ||
234 | - * the ASLS register would trigger MemoryRegion setup to enable that. | ||
235 | - */ | ||
236 | - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", | ||
237 | - vdev->igd_opregion, info->size); | ||
238 | - | ||
239 | - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); | ||
240 | - | ||
241 | - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); | ||
242 | - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); | ||
243 | - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); | ||
244 | - | ||
245 | - return true; | ||
246 | -} | ||
247 | - | ||
248 | /* | ||
249 | * Common quirk probe entry points. | ||
250 | */ | ||
251 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | ||
252 | index XXXXXXX..XXXXXXX 100644 | ||
253 | --- a/hw/vfio/pci.c | ||
254 | +++ b/hw/vfio/pci.c | ||
255 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) | ||
256 | vfio_bar_quirk_setup(vdev, i); | ||
257 | } | ||
258 | |||
259 | +#ifdef CONFIG_VFIO_IGD | ||
260 | if (!vdev->igd_opregion && | ||
261 | vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { | ||
262 | - g_autofree struct vfio_region_info *opregion = NULL; | ||
263 | - | ||
264 | - if (vdev->pdev.qdev.hotplugged) { | ||
265 | - error_setg(errp, | ||
266 | - "cannot support IGD OpRegion feature on hotplugged " | ||
267 | - "device"); | ||
268 | - goto out_unset_idev; | ||
269 | - } | ||
270 | - | ||
271 | - ret = vfio_get_dev_region_info(vbasedev, | ||
272 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
273 | - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); | ||
274 | - if (ret) { | ||
275 | - error_setg_errno(errp, -ret, | ||
276 | - "does not support requested IGD OpRegion feature"); | ||
277 | - goto out_unset_idev; | ||
278 | - } | ||
279 | - | ||
280 | - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { | ||
281 | + if (!vfio_pci_igd_setup_opregion(vdev, errp)) { | ||
282 | goto out_unset_idev; | ||
283 | } | ||
284 | } | ||
285 | +#endif | ||
286 | |||
287 | /* QEMU emulates all of MSI & MSIX */ | ||
288 | if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { | ||
64 | -- | 289 | -- |
65 | 2.41.0 | 290 | 2.48.1 |
66 | 291 | ||
67 | 292 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | VFIO migration uAPI defines an optional intermediate P2P quiescent | 3 | A new option will soon be introduced to decouple the LPC bridge/Host |
4 | state. While in the P2P quiescent state, P2P DMA transactions cannot be | 4 | bridge ID quirk from legacy mode. To prepare for this, move the LPC |
5 | initiated by the device, but the device can respond to incoming ones. | 5 | bridge initialization into a separate function. |
6 | Additionally, all outstanding P2P transactions are guaranteed to have | ||
7 | been completed by the time the device enters this state. | ||
8 | 6 | ||
9 | The purpose of this state is to support migration of multiple devices | 7 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
10 | that might do P2P transactions between themselves. | 8 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
11 | 9 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | |
12 | Add support for P2P migration by transitioning all the devices to the | 10 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
13 | P2P quiescent state before stopping or starting the devices. Use the new | 11 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-5-tomitamoeko@gmail.com |
14 | VMChangeStateHandler prepare_cb to achieve that behavior. | ||
15 | |||
16 | This will allow migration of multiple VFIO devices if all of them | ||
17 | support P2P migration. | ||
18 | |||
19 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | ||
20 | Tested-by: YangHang Liu <yanghliu@redhat.com> | ||
21 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
22 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
23 | --- | 13 | --- |
24 | docs/devel/vfio-migration.rst | 93 +++++++++++++++++++++-------------- | 14 | hw/vfio/igd.c | 122 +++++++++++++++++++++++++++++--------------------- |
25 | hw/vfio/common.c | 6 ++- | 15 | 1 file changed, 70 insertions(+), 52 deletions(-) |
26 | hw/vfio/migration.c | 46 +++++++++++++++-- | ||
27 | hw/vfio/trace-events | 1 + | ||
28 | 4 files changed, 105 insertions(+), 41 deletions(-) | ||
29 | 16 | ||
30 | diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst | 17 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
31 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/docs/devel/vfio-migration.rst | 19 | --- a/hw/vfio/igd.c |
33 | +++ b/docs/devel/vfio-migration.rst | 20 | +++ b/hw/vfio/igd.c |
34 | @@ -XXX,XX +XXX,XX @@ and recommends that the initial bytes are sent and loaded in the destination | 21 | @@ -XXX,XX +XXX,XX @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, |
35 | before stopping the source VM. Enabling this migration capability will | 22 | return ret; |
36 | guarantee that and thus, can potentially reduce downtime even further. | ||
37 | |||
38 | -Note that currently VFIO migration is supported only for a single device. This | ||
39 | -is due to VFIO migration's lack of P2P support. However, P2P support is planned | ||
40 | -to be added later on. | ||
41 | +To support migration of multiple devices that might do P2P transactions between | ||
42 | +themselves, VFIO migration uAPI defines an intermediate P2P quiescent state. | ||
43 | +While in the P2P quiescent state, P2P DMA transactions cannot be initiated by | ||
44 | +the device, but the device can respond to incoming ones. Additionally, all | ||
45 | +outstanding P2P transactions are guaranteed to have been completed by the time | ||
46 | +the device enters this state. | ||
47 | + | ||
48 | +All the devices that support P2P migration are first transitioned to the P2P | ||
49 | +quiescent state and only then are they stopped or started. This makes migration | ||
50 | +safe P2P-wise, since starting and stopping the devices is not done atomically | ||
51 | +for all the devices together. | ||
52 | + | ||
53 | +Thus, multiple VFIO devices migration is allowed only if all the devices | ||
54 | +support P2P migration. Single VFIO device migration is allowed regardless of | ||
55 | +P2P migration support. | ||
56 | |||
57 | A detailed description of the UAPI for VFIO device migration can be found in | ||
58 | the comment for the ``vfio_device_mig_state`` structure in the header file | ||
59 | @@ -XXX,XX +XXX,XX @@ will be blocked. | ||
60 | Flow of state changes during Live migration | ||
61 | =========================================== | ||
62 | |||
63 | -Below is the flow of state change during live migration. | ||
64 | +Below is the state change flow during live migration for a VFIO device that | ||
65 | +supports both precopy and P2P migration. The flow for devices that don't | ||
66 | +support it is similar, except that the relevant states for precopy and P2P are | ||
67 | +skipped. | ||
68 | The values in the parentheses represent the VM state, the migration state, and | ||
69 | the VFIO device state, respectively. | ||
70 | -The text in the square brackets represents the flow if the VFIO device supports | ||
71 | -pre-copy. | ||
72 | |||
73 | Live migration save path | ||
74 | ------------------------ | ||
75 | |||
76 | :: | ||
77 | |||
78 | - QEMU normal running state | ||
79 | - (RUNNING, _NONE, _RUNNING) | ||
80 | - | | ||
81 | + QEMU normal running state | ||
82 | + (RUNNING, _NONE, _RUNNING) | ||
83 | + | | ||
84 | migrate_init spawns migration_thread | ||
85 | - Migration thread then calls each device's .save_setup() | ||
86 | - (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) | ||
87 | - | | ||
88 | - (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) | ||
89 | - If device is active, get pending_bytes by .state_pending_{estimate,exact}() | ||
90 | - If total pending_bytes >= threshold_size, call .save_live_iterate() | ||
91 | - [Data of VFIO device for pre-copy phase is copied] | ||
92 | - Iterate till total pending bytes converge and are less than threshold | ||
93 | - | | ||
94 | - On migration completion, vCPU stops and calls .save_live_complete_precopy for | ||
95 | - each active device. The VFIO device is then transitioned into _STOP_COPY state | ||
96 | - (FINISH_MIGRATE, _DEVICE, _STOP_COPY) | ||
97 | - | | ||
98 | - For the VFIO device, iterate in .save_live_complete_precopy until | ||
99 | - pending data is 0 | ||
100 | - (FINISH_MIGRATE, _DEVICE, _STOP) | ||
101 | - | | ||
102 | - (FINISH_MIGRATE, _COMPLETED, _STOP) | ||
103 | - Migraton thread schedules cleanup bottom half and exits | ||
104 | + Migration thread then calls each device's .save_setup() | ||
105 | + (RUNNING, _SETUP, _PRE_COPY) | ||
106 | + | | ||
107 | + (RUNNING, _ACTIVE, _PRE_COPY) | ||
108 | + If device is active, get pending_bytes by .state_pending_{estimate,exact}() | ||
109 | + If total pending_bytes >= threshold_size, call .save_live_iterate() | ||
110 | + Data of VFIO device for pre-copy phase is copied | ||
111 | + Iterate till total pending bytes converge and are less than threshold | ||
112 | + | | ||
113 | + On migration completion, the vCPUs and the VFIO device are stopped | ||
114 | + The VFIO device is first put in P2P quiescent state | ||
115 | + (FINISH_MIGRATE, _ACTIVE, _PRE_COPY_P2P) | ||
116 | + | | ||
117 | + Then the VFIO device is put in _STOP_COPY state | ||
118 | + (FINISH_MIGRATE, _ACTIVE, _STOP_COPY) | ||
119 | + .save_live_complete_precopy() is called for each active device | ||
120 | + For the VFIO device, iterate in .save_live_complete_precopy() until | ||
121 | + pending data is 0 | ||
122 | + | | ||
123 | + (POSTMIGRATE, _COMPLETED, _STOP_COPY) | ||
124 | + Migraton thread schedules cleanup bottom half and exits | ||
125 | + | | ||
126 | + .save_cleanup() is called | ||
127 | + (POSTMIGRATE, _COMPLETED, _STOP) | ||
128 | |||
129 | Live migration resume path | ||
130 | -------------------------- | ||
131 | |||
132 | :: | ||
133 | |||
134 | - Incoming migration calls .load_setup for each device | ||
135 | - (RESTORE_VM, _ACTIVE, _STOP) | ||
136 | - | | ||
137 | - For each device, .load_state is called for that device section data | ||
138 | - (RESTORE_VM, _ACTIVE, _RESUMING) | ||
139 | - | | ||
140 | - At the end, .load_cleanup is called for each device and vCPUs are started | ||
141 | - (RUNNING, _NONE, _RUNNING) | ||
142 | + Incoming migration calls .load_setup() for each device | ||
143 | + (RESTORE_VM, _ACTIVE, _STOP) | ||
144 | + | | ||
145 | + For each device, .load_state() is called for that device section data | ||
146 | + (RESTORE_VM, _ACTIVE, _RESUMING) | ||
147 | + | | ||
148 | + At the end, .load_cleanup() is called for each device and vCPUs are started | ||
149 | + The VFIO device is first put in P2P quiescent state | ||
150 | + (RUNNING, _ACTIVE, _RUNNING_P2P) | ||
151 | + | | ||
152 | + (RUNNING, _NONE, _RUNNING) | ||
153 | |||
154 | Postcopy | ||
155 | ======== | ||
156 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c | ||
157 | index XXXXXXX..XXXXXXX 100644 | ||
158 | --- a/hw/vfio/common.c | ||
159 | +++ b/hw/vfio/common.c | ||
160 | @@ -XXX,XX +XXX,XX @@ bool vfio_device_state_is_running(VFIODevice *vbasedev) | ||
161 | { | ||
162 | VFIOMigration *migration = vbasedev->migration; | ||
163 | |||
164 | - return migration->device_state == VFIO_DEVICE_STATE_RUNNING; | ||
165 | + return migration->device_state == VFIO_DEVICE_STATE_RUNNING || | ||
166 | + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P; | ||
167 | } | 23 | } |
168 | 24 | ||
169 | bool vfio_device_state_is_precopy(VFIODevice *vbasedev) | 25 | +static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) |
170 | { | ||
171 | VFIOMigration *migration = vbasedev->migration; | ||
172 | |||
173 | - return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; | ||
174 | + return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY || | ||
175 | + migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; | ||
176 | } | ||
177 | |||
178 | static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) | ||
179 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
180 | index XXXXXXX..XXXXXXX 100644 | ||
181 | --- a/hw/vfio/migration.c | ||
182 | +++ b/hw/vfio/migration.c | ||
183 | @@ -XXX,XX +XXX,XX @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) | ||
184 | return "STOP_COPY"; | ||
185 | case VFIO_DEVICE_STATE_RESUMING: | ||
186 | return "RESUMING"; | ||
187 | + case VFIO_DEVICE_STATE_RUNNING_P2P: | ||
188 | + return "RUNNING_P2P"; | ||
189 | case VFIO_DEVICE_STATE_PRE_COPY: | ||
190 | return "PRE_COPY"; | ||
191 | + case VFIO_DEVICE_STATE_PRE_COPY_P2P: | ||
192 | + return "PRE_COPY_P2P"; | ||
193 | default: | ||
194 | return "UNKNOWN STATE"; | ||
195 | } | ||
196 | @@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = { | ||
197 | |||
198 | /* ---------------------------------------------------------------------- */ | ||
199 | |||
200 | +static void vfio_vmstate_change_prepare(void *opaque, bool running, | ||
201 | + RunState state) | ||
202 | +{ | 26 | +{ |
203 | + VFIODevice *vbasedev = opaque; | 27 | + g_autofree struct vfio_region_info *host = NULL; |
204 | + VFIOMigration *migration = vbasedev->migration; | 28 | + g_autofree struct vfio_region_info *lpc = NULL; |
205 | + enum vfio_device_mig_state new_state; | 29 | + PCIDevice *lpc_bridge; |
206 | + int ret; | 30 | + int ret; |
207 | + | 31 | + |
208 | + new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ? | 32 | + /* |
209 | + VFIO_DEVICE_STATE_PRE_COPY_P2P : | 33 | + * Copying IDs or creating new devices are not supported on hotplug |
210 | + VFIO_DEVICE_STATE_RUNNING_P2P; | 34 | + */ |
35 | + if (vdev->pdev.qdev.hotplugged) { | ||
36 | + error_setg(errp, "IGD LPC is not supported on hotplugged device"); | ||
37 | + return false; | ||
38 | + } | ||
211 | + | 39 | + |
212 | + /* | 40 | + /* |
213 | + * If setting the device in new_state fails, the device should be reset. | 41 | + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we |
214 | + * To do so, use ERROR state as a recover state. | 42 | + * can stuff host values into, so if there's already one there and it's not |
43 | + * one we can hack on, this quirk is no-go. Sorry Q35. | ||
215 | + */ | 44 | + */ |
216 | + ret = vfio_migration_set_state(vbasedev, new_state, | 45 | + lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), |
217 | + VFIO_DEVICE_STATE_ERROR); | 46 | + 0, PCI_DEVFN(0x1f, 0)); |
218 | + if (ret) { | 47 | + if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), |
219 | + /* | 48 | + "vfio-pci-igd-lpc-bridge")) { |
220 | + * Migration should be aborted in this case, but vm_state_notify() | 49 | + error_setg(errp, |
221 | + * currently does not support reporting failures. | 50 | + "Cannot create LPC bridge due to existing device at 1f.0"); |
222 | + */ | 51 | + return false; |
223 | + if (migrate_get_current()->to_dst_file) { | ||
224 | + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); | ||
225 | + } | ||
226 | + } | 52 | + } |
227 | + | 53 | + |
228 | + trace_vfio_vmstate_change_prepare(vbasedev->name, running, | 54 | + /* |
229 | + RunState_str(state), | 55 | + * Check whether we have all the vfio device specific regions to |
230 | + mig_state_to_str(new_state)); | 56 | + * support LPC quirk (added in Linux v4.6). |
57 | + */ | ||
58 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
59 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
60 | + VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); | ||
61 | + if (ret) { | ||
62 | + error_setg(errp, "IGD LPC bridge access is not supported by kernel"); | ||
63 | + return false; | ||
64 | + } | ||
65 | + | ||
66 | + ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
67 | + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
68 | + VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); | ||
69 | + if (ret) { | ||
70 | + error_setg(errp, "IGD host bridge access is not supported by kernel"); | ||
71 | + return false; | ||
72 | + } | ||
73 | + | ||
74 | + /* Create/modify LPC bridge */ | ||
75 | + ret = vfio_pci_igd_lpc_init(vdev, lpc); | ||
76 | + if (ret) { | ||
77 | + error_setg(errp, "Failed to create/modify LPC bridge for IGD"); | ||
78 | + return false; | ||
79 | + } | ||
80 | + | ||
81 | + /* Stuff some host values into the VM PCI host bridge */ | ||
82 | + ret = vfio_pci_igd_host_init(vdev, host); | ||
83 | + if (ret) { | ||
84 | + error_setg(errp, "Failed to modify host bridge for IGD"); | ||
85 | + return false; | ||
86 | + } | ||
87 | + | ||
88 | + return true; | ||
231 | +} | 89 | +} |
232 | + | 90 | + |
233 | static void vfio_vmstate_change(void *opaque, bool running, RunState state) | 91 | #define IGD_GGC_MMIO_OFFSET 0x108040 |
92 | #define IGD_BDSM_MMIO_OFFSET 0x1080C0 | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
95 | void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
234 | { | 96 | { |
235 | VFIODevice *vbasedev = opaque; | 97 | g_autofree struct vfio_region_info *rom = NULL; |
236 | @@ -XXX,XX +XXX,XX @@ static int vfio_migration_init(VFIODevice *vbasedev) | 98 | - g_autofree struct vfio_region_info *host = NULL; |
237 | char id[256] = ""; | 99 | - g_autofree struct vfio_region_info *lpc = NULL; |
238 | g_autofree char *path = NULL, *oid = NULL; | 100 | - PCIDevice *lpc_bridge; |
239 | uint64_t mig_flags = 0; | 101 | int ret, gen; |
240 | + VMChangeStateHandler *prepare_cb; | 102 | uint64_t gms_size; |
241 | 103 | uint64_t *bdsm_size; | |
242 | if (!vbasedev->ops->vfio_get_object) { | 104 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
243 | return -EINVAL; | 105 | return; |
244 | @@ -XXX,XX +XXX,XX @@ static int vfio_migration_init(VFIODevice *vbasedev) | 106 | } |
245 | register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, | 107 | |
246 | vbasedev); | 108 | - /* |
247 | 109 | - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we | |
248 | - migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, | 110 | - * can stuff host values into, so if there's already one there and it's not |
249 | - vfio_vmstate_change, | 111 | - * one we can hack on, legacy mode is no-go. Sorry Q35. |
250 | - vbasedev); | 112 | - */ |
251 | + prepare_cb = migration->mig_flags & VFIO_MIGRATION_P2P ? | 113 | - lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), |
252 | + vfio_vmstate_change_prepare : | 114 | - 0, PCI_DEVFN(0x1f, 0)); |
253 | + NULL; | 115 | - if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), |
254 | + migration->vm_state = qdev_add_vm_change_state_handler_full( | 116 | - "vfio-pci-igd-lpc-bridge")) { |
255 | + vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); | 117 | - error_report("IGD device %s cannot support legacy mode due to existing " |
256 | migration->migration_state.notify = vfio_migration_state_notifier; | 118 | - "devices at address 1f.0", vdev->vbasedev.name); |
257 | add_migration_state_change_notifier(&migration->migration_state); | 119 | - return; |
258 | 120 | - } | |
259 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | 121 | - |
260 | index XXXXXXX..XXXXXXX 100644 | 122 | /* |
261 | --- a/hw/vfio/trace-events | 123 | * IGD is not a standard, they like to change their specs often. We |
262 | +++ b/hw/vfio/trace-events | 124 | * only attempt to support back to SandBridge and we hope that newer |
263 | @@ -XXX,XX +XXX,XX @@ vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer | 125 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
264 | vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 | 126 | return; |
265 | vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 | 127 | } |
266 | vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" | 128 | |
267 | +vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" | 129 | - /* |
130 | - * Check whether we have all the vfio device specific regions to | ||
131 | - * support legacy mode (added in Linux v4.6). If not, bail. | ||
132 | - */ | ||
133 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
134 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
135 | - VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); | ||
136 | - if (ret) { | ||
137 | - error_report("IGD device %s does not support host bridge access," | ||
138 | - "legacy mode disabled", vdev->vbasedev.name); | ||
139 | - return; | ||
140 | - } | ||
141 | - | ||
142 | - ret = vfio_get_dev_region_info(&vdev->vbasedev, | ||
143 | - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, | ||
144 | - VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); | ||
145 | - if (ret) { | ||
146 | - error_report("IGD device %s does not support LPC bridge access," | ||
147 | - "legacy mode disabled", vdev->vbasedev.name); | ||
148 | - return; | ||
149 | - } | ||
150 | - | ||
151 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
152 | |||
153 | /* | ||
154 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
155 | return; | ||
156 | } | ||
157 | |||
158 | - /* Create our LPC/ISA bridge */ | ||
159 | - ret = vfio_pci_igd_lpc_init(vdev, lpc); | ||
160 | - if (ret) { | ||
161 | - error_report("IGD device %s failed to create LPC bridge, " | ||
162 | - "legacy mode disabled", vdev->vbasedev.name); | ||
163 | - return; | ||
164 | - } | ||
165 | - | ||
166 | - /* Stuff some host values into the VM PCI host bridge */ | ||
167 | - ret = vfio_pci_igd_host_init(vdev, host); | ||
168 | - if (ret) { | ||
169 | - error_report("IGD device %s failed to modify host bridge, " | ||
170 | - "legacy mode disabled", vdev->vbasedev.name); | ||
171 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
172 | + if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
173 | + error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
174 | + error_report_err(err); | ||
175 | return; | ||
176 | } | ||
177 | |||
268 | -- | 178 | -- |
269 | 2.41.0 | 179 | 2.48.1 |
270 | 180 | ||
271 | 181 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Background snapshot allows creating a snapshot of the VM while it's | 3 | IGD devices require device-specific quirk to be applied to their PCI |
4 | running and keeping it small by not including dirty RAM pages. | 4 | config space. Currently, it is put in the BAR4 quirk that does nothing |
5 | to BAR4 itself. Add a placeholder for PCI config space quirks to hold | ||
6 | that quirk later. | ||
5 | 7 | ||
6 | The way it works is by first stopping the VM, saving the non-iterable | 8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
7 | devices' state and then starting the VM and saving the RAM while write | 9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
8 | protecting it with UFFD. The resulting snapshot represents the VM state | 10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
9 | at snapshot start. | 11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
10 | 12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-6-tomitamoeko@gmail.com | |
11 | VFIO migration is not compatible with background snapshot. | ||
12 | First of all, VFIO device state is not even saved in background snapshot | ||
13 | because only non-iterable device state is saved. But even if it was | ||
14 | saved, after starting the VM, a VFIO device could dirty pages without it | ||
15 | being detected by UFFD write protection. This would corrupt the | ||
16 | snapshot, as the RAM in it would not represent the RAM at snapshot | ||
17 | start. | ||
18 | |||
19 | To prevent this, block VFIO migration with background snapshot. | ||
20 | |||
21 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | ||
22 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
23 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
24 | --- | 14 | --- |
25 | hw/vfio/migration.c | 11 ++++++++++- | 15 | hw/vfio/pci.h | 1 + |
26 | 1 file changed, 10 insertions(+), 1 deletion(-) | 16 | hw/vfio/pci-quirks.c | 5 +++++ |
17 | hw/vfio/pci.c | 4 ++++ | ||
18 | 3 files changed, 10 insertions(+) | ||
27 | 19 | ||
28 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 20 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
29 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
30 | --- a/hw/vfio/migration.c | 22 | --- a/hw/vfio/pci.h |
31 | +++ b/hw/vfio/migration.c | 23 | +++ b/hw/vfio/pci.h |
32 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_prepare(void *opaque, Error **errp) | 24 | @@ -XXX,XX +XXX,XX @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); |
33 | VFIODevice *vbasedev = opaque; | 25 | void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); |
34 | 26 | ||
35 | /* | 27 | bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); |
36 | - * Snapshot doesn't use postcopy, so allow snapshot even if postcopy is on. | 28 | +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); |
37 | + * Snapshot doesn't use postcopy nor background snapshot, so allow snapshot | 29 | void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); |
38 | + * even if they are on. | 30 | void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); |
39 | */ | 31 | void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); |
40 | if (runstate_check(RUN_STATE_SAVE_VM)) { | 32 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c |
41 | return 0; | 33 | index XXXXXXX..XXXXXXX 100644 |
42 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_prepare(void *opaque, Error **errp) | 34 | --- a/hw/vfio/pci-quirks.c |
43 | return -EOPNOTSUPP; | 35 | +++ b/hw/vfio/pci-quirks.c |
36 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) | ||
37 | /* | ||
38 | * Common quirk probe entry points. | ||
39 | */ | ||
40 | +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp) | ||
41 | +{ | ||
42 | + return true; | ||
43 | +} | ||
44 | + | ||
45 | void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) | ||
46 | { | ||
47 | vfio_vga_probe_ati_3c3_quirk(vdev); | ||
48 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/hw/vfio/pci.c | ||
51 | +++ b/hw/vfio/pci.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) | ||
53 | goto out_unset_idev; | ||
44 | } | 54 | } |
45 | 55 | ||
46 | + if (migrate_background_snapshot()) { | 56 | + if (!vfio_config_quirk_setup(vdev, errp)) { |
47 | + error_setg( | 57 | + goto out_unset_idev; |
48 | + errp, | ||
49 | + "%s: VFIO migration is not supported with background snapshot", | ||
50 | + vbasedev->name); | ||
51 | + return -EOPNOTSUPP; | ||
52 | + } | 58 | + } |
53 | + | 59 | + |
54 | return 0; | 60 | if (vdev->vga) { |
55 | } | 61 | vfio_vga_quirk_setup(vdev); |
56 | 62 | } | |
57 | -- | 63 | -- |
58 | 2.41.0 | 64 | 2.48.1 |
59 | 65 | ||
60 | 66 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | The functions in target.c are not static, yet they don't have a proper | 3 | The actual IO BAR4 write quirk in vfio_probe_igd_bar4_quirk was removed |
4 | migration prefix. Add such prefix. | 4 | in previous change, leaving the function not matching its name, so move |
5 | it into the newly introduced vfio_config_quirk_setup. There is no | ||
6 | functional change in this commit. | ||
5 | 7 | ||
6 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 8 | For now, to align with current legacy mode behavior, it returns and |
7 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 9 | proceeds on error. Later it will fail on error after decoupling the |
10 | quirks from legacy mode. | ||
11 | |||
12 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
13 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
14 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
15 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
16 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-7-tomitamoeko@gmail.com | ||
8 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 17 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
9 | --- | 18 | --- |
10 | migration/migration.h | 4 ++-- | 19 | hw/vfio/pci.h | 2 +- |
11 | migration/migration.c | 6 +++--- | 20 | hw/vfio/igd.c | 21 ++++++++++++--------- |
12 | migration/savevm.c | 2 +- | 21 | hw/vfio/pci-quirks.c | 6 +++++- |
13 | migration/target.c | 8 ++++---- | 22 | 3 files changed, 18 insertions(+), 11 deletions(-) |
14 | 4 files changed, 10 insertions(+), 10 deletions(-) | ||
15 | 23 | ||
16 | diff --git a/migration/migration.h b/migration/migration.h | 24 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
17 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/migration/migration.h | 26 | --- a/hw/vfio/pci.h |
19 | +++ b/migration/migration.h | 27 | +++ b/hw/vfio/pci.h |
20 | @@ -XXX,XX +XXX,XX @@ void migration_consume_urgent_request(void); | 28 | @@ -XXX,XX +XXX,XX @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); |
21 | bool migration_rate_limit(void); | 29 | void vfio_quirk_reset(VFIOPCIDevice *vdev); |
22 | void migration_cancel(const Error *error); | 30 | VFIOQuirk *vfio_quirk_alloc(int nr_mem); |
23 | 31 | void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); | |
24 | -void populate_vfio_info(MigrationInfo *info); | 32 | -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); |
25 | -void reset_vfio_bytes_transferred(void); | 33 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp); |
26 | +void migration_populate_vfio_info(MigrationInfo *info); | 34 | |
27 | +void migration_reset_vfio_bytes_transferred(void); | 35 | extern const PropertyInfo qdev_prop_nv_gpudirect_clique; |
28 | void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); | 36 | |
29 | 37 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | |
30 | #endif | ||
31 | diff --git a/migration/migration.c b/migration/migration.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | 38 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/migration/migration.c | 39 | --- a/hw/vfio/igd.c |
34 | +++ b/migration/migration.c | 40 | +++ b/hw/vfio/igd.c |
35 | @@ -XXX,XX +XXX,XX @@ static void fill_source_migration_info(MigrationInfo *info) | 41 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) |
36 | populate_time_info(info, s); | 42 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); |
37 | populate_ram_info(info, s); | 43 | } |
38 | populate_disk_info(info); | 44 | |
39 | - populate_vfio_info(info); | 45 | -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
40 | + migration_populate_vfio_info(info); | 46 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, |
41 | break; | 47 | + Error **errp G_GNUC_UNUSED) |
42 | case MIGRATION_STATUS_COLO: | 48 | { |
43 | info->has_status = true; | 49 | g_autofree struct vfio_region_info *rom = NULL; |
44 | @@ -XXX,XX +XXX,XX @@ static void fill_source_migration_info(MigrationInfo *info) | 50 | int ret, gen; |
45 | case MIGRATION_STATUS_COMPLETED: | 51 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
46 | populate_time_info(info, s); | 52 | * PCI bus address. |
47 | populate_ram_info(info, s); | ||
48 | - populate_vfio_info(info); | ||
49 | + migration_populate_vfio_info(info); | ||
50 | break; | ||
51 | case MIGRATION_STATUS_FAILED: | ||
52 | info->has_status = true; | ||
53 | @@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, | ||
54 | */ | 53 | */ |
55 | memset(&mig_stats, 0, sizeof(mig_stats)); | 54 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || |
56 | memset(&compression_counters, 0, sizeof(compression_counters)); | 55 | - !vfio_is_vga(vdev) || nr != 4 || |
57 | - reset_vfio_bytes_transferred(); | 56 | + !vfio_is_vga(vdev) || |
58 | + migration_reset_vfio_bytes_transferred(); | 57 | &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), |
59 | 58 | 0, PCI_DEVFN(0x2, 0))) { | |
59 | - return; | ||
60 | + return true; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
65 | if (gen == -1) { | ||
66 | error_report("IGD device %s is unsupported in legacy mode, " | ||
67 | "try SandyBridge or newer", vdev->vbasedev.name); | ||
68 | - return; | ||
69 | + return true; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
74 | if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
75 | error_report("IGD device %s has no ROM, legacy mode disabled", | ||
76 | vdev->vbasedev.name); | ||
77 | - return; | ||
78 | + return true; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
83 | error_report("IGD device %s hotplugged, ROM disabled, " | ||
84 | "legacy mode disabled", vdev->vbasedev.name); | ||
85 | vdev->rom_read_failed = true; | ||
86 | - return; | ||
87 | + return true; | ||
88 | } | ||
89 | |||
90 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
91 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
92 | error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
93 | error_report("IGD device %s failed to enable VGA access, " | ||
94 | "legacy mode disabled", vdev->vbasedev.name); | ||
95 | - return; | ||
96 | + return true; | ||
97 | } | ||
98 | |||
99 | /* Setup OpRegion access */ | ||
100 | if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
101 | error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
102 | error_report_err(err); | ||
103 | - return; | ||
104 | + return true; | ||
105 | } | ||
106 | |||
107 | /* Setup LPC bridge / Host bridge PCI IDs */ | ||
108 | if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
109 | error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
110 | error_report_err(err); | ||
111 | - return; | ||
112 | + return true; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) | ||
117 | } | ||
118 | |||
119 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
120 | + | ||
121 | + return true; | ||
122 | } | ||
123 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c | ||
124 | index XXXXXXX..XXXXXXX 100644 | ||
125 | --- a/hw/vfio/pci-quirks.c | ||
126 | +++ b/hw/vfio/pci-quirks.c | ||
127 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) | ||
128 | */ | ||
129 | bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp) | ||
130 | { | ||
131 | +#ifdef CONFIG_VFIO_IGD | ||
132 | + if (!vfio_probe_igd_config_quirk(vdev, errp)) { | ||
133 | + return false; | ||
134 | + } | ||
135 | +#endif | ||
60 | return true; | 136 | return true; |
61 | } | 137 | } |
62 | diff --git a/migration/savevm.c b/migration/savevm.c | 138 | |
63 | index XXXXXXX..XXXXXXX 100644 | 139 | @@ -XXX,XX +XXX,XX @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) |
64 | --- a/migration/savevm.c | 140 | vfio_probe_rtl8168_bar2_quirk(vdev, nr); |
65 | +++ b/migration/savevm.c | 141 | #ifdef CONFIG_VFIO_IGD |
66 | @@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) | 142 | vfio_probe_igd_bar0_quirk(vdev, nr); |
67 | migrate_init(ms); | 143 | - vfio_probe_igd_bar4_quirk(vdev, nr); |
68 | memset(&mig_stats, 0, sizeof(mig_stats)); | ||
69 | memset(&compression_counters, 0, sizeof(compression_counters)); | ||
70 | - reset_vfio_bytes_transferred(); | ||
71 | + migration_reset_vfio_bytes_transferred(); | ||
72 | ms->to_dst_file = f; | ||
73 | |||
74 | qemu_mutex_unlock_iothread(); | ||
75 | diff --git a/migration/target.c b/migration/target.c | ||
76 | index XXXXXXX..XXXXXXX 100644 | ||
77 | --- a/migration/target.c | ||
78 | +++ b/migration/target.c | ||
79 | @@ -XXX,XX +XXX,XX @@ | ||
80 | #endif | 144 | #endif |
81 | |||
82 | #ifdef CONFIG_VFIO | ||
83 | -void populate_vfio_info(MigrationInfo *info) | ||
84 | +void migration_populate_vfio_info(MigrationInfo *info) | ||
85 | { | ||
86 | if (vfio_mig_active()) { | ||
87 | info->vfio = g_malloc0(sizeof(*info->vfio)); | ||
88 | @@ -XXX,XX +XXX,XX @@ void populate_vfio_info(MigrationInfo *info) | ||
89 | } | ||
90 | } | 145 | } |
91 | 146 | ||
92 | -void reset_vfio_bytes_transferred(void) | ||
93 | +void migration_reset_vfio_bytes_transferred(void) | ||
94 | { | ||
95 | vfio_reset_bytes_transferred(); | ||
96 | } | ||
97 | #else | ||
98 | -void populate_vfio_info(MigrationInfo *info) | ||
99 | +void migration_populate_vfio_info(MigrationInfo *info) | ||
100 | { | ||
101 | } | ||
102 | |||
103 | -void reset_vfio_bytes_transferred(void) | ||
104 | +void migration_reset_vfio_bytes_transferred(void) | ||
105 | { | ||
106 | } | ||
107 | #endif | ||
108 | -- | 147 | -- |
109 | 2.41.0 | 148 | 2.48.1 |
110 | 149 | ||
111 | 150 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> | |
2 | |||
3 | So far, IGD-specific quirks all require enabling legacy mode, which is | ||
4 | toggled by assigning IGD to 00:02.0. However, some quirks, like the BDSM | ||
5 | and GGC register quirks, should be applied to all supported IGD devices. | ||
6 | A new config option, x-igd-legacy-mode=[on|off|auto], is introduced to | ||
7 | control the legacy mode only quirks. The default value is "auto", which | ||
8 | keeps current behavior that enables legacy mode implicitly and continues | ||
9 | on error when all following conditions are met. | ||
10 | * Machine type is i440fx | ||
11 | * IGD device is at guest BDF 00:02.0 | ||
12 | |||
13 | If any one of the conditions above is not met, the default behavior is | ||
14 | equivalent to "off", QEMU will fail immediately if any error occurs. | ||
15 | |||
16 | Users can also use "on" to force enabling legacy mode. It checks if all | ||
17 | the conditions above are met and set up legacy mode. QEMU will also fail | ||
18 | immediately on error in this case. | ||
19 | |||
20 | Additionally, the hotplug check in legacy mode is removed as hotplugging | ||
21 | IGD device is never supported, and it will be checked when enabling the | ||
22 | OpRegion quirk. | ||
23 | |||
24 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
25 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
26 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
27 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
28 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-8-tomitamoeko@gmail.com | ||
29 | [ clg: - Changed warn_report() by info_report() in | ||
30 | vfio_probe_igd_config_quirk() as suggested by Alex W. | ||
31 | - Fixed spelling in vfio_probe_igd_config_quirk () ] | ||
32 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
33 | --- | ||
34 | hw/vfio/pci.h | 1 + | ||
35 | hw/vfio/igd.c | 127 +++++++++++++++++++++++++++++--------------------- | ||
36 | hw/vfio/pci.c | 2 + | ||
37 | 3 files changed, 77 insertions(+), 53 deletions(-) | ||
38 | |||
39 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/hw/vfio/pci.h | ||
42 | +++ b/hw/vfio/pci.h | ||
43 | @@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice { | ||
44 | uint32_t display_xres; | ||
45 | uint32_t display_yres; | ||
46 | int32_t bootindex; | ||
47 | + OnOffAuto igd_legacy_mode; | ||
48 | uint32_t igd_gms; | ||
49 | OffAutoPCIBAR msix_relo; | ||
50 | uint8_t nv_gpudirect_clique; | ||
51 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/hw/vfio/igd.c | ||
54 | +++ b/hw/vfio/igd.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #include "qemu/error-report.h" | ||
57 | #include "qapi/error.h" | ||
58 | #include "qapi/qmp/qerror.h" | ||
59 | +#include "hw/boards.h" | ||
60 | #include "hw/hw.h" | ||
61 | #include "hw/nvram/fw_cfg.h" | ||
62 | #include "pci.h" | ||
63 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
64 | * bus address. | ||
65 | */ | ||
66 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || | ||
67 | - !vfio_is_vga(vdev) || nr != 0 || | ||
68 | - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
69 | - 0, PCI_DEVFN(0x2, 0))) { | ||
70 | + !vfio_is_vga(vdev) || nr != 0) { | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) | ||
75 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); | ||
76 | } | ||
77 | |||
78 | -bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
79 | - Error **errp G_GNUC_UNUSED) | ||
80 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
81 | { | ||
82 | - g_autofree struct vfio_region_info *rom = NULL; | ||
83 | int ret, gen; | ||
84 | uint64_t gms_size; | ||
85 | uint64_t *bdsm_size; | ||
86 | uint32_t gmch; | ||
87 | + bool legacy_mode_enabled = false; | ||
88 | Error *err = NULL; | ||
89 | |||
90 | /* | ||
91 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
92 | * PCI bus address. | ||
93 | */ | ||
94 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || | ||
95 | - !vfio_is_vga(vdev) || | ||
96 | - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
97 | - 0, PCI_DEVFN(0x2, 0))) { | ||
98 | + !vfio_is_vga(vdev)) { | ||
99 | return true; | ||
100 | } | ||
101 | |||
102 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
103 | return true; | ||
104 | } | ||
105 | |||
106 | - /* | ||
107 | - * Most of what we're doing here is to enable the ROM to run, so if | ||
108 | - * there's no ROM, there's no point in setting up this quirk. | ||
109 | - * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support. | ||
110 | - */ | ||
111 | - ret = vfio_get_region_info(&vdev->vbasedev, | ||
112 | - VFIO_PCI_ROM_REGION_INDEX, &rom); | ||
113 | - if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
114 | - error_report("IGD device %s has no ROM, legacy mode disabled", | ||
115 | - vdev->vbasedev.name); | ||
116 | - return true; | ||
117 | - } | ||
118 | - | ||
119 | - /* | ||
120 | - * Ignore the hotplug corner case, mark the ROM failed, we can't | ||
121 | - * create the devices we need for legacy mode in the hotplug scenario. | ||
122 | - */ | ||
123 | - if (vdev->pdev.qdev.hotplugged) { | ||
124 | - error_report("IGD device %s hotplugged, ROM disabled, " | ||
125 | - "legacy mode disabled", vdev->vbasedev.name); | ||
126 | - vdev->rom_read_failed = true; | ||
127 | - return true; | ||
128 | - } | ||
129 | - | ||
130 | gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); | ||
131 | |||
132 | /* | ||
133 | - * If IGD VGA Disable is clear (expected) and VGA is not already enabled, | ||
134 | - * try to enable it. Probably shouldn't be using legacy mode without VGA, | ||
135 | - * but also no point in us enabling VGA if disabled in hardware. | ||
136 | + * For backward compatibility, enable legacy mode when | ||
137 | + * - Machine type is i440fx (pc_piix) | ||
138 | + * - IGD device is at guest BDF 00:02.0 | ||
139 | + * - Not manually disabled by x-igd-legacy-mode=off | ||
140 | */ | ||
141 | - if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { | ||
142 | - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); | ||
143 | - error_report("IGD device %s failed to enable VGA access, " | ||
144 | - "legacy mode disabled", vdev->vbasedev.name); | ||
145 | - return true; | ||
146 | - } | ||
147 | + if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && | ||
148 | + !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && | ||
149 | + (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), | ||
150 | + 0, PCI_DEVFN(0x2, 0)))) { | ||
151 | + /* | ||
152 | + * IGD legacy mode requires: | ||
153 | + * - VBIOS in ROM BAR or file | ||
154 | + * - VGA IO/MMIO ranges are claimed by IGD | ||
155 | + * - OpRegion | ||
156 | + * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host | ||
157 | + */ | ||
158 | + g_autofree struct vfio_region_info *rom = NULL; | ||
159 | + | ||
160 | + legacy_mode_enabled = true; | ||
161 | + info_report("IGD legacy mode enabled, " | ||
162 | + "use x-igd-legacy-mode=off to disable it if unwanted."); | ||
163 | + | ||
164 | + /* | ||
165 | + * Most of what we're doing here is to enable the ROM to run, so if | ||
166 | + * there's no ROM, there's no point in setting up this quirk. | ||
167 | + * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support. | ||
168 | + */ | ||
169 | + ret = vfio_get_region_info(&vdev->vbasedev, | ||
170 | + VFIO_PCI_ROM_REGION_INDEX, &rom); | ||
171 | + if ((ret || !rom->size) && !vdev->pdev.romfile) { | ||
172 | + error_setg(&err, "Device has no ROM"); | ||
173 | + goto error; | ||
174 | + } | ||
175 | |||
176 | - /* Setup OpRegion access */ | ||
177 | - if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
178 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
179 | - error_report_err(err); | ||
180 | - return true; | ||
181 | - } | ||
182 | + /* | ||
183 | + * If IGD VGA Disable is clear (expected) and VGA is not already | ||
184 | + * enabled, try to enable it. Probably shouldn't be using legacy mode | ||
185 | + * without VGA, but also no point in us enabling VGA if disabled in | ||
186 | + * hardware. | ||
187 | + */ | ||
188 | + if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { | ||
189 | + error_setg(&err, "Unable to enable VGA access"); | ||
190 | + goto error; | ||
191 | + } | ||
192 | |||
193 | - /* Setup LPC bridge / Host bridge PCI IDs */ | ||
194 | - if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
195 | - error_append_hint(&err, "IGD legacy mode disabled\n"); | ||
196 | - error_report_err(err); | ||
197 | - return true; | ||
198 | + /* Setup OpRegion access */ | ||
199 | + if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
200 | + goto error; | ||
201 | + } | ||
202 | + | ||
203 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
204 | + if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
205 | + goto error; | ||
206 | + } | ||
207 | + } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { | ||
208 | + error_setg(&err, | ||
209 | + "Machine is not i440fx or assigned BDF is not 00:02.0"); | ||
210 | + goto error; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, | ||
215 | trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); | ||
216 | |||
217 | return true; | ||
218 | + | ||
219 | +error: | ||
220 | + /* | ||
221 | + * When legacy mode is implicity enabled, continue on error, | ||
222 | + * to keep compatibility | ||
223 | + */ | ||
224 | + if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) { | ||
225 | + error_report_err(err); | ||
226 | + error_report("IGD legacy mode disabled"); | ||
227 | + return true; | ||
228 | + } | ||
229 | + | ||
230 | + error_propagate(errp, err); | ||
231 | + return false; | ||
232 | } | ||
233 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | ||
234 | index XXXXXXX..XXXXXXX 100644 | ||
235 | --- a/hw/vfio/pci.c | ||
236 | +++ b/hw/vfio/pci.c | ||
237 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { | ||
238 | VFIO_FEATURE_ENABLE_REQ_BIT, true), | ||
239 | DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, | ||
240 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), | ||
241 | + DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, | ||
242 | + igd_legacy_mode, ON_OFF_AUTO_AUTO), | ||
243 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, | ||
244 | vbasedev.enable_migration, ON_OFF_AUTO_AUTO), | ||
245 | DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice, | ||
246 | -- | ||
247 | 2.48.1 | ||
248 | |||
249 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Add a new .save_prepare() handler to struct SaveVMHandlers. This handler | 3 | Both enable OpRegion option (x-igd-opregion) and legacy mode require |
4 | is called early, even before migration starts, and can be used by | 4 | setting up OpRegion copy for IGD devices. As the config quirk no longer |
5 | devices to perform early checks. | 5 | depends on legacy mode, we can now handle x-igd-opregion option there |
6 | instead of in vfio_realize. | ||
6 | 7 | ||
7 | Refactor migrate_init() to be able to return errors and call | 8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
8 | .save_prepare() from there. | 9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
9 | 10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | |
10 | Suggested-by: Peter Xu <peterx@redhat.com> | 11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> |
11 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-9-tomitamoeko@gmail.com |
12 | Reviewed-by: Peter Xu <peterx@redhat.com> | ||
13 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
15 | --- | 14 | --- |
16 | include/migration/register.h | 5 +++++ | 15 | hw/vfio/pci.h | 2 -- |
17 | migration/migration.h | 2 +- | 16 | hw/vfio/igd.c | 14 +++++++++----- |
18 | migration/savevm.h | 1 + | 17 | hw/vfio/pci.c | 9 --------- |
19 | migration/migration.c | 15 +++++++++++++-- | 18 | 3 files changed, 9 insertions(+), 16 deletions(-) |
20 | migration/savevm.c | 29 ++++++++++++++++++++++++++++- | ||
21 | 5 files changed, 48 insertions(+), 4 deletions(-) | ||
22 | 19 | ||
23 | diff --git a/include/migration/register.h b/include/migration/register.h | 20 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h |
24 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/include/migration/register.h | 22 | --- a/hw/vfio/pci.h |
26 | +++ b/include/migration/register.h | 23 | +++ b/hw/vfio/pci.h |
27 | @@ -XXX,XX +XXX,XX @@ typedef struct SaveVMHandlers { | 24 | @@ -XXX,XX +XXX,XX @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, |
28 | /* This runs inside the iothread lock. */ | 25 | |
29 | SaveStateHandler *save_state; | 26 | bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); |
30 | 27 | ||
31 | + /* | 28 | -bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp); |
32 | + * save_prepare is called early, even before migration starts, and can be | 29 | - |
33 | + * used to perform early checks. | 30 | void vfio_display_reset(VFIOPCIDevice *vdev); |
34 | + */ | 31 | bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); |
35 | + int (*save_prepare)(void *opaque, Error **errp); | 32 | void vfio_display_finalize(VFIOPCIDevice *vdev); |
36 | void (*save_cleanup)(void *opaque); | 33 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
37 | int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque); | ||
38 | int (*save_live_complete_precopy)(QEMUFile *f, void *opaque); | ||
39 | diff --git a/migration/migration.h b/migration/migration.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/migration/migration.h | 35 | --- a/hw/vfio/igd.c |
42 | +++ b/migration/migration.h | 36 | +++ b/hw/vfio/igd.c |
43 | @@ -XXX,XX +XXX,XX @@ void migrate_fd_connect(MigrationState *s, Error *error_in); | 37 | @@ -XXX,XX +XXX,XX @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, |
44 | bool migration_is_setup_or_active(int state); | 38 | return true; |
45 | bool migration_is_running(int state); | ||
46 | |||
47 | -void migrate_init(MigrationState *s); | ||
48 | +int migrate_init(MigrationState *s, Error **errp); | ||
49 | bool migration_is_blocked(Error **errp); | ||
50 | /* True if outgoing migration has entered postcopy phase */ | ||
51 | bool migration_in_postcopy(void); | ||
52 | diff --git a/migration/savevm.h b/migration/savevm.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/migration/savevm.h | ||
55 | +++ b/migration/savevm.h | ||
56 | @@ -XXX,XX +XXX,XX @@ | ||
57 | |||
58 | bool qemu_savevm_state_blocked(Error **errp); | ||
59 | void qemu_savevm_non_migratable_list(strList **reasons); | ||
60 | +int qemu_savevm_state_prepare(Error **errp); | ||
61 | void qemu_savevm_state_setup(QEMUFile *f); | ||
62 | bool qemu_savevm_state_guest_unplug_pending(void); | ||
63 | int qemu_savevm_state_resume_prepare(MigrationState *s); | ||
64 | diff --git a/migration/migration.c b/migration/migration.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/migration/migration.c | ||
67 | +++ b/migration/migration.c | ||
68 | @@ -XXX,XX +XXX,XX @@ bool migration_is_active(MigrationState *s) | ||
69 | s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); | ||
70 | } | 39 | } |
71 | 40 | ||
72 | -void migrate_init(MigrationState *s) | 41 | -bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) |
73 | +int migrate_init(MigrationState *s, Error **errp) | 42 | +static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) |
74 | { | 43 | { |
75 | + int ret; | 44 | g_autofree struct vfio_region_info *opregion = NULL; |
76 | + | 45 | int ret; |
77 | + ret = qemu_savevm_state_prepare(errp); | 46 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
78 | + if (ret) { | 47 | goto error; |
79 | + return ret; | 48 | } |
49 | |||
50 | - /* Setup OpRegion access */ | ||
51 | - if (!vfio_pci_igd_setup_opregion(vdev, &err)) { | ||
52 | - goto error; | ||
53 | - } | ||
54 | + /* Enable OpRegion quirk */ | ||
55 | + vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION; | ||
56 | |||
57 | /* Setup LPC bridge / Host bridge PCI IDs */ | ||
58 | if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
59 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
60 | goto error; | ||
61 | } | ||
62 | |||
63 | + /* Setup OpRegion access */ | ||
64 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && | ||
65 | + !vfio_pci_igd_setup_opregion(vdev, errp)) { | ||
66 | + goto error; | ||
80 | + } | 67 | + } |
81 | + | 68 | + |
82 | /* | 69 | /* |
83 | * Reinitialise all migration state, except | 70 | * Allow user to override dsm size using x-igd-gms option, in multiples of |
84 | * parameters/capabilities that the user set, and | 71 | * 32MiB. This option should only be used when the desired size cannot be |
85 | @@ -XXX,XX +XXX,XX @@ void migrate_init(MigrationState *s) | 72 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
86 | memset(&mig_stats, 0, sizeof(mig_stats)); | 73 | index XXXXXXX..XXXXXXX 100644 |
87 | memset(&compression_counters, 0, sizeof(compression_counters)); | 74 | --- a/hw/vfio/pci.c |
88 | migration_reset_vfio_bytes_transferred(); | 75 | +++ b/hw/vfio/pci.c |
89 | + | 76 | @@ -XXX,XX +XXX,XX @@ static void vfio_realize(PCIDevice *pdev, Error **errp) |
90 | + return 0; | 77 | vfio_bar_quirk_setup(vdev, i); |
91 | } | ||
92 | |||
93 | int migrate_add_blocker_internal(Error *reason, Error **errp) | ||
94 | @@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, | ||
95 | migrate_set_block_incremental(true); | ||
96 | } | 78 | } |
97 | 79 | ||
98 | - migrate_init(s); | 80 | -#ifdef CONFIG_VFIO_IGD |
99 | + if (migrate_init(s, errp)) { | 81 | - if (!vdev->igd_opregion && |
100 | + return false; | 82 | - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { |
101 | + } | 83 | - if (!vfio_pci_igd_setup_opregion(vdev, errp)) { |
102 | 84 | - goto out_unset_idev; | |
103 | return true; | 85 | - } |
104 | } | 86 | - } |
105 | diff --git a/migration/savevm.c b/migration/savevm.c | 87 | -#endif |
106 | index XXXXXXX..XXXXXXX 100644 | 88 | - |
107 | --- a/migration/savevm.c | 89 | /* QEMU emulates all of MSI & MSIX */ |
108 | +++ b/migration/savevm.c | 90 | if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { |
109 | @@ -XXX,XX +XXX,XX @@ bool qemu_savevm_state_guest_unplug_pending(void) | 91 | memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, |
110 | return false; | ||
111 | } | ||
112 | |||
113 | +int qemu_savevm_state_prepare(Error **errp) | ||
114 | +{ | ||
115 | + SaveStateEntry *se; | ||
116 | + int ret; | ||
117 | + | ||
118 | + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { | ||
119 | + if (!se->ops || !se->ops->save_prepare) { | ||
120 | + continue; | ||
121 | + } | ||
122 | + if (se->ops->is_active) { | ||
123 | + if (!se->ops->is_active(se->opaque)) { | ||
124 | + continue; | ||
125 | + } | ||
126 | + } | ||
127 | + | ||
128 | + ret = se->ops->save_prepare(se->opaque, errp); | ||
129 | + if (ret < 0) { | ||
130 | + return ret; | ||
131 | + } | ||
132 | + } | ||
133 | + | ||
134 | + return 0; | ||
135 | +} | ||
136 | + | ||
137 | void qemu_savevm_state_setup(QEMUFile *f) | ||
138 | { | ||
139 | MigrationState *ms = migrate_get_current(); | ||
140 | @@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) | ||
141 | return -EINVAL; | ||
142 | } | ||
143 | |||
144 | - migrate_init(ms); | ||
145 | + ret = migrate_init(ms, errp); | ||
146 | + if (ret) { | ||
147 | + return ret; | ||
148 | + } | ||
149 | ms->to_dst_file = f; | ||
150 | |||
151 | qemu_mutex_unlock_iothread(); | ||
152 | -- | 92 | -- |
153 | 2.41.0 | 93 | 2.48.1 |
154 | 94 | ||
155 | 95 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Tomita Moeko <tomitamoeko@gmail.com> | ||
1 | 2 | ||
3 | The LPC bridge/Host bridge IDs quirk is also not dependent on legacy | ||
4 | mode. Recent Windows driver no longer depends on these IDs, as well as | ||
5 | Linux i915 driver, while UEFI GOP seems still needs them. Make it an | ||
6 | option to allow users enabling and disabling it as needed. | ||
7 | |||
8 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> | ||
9 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
10 | Tested-by: Alex Williamson <alex.williamson@redhat.com> | ||
11 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | ||
12 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-10-tomitamoeko@gmail.com | ||
13 | [ clg: - Fixed spelling in vfio_probe_igd_config_quirk() ] | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
15 | --- | ||
16 | hw/vfio/pci.h | 3 +++ | ||
17 | hw/vfio/igd.c | 14 ++++++++------ | ||
18 | hw/vfio/pci.c | 2 ++ | ||
19 | 3 files changed, 13 insertions(+), 6 deletions(-) | ||
20 | |||
21 | diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/hw/vfio/pci.h | ||
24 | +++ b/hw/vfio/pci.h | ||
25 | @@ -XXX,XX +XXX,XX @@ struct VFIOPCIDevice { | ||
26 | #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2 | ||
27 | #define VFIO_FEATURE_ENABLE_IGD_OPREGION \ | ||
28 | (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT) | ||
29 | +#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3 | ||
30 | +#define VFIO_FEATURE_ENABLE_IGD_LPC \ | ||
31 | + (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT) | ||
32 | OnOffAuto display; | ||
33 | uint32_t display_xres; | ||
34 | uint32_t display_yres; | ||
35 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/hw/vfio/igd.c | ||
38 | +++ b/hw/vfio/igd.c | ||
39 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
40 | goto error; | ||
41 | } | ||
42 | |||
43 | - /* Enable OpRegion quirk */ | ||
44 | + /* Enable OpRegion and LPC bridge quirk */ | ||
45 | vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION; | ||
46 | - | ||
47 | - /* Setup LPC bridge / Host bridge PCI IDs */ | ||
48 | - if (!vfio_pci_igd_setup_lpc_bridge(vdev, &err)) { | ||
49 | - goto error; | ||
50 | - } | ||
51 | + vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; | ||
52 | } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { | ||
53 | error_setg(&err, | ||
54 | "Machine is not i440fx or assigned BDF is not 00:02.0"); | ||
55 | @@ -XXX,XX +XXX,XX @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
56 | goto error; | ||
57 | } | ||
58 | |||
59 | + /* Setup LPC bridge / Host bridge PCI IDs */ | ||
60 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && | ||
61 | + !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { | ||
62 | + goto error; | ||
63 | + } | ||
64 | + | ||
65 | /* | ||
66 | * Allow user to override dsm size using x-igd-gms option, in multiples of | ||
67 | * 32MiB. This option should only be used when the desired size cannot be | ||
68 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/hw/vfio/pci.c | ||
71 | +++ b/hw/vfio/pci.c | ||
72 | @@ -XXX,XX +XXX,XX @@ static const Property vfio_pci_dev_properties[] = { | ||
73 | VFIO_FEATURE_ENABLE_REQ_BIT, true), | ||
74 | DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, | ||
75 | VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), | ||
76 | + DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, | ||
77 | + VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), | ||
78 | DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, | ||
79 | igd_legacy_mode, ON_OFF_AUTO_AUTO), | ||
80 | DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, | ||
81 | -- | ||
82 | 2.48.1 | ||
83 | |||
84 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Tomita Moeko <tomitamoeko@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Add prepare callback to struct VMChangeStateEntry. | 3 | The KVMGT/GVT-g vGPU also exposes OpRegion. But unlike IGD passthrough, |
4 | it only needs the OpRegion quirk. A previous change moved x-igd-opregion | ||
5 | handling to config quirk breaks KVMGT functionality as it brings extra | ||
6 | checks and applied other quirks. Here we check if the device is mdev | ||
7 | (KVMGT) or not (passthrough), and then applies corresponding quirks. | ||
4 | 8 | ||
5 | The prepare callback is optional and can be set by the new function | 9 | As before, users must manually specify x-igd-opregion=on to enable it |
6 | qemu_add_vm_change_state_handler_prio_full() that allows setting this | 10 | on KVMGT devices. In the future, we may check the VID/DID and enable |
7 | callback in addition to the main callback. | 11 | OpRegion automatically. |
8 | 12 | ||
9 | The prepare callbacks and main callbacks are called in two separate | 13 | Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> |
10 | phases: First all prepare callbacks are called and only then all main | 14 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> |
11 | callbacks are called. | 15 | Tested-by: Alex Williamson <alex.williamson@redhat.com> |
12 | 16 | Reviewed-by: Corvin Köhne <c.koehne@beckhoff.com> | |
13 | The purpose of the new prepare callback is to allow all devices to run a | 17 | Link: https://lore.kernel.org/qemu-devel/20250306180131.32970-11-tomitamoeko@gmail.com |
14 | preliminary task before calling the devices' main callbacks. | ||
15 | |||
16 | This will facilitate adding P2P support for VFIO migration where all | ||
17 | VFIO devices need to be put in an intermediate P2P quiescent state | ||
18 | before being stopped or started by the main callback. | ||
19 | |||
20 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | ||
21 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
22 | Tested-by: YangHang Liu <yanghliu@redhat.com> | ||
23 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
24 | --- | 19 | --- |
25 | include/sysemu/runstate.h | 4 ++++ | 20 | hw/vfio/igd.c | 27 ++++++++++++++++++++++++++- |
26 | softmmu/runstate.c | 40 +++++++++++++++++++++++++++++++++++++++ | 21 | 1 file changed, 26 insertions(+), 1 deletion(-) |
27 | 2 files changed, 44 insertions(+) | ||
28 | 22 | ||
29 | diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h | 23 | diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c |
30 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/include/sysemu/runstate.h | 25 | --- a/hw/vfio/igd.c |
32 | +++ b/include/sysemu/runstate.h | 26 | +++ b/hw/vfio/igd.c |
33 | @@ -XXX,XX +XXX,XX @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, | 27 | @@ -XXX,XX +XXX,XX @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) |
34 | void *opaque); | 28 | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); |
35 | VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( | ||
36 | VMChangeStateHandler *cb, void *opaque, int priority); | ||
37 | +VMChangeStateEntry * | ||
38 | +qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, | ||
39 | + VMChangeStateHandler *prepare_cb, | ||
40 | + void *opaque, int priority); | ||
41 | VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, | ||
42 | VMChangeStateHandler *cb, | ||
43 | void *opaque); | ||
44 | diff --git a/softmmu/runstate.c b/softmmu/runstate.c | ||
45 | index XXXXXXX..XXXXXXX 100644 | ||
46 | --- a/softmmu/runstate.c | ||
47 | +++ b/softmmu/runstate.c | ||
48 | @@ -XXX,XX +XXX,XX @@ void qemu_system_vmstop_request(RunState state) | ||
49 | } | 29 | } |
50 | struct VMChangeStateEntry { | 30 | |
51 | VMChangeStateHandler *cb; | 31 | -bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
52 | + VMChangeStateHandler *prepare_cb; | 32 | +static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
53 | void *opaque; | 33 | { |
54 | QTAILQ_ENTRY(VMChangeStateEntry) entries; | 34 | int ret, gen; |
55 | int priority; | 35 | uint64_t gms_size; |
56 | @@ -XXX,XX +XXX,XX @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = | 36 | @@ -XXX,XX +XXX,XX @@ error: |
57 | */ | 37 | error_propagate(errp, err); |
58 | VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( | 38 | return false; |
59 | VMChangeStateHandler *cb, void *opaque, int priority) | 39 | } |
40 | + | ||
41 | +/* | ||
42 | + * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify | ||
43 | + * x-igd-opregion=on to enable the access. | ||
44 | + * TODO: Check VID/DID and enable opregion access automatically | ||
45 | + */ | ||
46 | +static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) | ||
60 | +{ | 47 | +{ |
61 | + return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, | 48 | + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && |
62 | + priority); | 49 | + !vfio_pci_igd_setup_opregion(vdev, errp)) { |
50 | + return false; | ||
51 | + } | ||
52 | + | ||
53 | + return true; | ||
63 | +} | 54 | +} |
64 | + | 55 | + |
65 | +/** | 56 | +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) |
66 | + * qemu_add_vm_change_state_handler_prio_full: | 57 | +{ |
67 | + * @cb: the main callback to invoke | 58 | + /* KVMGT/GVT-g vGPU is exposed as mdev */ |
68 | + * @prepare_cb: a callback to invoke before the main callback | 59 | + if (vdev->vbasedev.mdev) { |
69 | + * @opaque: user data passed to the callbacks | 60 | + return vfio_pci_kvmgt_config_quirk(vdev, errp); |
70 | + * @priority: low priorities execute first when the vm runs and the reverse is | 61 | + } |
71 | + * true when the vm stops | ||
72 | + * | ||
73 | + * Register a main callback function and an optional prepare callback function | ||
74 | + * that are invoked when the vm starts or stops running. The main callback and | ||
75 | + * the prepare callback are called in two separate phases: First all prepare | ||
76 | + * callbacks are called and only then all main callbacks are called. As its | ||
77 | + * name suggests, the prepare callback can be used to do some preparatory work | ||
78 | + * before invoking the main callback. | ||
79 | + * | ||
80 | + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() | ||
81 | + */ | ||
82 | +VMChangeStateEntry * | ||
83 | +qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, | ||
84 | + VMChangeStateHandler *prepare_cb, | ||
85 | + void *opaque, int priority) | ||
86 | { | ||
87 | VMChangeStateEntry *e; | ||
88 | VMChangeStateEntry *other; | ||
89 | |||
90 | e = g_malloc0(sizeof(*e)); | ||
91 | e->cb = cb; | ||
92 | + e->prepare_cb = prepare_cb; | ||
93 | e->opaque = opaque; | ||
94 | e->priority = priority; | ||
95 | |||
96 | @@ -XXX,XX +XXX,XX @@ void vm_state_notify(bool running, RunState state) | ||
97 | trace_vm_state_notify(running, state, RunState_str(state)); | ||
98 | |||
99 | if (running) { | ||
100 | + QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { | ||
101 | + if (e->prepare_cb) { | ||
102 | + e->prepare_cb(e->opaque, running, state); | ||
103 | + } | ||
104 | + } | ||
105 | + | 62 | + |
106 | QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { | 63 | + return vfio_pci_igd_config_quirk(vdev, errp); |
107 | e->cb(e->opaque, running, state); | 64 | +} |
108 | } | ||
109 | } else { | ||
110 | + QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { | ||
111 | + if (e->prepare_cb) { | ||
112 | + e->prepare_cb(e->opaque, running, state); | ||
113 | + } | ||
114 | + } | ||
115 | + | ||
116 | QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { | ||
117 | e->cb(e->opaque, running, state); | ||
118 | } | ||
119 | -- | 65 | -- |
120 | 2.41.0 | 66 | 2.48.1 |
121 | 67 | ||
122 | 68 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Initialization of mig_stats, compression_counters and VFIO bytes | 3 | Wire data commonly use BE byte order (including in the existing migration |
4 | transferred is hard-coded in migration code path and snapshot code path. | 4 | protocol), use it also for for VFIO device state packets. |
5 | 5 | ||
6 | Make the code cleaner by initializing them in migrate_init(). | 6 | This will allow VFIO multifd device state transfer between hosts with |
7 | different endianness. | ||
8 | Although currently there is no such use case, it's good to have it now | ||
9 | for completeness. | ||
7 | 10 | ||
8 | Suggested-by: Cédric Le Goater <clg@redhat.com> | 11 | Reviewed-by: Avihai Horon <avihaih@nvidia.com> |
9 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 12 | Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> |
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 13 | Link: https://lore.kernel.org/qemu-devel/dcfc04cc1a50655650dbac8398e2742ada84ee39.1741611079.git.maciej.szmigiero@oracle.com |
11 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
12 | --- | 15 | --- |
13 | migration/migration.c | 14 +++++++------- | 16 | hw/vfio/migration-multifd.c | 15 ++++++++++----- |
14 | migration/savevm.c | 3 --- | 17 | 1 file changed, 10 insertions(+), 5 deletions(-) |
15 | 2 files changed, 7 insertions(+), 10 deletions(-) | ||
16 | 18 | ||
17 | diff --git a/migration/migration.c b/migration/migration.c | 19 | diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c |
18 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/migration/migration.c | 21 | --- a/hw/vfio/migration-multifd.c |
20 | +++ b/migration/migration.c | 22 | +++ b/hw/vfio/migration-multifd.c |
21 | @@ -XXX,XX +XXX,XX @@ void migrate_init(MigrationState *s) | 23 | @@ -XXX,XX +XXX,XX @@ |
22 | s->iteration_initial_bytes = 0; | 24 | #include "hw/vfio/vfio-common.h" |
23 | s->threshold_size = 0; | 25 | #include "migration/misc.h" |
24 | s->switchover_acked = false; | 26 | #include "qapi/error.h" |
25 | + /* | 27 | +#include "qemu/bswap.h" |
26 | + * set mig_stats compression_counters memory to zero for a | 28 | #include "qemu/error-report.h" |
27 | + * new migration | 29 | #include "qemu/lockable.h" |
28 | + */ | 30 | #include "qemu/main-loop.h" |
29 | + memset(&mig_stats, 0, sizeof(mig_stats)); | 31 | @@ -XXX,XX +XXX,XX @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, |
30 | + memset(&compression_counters, 0, sizeof(compression_counters)); | 32 | return false; |
31 | + migration_reset_vfio_bytes_transferred(); | ||
32 | } | ||
33 | |||
34 | int migrate_add_blocker_internal(Error *reason, Error **errp) | ||
35 | @@ -XXX,XX +XXX,XX @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, | ||
36 | } | 33 | } |
37 | 34 | ||
38 | migrate_init(s); | 35 | + packet->version = be32_to_cpu(packet->version); |
39 | - /* | 36 | if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { |
40 | - * set mig_stats compression_counters memory to zero for a | 37 | error_setg(errp, "%s: packet has unknown version %" PRIu32, |
41 | - * new migration | 38 | vbasedev->name, packet->version); |
42 | - */ | 39 | return false; |
43 | - memset(&mig_stats, 0, sizeof(mig_stats)); | ||
44 | - memset(&compression_counters, 0, sizeof(compression_counters)); | ||
45 | - migration_reset_vfio_bytes_transferred(); | ||
46 | |||
47 | return true; | ||
48 | } | ||
49 | diff --git a/migration/savevm.c b/migration/savevm.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/migration/savevm.c | ||
52 | +++ b/migration/savevm.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) | ||
54 | } | 40 | } |
55 | 41 | ||
56 | migrate_init(ms); | 42 | + packet->idx = be32_to_cpu(packet->idx); |
57 | - memset(&mig_stats, 0, sizeof(mig_stats)); | 43 | + packet->flags = be32_to_cpu(packet->flags); |
58 | - memset(&compression_counters, 0, sizeof(compression_counters)); | 44 | + |
59 | - migration_reset_vfio_bytes_transferred(); | 45 | if (packet->idx == UINT32_MAX) { |
60 | ms->to_dst_file = f; | 46 | error_setg(errp, "%s: packet index is invalid", vbasedev->name); |
61 | 47 | return false; | |
62 | qemu_mutex_unlock_iothread(); | 48 | @@ -XXX,XX +XXX,XX @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, |
49 | |||
50 | packet_len = sizeof(*packet) + bioc->usage; | ||
51 | packet = g_malloc0(packet_len); | ||
52 | - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; | ||
53 | - packet->idx = idx; | ||
54 | - packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; | ||
55 | + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); | ||
56 | + packet->idx = cpu_to_be32(idx); | ||
57 | + packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE); | ||
58 | memcpy(&packet->data, bioc->data, bioc->usage); | ||
59 | |||
60 | if (!multifd_queue_device_state(idstr, instance_id, | ||
61 | @@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, | ||
62 | } | ||
63 | |||
64 | packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); | ||
65 | - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; | ||
66 | + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); | ||
67 | |||
68 | for (idx = 0; ; idx++) { | ||
69 | ssize_t data_size; | ||
70 | @@ -XXX,XX +XXX,XX @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, | ||
71 | break; | ||
72 | } | ||
73 | |||
74 | - packet->idx = idx; | ||
75 | + packet->idx = cpu_to_be32(idx); | ||
76 | packet_size = sizeof(*packet) + data_size; | ||
77 | |||
78 | if (!multifd_queue_device_state(d->idstr, d->instance_id, | ||
63 | -- | 79 | -- |
64 | 2.41.0 | 80 | 2.48.1 |
65 | 81 | ||
66 | 82 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | Both qemu_minrampagesize() and qemu_maxrampagesize() are | ||
4 | related to host memory backends, having the following call | ||
5 | stack: | ||
6 | |||
7 | qemu_minrampagesize() | ||
8 | -> find_min_backend_pagesize() | ||
9 | -> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND) | ||
10 | |||
11 | qemu_maxrampagesize() | ||
12 | -> find_max_backend_pagesize() | ||
13 | -> object_dynamic_cast(obj, TYPE_MEMORY_BACKEND) | ||
14 | |||
15 | Having TYPE_MEMORY_BACKEND defined in "system/hostmem.h": | ||
16 | |||
17 | include/system/hostmem.h:23:#define TYPE_MEMORY_BACKEND "memory-backend" | ||
18 | |||
19 | Move their prototype declaration to "system/hostmem.h". | ||
20 | |||
21 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
22 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
23 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
24 | Message-Id: <20250308230917.18907-7-philmd@linaro.org> | ||
25 | Acked-by: David Hildenbrand <david@redhat.com> | ||
26 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-2-philmd@linaro.org | ||
27 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
28 | --- | ||
29 | include/exec/ram_addr.h | 3 --- | ||
30 | include/system/hostmem.h | 3 +++ | ||
31 | hw/ppc/spapr_caps.c | 1 + | ||
32 | hw/s390x/s390-virtio-ccw.c | 1 + | ||
33 | hw/vfio/spapr.c | 1 + | ||
34 | 5 files changed, 6 insertions(+), 3 deletions(-) | ||
35 | |||
36 | diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/include/exec/ram_addr.h | ||
39 | +++ b/include/exec/ram_addr.h | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, | ||
41 | |||
42 | bool ramblock_is_pmem(RAMBlock *rb); | ||
43 | |||
44 | -long qemu_minrampagesize(void); | ||
45 | -long qemu_maxrampagesize(void); | ||
46 | - | ||
47 | /** | ||
48 | * qemu_ram_alloc_from_file, | ||
49 | * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing | ||
50 | diff --git a/include/system/hostmem.h b/include/system/hostmem.h | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/include/system/hostmem.h | ||
53 | +++ b/include/system/hostmem.h | ||
54 | @@ -XXX,XX +XXX,XX @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend); | ||
55 | size_t host_memory_backend_pagesize(HostMemoryBackend *memdev); | ||
56 | char *host_memory_backend_get_name(HostMemoryBackend *backend); | ||
57 | |||
58 | +long qemu_minrampagesize(void); | ||
59 | +long qemu_maxrampagesize(void); | ||
60 | + | ||
61 | #endif | ||
62 | diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/hw/ppc/spapr_caps.c | ||
65 | +++ b/hw/ppc/spapr_caps.c | ||
66 | @@ -XXX,XX +XXX,XX @@ | ||
67 | #include "kvm_ppc.h" | ||
68 | #include "migration/vmstate.h" | ||
69 | #include "system/tcg.h" | ||
70 | +#include "system/hostmem.h" | ||
71 | |||
72 | #include "hw/ppc/spapr.h" | ||
73 | |||
74 | diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/hw/s390x/s390-virtio-ccw.c | ||
77 | +++ b/hw/s390x/s390-virtio-ccw.c | ||
78 | @@ -XXX,XX +XXX,XX @@ | ||
79 | #include "hw/s390x/tod.h" | ||
80 | #include "system/system.h" | ||
81 | #include "system/cpus.h" | ||
82 | +#include "system/hostmem.h" | ||
83 | #include "target/s390x/kvm/pv.h" | ||
84 | #include "migration/blocker.h" | ||
85 | #include "qapi/visitor.h" | ||
86 | diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/hw/vfio/spapr.c | ||
89 | +++ b/hw/vfio/spapr.c | ||
90 | @@ -XXX,XX +XXX,XX @@ | ||
91 | #include <linux/kvm.h> | ||
92 | #endif | ||
93 | #include "system/kvm.h" | ||
94 | +#include "system/hostmem.h" | ||
95 | #include "exec/address-spaces.h" | ||
96 | |||
97 | #include "hw/vfio/vfio-common.h" | ||
98 | -- | ||
99 | 2.48.1 | ||
100 | |||
101 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | <linux/kvm.h> is already included by "system/kvm.h" in the next line. | ||
4 | |||
5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
9 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
10 | Message-Id: <20250307180337.14811-3-philmd@linaro.org> | ||
11 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-3-philmd@linaro.org | ||
12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
13 | --- | ||
14 | hw/vfio/spapr.c | 3 --- | ||
15 | 1 file changed, 3 deletions(-) | ||
16 | |||
17 | diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/vfio/spapr.c | ||
20 | +++ b/hw/vfio/spapr.c | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | #include "qemu/osdep.h" | ||
23 | #include <sys/ioctl.h> | ||
24 | #include <linux/vfio.h> | ||
25 | -#ifdef CONFIG_KVM | ||
26 | -#include <linux/kvm.h> | ||
27 | -#endif | ||
28 | #include "system/kvm.h" | ||
29 | #include "system/hostmem.h" | ||
30 | #include "exec/address-spaces.h" | ||
31 | -- | ||
32 | 2.48.1 | ||
33 | |||
34 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | If a device with enable-migration=on is added and it causes a migration | 3 | Always include necessary headers explicitly, to avoid |
4 | blocker, adding the device should fail with a proper error. | 4 | when refactoring unrelated ones: |
5 | 5 | ||
6 | This is not the case with multiple device migration blocker when the | 6 | hw/vfio/common.c:1176:45: error: implicit declaration of function ‘tcg_enabled’; |
7 | blocker already exists. If the blocker already exists and a device with | 7 | 1176 | tcg_enabled() ? DIRTY_CLIENTS_ALL : |
8 | enable-migration=on is added which causes a migration blocker, adding | 8 | | ^~~~~~~~~~~ |
9 | the device will succeed. | ||
10 | 9 | ||
11 | Fix it by failing adding the device in such case. | 10 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
12 | 11 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | |
13 | Fixes: 8bbcb64a71d8 ("vfio/migration: Make VFIO migration non-experimental") | 12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
14 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | ||
15 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 13 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
14 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
15 | Message-Id: <20250307180337.14811-2-philmd@linaro.org> | ||
16 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-4-philmd@linaro.org | ||
16 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 17 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
17 | --- | 18 | --- |
18 | hw/vfio/common.c | 7 +++++-- | 19 | hw/vfio/common.c | 1 + |
19 | 1 file changed, 5 insertions(+), 2 deletions(-) | 20 | 1 file changed, 1 insertion(+) |
20 | 21 | ||
21 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c | 22 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c |
22 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/hw/vfio/common.c | 24 | --- a/hw/vfio/common.c |
24 | +++ b/hw/vfio/common.c | 25 | +++ b/hw/vfio/common.c |
25 | @@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) | 26 | @@ -XXX,XX +XXX,XX @@ |
26 | { | 27 | #include "migration/misc.h" |
27 | int ret; | 28 | #include "migration/blocker.h" |
28 | 29 | #include "migration/qemu-file.h" | |
29 | - if (multiple_devices_migration_blocker || | 30 | +#include "system/tcg.h" |
30 | - vfio_multiple_devices_migration_is_supported()) { | 31 | #include "system/tpm.h" |
31 | + if (vfio_multiple_devices_migration_is_supported()) { | 32 | |
32 | return 0; | 33 | VFIODeviceList vfio_device_list = |
33 | } | ||
34 | |||
35 | @@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) | ||
36 | return -EINVAL; | ||
37 | } | ||
38 | |||
39 | + if (multiple_devices_migration_blocker) { | ||
40 | + return 0; | ||
41 | + } | ||
42 | + | ||
43 | error_setg(&multiple_devices_migration_blocker, | ||
44 | "Multiple VFIO devices migration is supported only if all of " | ||
45 | "them support P2P migration"); | ||
46 | -- | 34 | -- |
47 | 2.41.0 | 35 | 2.48.1 |
48 | 36 | ||
49 | 37 | diff view generated by jsdifflib |
1 | From: Joao Martins <joao.m.martins@oracle.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | QEMU computes the DMA logging ranges for two predefined ranges: 32-bit | 3 | Prefer runtime helpers to get target page size. |
4 | and 64-bit. In the OVMF case, when the dynamic MMIO window is enabled, | ||
5 | QEMU includes in the 64-bit range the RAM regions at the lower part | ||
6 | and vfio-pci device RAM regions which are at the top of the address | ||
7 | space. This range contains a large gap and the size can be bigger than | ||
8 | the dirty tracking HW limits of some devices (MLX5 has a 2^42 limit). | ||
9 | 4 | ||
10 | To avoid such large ranges, introduce a new PCI range covering the | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
11 | vfio-pci device RAM regions, this only if the addresses are above 4GB | 6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
12 | to avoid breaking potential SeaBIOS guests. | 7 | Message-Id: <20250305153929.43687-3-philmd@linaro.org> |
13 | 8 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-5-philmd@linaro.org | |
14 | [ clg: - wrote commit log | ||
15 | - fixed overlapping 32-bit and PCI ranges when using SeaBIOS ] | ||
16 | |||
17 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> | ||
18 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
19 | Fixes: 5255bbf4ec16 ("vfio/common: Add device dirty page tracking start/stop") | ||
20 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 9 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
21 | --- | 10 | --- |
22 | hw/vfio/common.c | 71 +++++++++++++++++++++++++++++++++++++------- | 11 | hw/vfio/common.c | 8 +++++--- |
23 | hw/vfio/trace-events | 2 +- | 12 | 1 file changed, 5 insertions(+), 3 deletions(-) |
24 | 2 files changed, 61 insertions(+), 12 deletions(-) | ||
25 | 13 | ||
26 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c | 14 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c |
27 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/hw/vfio/common.c | 16 | --- a/hw/vfio/common.c |
29 | +++ b/hw/vfio/common.c | 17 | +++ b/hw/vfio/common.c |
30 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ |
31 | |||
32 | #include "hw/vfio/vfio-common.h" | ||
33 | #include "hw/vfio/vfio.h" | ||
34 | +#include "hw/vfio/pci.h" | ||
35 | #include "exec/address-spaces.h" | 19 | #include "exec/address-spaces.h" |
36 | #include "exec/memory.h" | 20 | #include "exec/memory.h" |
37 | #include "exec/ram_addr.h" | 21 | #include "exec/ram_addr.h" |
38 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODirtyRanges { | 22 | +#include "exec/target_page.h" |
39 | hwaddr max32; | 23 | #include "hw/hw.h" |
40 | hwaddr min64; | 24 | #include "qemu/error-report.h" |
41 | hwaddr max64; | 25 | #include "qemu/main-loop.h" |
42 | + hwaddr minpci64; | 26 | @@ -XXX,XX +XXX,XX @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, |
43 | + hwaddr maxpci64; | 27 | MemoryRegionSection *section) |
44 | } VFIODirtyRanges; | ||
45 | |||
46 | typedef struct VFIODirtyRangesListener { | ||
47 | @@ -XXX,XX +XXX,XX @@ typedef struct VFIODirtyRangesListener { | ||
48 | MemoryListener listener; | ||
49 | } VFIODirtyRangesListener; | ||
50 | |||
51 | +static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, | ||
52 | + VFIOContainer *container) | ||
53 | +{ | ||
54 | + VFIOPCIDevice *pcidev; | ||
55 | + VFIODevice *vbasedev; | ||
56 | + VFIOGroup *group; | ||
57 | + Object *owner; | ||
58 | + | ||
59 | + owner = memory_region_owner(section->mr); | ||
60 | + | ||
61 | + QLIST_FOREACH(group, &container->group_list, container_next) { | ||
62 | + QLIST_FOREACH(vbasedev, &group->device_list, next) { | ||
63 | + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { | ||
64 | + continue; | ||
65 | + } | ||
66 | + pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev); | ||
67 | + if (OBJECT(pcidev) == owner) { | ||
68 | + return true; | ||
69 | + } | ||
70 | + } | ||
71 | + } | ||
72 | + | ||
73 | + return false; | ||
74 | +} | ||
75 | + | ||
76 | static void vfio_dirty_tracking_update(MemoryListener *listener, | ||
77 | MemoryRegionSection *section) | ||
78 | { | 28 | { |
79 | @@ -XXX,XX +XXX,XX @@ static void vfio_dirty_tracking_update(MemoryListener *listener, | 29 | RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); |
80 | } | 30 | + int target_page_size = qemu_target_page_size(); |
81 | 31 | VFIORamDiscardListener *vrdl; | |
82 | /* | 32 | |
83 | - * The address space passed to the dirty tracker is reduced to two ranges: | 33 | /* Ignore some corner cases not relevant in practice. */ |
84 | - * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges. | 34 | - g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); |
85 | + * The address space passed to the dirty tracker is reduced to three ranges: | 35 | + g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size)); |
86 | + * one for 32-bit DMA ranges, one for 64-bit DMA ranges and one for the | 36 | g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space, |
87 | + * PCI 64-bit hole. | 37 | - TARGET_PAGE_SIZE)); |
88 | + * | 38 | - g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); |
89 | * The underlying reports of dirty will query a sub-interval of each of | 39 | + target_page_size)); |
90 | * these ranges. | 40 | + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size)); |
91 | * | 41 | |
92 | - * The purpose of the dual range handling is to handle known cases of big | 42 | vrdl = g_new0(VFIORamDiscardListener, 1); |
93 | - * holes in the address space, like the x86 AMD 1T hole. The alternative | 43 | vrdl->bcontainer = bcontainer; |
94 | - * would be an IOVATree but that has a much bigger runtime overhead and | ||
95 | - * unnecessary complexity. | ||
96 | + * The purpose of the three range handling is to handle known cases of big | ||
97 | + * holes in the address space, like the x86 AMD 1T hole, and firmware (like | ||
98 | + * OVMF) which may relocate the pci-hole64 to the end of the address space. | ||
99 | + * The latter would otherwise generate large ranges for tracking, stressing | ||
100 | + * the limits of supported hardware. The pci-hole32 will always be below 4G | ||
101 | + * (overlapping or not) so it doesn't need special handling and is part of | ||
102 | + * the 32-bit range. | ||
103 | + * | ||
104 | + * The alternative would be an IOVATree but that has a much bigger runtime | ||
105 | + * overhead and unnecessary complexity. | ||
106 | */ | ||
107 | - min = (end <= UINT32_MAX) ? &range->min32 : &range->min64; | ||
108 | - max = (end <= UINT32_MAX) ? &range->max32 : &range->max64; | ||
109 | - | ||
110 | + if (vfio_section_is_vfio_pci(section, dirty->container) && | ||
111 | + iova >= UINT32_MAX) { | ||
112 | + min = &range->minpci64; | ||
113 | + max = &range->maxpci64; | ||
114 | + } else { | ||
115 | + min = (end <= UINT32_MAX) ? &range->min32 : &range->min64; | ||
116 | + max = (end <= UINT32_MAX) ? &range->max32 : &range->max64; | ||
117 | + } | ||
118 | if (*min > iova) { | ||
119 | *min = iova; | ||
120 | } | ||
121 | @@ -XXX,XX +XXX,XX @@ static void vfio_dirty_tracking_init(VFIOContainer *container, | ||
122 | memset(&dirty, 0, sizeof(dirty)); | ||
123 | dirty.ranges.min32 = UINT32_MAX; | ||
124 | dirty.ranges.min64 = UINT64_MAX; | ||
125 | + dirty.ranges.minpci64 = UINT64_MAX; | ||
126 | dirty.listener = vfio_dirty_tracking_listener; | ||
127 | dirty.container = container; | ||
128 | |||
129 | @@ -XXX,XX +XXX,XX @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container, | ||
130 | * DMA logging uAPI guarantees to support at least a number of ranges that | ||
131 | * fits into a single host kernel base page. | ||
132 | */ | ||
133 | - control->num_ranges = !!tracking->max32 + !!tracking->max64; | ||
134 | + control->num_ranges = !!tracking->max32 + !!tracking->max64 + | ||
135 | + !!tracking->maxpci64; | ||
136 | ranges = g_try_new0(struct vfio_device_feature_dma_logging_range, | ||
137 | control->num_ranges); | ||
138 | if (!ranges) { | ||
139 | @@ -XXX,XX +XXX,XX @@ vfio_device_feature_dma_logging_start_create(VFIOContainer *container, | ||
140 | if (tracking->max64) { | ||
141 | ranges->iova = tracking->min64; | ||
142 | ranges->length = (tracking->max64 - tracking->min64) + 1; | ||
143 | + ranges++; | ||
144 | + } | ||
145 | + if (tracking->maxpci64) { | ||
146 | + ranges->iova = tracking->minpci64; | ||
147 | + ranges->length = (tracking->maxpci64 - tracking->minpci64) + 1; | ||
148 | } | ||
149 | |||
150 | trace_vfio_device_dirty_tracking_start(control->num_ranges, | ||
151 | tracking->min32, tracking->max32, | ||
152 | - tracking->min64, tracking->max64); | ||
153 | + tracking->min64, tracking->max64, | ||
154 | + tracking->minpci64, tracking->maxpci64); | ||
155 | |||
156 | return feature; | ||
157 | } | ||
158 | diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events | ||
159 | index XXXXXXX..XXXXXXX 100644 | ||
160 | --- a/hw/vfio/trace-events | ||
161 | +++ b/hw/vfio/trace-events | ||
162 | @@ -XXX,XX +XXX,XX @@ vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_wi | ||
163 | vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" | ||
164 | vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 | ||
165 | vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]" | ||
166 | -vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"]" | ||
167 | +vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]" | ||
168 | vfio_disconnect_container(int fd) "close container->fd=%d" | ||
169 | vfio_put_group(int fd) "close group->fd=%d" | ||
170 | vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" | ||
171 | -- | 44 | -- |
172 | 2.41.0 | 45 | 2.48.1 |
173 | 46 | ||
174 | 47 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Changing the device state from STOP_COPY to STOP can take time as the | 3 | Some files don't rely on any target-specific knowledge |
4 | device may need to free resources and do other operations as part of the | 4 | and can be compiled once: |
5 | transition. Currently, this is done in vfio_save_complete_precopy() and | ||
6 | therefore it is counted in the migration downtime. | ||
7 | 5 | ||
8 | To avoid this, change the device state from STOP_COPY to STOP in | 6 | - helpers.c |
9 | vfio_save_cleanup(), which is called after migration has completed and | 7 | - container-base.c |
10 | thus is not part of migration downtime. | 8 | - migration.c (removing unnecessary "exec/ram_addr.h") |
9 | - migration-multifd.c | ||
10 | - cpr.c | ||
11 | 11 | ||
12 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
13 | Tested-by: YangHang Liu <yanghliu@redhat.com> | 13 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
16 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
17 | Message-Id: <20250308230917.18907-4-philmd@linaro.org> | ||
18 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-6-philmd@linaro.org | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 19 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
15 | --- | 20 | --- |
16 | hw/vfio/migration.c | 19 +++++++++++++------ | 21 | hw/vfio/migration.c | 1 - |
17 | 1 file changed, 13 insertions(+), 6 deletions(-) | 22 | hw/vfio/meson.build | 13 ++++++++----- |
23 | 2 files changed, 8 insertions(+), 6 deletions(-) | ||
18 | 24 | ||
19 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | 25 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c |
20 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/vfio/migration.c | 27 | --- a/hw/vfio/migration.c |
22 | +++ b/hw/vfio/migration.c | 28 | +++ b/hw/vfio/migration.c |
23 | @@ -XXX,XX +XXX,XX @@ static void vfio_save_cleanup(void *opaque) | 29 | @@ -XXX,XX +XXX,XX @@ |
24 | VFIODevice *vbasedev = opaque; | 30 | #include "qapi/error.h" |
25 | VFIOMigration *migration = vbasedev->migration; | 31 | #include "qapi/qapi-events-vfio.h" |
26 | 32 | #include "exec/ramlist.h" | |
27 | + /* | 33 | -#include "exec/ram_addr.h" |
28 | + * Changing device state from STOP_COPY to STOP can take time. Do it here, | 34 | #include "pci.h" |
29 | + * after migration has completed, so it won't increase downtime. | 35 | #include "trace.h" |
30 | + */ | 36 | #include "hw/hw.h" |
31 | + if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { | 37 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build |
32 | + /* | 38 | index XXXXXXX..XXXXXXX 100644 |
33 | + * If setting the device in STOP state fails, the device should be | 39 | --- a/hw/vfio/meson.build |
34 | + * reset. To do so, use ERROR state as a recover state. | 40 | +++ b/hw/vfio/meson.build |
35 | + */ | 41 | @@ -XXX,XX +XXX,XX @@ |
36 | + vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, | 42 | vfio_ss = ss.source_set() |
37 | + VFIO_DEVICE_STATE_ERROR); | 43 | vfio_ss.add(files( |
38 | + } | 44 | - 'helpers.c', |
45 | 'common.c', | ||
46 | - 'container-base.c', | ||
47 | 'container.c', | ||
48 | - 'migration.c', | ||
49 | - 'migration-multifd.c', | ||
50 | - 'cpr.c', | ||
51 | )) | ||
52 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) | ||
53 | vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( | ||
54 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) | ||
55 | vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) | ||
56 | |||
57 | specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) | ||
39 | + | 58 | + |
40 | g_free(migration->data_buffer); | 59 | +system_ss.add(when: 'CONFIG_VFIO', if_true: files( |
41 | migration->data_buffer = NULL; | 60 | + 'helpers.c', |
42 | migration->precopy_init_size = 0; | 61 | + 'container-base.c', |
43 | @@ -XXX,XX +XXX,XX @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) | 62 | + 'migration.c', |
44 | return ret; | 63 | + 'migration-multifd.c', |
45 | } | 64 | + 'cpr.c', |
46 | 65 | +)) | |
47 | - /* | ||
48 | - * If setting the device in STOP state fails, the device should be reset. | ||
49 | - * To do so, use ERROR state as a recover state. | ||
50 | - */ | ||
51 | - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, | ||
52 | - VFIO_DEVICE_STATE_ERROR); | ||
53 | trace_vfio_save_complete_precopy(vbasedev->name, ret); | ||
54 | |||
55 | return ret; | ||
56 | -- | 66 | -- |
57 | 2.41.0 | 67 | 2.48.1 |
58 | 68 | ||
59 | 69 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | These files depend on the VFIO symbol in their Kconfig | ||
4 | definition. They don't rely on target specific definitions, | ||
5 | move them to system_ss[] to build them once. | ||
6 | |||
7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
11 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
12 | Message-Id: <20250308230917.18907-5-philmd@linaro.org> | ||
13 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-7-philmd@linaro.org | ||
14 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
15 | --- | ||
16 | hw/vfio/meson.build | 4 ++-- | ||
17 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
18 | |||
19 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/vfio/meson.build | ||
22 | +++ b/hw/vfio/meson.build | ||
23 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
24 | )) | ||
25 | vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c')) | ||
26 | vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c')) | ||
27 | -vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) | ||
28 | -vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) | ||
29 | vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) | ||
30 | vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) | ||
31 | |||
32 | specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) | ||
33 | |||
34 | +system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) | ||
35 | +system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) | ||
36 | system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
37 | 'helpers.c', | ||
38 | 'container-base.c', | ||
39 | -- | ||
40 | 2.48.1 | ||
41 | |||
42 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | Removing unused "exec/ram_addr.h" header allow to compile | ||
4 | iommufd.c once for all targets. | ||
5 | |||
6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Message-Id: <20250308230917.18907-6-philmd@linaro.org> | ||
12 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-8-philmd@linaro.org | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | hw/vfio/iommufd.c | 1 - | ||
16 | hw/vfio/meson.build | 6 +++--- | ||
17 | 2 files changed, 3 insertions(+), 4 deletions(-) | ||
18 | |||
19 | diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/vfio/iommufd.c | ||
22 | +++ b/hw/vfio/iommufd.c | ||
23 | @@ -XXX,XX +XXX,XX @@ | ||
24 | #include "qemu/cutils.h" | ||
25 | #include "qemu/chardev_open.h" | ||
26 | #include "pci.h" | ||
27 | -#include "exec/ram_addr.h" | ||
28 | |||
29 | static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, | ||
30 | ram_addr_t size, void *vaddr, bool readonly) | ||
31 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/hw/vfio/meson.build | ||
34 | +++ b/hw/vfio/meson.build | ||
35 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(files( | ||
36 | 'container.c', | ||
37 | )) | ||
38 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) | ||
39 | -vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( | ||
40 | - 'iommufd.c', | ||
41 | -)) | ||
42 | vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
43 | 'display.c', | ||
44 | 'pci-quirks.c', | ||
45 | @@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
46 | 'migration-multifd.c', | ||
47 | 'cpr.c', | ||
48 | )) | ||
49 | +system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( | ||
50 | + 'iommufd.c', | ||
51 | +)) | ||
52 | -- | ||
53 | 2.48.1 | ||
54 | |||
55 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
1 | 2 | ||
3 | display.c doesn't rely on target specific definitions, | ||
4 | move it to system_ss[] to build it once. | ||
5 | |||
6 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | ||
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Message-Id: <20250308230917.18907-8-philmd@linaro.org> | ||
12 | Link: https://lore.kernel.org/qemu-devel/20250311085743.21724-9-philmd@linaro.org | ||
13 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | ||
14 | --- | ||
15 | hw/vfio/meson.build | 4 +++- | ||
16 | 1 file changed, 3 insertions(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/hw/vfio/meson.build | ||
21 | +++ b/hw/vfio/meson.build | ||
22 | @@ -XXX,XX +XXX,XX @@ vfio_ss.add(files( | ||
23 | )) | ||
24 | vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) | ||
25 | vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
26 | - 'display.c', | ||
27 | 'pci-quirks.c', | ||
28 | 'pci.c', | ||
29 | )) | ||
30 | @@ -XXX,XX +XXX,XX @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( | ||
31 | system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( | ||
32 | 'iommufd.c', | ||
33 | )) | ||
34 | +system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( | ||
35 | + 'display.c', | ||
36 | +)) | ||
37 | -- | ||
38 | 2.48.1 | ||
39 | |||
40 | diff view generated by jsdifflib |
1 | From: Avihai Horon <avihaih@nvidia.com> | 1 | From: Vasilis Liaskovitis <vliaskovitis@suse.com> |
---|---|---|---|
2 | 2 | ||
3 | Now that P2P support has been added to VFIO migration, allow migration | 3 | The ATI BAR4 quirk is targeting an ioport BAR. Older devices may |
4 | of multiple devices if all of them support P2P migration. | 4 | have a BAR4 which is not an ioport, causing a segfault here. Test |
5 | the BAR type to skip these devices. | ||
5 | 6 | ||
6 | Single device migration is allowed regardless of P2P migration support. | 7 | Similar to |
8 | "8f419c5b: vfio/pci-quirks: Exclude non-ioport BAR from NVIDIA quirk" | ||
7 | 9 | ||
8 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | 10 | Untested, as I don't have the card to test. |
9 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> | 11 | |
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 12 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2856 |
11 | Tested-by: YangHang Liu <yanghliu@redhat.com> | 13 | Signed-off-by: Vasilis Liaskovitis <vliaskovitis@suse.com> |
14 | Reviewed-by: Alex Williamson <alex.williamson@redhat.com> | ||
15 | Link: https://lore.kernel.org/qemu-devel/20250310235833.41026-1-vliaskovitis@suse.com | ||
12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 16 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
13 | --- | 17 | --- |
14 | hw/vfio/common.c | 26 ++++++++++++++++++-------- | 18 | hw/vfio/pci-quirks.c | 2 +- |
15 | 1 file changed, 18 insertions(+), 8 deletions(-) | 19 | 1 file changed, 1 insertion(+), 1 deletion(-) |
16 | 20 | ||
17 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c | 21 | diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c |
18 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/hw/vfio/common.c | 23 | --- a/hw/vfio/pci-quirks.c |
20 | +++ b/hw/vfio/common.c | 24 | +++ b/hw/vfio/pci-quirks.c |
21 | @@ -XXX,XX +XXX,XX @@ bool vfio_mig_active(void) | 25 | @@ -XXX,XX +XXX,XX @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) |
22 | 26 | ||
23 | static Error *multiple_devices_migration_blocker; | 27 | /* This windows doesn't seem to be used except by legacy VGA code */ |
24 | 28 | if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || | |
25 | -static unsigned int vfio_migratable_device_num(void) | 29 | - !vdev->vga || nr != 4) { |
26 | +/* | 30 | + !vdev->vga || nr != 4 || !vdev->bars[4].ioport) { |
27 | + * Multiple devices migration is allowed only if all devices support P2P | ||
28 | + * migration. Single device migration is allowed regardless of P2P migration | ||
29 | + * support. | ||
30 | + */ | ||
31 | +static bool vfio_multiple_devices_migration_is_supported(void) | ||
32 | { | ||
33 | VFIOGroup *group; | ||
34 | VFIODevice *vbasedev; | ||
35 | unsigned int device_num = 0; | ||
36 | + bool all_support_p2p = true; | ||
37 | |||
38 | QLIST_FOREACH(group, &vfio_group_list, next) { | ||
39 | QLIST_FOREACH(vbasedev, &group->device_list, next) { | ||
40 | if (vbasedev->migration) { | ||
41 | device_num++; | ||
42 | + | ||
43 | + if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) { | ||
44 | + all_support_p2p = false; | ||
45 | + } | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | - return device_num; | ||
51 | + return all_support_p2p || device_num <= 1; | ||
52 | } | ||
53 | |||
54 | int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) | ||
55 | @@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) | ||
56 | int ret; | ||
57 | |||
58 | if (multiple_devices_migration_blocker || | ||
59 | - vfio_migratable_device_num() <= 1) { | ||
60 | + vfio_multiple_devices_migration_is_supported()) { | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { | ||
65 | - error_setg(errp, "Migration is currently not supported with multiple " | ||
66 | - "VFIO devices"); | ||
67 | + error_setg(errp, "Multiple VFIO devices migration is supported only if " | ||
68 | + "all of them support P2P migration"); | ||
69 | return -EINVAL; | ||
70 | } | ||
71 | |||
72 | error_setg(&multiple_devices_migration_blocker, | ||
73 | - "Migration is currently not supported with multiple " | ||
74 | - "VFIO devices"); | ||
75 | + "Multiple VFIO devices migration is supported only if all of " | ||
76 | + "them support P2P migration"); | ||
77 | ret = migrate_add_blocker(multiple_devices_migration_blocker, errp); | ||
78 | if (ret < 0) { | ||
79 | error_free(multiple_devices_migration_blocker); | ||
80 | @@ -XXX,XX +XXX,XX @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) | ||
81 | void vfio_unblock_multiple_devices_migration(void) | ||
82 | { | ||
83 | if (!multiple_devices_migration_blocker || | ||
84 | - vfio_migratable_device_num() > 1) { | ||
85 | + !vfio_multiple_devices_migration_is_supported()) { | ||
86 | return; | 31 | return; |
87 | } | 32 | } |
88 | 33 | ||
89 | -- | 34 | -- |
90 | 2.41.0 | 35 | 2.48.1 |
91 | 36 | ||
92 | 37 | diff view generated by jsdifflib |
1 | From: Joao Martins <joao.m.martins@oracle.com> | 1 | From: Joao Martins <joao.m.martins@oracle.com> |
---|---|---|---|
2 | 2 | ||
3 | Move the PRE_COPY and RUNNING state checks to helper functions. | 3 | The intent behind the x-device-dirty-page-tracking option is twofold: |
4 | 4 | ||
5 | This is in preparation for adding P2P VFIO migration support, where | 5 | 1) development/testing in the presence of VFs with VF dirty page tracking |
6 | these helpers will also test for PRE_COPY_P2P and RUNNING_P2P states. | 6 | |
7 | 2) deliberately choosing platform dirty tracker over the VF one. | ||
8 | |||
9 | Item 2) scenario is useful when VF dirty tracker is not as fast as | ||
10 | IOMMU, or there's some limitations around it (e.g. number of them is | ||
11 | limited; aggregated address space under tracking is limited), | ||
12 | efficiency/scalability (e.g. 1 pagetable in IOMMU dirty tracker to scan | ||
13 | vs N VFs) or just troubleshooting. Given item 2 it is not restricted to | ||
14 | debugging, hence drop the debug parenthesis from the option description. | ||
7 | 15 | ||
8 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> | 16 | Signed-off-by: Joao Martins <joao.m.martins@oracle.com> |
9 | Signed-off-by: Avihai Horon <avihaih@nvidia.com> | ||
10 | Reviewed-by: Cédric Le Goater <clg@redhat.com> | 17 | Reviewed-by: Cédric Le Goater <clg@redhat.com> |
11 | Tested-by: YangHang Liu <yanghliu@redhat.com> | 18 | Link: https://lore.kernel.org/qemu-devel/20250311174807.79825-1-joao.m.martins@oracle.com |
19 | [ clg: Fixed subject spelling ] | ||
12 | Signed-off-by: Cédric Le Goater <clg@redhat.com> | 20 | Signed-off-by: Cédric Le Goater <clg@redhat.com> |
13 | --- | 21 | --- |
14 | include/hw/vfio/vfio-common.h | 2 ++ | 22 | hw/vfio/pci.c | 2 +- |
15 | hw/vfio/common.c | 22 ++++++++++++++++++---- | 23 | 1 file changed, 1 insertion(+), 1 deletion(-) |
16 | hw/vfio/migration.c | 10 ++++------ | ||
17 | 3 files changed, 24 insertions(+), 10 deletions(-) | ||
18 | 24 | ||
19 | diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h | 25 | diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c |
20 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/include/hw/vfio/vfio-common.h | 27 | --- a/hw/vfio/pci.c |
22 | +++ b/include/hw/vfio/vfio-common.h | 28 | +++ b/hw/vfio/pci.c |
23 | @@ -XXX,XX +XXX,XX @@ void vfio_unblock_multiple_devices_migration(void); | 29 | @@ -XXX,XX +XXX,XX @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) |
24 | bool vfio_viommu_preset(VFIODevice *vbasedev); | 30 | object_class_property_set_description(klass, /* 9.1 */ |
25 | int64_t vfio_mig_bytes_transferred(void); | 31 | "x-device-dirty-page-tracking", |
26 | void vfio_reset_bytes_transferred(void); | 32 | "Disable device dirty page tracking and use " |
27 | +bool vfio_device_state_is_running(VFIODevice *vbasedev); | 33 | - "container-based dirty page tracking (DEBUG)"); |
28 | +bool vfio_device_state_is_precopy(VFIODevice *vbasedev); | 34 | + "container-based dirty page tracking"); |
29 | 35 | object_class_property_set_description(klass, /* 9.1 */ | |
30 | #ifdef CONFIG_LINUX | 36 | "migration-events", |
31 | int vfio_get_region_info(VFIODevice *vbasedev, int index, | 37 | "Emit VFIO migration QAPI event when a VFIO device " |
32 | diff --git a/hw/vfio/common.c b/hw/vfio/common.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/hw/vfio/common.c | ||
35 | +++ b/hw/vfio/common.c | ||
36 | @@ -XXX,XX +XXX,XX @@ static void vfio_set_migration_error(int err) | ||
37 | } | ||
38 | } | ||
39 | |||
40 | +bool vfio_device_state_is_running(VFIODevice *vbasedev) | ||
41 | +{ | ||
42 | + VFIOMigration *migration = vbasedev->migration; | ||
43 | + | ||
44 | + return migration->device_state == VFIO_DEVICE_STATE_RUNNING; | ||
45 | +} | ||
46 | + | ||
47 | +bool vfio_device_state_is_precopy(VFIODevice *vbasedev) | ||
48 | +{ | ||
49 | + VFIOMigration *migration = vbasedev->migration; | ||
50 | + | ||
51 | + return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; | ||
52 | +} | ||
53 | + | ||
54 | static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) | ||
55 | { | ||
56 | VFIOGroup *group; | ||
57 | @@ -XXX,XX +XXX,XX @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) | ||
58 | } | ||
59 | |||
60 | if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && | ||
61 | - (migration->device_state == VFIO_DEVICE_STATE_RUNNING || | ||
62 | - migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { | ||
63 | + (vfio_device_state_is_running(vbasedev) || | ||
64 | + vfio_device_state_is_precopy(vbasedev))) { | ||
65 | return false; | ||
66 | } | ||
67 | } | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) | ||
69 | return false; | ||
70 | } | ||
71 | |||
72 | - if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || | ||
73 | - migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { | ||
74 | + if (vfio_device_state_is_running(vbasedev) || | ||
75 | + vfio_device_state_is_precopy(vbasedev)) { | ||
76 | continue; | ||
77 | } else { | ||
78 | return false; | ||
79 | diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/hw/vfio/migration.c | ||
82 | +++ b/hw/vfio/migration.c | ||
83 | @@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, | ||
84 | VFIODevice *vbasedev = opaque; | ||
85 | VFIOMigration *migration = vbasedev->migration; | ||
86 | |||
87 | - if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { | ||
88 | + if (!vfio_device_state_is_precopy(vbasedev)) { | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, | ||
93 | vfio_query_stop_copy_size(vbasedev, &stop_copy_size); | ||
94 | *must_precopy += stop_copy_size; | ||
95 | |||
96 | - if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { | ||
97 | + if (vfio_device_state_is_precopy(vbasedev)) { | ||
98 | vfio_query_precopy_size(migration); | ||
99 | |||
100 | *must_precopy += | ||
101 | @@ -XXX,XX +XXX,XX @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, | ||
102 | static bool vfio_is_active_iterate(void *opaque) | ||
103 | { | ||
104 | VFIODevice *vbasedev = opaque; | ||
105 | - VFIOMigration *migration = vbasedev->migration; | ||
106 | |||
107 | - return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; | ||
108 | + return vfio_device_state_is_precopy(vbasedev); | ||
109 | } | ||
110 | |||
111 | static int vfio_save_iterate(QEMUFile *f, void *opaque) | ||
112 | @@ -XXX,XX +XXX,XX @@ static const SaveVMHandlers savevm_vfio_handlers = { | ||
113 | static void vfio_vmstate_change(void *opaque, bool running, RunState state) | ||
114 | { | ||
115 | VFIODevice *vbasedev = opaque; | ||
116 | - VFIOMigration *migration = vbasedev->migration; | ||
117 | enum vfio_device_mig_state new_state; | ||
118 | int ret; | ||
119 | |||
120 | @@ -XXX,XX +XXX,XX @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) | ||
121 | new_state = VFIO_DEVICE_STATE_RUNNING; | ||
122 | } else { | ||
123 | new_state = | ||
124 | - (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && | ||
125 | + (vfio_device_state_is_precopy(vbasedev) && | ||
126 | (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? | ||
127 | VFIO_DEVICE_STATE_STOP_COPY : | ||
128 | VFIO_DEVICE_STATE_STOP; | ||
129 | -- | 38 | -- |
130 | 2.41.0 | 39 | 2.48.1 |
131 | 40 | ||
132 | 41 | diff view generated by jsdifflib |