[PULL 14/47] vfio/pci: migration: Skip config space check for Vendor Specific Information in VSC during restore/load

Cédric Le Goater posted 47 patches 6 months ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Thomas Huth <thuth@redhat.com>, Tony Krowiak <akrowiak@linux.ibm.com>, Halil Pasic <pasic@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Nicholas Piggin <npiggin@gmail.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, David Gibson <david@gibson.dropbear.id.au>, Harsh Prateek Bora <harshpb@linux.ibm.com>, "Michael S. Tsirkin" <mst@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>, Fabiano Rosas <farosas@suse.de>, Markus Armbruster <armbru@redhat.com>, Michael Roth <michael.roth@amd.com>, Eric Blake <eblake@redhat.com>
There is a newer version of this series
[PULL 14/47] vfio/pci: migration: Skip config space check for Vendor Specific Information in VSC during restore/load
Posted by Cédric Le Goater 6 months ago
From: Vinayak Kale <vkale@nvidia.com>

In case of migration, during restore operation, qemu checks config space of the
pci device with the config space in the migration stream captured during save
operation. In case of config space data mismatch, restore operation is failed.

config space check is done in function get_pci_config_device(). By default VSC
(vendor-specific-capability) in config space is checked.

Due to qemu's config space check for VSC, live migration is broken across NVIDIA
vGPU devices in situation where source and destination host driver is different.
In this situation, Vendor Specific Information in VSC varies on the destination
to ensure vGPU feature capabilities exposed to the guest driver are compatible
with destination host.

If a vfio-pci device is migration capable and vfio-pci vendor driver is OK with
volatile Vendor Specific Info in VSC then qemu should exempt config space check
for Vendor Specific Info. It is vendor driver's responsibility to ensure that
VSC is consistent across migration. Here consistency could mean that VSC format
should be same on source and destination, however actual Vendor Specific Info
may not be byte-to-byte identical.

This patch skips the check for Vendor Specific Information in VSC for VFIO-PCI
device by clearing pdev->cmask[] offsets. Config space check is still enforced
for 3 byte VSC header. If cmask[] is not set for an offset, then qemu skips
config space check for that offset.

VSC check is skipped for machine types >= 9.1. The check would be enforced on
older machine types (<= 9.0).

Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Vinayak Kale <vkale@nvidia.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 hw/vfio/pci.h     |  1 +
 hw/core/machine.c |  1 +
 hw/vfio/pci.c     | 26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 6e64a2654e690af11b72710530a41135b726e96f..92cd62d1159dbd47d878454f201f9c18112a7692 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -177,6 +177,7 @@ struct VFIOPCIDevice {
     OnOffAuto ramfb_migrate;
     bool defer_kvm_irq_routing;
     bool clear_parent_atomics_on_exit;
+    bool skip_vsc_check;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
 };
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c7ceb1150147eb2bdd5f7ef583d00cee88f306cd..3442f31f9458c4e1bdde366aa40bfe3ad8b321a8 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -35,6 +35,7 @@
 
 GlobalProperty hw_compat_9_0[] = {
     {"arm-cpu", "backcompat-cntfrq", "true" },
+    {"vfio-pci", "skip-vsc-check", "false" },
 };
 const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fc6e54e871508bb0e2a3ac9079a195c086531f21..4789d43c0f9cc7ef94b73adc815377f7222d8c57 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2134,6 +2134,28 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+static int vfio_add_vendor_specific_cap(VFIOPCIDevice *vdev, int pos,
+                                        uint8_t size, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+
+    pos = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, size, errp);
+    if (pos < 0) {
+        return pos;
+    }
+
+    /*
+     * Exempt config space check for Vendor Specific Information during
+     * restore/load.
+     * Config space check is still enforced for 3 byte VSC header.
+     */
+    if (vdev->skip_vsc_check && size > 3) {
+        memset(pdev->cmask + pos + 3, 0, size - 3);
+    }
+
+    return pos;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
 {
     ERRP_GUARD();
@@ -2202,6 +2224,9 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
         vfio_check_af_flr(vdev, pos);
         ret = pci_add_capability(pdev, cap_id, pos, size, errp);
         break;
+    case PCI_CAP_ID_VNDR:
+        ret = vfio_add_vendor_specific_cap(vdev, pos, size, errp);
+        break;
     default:
         ret = pci_add_capability(pdev, cap_id, pos, size, errp);
         break;
@@ -3391,6 +3416,7 @@ static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
                      TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
 #endif
+    DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.45.1