[PATCH 2/2] vfio/pci: Support multifunction atomic ops capability

Manojlo Pekovic posted 2 patches 6 days, 4 hours ago
There is a newer version of this series
[PATCH 2/2] vfio/pci: Support multifunction atomic ops capability
Posted by Manojlo Pekovic 6 days, 4 hours ago
From: Manojlo Pekovic <mpekovic@example.com>

Extend PCIe Atomic Ops completer support to multifunction vfio-pci
devices by calculating the common subset of atomic capabilities across
all functions.

For multifunction devices, we now:
1. Calculate the intersection of atomic capabilities across all functions
2. Only enable capabilities that ALL functions with atomic support share
3. Functions without atomic support are skipped (not penalized)

During vfio_realize(), other functions in a multifunction device may not
be realized yet, making it impossible to enumerate all functions. We
defer atomic ops configuration to machine_done time for multifunction
devices, ensuring all functions are realized before calculating common
capabilities.

This extends commit c00aac6f14

Signed-off-by: Manojlo Pekovic <mpekovic@amd.com>
---
 hw/vfio/pci.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |   1 +
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6a6c8f1807..89f171f431 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1931,6 +1931,67 @@ static uint32_t vfio_get_atomic_cap(VFIOPCIDevice *vdev)
     return mask;
 }
 
+static void vfio_pci_machine_done(Notifier *notifier, void *data);
+
+/*
+ * This function iterates through all possible functions (0-7)
+ * at the same slot and returns the intersection of their capabilities.
+ *
+ * Returns: Bitmask of atomic capabilities supported by functions on slot
+ *          (PCI_EXP_DEVCAP2_ATOMIC_COMP32/64/128), or 0 if:
+ *          - Mixed device types found (vfio-pci + emulated)
+ *          - No common capabilities exist
+ */
+static uint32_t vfio_get_multifunction_atomic_cap(VFIOPCIDevice *vdev,
+                                                    PCIBus *bus)
+{
+    PCIDevice *func_dev;
+    VFIOPCIDevice *func_vdev;
+    int slot = PCI_SLOT(vdev->pdev.devfn);
+    int target_devfn;
+    uint32_t func_cap;
+    uint32_t common_mask = PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                           PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+                           PCI_EXP_DEVCAP2_ATOMIC_COMP128;
+
+    /* Iterate through all possible functions at this slot */
+    for (int fn = 0; fn < PCI_FUNC_MAX; fn++) {
+        target_devfn = PCI_DEVFN(slot, fn);
+        func_dev = pci_find_device(bus, pci_bus_num(bus), target_devfn);
+
+        if (!func_dev) {
+            continue;
+        }
+
+        /*
+         * We can only calculate atomic caps for vfio-pci devices. If we
+         * encounter a mixed multifunction device (e.g., vfio-pci function 0
+         * + emulated e1000 function 1), we cannot safely determine the atomic
+         * capabilities. Taking the conservative approach: disable all atomic
+         * ops for mixed configurations.
+         */
+
+        func_vdev = (VFIOPCIDevice *)object_dynamic_cast(OBJECT(func_dev),
+                                                        TYPE_VFIO_PCI);
+        if (!func_vdev) {
+            return 0;
+        }
+
+        /*
+         * A function with no atomic completer support simply cannot
+         * receive and execute atomic operations.
+         * This does NOT mean other functions in the same multifunction
+         * device should be prevented from supporting atomics.
+         */
+        func_cap = vfio_get_atomic_cap(func_vdev);
+        if (func_cap != 0) {
+            common_mask &= func_cap;
+        }
+    }
+
+    return common_mask;
+}
+
 static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
 {
     PCIBus *bus = pci_get_bus(&vdev->pdev);
@@ -1948,8 +2009,7 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
     if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
         pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
         pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
-        vdev->pdev.devfn ||
-        vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+        vdev->pdev.devfn) {
         return;
     }
 
@@ -1962,6 +2022,25 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
         return;
     }
 
+    /*
+     * Single-function: All device info is available now, configure immediately
+     * Multifunction: Other functions may not be realized yet, defer to
+     *                machine_done time when all devices guaranteed to exist
+     */
+    if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+        /*
+         * Register machine_done notifier if not already registered.
+         * The notifier will be called after all devices are realized, at which
+         * point we can safely enumerate all functions and calculate the common
+         * atomic capabilities.
+         */
+        if (!vdev->machine_done.notify) {
+            vdev->machine_done.notify = vfio_pci_machine_done;
+            qemu_add_machine_init_done_notifier(&vdev->machine_done);
+        }
+        return;  /* Actual work deferred to vfio_pci_machine_done() */
+    }
+
     mask = vfio_get_atomic_cap(vdev);
 
     if (!mask) {
@@ -1972,6 +2051,51 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
     vdev->clear_parent_atomics_on_exit = true;
 }
 
+/*
+ * Called at machine_done time, after ALL devices are realized
+ * but before the VM starts running.
+ *
+ * For multifunction devices, we need to calculate atomic capabilities
+ * across all functions. During realize of function 0, the other functions
+ * (1-7) may not exist yet. By deferring to machine_done time, we ensure
+ * all functions are realized and can be safely enumerated.
+ */
+static void vfio_pci_machine_done(Notifier *notifier, void *data)
+{
+    VFIOPCIDevice *vdev = container_of(notifier, VFIOPCIDevice, machine_done);
+    PCIBus *bus = pci_get_bus(&vdev->pdev);
+    PCIDevice *parent = pci_bridge_get_device(bus);
+    uint32_t mask;
+    uint8_t *pos;
+
+    /*
+     * Sanity checks: Should always pass since vfio_pci_enable_rp_atomics()
+     * already validated these conditions before registering this notifier.
+     */
+    if (!parent || !parent->exp.exp_cap) {
+        return;
+    }
+
+    pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+    /* Abort if there'a already an Atomic Ops configuration on the root port */
+
+    if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                             PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+                             PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
+        return;
+    }
+
+    mask = vfio_get_multifunction_atomic_cap(vdev, bus);
+
+    if (!mask) {
+        return;
+    }
+
+    pci_long_test_and_set_mask(pos, mask);
+    vdev->clear_parent_atomics_on_exit = true;
+}
+
 static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
 {
     if (vdev->clear_parent_atomics_on_exit) {
@@ -3281,6 +3405,16 @@ static void vfio_exitfn(PCIDevice *pdev)
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
     VFIODevice *vbasedev = &vdev->vbasedev;
 
+    /*
+     * Remove machine_done notifier if it was registered.
+     * This happens for multifunction devices where function 0 registered
+     * a notifier to defer atomic ops configuration to machine_done time.
+     */
+    if (vdev->machine_done.notify) {
+        qemu_remove_machine_init_done_notifier(&vdev->machine_done);
+        vdev->machine_done.notify = NULL;
+    }
+
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5ad090a229..9182ab0053 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -180,6 +180,7 @@ struct VFIOPCIDevice {
     bool skip_vsc_check;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
+    Notifier machine_done;
 };
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
-- 
2.43.0