Dynamically enable Atomic Ops completer support around realize/exit of
vfio-pci devices reporting host support for these accesses and adhering
to a minimal configuration standard. While the Atomic Ops completer
bits in the root port device capabilities2 register are read-only, the
PCIe spec does allow RO bits to change to reflect hardware state. We
take advantage of that here around the realize and exit functions of
the vfio-pci device.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
hw/vfio/pci.h | 1 +
2 files changed, 79 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index bf27a3990564..d8a0fd595560 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1826,6 +1826,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
}
+static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
+{
+ struct vfio_device_info_cap_pci_atomic_comp *cap;
+ g_autofree struct vfio_device_info *info = NULL;
+ PCIBus *bus = pci_get_bus(&vdev->pdev);
+ PCIDevice *parent = bus->parent_dev;
+ struct vfio_info_cap_header *hdr;
+ uint32_t mask = 0;
+ uint8_t *pos;
+
+ /*
+ * PCIe Atomic Ops completer support is only added automatically for single
+ * function devices downstream of a root port supporting DEVCAP2. Support
+ * is added during realize and, if added, removed during device exit. The
+ * single function requirement avoids conflicting requirements should a
+ * slot be composed of multiple devices with differing capabilities.
+ */
+ if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
+ pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
+ pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
+ vdev->pdev.devfn ||
+ vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ return;
+ }
+
+ pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+ /* Abort if there'a already an Atomic Ops configuration on the root port */
+ if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
+ return;
+ }
+
+ info = vfio_get_device_info(vdev->vbasedev.fd);
+ if (!info) {
+ return;
+ }
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
+ if (!hdr) {
+ return;
+ }
+
+ cap = (void *)hdr;
+ if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
+ mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
+ }
+ if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
+ mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
+ }
+ if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
+ mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
+ }
+
+ if (!mask) {
+ return;
+ }
+
+ pci_long_test_and_set_mask(pos, mask);
+ vdev->clear_parent_atomics_on_exit = true;
+}
+
+static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
+{
+ if (vdev->clear_parent_atomics_on_exit) {
+ PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
+ uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+ pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP128);
+ }
+}
+
static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
Error **errp)
{
@@ -1929,6 +2004,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
}
+
+ vfio_pci_enable_rp_atomics(vdev);
}
/*
@@ -3265,6 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
+ vfio_pci_disable_rp_atomics(vdev);
vfio_bars_exit(vdev);
vfio_migration_exit(&vdev->vbasedev);
}
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 2674476d6c77..a2771b9ff3cc 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -174,6 +174,7 @@ struct VFIOPCIDevice {
bool no_vfio_ioeventfd;
bool enable_ramfb;
bool defer_kvm_irq_routing;
+ bool clear_parent_atomics_on_exit;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
};
--
2.39.2
On 27/5/23 01:15, Alex Williamson wrote:
> Dynamically enable Atomic Ops completer support around realize/exit of
> vfio-pci devices reporting host support for these accesses and adhering
> to a minimal configuration standard. While the Atomic Ops completer
> bits in the root port device capabilities2 register are read-only, the
> PCIe spec does allow RO bits to change to reflect hardware state. We
> take advantage of that here around the realize and exit functions of
> the vfio-pci device.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/pci.h | 1 +
> 2 files changed, 79 insertions(+)
> +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> + struct vfio_device_info_cap_pci_atomic_comp *cap;
> + g_autofree struct vfio_device_info *info = NULL;
> + PCIBus *bus = pci_get_bus(&vdev->pdev);
> + PCIDevice *parent = bus->parent_dev;
> + struct vfio_info_cap_header *hdr;
> + uint32_t mask = 0;
> + uint8_t *pos;
> +
> + /*
> + * PCIe Atomic Ops completer support is only added automatically for single
> + * function devices downstream of a root port supporting DEVCAP2. Support
> + * is added during realize and, if added, removed during device exit. The
> + * single function requirement avoids conflicting requirements should a
> + * slot be composed of multiple devices with differing capabilities.
> + */
> + if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
> + pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
> + pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
> + vdev->pdev.devfn ||
> + vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> + return;
> + }
> +
> + pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> + /* Abort if there'a already an Atomic Ops configuration on the root port */
Optional here: trace event logging pci_get_long(pos).
> + if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
> + return;
> + }
> +
> + info = vfio_get_device_info(vdev->vbasedev.fd);
> + if (!info) {
> + return;
> + }
> +
> + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
> + if (!hdr) {
> + return;
> + }
> +
> + cap = (void *)hdr;
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
> + }
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
> + }
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
> + }
> +
> + if (!mask) {
> + return;
> + }
Similarly optional, trace event logging (cap->flags, mask).
> +
> + pci_long_test_and_set_mask(pos, mask);
> + vdev->clear_parent_atomics_on_exit = true;
> +}
> +
> +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> + if (vdev->clear_parent_atomics_on_exit) {
> + PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
> + uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> + pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP128);
> + }
> +}
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 5/27/23 01:15, Alex Williamson wrote: > Dynamically enable Atomic Ops completer support around realize/exit of > vfio-pci devices reporting host support for these accesses and adhering > to a minimal configuration standard. While the Atomic Ops completer > bits in the root port device capabilities2 register are read-only, the > PCIe spec does allow RO bits to change to reflect hardware state. We > take advantage of that here around the realize and exit functions of > the vfio-pci device. > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: Robin Voetter <robin@streamhpc.com> Tested-by: Robin Voetter <robin@streamhpc.com> Kind regards, Robin Voetter
On 5/27/23 01:15, Alex Williamson wrote:
> Dynamically enable Atomic Ops completer support around realize/exit of
> vfio-pci devices reporting host support for these accesses and adhering
> to a minimal configuration standard. While the Atomic Ops completer
> bits in the root port device capabilities2 register are read-only, the
> PCIe spec does allow RO bits to change to reflect hardware state. We
> take advantage of that here around the realize and exit functions of
> the vfio-pci device.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
LGTM. I am not sure about the single function restriction, may be that's
worth a warning ?
Thanks,
C.
> ---
> hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/pci.h | 1 +
> 2 files changed, 79 insertions(+)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index bf27a3990564..d8a0fd595560 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1826,6 +1826,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
> vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
> }
>
> +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> + struct vfio_device_info_cap_pci_atomic_comp *cap;
> + g_autofree struct vfio_device_info *info = NULL;
> + PCIBus *bus = pci_get_bus(&vdev->pdev);
> + PCIDevice *parent = bus->parent_dev;
> + struct vfio_info_cap_header *hdr;
> + uint32_t mask = 0;
> + uint8_t *pos;
> +
> + /*
> + * PCIe Atomic Ops completer support is only added automatically for single
> + * function devices downstream of a root port supporting DEVCAP2. Support
> + * is added during realize and, if added, removed during device exit. The
> + * single function requirement avoids conflicting requirements should a
> + * slot be composed of multiple devices with differing capabilities.
> + */
> + if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
> + pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
> + pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
> + vdev->pdev.devfn ||
> + vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> + return;
> + }
> +
> + pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> + /* Abort if there'a already an Atomic Ops configuration on the root port */
> + if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
> + return;
> + }
> +
> + info = vfio_get_device_info(vdev->vbasedev.fd);
> + if (!info) {
> + return;
> + }
> +
> + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
> + if (!hdr) {
> + return;
> + }
> +
> + cap = (void *)hdr;
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
> + }
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
> + }
> + if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
> + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
> + }
> +
> + if (!mask) {
> + return;
> + }
> +
> + pci_long_test_and_set_mask(pos, mask);
> + vdev->clear_parent_atomics_on_exit = true;
> +}
> +
> +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> + if (vdev->clear_parent_atomics_on_exit) {
> + PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
> + uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> + pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> + PCI_EXP_DEVCAP2_ATOMIC_COMP128);
> + }
> +}
> +
> static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
> Error **errp)
> {
> @@ -1929,6 +2004,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
> QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
> vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
> }
> +
> + vfio_pci_enable_rp_atomics(vdev);
> }
>
> /*
> @@ -3265,6 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev)
> timer_free(vdev->intx.mmap_timer);
> }
> vfio_teardown_msi(vdev);
> + vfio_pci_disable_rp_atomics(vdev);
> vfio_bars_exit(vdev);
> vfio_migration_exit(&vdev->vbasedev);
> }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 2674476d6c77..a2771b9ff3cc 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -174,6 +174,7 @@ struct VFIOPCIDevice {
> bool no_vfio_ioeventfd;
> bool enable_ramfb;
> bool defer_kvm_irq_routing;
> + bool clear_parent_atomics_on_exit;
> VFIODisplay *dpy;
> Notifier irqchip_change_notifier;
> };
© 2016 - 2025 Red Hat, Inc.