Hi Shameer,
On 10/31/25 11:50 AM, Shameer Kolothum wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> If user wants to expose PASID capability in vIOMMU, then VFIO would also
need to report?
> report the PASID cap for this device if the underlying hardware supports
> it as well.
>
> As a start, this chooses to put the vPASID cap in the last 8 bytes of the
> vconfig space. This is a choice in the good hope of no conflict with any
> existing cap or hidden registers. For the devices that has hidden registers,
> user should figure out a proper offset for the vPASID cap. This may require
> an option for user to config it. Here we leave it as a future extension.
> There are more discussions on the mechanism of finding the proper offset.
>
> https://lore.kernel.org/kvm/BN9PR11MB5276318969A212AD0649C7BE8CBE2@BN9PR11MB5276.namprd11.prod.outlook.com/
>
> Since we add a check to ensure the vIOMMU supports PASID, only devices
> under those vIOMMUs can synthesize the vPASID capability. This gives
> users control over which devices expose vPASID.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/vfio/pci.c | 37 +++++++++++++++++++++++++++++++++++++
> include/hw/iommu.h | 1 +
> 2 files changed, 38 insertions(+)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 06b06afc2b..2054eac897 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -24,6 +24,7 @@
> #include <sys/ioctl.h>
>
> #include "hw/hw.h"
> +#include "hw/iommu.h"
> #include "hw/pci/msi.h"
> #include "hw/pci/msix.h"
> #include "hw/pci/pci_bridge.h"
> @@ -2500,7 +2501,12 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos)
>
> static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
> {
> + HostIOMMUDevice *hiod = vdev->vbasedev.hiod;
> + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
> PCIDevice *pdev = PCI_DEVICE(vdev);
> + uint64_t max_pasid_log2 = 0;
> + bool pasid_cap_added = false;
> + uint64_t hw_caps;
> uint32_t header;
> uint16_t cap_id, next, size;
> uint8_t cap_ver;
> @@ -2578,12 +2584,43 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
> pcie_add_capability(pdev, cap_id, cap_ver, next, size);
> }
> break;
> + case PCI_EXT_CAP_ID_PASID:
> + pasid_cap_added = true;
> + /* fallthrough */
> default:
> pcie_add_capability(pdev, cap_id, cap_ver, next, size);
> }
>
> }
>
> +#ifdef CONFIG_IOMMUFD
> + /*
> + * Although we check for PCI_EXT_CAP_ID_PASID above, the Linux VFIO
> + * framework currently hides this capability. Try to retrieve it
> + * through alternative kernel interfaces (e.g. IOMMUFD APIs).
I don't catch this sentence . When are you supposed to read above
PCI_EXT_CAP_ID_PASID cap id then?
> + */
> + if (!pasid_cap_added && hiodc->get_cap) {
> + hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_GENERIC_HW, &hw_caps, NULL);
> + hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_MAX_PASID_LOG2,
> + &max_pasid_log2, NULL);
> + }
> +
> + /*
> + * If supported, adds the PASID capability in the end of the PCIe config
> + * space. TODO: Add option for enabling pasid at a safe offset.
> + */
> + if (max_pasid_log2 && (pci_device_get_viommu_flags(pdev) &
> + VIOMMU_FLAG_PASID_SUPPORTED)) {
> + bool exec_perm = (hw_caps & IOMMU_HW_CAP_PCI_PASID_EXEC) ? true : false;
can't you direct set exec_perm = (hw_caps & IOMMU_HW_CAP_PCI_PASID_EXEC);
> + bool priv_mod = (hw_caps & IOMMU_HW_CAP_PCI_PASID_PRIV) ? true : false;
> +
> + pcie_pasid_init(pdev, PCIE_CONFIG_SPACE_SIZE - PCI_EXT_CAP_PASID_SIZEOF,
> + max_pasid_log2, exec_perm, priv_mod);
> + /* PASID capability is fully emulated by QEMU */
> + memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff, 8);
> + }
> +#endif
> +
> /* Cleanup chain head ID if necessary */
> if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
> pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
> diff --git a/include/hw/iommu.h b/include/hw/iommu.h
> index 9b8bb94fc2..9635770bee 100644
> --- a/include/hw/iommu.h
> +++ b/include/hw/iommu.h
> @@ -20,6 +20,7 @@
> enum viommu_flags {
> /* vIOMMU needs nesting parent HWPT to create nested HWPT */
> VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0),
> + VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1),
> };
>
> #endif /* HW_IOMMU_H */
Thanks
Eric