[PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps

Zhenzhong Duan posted 5 patches 7 months, 1 week ago
[PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Zhenzhong Duan 7 months, 1 week ago
The saved caps copy can be used to check dirty tracking capability.

The capabilities is gotten through IOMMUFD interface, so define a
new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
caps raw data in "include/system/iommufd.h".

This is a prepare work for moving .realize() after .attach_device().

Suggested-by: Cédric Le Goater <clg@redhat.com>
Suggested-by: Eric Auger <eric.auger@redhat.com>
Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/vfio/vfio-device.h |  1 +
 include/system/iommufd.h      | 22 ++++++++++++++++++++++
 hw/vfio/iommufd.c             | 10 +++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 66797b4c92..09a7af891a 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -77,6 +77,7 @@ typedef struct VFIODevice {
     bool dirty_tracking; /* Protected by BQL */
     bool iommu_dirty_tracking;
     HostIOMMUDevice *hiod;
+    HostIOMMUDeviceIOMMUFDCaps caps;
     int devid;
     IOMMUFDBackend *iommufd;
     VFIOIOASHwpt *hwpt;
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index cbab75bfbf..0f337585c9 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -18,6 +18,9 @@
 #include "exec/hwaddr.h"
 #include "exec/cpu-common.h"
 #include "system/host_iommu_device.h"
+#ifdef CONFIG_LINUX
+#include <linux/iommufd.h>
+#endif
 
 #define TYPE_IOMMUFD_BACKEND "iommufd"
 OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
@@ -63,4 +66,23 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
                                       Error **errp);
 
 #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
+
+typedef union VendorCaps {
+    struct iommu_hw_info_vtd vtd;
+    struct iommu_hw_info_arm_smmuv3 smmuv3;
+} VendorCaps;
+
+/**
+ * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
+ *
+ * @type: host platform IOMMU type.
+ *
+ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
+ *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
+ */
+typedef struct HostIOMMUDeviceIOMMUFDCaps {
+    uint32_t type;
+    uint64_t hw_caps;
+    VendorCaps vendor_caps;
+} HostIOMMUDeviceIOMMUFDCaps;
 #endif
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 48db105422..530cde6740 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -324,7 +324,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
      * vfio_migration_realize() may decide to use VF dirty tracking
      * instead.
      */
-    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
+    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
         flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
     }
 
@@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
     int ret, devfd;
     uint32_t ioas_id;
     Error *err = NULL;
+    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
     const VFIOIOMMUClass *iommufd_vioc =
         VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
 
@@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
         goto err_alloc_ioas;
     }
 
+    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
+                                         &caps->type, &caps->vendor_caps,
+                                         sizeof(VendorCaps), &caps->hw_caps,
+                                         errp)) {
+        goto err_alloc_ioas;
+    }
+
     /* try to attach to an existing container in this space */
     QLIST_FOREACH(bcontainer, &space->containers, next) {
         container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
-- 
2.34.1


Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Eric Auger 6 months, 2 weeks ago
Hi Zhenzhong,

On 4/11/25 12:17 PM, Zhenzhong Duan wrote:
> The saved caps copy can be used to check dirty tracking capability.
>
> The capabilities is gotten through IOMMUFD interface, so define a
> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
> caps raw data in "include/system/iommufd.h".
>
> This is a prepare work for moving .realize() after .attach_device().
>
> Suggested-by: Cédric Le Goater <clg@redhat.com>
> Suggested-by: Eric Auger <eric.auger@redhat.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-device.h |  1 +
>  include/system/iommufd.h      | 22 ++++++++++++++++++++++
>  hw/vfio/iommufd.c             | 10 +++++++++-
>  3 files changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 66797b4c92..09a7af891a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>      bool dirty_tracking; /* Protected by BQL */
>      bool iommu_dirty_tracking;
>      HostIOMMUDevice *hiod;
> +    HostIOMMUDeviceIOMMUFDCaps caps;
>      int devid;
>      IOMMUFDBackend *iommufd;
>      VFIOIOASHwpt *hwpt;
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index cbab75bfbf..0f337585c9 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -18,6 +18,9 @@
>  #include "exec/hwaddr.h"
>  #include "exec/cpu-common.h"
>  #include "system/host_iommu_device.h"
> +#ifdef CONFIG_LINUX
> +#include <linux/iommufd.h>
> +#endif
>  
>  #define TYPE_IOMMUFD_BACKEND "iommufd"
>  OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
> @@ -63,4 +66,23 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>                                        Error **errp);
>  
>  #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> +
> +typedef union VendorCaps {
> +    struct iommu_hw_info_vtd vtd;
> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
> +} VendorCaps;
> +
> +/**
> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
> + *
> + * @type: host platform IOMMU type.
> + *
> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> + */
> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
> +    uint32_t type;
> +    uint64_t hw_caps;
> +    VendorCaps vendor_caps;
can't we store the raw data in the caps and let the vIOMMU code
interpret it via a PCIIOMMUOps callback?

If my understanding is correct this is also Nicolin's initial
suggestion, no?

Eric
> +} HostIOMMUDeviceIOMMUFDCaps;
>  #endif
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 48db105422..530cde6740 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -324,7 +324,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       * vfio_migration_realize() may decide to use VF dirty tracking
>       * instead.
>       */
> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>      }
>  
> @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>      int ret, devfd;
>      uint32_t ioas_id;
>      Error *err = NULL;
> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>      const VFIOIOMMUClass *iommufd_vioc =
>          VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>  
> @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>          goto err_alloc_ioas;
>      }
>  
> +    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> +                                         &caps->type, &caps->vendor_caps,
> +                                         sizeof(VendorCaps), &caps->hw_caps,
> +                                         errp)) {
> +        goto err_alloc_ioas;
> +    }
> +
>      /* try to attach to an existing container in this space */
>      QLIST_FOREACH(bcontainer, &space->containers, next) {
>          container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);


Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Nicolin Chen 6 months, 1 week ago
On Mon, May 05, 2025 at 06:38:17PM +0200, Eric Auger wrote:
> > +/**
> > + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
> > + *
> > + * @type: host platform IOMMU type.
> > + *
> > + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> > + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> > + */
> > +typedef struct HostIOMMUDeviceIOMMUFDCaps {
> > +    uint32_t type;
> > +    uint64_t hw_caps;
> > +    VendorCaps vendor_caps;

> can't we store the raw data in the caps and let the vIOMMU code
> interpret it via a PCIIOMMUOps callback?
> 
> If my understanding is correct this is also Nicolin's initial
> suggestion, no?

It was, until Cedric suggested to do a further isolation between
the iommufd uAPIs/structures and vIOMMU code, so vIOMMU wouldn't
need to deal with any iommufd uAPIs/structures.

So, what Zhenzhong did is kinda creating another vIOMMU specific
iommufd driver(s) in backend/iommufd, which for now only unpacks
the hw_caps and vendor_caps, and likely will further forward the
caps via another non-iommufd structure (?) to vIOMMU.

It's slightly different than what we do in the kernel, where all
the vendor data isn't touched by the core, but still makes sense
in QEMU world I think?

Thanks
Nicolin
Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Eric Auger 6 months, 2 weeks ago

On 4/11/25 12:17 PM, Zhenzhong Duan wrote:
> The saved caps copy can be used to check dirty tracking capability.
>
> The capabilities is gotten through IOMMUFD interface, so define a
> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
> caps raw data in "include/system/iommufd.h".
>
> This is a prepare work for moving .realize() after .attach_device().
>
> Suggested-by: Cédric Le Goater <clg@redhat.com>
> Suggested-by: Eric Auger <eric.auger@redhat.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-device.h |  1 +
>  include/system/iommufd.h      | 22 ++++++++++++++++++++++
>  hw/vfio/iommufd.c             | 10 +++++++++-
>  3 files changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 66797b4c92..09a7af891a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>      bool dirty_tracking; /* Protected by BQL */
>      bool iommu_dirty_tracking;
>      HostIOMMUDevice *hiod;
> +    HostIOMMUDeviceIOMMUFDCaps caps;
>      int devid;
>      IOMMUFDBackend *iommufd;
>      VFIOIOASHwpt *hwpt;
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index cbab75bfbf..0f337585c9 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -18,6 +18,9 @@
>  #include "exec/hwaddr.h"
>  #include "exec/cpu-common.h"
>  #include "system/host_iommu_device.h"
> +#ifdef CONFIG_LINUX
> +#include <linux/iommufd.h>
> +#endif
>  
>  #define TYPE_IOMMUFD_BACKEND "iommufd"
>  OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
> @@ -63,4 +66,23 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>                                        Error **errp);
>  
>  #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> +
> +typedef union VendorCaps {
> +    struct iommu_hw_info_vtd vtd;
> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
> +} VendorCaps;
> +
> +/**
> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
> + *
> + * @type: host platform IOMMU type.
> + *
> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> + */
> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
> +    uint32_t type;
> +    uint64_t hw_caps;
> +    VendorCaps vendor_caps;
> +} HostIOMMUDeviceIOMMUFDCaps;
Why can't we extend the existing HostIOMMUDeviceCaps in host_iommu_device.h?

Eric
>  #endif
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 48db105422..530cde6740 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -324,7 +324,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       * vfio_migration_realize() may decide to use VF dirty tracking
>       * instead.
>       */
> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>      }
>  
> @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>      int ret, devfd;
>      uint32_t ioas_id;
>      Error *err = NULL;
> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>      const VFIOIOMMUClass *iommufd_vioc =
>          VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>  
> @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>          goto err_alloc_ioas;
>      }
>  
> +    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> +                                         &caps->type, &caps->vendor_caps,
> +                                         sizeof(VendorCaps), &caps->hw_caps,
> +                                         errp)) {
> +        goto err_alloc_ioas;
> +    }
> +
>      /* try to attach to an existing container in this space */
>      QLIST_FOREACH(bcontainer, &space->containers, next) {
>          container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);


Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Cédric Le Goater 7 months, 1 week ago
On 4/11/25 12:17, Zhenzhong Duan wrote:
> The saved caps copy can be used to check dirty tracking capability.
> 
> The capabilities is gotten through IOMMUFD interface, so define a
> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
> caps raw data in "include/system/iommufd.h".
> 
> This is a prepare work for moving .realize() after .attach_device().
> 
> Suggested-by: Cédric Le Goater <clg@redhat.com>
> Suggested-by: Eric Auger <eric.auger@redhat.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/vfio/vfio-device.h |  1 +
>   include/system/iommufd.h      | 22 ++++++++++++++++++++++
>   hw/vfio/iommufd.c             | 10 +++++++++-
>   3 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 66797b4c92..09a7af891a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>       bool dirty_tracking; /* Protected by BQL */
>       bool iommu_dirty_tracking;
>       HostIOMMUDevice *hiod;
> +    HostIOMMUDeviceIOMMUFDCaps caps;

IMO, these capabilities belong to HostIOMMUDevice and not VFIODevice.

I would simply call iommufd_backend_get_device_info() twice where needed :
iommufd_cdev_autodomains_get() and  hiod_iommufd_vfio_realize()


Thanks,

C.



>       int devid;
>       IOMMUFDBackend *iommufd;
>       VFIOIOASHwpt *hwpt;
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index cbab75bfbf..0f337585c9 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -18,6 +18,9 @@
>   #include "exec/hwaddr.h"
>   #include "exec/cpu-common.h"
>   #include "system/host_iommu_device.h"
> +#ifdef CONFIG_LINUX
> +#include <linux/iommufd.h>
> +#endif
>   
>   #define TYPE_IOMMUFD_BACKEND "iommufd"
>   OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
> @@ -63,4 +66,23 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>                                         Error **errp);
>   
>   #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> +
> +typedef union VendorCaps {
> +    struct iommu_hw_info_vtd vtd;
> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
> +} VendorCaps;
> +
> +/**
> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
> + *
> + * @type: host platform IOMMU type.
> + *
> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> + */
> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
> +    uint32_t type;
> +    uint64_t hw_caps;
> +    VendorCaps vendor_caps;
> +} HostIOMMUDeviceIOMMUFDCaps;
>   #endif
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 48db105422..530cde6740 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -324,7 +324,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>        * vfio_migration_realize() may decide to use VF dirty tracking
>        * instead.
>        */
> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>           flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>       }
>   
> @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>       int ret, devfd;
>       uint32_t ioas_id;
>       Error *err = NULL;
> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>       const VFIOIOMMUClass *iommufd_vioc =
>           VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>   
> @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>           goto err_alloc_ioas;
>       }
>   
> +    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> +                                         &caps->type, &caps->vendor_caps,
> +                                         sizeof(VendorCaps), &caps->hw_caps,
> +                                         errp)) {
> +        goto err_alloc_ioas;
> +    }
> +
>       /* try to attach to an existing container in this space */
>       QLIST_FOREACH(bcontainer, &space->containers, next) {
>           container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);


Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Eric Auger 6 months, 2 weeks ago
Hi Zhenzhong,

On 4/11/25 1:28 PM, Cédric Le Goater wrote:
> On 4/11/25 12:17, Zhenzhong Duan wrote:
>> The saved caps copy can be used to check dirty tracking capability.
>>
>> The capabilities is gotten through IOMMUFD interface, so define a
>> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
>> caps raw data in "include/system/iommufd.h".
>>
>> This is a prepare work for moving .realize() after .attach_device().
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.com>
>> Suggested-by: Eric Auger <eric.auger@redhat.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/vfio/vfio-device.h |  1 +
>>   include/system/iommufd.h      | 22 ++++++++++++++++++++++
>>   hw/vfio/iommufd.c             | 10 +++++++++-
>>   3 files changed, 32 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/vfio/vfio-device.h
>> b/include/hw/vfio/vfio-device.h
>> index 66797b4c92..09a7af891a 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>>       bool dirty_tracking; /* Protected by BQL */
>>       bool iommu_dirty_tracking;
>>       HostIOMMUDevice *hiod;
>> +    HostIOMMUDeviceIOMMUFDCaps caps;
>
> IMO, these capabilities belong to HostIOMMUDevice and not VFIODevice.
I do agree with Cédric that it looks a wrong place to put this caps. I
feel this somehow breaks the abstraction layering.

Now "[PATCH v2 0/5] vfio: Move realize after attach_dev" has landed, I
think it would help if you could respin with a clear functional goal
such as the one targeted in[PATCH v2 0/5] Check host IOMMU compatilibity
with vIOMMU
<https://lore.kernel.org/all/20240408084404.1111628-1-zhenzhong.duan@intel.com/>

Thanks

Eric
>
> I would simply call iommufd_backend_get_device_info() twice where
> needed :
> iommufd_cdev_autodomains_get() and  hiod_iommufd_vfio_realize()
>
>
> Thanks,
>
> C.
>
>
>
>>       int devid;
>>       IOMMUFDBackend *iommufd;
>>       VFIOIOASHwpt *hwpt;
>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>> index cbab75bfbf..0f337585c9 100644
>> --- a/include/system/iommufd.h
>> +++ b/include/system/iommufd.h
>> @@ -18,6 +18,9 @@
>>   #include "exec/hwaddr.h"
>>   #include "exec/cpu-common.h"
>>   #include "system/host_iommu_device.h"
>> +#ifdef CONFIG_LINUX
>> +#include <linux/iommufd.h>
>> +#endif
>>     #define TYPE_IOMMUFD_BACKEND "iommufd"
>>   OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass,
>> IOMMUFD_BACKEND)
>> @@ -63,4 +66,23 @@ bool
>> iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>>                                         Error **errp);
>>     #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE
>> "-iommufd"
>> +
>> +typedef union VendorCaps {
>> +    struct iommu_hw_info_vtd vtd;
>> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
>> +} VendorCaps;
>> +
>> +/**
>> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device
>> capabilities.
>> + *
>> + * @type: host platform IOMMU type.
>> + *
>> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this
>> represents
>> + *           the @out_capabilities value returned from
>> IOMMU_GET_HW_INFO ioctl)
>> + */
>> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
>> +    uint32_t type;
>> +    uint64_t hw_caps;
>> +    VendorCaps vendor_caps;
>> +} HostIOMMUDeviceIOMMUFDCaps;
>>   #endif
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 48db105422..530cde6740 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -324,7 +324,7 @@ static bool
>> iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>        * vfio_migration_realize() may decide to use VF dirty tracking
>>        * instead.
>>        */
>> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>>           flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>       }
>>   @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char
>> *name, VFIODevice *vbasedev,
>>       int ret, devfd;
>>       uint32_t ioas_id;
>>       Error *err = NULL;
>> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>>       const VFIOIOMMUClass *iommufd_vioc =
>>          
>> VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>   @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char
>> *name, VFIODevice *vbasedev,
>>           goto err_alloc_ioas;
>>       }
>>   +    if (!iommufd_backend_get_device_info(vbasedev->iommufd,
>> vbasedev->devid,
>> +                                         &caps->type,
>> &caps->vendor_caps,
>> +                                         sizeof(VendorCaps),
>> &caps->hw_caps,
>> +                                         errp)) {
>> +        goto err_alloc_ioas;
>> +    }
>> +
>>       /* try to attach to an existing container in this space */
>>       QLIST_FOREACH(bcontainer, &space->containers, next) {
>>           container = container_of(bcontainer, VFIOIOMMUFDContainer,
>> bcontainer);
>


RE: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Duan, Zhenzhong 6 months, 1 week ago
Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Sent: Tuesday, May 6, 2025 12:15 AM
>To: Cédric Le Goater <clg@redhat.com>; Duan, Zhenzhong
><zhenzhong.duan@intel.com>; qemu-devel@nongnu.org
>Cc: alex.williamson@redhat.com; nicolinc@nvidia.com;
>joao.m.martins@oracle.com; Peng, Chao P <chao.p.peng@intel.com>; Liu, Yi L
><yi.l.liu@intel.com>
>Subject: Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in
>VFIODevice.caps
>
>Hi Zhenzhong,
>
>On 4/11/25 1:28 PM, Cédric Le Goater wrote:
>> On 4/11/25 12:17, Zhenzhong Duan wrote:
>>> The saved caps copy can be used to check dirty tracking capability.
>>>
>>> The capabilities is gotten through IOMMUFD interface, so define a
>>> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
>>> caps raw data in "include/system/iommufd.h".
>>>
>>> This is a prepare work for moving .realize() after .attach_device().
>>>
>>> Suggested-by: Cédric Le Goater <clg@redhat.com>
>>> Suggested-by: Eric Auger <eric.auger@redhat.com>
>>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>   include/hw/vfio/vfio-device.h |  1 +
>>>   include/system/iommufd.h      | 22 ++++++++++++++++++++++
>>>   hw/vfio/iommufd.c             | 10 +++++++++-
>>>   3 files changed, 32 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/hw/vfio/vfio-device.h
>>> b/include/hw/vfio/vfio-device.h
>>> index 66797b4c92..09a7af891a 100644
>>> --- a/include/hw/vfio/vfio-device.h
>>> +++ b/include/hw/vfio/vfio-device.h
>>> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>>>       bool dirty_tracking; /* Protected by BQL */
>>>       bool iommu_dirty_tracking;
>>>       HostIOMMUDevice *hiod;
>>> +    HostIOMMUDeviceIOMMUFDCaps caps;
>>
>> IMO, these capabilities belong to HostIOMMUDevice and not VFIODevice.
>I do agree with Cédric that it looks a wrong place to put this caps. I
>feel this somehow breaks the abstraction layering.

This change was dropped in "[PATCH v2 0/5] vfio: Move realize after attach_dev".

>
>Now "[PATCH v2 0/5] vfio: Move realize after attach_dev" has landed, I
>think it would help if you could respin with a clear functional goal
>such as the one targeted in[PATCH v2 0/5] Check host IOMMU compatilibity
>with vIOMMU
><https://lore.kernel.org/all/20240408084404.1111628-1-
>zhenzhong.duan@intel.com/>

See a rfcv3 candidate at link https://github.com/yiliu1765/qemu/commits/zhenzhong/iommufd_nesting_rfcv3.wip/ for example implementation.

In this example, I used .get_cap() interface for vIOMMU to get cap. 
vIOMMU could also access HostIOMMUDevice::HostIOMMUDeviceCaps
directly as it's passed from VFIO to vIOMMU along with HostIOMMUDevice.

Thanks
Zhenzhong
RE: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Duan, Zhenzhong 7 months ago

>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in
>VFIODevice.caps
>
>On 4/11/25 12:17, Zhenzhong Duan wrote:
>> The saved caps copy can be used to check dirty tracking capability.
>>
>> The capabilities is gotten through IOMMUFD interface, so define a
>> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
>> caps raw data in "include/system/iommufd.h".
>>
>> This is a prepare work for moving .realize() after .attach_device().
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.com>
>> Suggested-by: Eric Auger <eric.auger@redhat.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/vfio/vfio-device.h |  1 +
>>   include/system/iommufd.h      | 22 ++++++++++++++++++++++
>>   hw/vfio/iommufd.c             | 10 +++++++++-
>>   3 files changed, 32 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
>> index 66797b4c92..09a7af891a 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>>       bool dirty_tracking; /* Protected by BQL */
>>       bool iommu_dirty_tracking;
>>       HostIOMMUDevice *hiod;
>> +    HostIOMMUDeviceIOMMUFDCaps caps;
>
>IMO, these capabilities belong to HostIOMMUDevice and not VFIODevice.

This was trying to address suggestions in [1], caps is generated by IOMMUFD backend
and is only used by hiod_iommufd_get_cap(), hiod_legacy_vfio_get_cap() never
check it. By putting it in VFIODevice, I can save vendor caps in a union and raw
data format, hiod_iommufd_get_cap() recognizes the raw data format and can
check it for a cap support.

If keep caps in HostIOMMUDevice, I can think of a change like below to address Eric and Nicolin's suggestion:
https://github.com/yiliu1765/qemu/commit/e05f91b2a724cefa8356969cb43284f7c3ec11d1
https://github.com/yiliu1765/qemu/commit/e05f91b2a724cefa8356969cb43284f7c3ec11d1

Does the change make sense for you?

[1] https://lists.gnu.org/archive/html/qemu-devel/2025-03/msg01552.html

>
>I would simply call iommufd_backend_get_device_info() twice where needed :
>iommufd_cdev_autodomains_get() and  hiod_iommufd_vfio_realize()

OK, will do, that's simpler than current change.

Thanks
Zhenzhong
Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Joao Martins 7 months, 1 week ago
On 11/04/2025 11:17, Zhenzhong Duan wrote:
> The saved caps copy can be used to check dirty tracking capability.
> 
> The capabilities is gotten through IOMMUFD interface, so define a
> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
> caps raw data in "include/system/iommufd.h".
> 
> This is a prepare work for moving .realize() after .attach_device().
> 
> Suggested-by: Cédric Le Goater <clg@redhat.com>
> Suggested-by: Eric Auger <eric.auger@redhat.com>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/vfio/vfio-device.h |  1 +
>  include/system/iommufd.h      | 22 ++++++++++++++++++++++
>  hw/vfio/iommufd.c             | 10 +++++++++-
>  3 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 66797b4c92..09a7af891a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>      bool dirty_tracking; /* Protected by BQL */
>      bool iommu_dirty_tracking;
>      HostIOMMUDevice *hiod;
> +    HostIOMMUDeviceIOMMUFDCaps caps;
>      int devid;
>      IOMMUFDBackend *iommufd;
>      VFIOIOASHwpt *hwpt;
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index cbab75bfbf..0f337585c9 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -18,6 +18,9 @@
>  #include "exec/hwaddr.h"
>  #include "exec/cpu-common.h"
>  #include "system/host_iommu_device.h"
> +#ifdef CONFIG_LINUX
> +#include <linux/iommufd.h>
> +#endif
>  
>  #define TYPE_IOMMUFD_BACKEND "iommufd"
>  OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
> @@ -63,4 +66,23 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>                                        Error **errp);
>  
>  #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> +
> +typedef union VendorCaps {
> +    struct iommu_hw_info_vtd vtd;
> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
> +} VendorCaps;
> +
> +/**
> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device capabilities.
> + *
> + * @type: host platform IOMMU type.
> + *
> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
> + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
> + */
> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
> +    uint32_t type;
> +    uint64_t hw_caps;
> +    VendorCaps vendor_caps;
> +} HostIOMMUDeviceIOMMUFDCaps;
>  #endif
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 48db105422..530cde6740 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -324,7 +324,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       * vfio_migration_realize() may decide to use VF dirty tracking
>       * instead.
>       */
> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>      }
>  
> @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>      int ret, devfd;
>      uint32_t ioas_id;
>      Error *err = NULL;
> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>      const VFIOIOMMUClass *iommufd_vioc =
>          VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>  
> @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
>          goto err_alloc_ioas;
>      }
>  
> +    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> +                                         &caps->type, &caps->vendor_caps,
> +                                         sizeof(VendorCaps), &caps->hw_caps,
> +                                         errp)) {
> +        goto err_alloc_ioas;
> +    }
> +

I think this will fail on mdev (and thus fail the attachment mistakengly as
there's no IOMMUFDDevice with mdev) ? In case it fails, you can just do:

	if (!vbasedev->mdev && !iommufd_backend_get_device_info(...)) {


RE: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in VFIODevice.caps
Posted by Duan, Zhenzhong 7 months ago

>-----Original Message-----
>From: Joao Martins <joao.m.martins@oracle.com>
>Subject: Re: [PATCH 1/5] vfio/iommufd: Save host iommu capabilities in
>VFIODevice.caps
>
>On 11/04/2025 11:17, Zhenzhong Duan wrote:
>> The saved caps copy can be used to check dirty tracking capability.
>>
>> The capabilities is gotten through IOMMUFD interface, so define a
>> new structure HostIOMMUDeviceIOMMUFDCaps which contains vendor
>> caps raw data in "include/system/iommufd.h".
>>
>> This is a prepare work for moving .realize() after .attach_device().
>>
>> Suggested-by: Cédric Le Goater <clg@redhat.com>
>> Suggested-by: Eric Auger <eric.auger@redhat.com>
>> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/vfio/vfio-device.h |  1 +
>>  include/system/iommufd.h      | 22 ++++++++++++++++++++++
>>  hw/vfio/iommufd.c             | 10 +++++++++-
>>  3 files changed, 32 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
>> index 66797b4c92..09a7af891a 100644
>> --- a/include/hw/vfio/vfio-device.h
>> +++ b/include/hw/vfio/vfio-device.h
>> @@ -77,6 +77,7 @@ typedef struct VFIODevice {
>>      bool dirty_tracking; /* Protected by BQL */
>>      bool iommu_dirty_tracking;
>>      HostIOMMUDevice *hiod;
>> +    HostIOMMUDeviceIOMMUFDCaps caps;
>>      int devid;
>>      IOMMUFDBackend *iommufd;
>>      VFIOIOASHwpt *hwpt;
>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>> index cbab75bfbf..0f337585c9 100644
>> --- a/include/system/iommufd.h
>> +++ b/include/system/iommufd.h
>> @@ -18,6 +18,9 @@
>>  #include "exec/hwaddr.h"
>>  #include "exec/cpu-common.h"
>>  #include "system/host_iommu_device.h"
>> +#ifdef CONFIG_LINUX
>> +#include <linux/iommufd.h>
>> +#endif
>>
>>  #define TYPE_IOMMUFD_BACKEND "iommufd"
>>  OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass,
>IOMMUFD_BACKEND)
>> @@ -63,4 +66,23 @@ bool
>iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>>                                        Error **errp);
>>
>>  #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD
>TYPE_HOST_IOMMU_DEVICE "-iommufd"
>> +
>> +typedef union VendorCaps {
>> +    struct iommu_hw_info_vtd vtd;
>> +    struct iommu_hw_info_arm_smmuv3 smmuv3;
>> +} VendorCaps;
>> +
>> +/**
>> + * struct HostIOMMUDeviceIOMMUFDCaps - Define host IOMMU device
>capabilities.
>> + *
>> + * @type: host platform IOMMU type.
>> + *
>> + * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this
>represents
>> + *           the @out_capabilities value returned from IOMMU_GET_HW_INFO
>ioctl)
>> + */
>> +typedef struct HostIOMMUDeviceIOMMUFDCaps {
>> +    uint32_t type;
>> +    uint64_t hw_caps;
>> +    VendorCaps vendor_caps;
>> +} HostIOMMUDeviceIOMMUFDCaps;
>>  #endif
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 48db105422..530cde6740 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -324,7 +324,7 @@ static bool
>iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>>       * vfio_migration_realize() may decide to use VF dirty tracking
>>       * instead.
>>       */
>> -    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>> +    if (vbasedev->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
>>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>>      }
>>
>> @@ -475,6 +475,7 @@ static bool iommufd_cdev_attach(const char *name,
>VFIODevice *vbasedev,
>>      int ret, devfd;
>>      uint32_t ioas_id;
>>      Error *err = NULL;
>> +    HostIOMMUDeviceIOMMUFDCaps *caps = &vbasedev->caps;
>>      const VFIOIOMMUClass *iommufd_vioc =
>>
>VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
>>
>> @@ -505,6 +506,13 @@ static bool iommufd_cdev_attach(const char *name,
>VFIODevice *vbasedev,
>>          goto err_alloc_ioas;
>>      }
>>
>> +    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev-
>>devid,
>> +                                         &caps->type, &caps->vendor_caps,
>> +                                         sizeof(VendorCaps), &caps->hw_caps,
>> +                                         errp)) {
>> +        goto err_alloc_ioas;
>> +    }
>> +
>
>I think this will fail on mdev (and thus fail the attachment mistakengly as
>there's no IOMMUFDDevice with mdev) ? In case it fails, you can just do:
>
>	if (!vbasedev->mdev && !iommufd_backend_get_device_info(...)) {

Indeed, thanks for caching this.

BRs,
Zhenzhong