[PATCH v7 08/23] vfio/iommufd: Force creating nesting parent HWPT

Zhenzhong Duan posted 23 patches 3 days, 2 hours ago
[PATCH v7 08/23] vfio/iommufd: Force creating nesting parent HWPT
Posted by Zhenzhong Duan 3 days, 2 hours ago
Call pci_device_get_viommu_flags() to get if vIOMMU supports
VIOMMU_FLAG_WANT_NESTING_PARENT.

If yes, create a nesting parent HWPT and add it to the container's hwpt_list,
letting this parent HWPT cover the entire second stage mappings (GPA=>HPA).

This allows a VFIO passthrough device to directly attach to this default HWPT
and then to use the system address space and its listener.

Introduce a vfio_device_get_viommu_flags_want_nesting() helper to facilitate
this implementation.

It is safe to do so because a vIOMMU will be able to fail in set_iommu_device()
call, if something else related to the VFIO device or vIOMMU isn't compatible.

Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
---
 include/hw/vfio/vfio-device.h |  2 ++
 hw/vfio/device.c              | 12 ++++++++++++
 hw/vfio/iommufd.c             |  9 +++++++++
 3 files changed, 23 insertions(+)

diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index a0b8fc2eb6..48d00c7bc4 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -267,6 +267,8 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
 
 void vfio_device_unprepare(VFIODevice *vbasedev);
 
+bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
+
 int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
                                 struct vfio_region_info **info);
 int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 5ed3103e72..be94947623 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -23,6 +23,7 @@
 
 #include "hw/vfio/vfio-device.h"
 #include "hw/vfio/pci.h"
+#include "hw/iommu.h"
 #include "hw/hw.h"
 #include "trace.h"
 #include "qapi/error.h"
@@ -521,6 +522,17 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
     vbasedev->bcontainer = NULL;
 }
 
+bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
+{
+    VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
+
+    if (vdev) {
+        return !!(pci_device_get_viommu_flags(&vdev->parent_obj) &
+                  VIOMMU_FLAG_WANT_NESTING_PARENT);
+    }
+    return false;
+}
+
 /*
  * Traditional ioctl() based io
  */
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 8de765c769..f9d0926274 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -404,6 +404,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
         flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
     }
 
+    /*
+     * If vIOMMU requests VFIO's cooperation to create nesting parent HWPT,
+     * force to create it so that it could be reused by vIOMMU to create
+     * nested HWPT.
+     */
+    if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
+        flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
+    }
+
     if (cpr_is_incoming()) {
         hwpt_id = vbasedev->cpr.hwpt_id;
         goto skip_alloc;
-- 
2.47.1
Re: [PATCH v7 08/23] vfio/iommufd: Force creating nesting parent HWPT
Posted by Cédric Le Goater 2 days, 18 hours ago
On 10/24/25 10:43, Zhenzhong Duan wrote:
> Call pci_device_get_viommu_flags() to get if vIOMMU supports
> VIOMMU_FLAG_WANT_NESTING_PARENT.
> 
> If yes, create a nesting parent HWPT and add it to the container's hwpt_list,
> letting this parent HWPT cover the entire second stage mappings (GPA=>HPA).
> 
> This allows a VFIO passthrough device to directly attach to this default HWPT
> and then to use the system address space and its listener.
> 
> Introduce a vfio_device_get_viommu_flags_want_nesting() helper to facilitate
> this implementation.
> 
> It is safe to do so because a vIOMMU will be able to fail in set_iommu_device()
> call, if something else related to the VFIO device or vIOMMU isn't compatible.
> 
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Yi Liu <yi.l.liu@intel.com>
> ---
>   include/hw/vfio/vfio-device.h |  2 ++
>   hw/vfio/device.c              | 12 ++++++++++++
>   hw/vfio/iommufd.c             |  9 +++++++++
>   3 files changed, 23 insertions(+)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index a0b8fc2eb6..48d00c7bc4 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -267,6 +267,8 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
>   
>   void vfio_device_unprepare(VFIODevice *vbasedev);
>   
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
> +
>   int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                   struct vfio_region_info **info);
>   int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 5ed3103e72..be94947623 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -23,6 +23,7 @@
>   
>   #include "hw/vfio/vfio-device.h"
>   #include "hw/vfio/pci.h"
> +#include "hw/iommu.h"
>   #include "hw/hw.h"
>   #include "trace.h"
>   #include "qapi/error.h"
> @@ -521,6 +522,17 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>       vbasedev->bcontainer = NULL;
>   }
>   
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
> +{
> +    VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
> +
> +    if (vdev) {
> +        return !!(pci_device_get_viommu_flags(&vdev->parent_obj) &

Using PCI_DEVICE(vdev) would be more appropriate. It can come later.

Thanks,

C.



> +                  VIOMMU_FLAG_WANT_NESTING_PARENT);
> +    }
> +    return false;
> +}
> +
>   /*
>    * Traditional ioctl() based io
>    */
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 8de765c769..f9d0926274 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -404,6 +404,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>           flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>       }
>   
> +    /*
> +     * If vIOMMU requests VFIO's cooperation to create nesting parent HWPT,
> +     * force to create it so that it could be reused by vIOMMU to create
> +     * nested HWPT.
> +     */
> +    if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
> +        flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
> +    }
> +
>       if (cpr_is_incoming()) {
>           hwpt_id = vbasedev->cpr.hwpt_id;
>           goto skip_alloc;