[PATCH v6 08/22] vfio/iommufd: Force creating nesting parent HWPT

Zhenzhong Duan posted 22 patches 1 month, 3 weeks ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>
There is a newer version of this series
[PATCH v6 08/22] vfio/iommufd: Force creating nesting parent HWPT
Posted by Zhenzhong Duan 1 month, 3 weeks ago
Call pci_device_get_viommu_flags() to get if vIOMMU supports
VIOMMU_FLAG_WANT_NESTING_PARENT.

If yes, create a nesting parent HWPT and add it to the container's hwpt_list,
letting this parent HWPT cover the entire second stage mappings (GPA=>HPA).

This allows a VFIO passthrough device to directly attach to this default HWPT
and then to use the system address space and its listener.

Introduce a vfio_device_get_viommu_flags_want_nesting() helper to facilitate
this implementation.

It is safe to do so because a vIOMMU will be able to fail in set_iommu_device()
call, if something else related to the VFIO device or vIOMMU isn't compatible.

Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
---
 include/hw/vfio/vfio-device.h |  2 ++
 hw/vfio/device.c              | 12 ++++++++++++
 hw/vfio/iommufd.c             |  9 +++++++++
 3 files changed, 23 insertions(+)

diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index e7e6243e2d..a964091135 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -257,6 +257,8 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
 
 void vfio_device_unprepare(VFIODevice *vbasedev);
 
+bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
+
 int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
                                 struct vfio_region_info **info);
 int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 08f12ac31f..620cc78b77 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -23,6 +23,7 @@
 
 #include "hw/vfio/vfio-device.h"
 #include "hw/vfio/pci.h"
+#include "hw/iommu.h"
 #include "hw/hw.h"
 #include "trace.h"
 #include "qapi/error.h"
@@ -504,6 +505,17 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
     vbasedev->bcontainer = NULL;
 }
 
+bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
+{
+    VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
+
+    if (vdev) {
+        return !!(pci_device_get_viommu_flags(&vdev->parent_obj) &
+                  VIOMMU_FLAG_WANT_NESTING_PARENT);
+    }
+    return false;
+}
+
 /*
  * Traditional ioctl() based io
  */
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 8c27222f75..f1684a39b7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -379,6 +379,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
         flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
     }
 
+    /*
+     * If vIOMMU requests VFIO's cooperation to create nesting parent HWPT,
+     * force to create it so that it could be reused by vIOMMU to create
+     * nested HWPT.
+     */
+    if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
+        flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
+    }
+
     if (cpr_is_incoming()) {
         hwpt_id = vbasedev->cpr.hwpt_id;
         goto skip_alloc;
-- 
2.47.1
Re: [PATCH v6 08/22] vfio/iommufd: Force creating nesting parent HWPT
Posted by Yi Liu 1 month ago
On 2025/9/18 16:57, Zhenzhong Duan wrote:
> Call pci_device_get_viommu_flags() to get if vIOMMU supports
> VIOMMU_FLAG_WANT_NESTING_PARENT.
> 
> If yes, create a nesting parent HWPT and add it to the container's hwpt_list,
> letting this parent HWPT cover the entire second stage mappings (GPA=>HPA).
> 
> This allows a VFIO passthrough device to directly attach to this default HWPT
> and then to use the system address space and its listener.
> 
> Introduce a vfio_device_get_viommu_flags_want_nesting() helper to facilitate
> this implementation.
> 
> It is safe to do so because a vIOMMU will be able to fail in set_iommu_device()
> call, if something else related to the VFIO device or vIOMMU isn't compatible.
> 
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>   include/hw/vfio/vfio-device.h |  2 ++
>   hw/vfio/device.c              | 12 ++++++++++++
>   hw/vfio/iommufd.c             |  9 +++++++++
>   3 files changed, 23 insertions(+)
>

Reviewed-by: Yi Liu <yi.l.liu@intel.com>

> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index e7e6243e2d..a964091135 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -257,6 +257,8 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>   
>   void vfio_device_unprepare(VFIODevice *vbasedev);
>   
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
> +
>   int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                   struct vfio_region_info **info);
>   int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 08f12ac31f..620cc78b77 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -23,6 +23,7 @@
>   
>   #include "hw/vfio/vfio-device.h"
>   #include "hw/vfio/pci.h"
> +#include "hw/iommu.h"
>   #include "hw/hw.h"
>   #include "trace.h"
>   #include "qapi/error.h"
> @@ -504,6 +505,17 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>       vbasedev->bcontainer = NULL;
>   }
>   
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
> +{
> +    VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
> +
> +    if (vdev) {
> +        return !!(pci_device_get_viommu_flags(&vdev->parent_obj) &
> +                  VIOMMU_FLAG_WANT_NESTING_PARENT);
> +    }
> +    return false;
> +}
> +
>   /*
>    * Traditional ioctl() based io
>    */
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 8c27222f75..f1684a39b7 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -379,6 +379,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>           flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>       }
>   
> +    /*
> +     * If vIOMMU requests VFIO's cooperation to create nesting parent HWPT,
> +     * force to create it so that it could be reused by vIOMMU to create
> +     * nested HWPT.
> +     */
> +    if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
> +        flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
> +    }
> +
>       if (cpr_is_incoming()) {
>           hwpt_id = vbasedev->cpr.hwpt_id;
>           goto skip_alloc;
Re: [PATCH v6 08/22] vfio/iommufd: Force creating nesting parent HWPT
Posted by Eric Auger 1 month, 2 weeks ago

On 9/18/25 10:57 AM, Zhenzhong Duan wrote:
> Call pci_device_get_viommu_flags() to get if vIOMMU supports
> VIOMMU_FLAG_WANT_NESTING_PARENT.
>
> If yes, create a nesting parent HWPT and add it to the container's hwpt_list,
> letting this parent HWPT cover the entire second stage mappings (GPA=>HPA).
>
> This allows a VFIO passthrough device to directly attach to this default HWPT
> and then to use the system address space and its listener.
>
> Introduce a vfio_device_get_viommu_flags_want_nesting() helper to facilitate
> this implementation.
>
> It is safe to do so because a vIOMMU will be able to fail in set_iommu_device()
> call, if something else related to the VFIO device or vIOMMU isn't compatible.
>
> Suggested-by: Nicolin Chen <nicolinc@nvidia.com>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Eric
> ---
>  include/hw/vfio/vfio-device.h |  2 ++
>  hw/vfio/device.c              | 12 ++++++++++++
>  hw/vfio/iommufd.c             |  9 +++++++++
>  3 files changed, 23 insertions(+)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index e7e6243e2d..a964091135 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -257,6 +257,8 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>  
>  void vfio_device_unprepare(VFIODevice *vbasedev);
>  
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev);
> +
>  int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                  struct vfio_region_info **info);
>  int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 08f12ac31f..620cc78b77 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -23,6 +23,7 @@
>  
>  #include "hw/vfio/vfio-device.h"
>  #include "hw/vfio/pci.h"
> +#include "hw/iommu.h"
>  #include "hw/hw.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> @@ -504,6 +505,17 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>      vbasedev->bcontainer = NULL;
>  }
>  
> +bool vfio_device_get_viommu_flags_want_nesting(VFIODevice *vbasedev)
> +{
> +    VFIOPCIDevice *vdev = vfio_pci_from_vfio_device(vbasedev);
> +
> +    if (vdev) {
> +        return !!(pci_device_get_viommu_flags(&vdev->parent_obj) &
> +                  VIOMMU_FLAG_WANT_NESTING_PARENT);
> +    }
> +    return false;
> +}
> +
>  /*
>   * Traditional ioctl() based io
>   */
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 8c27222f75..f1684a39b7 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -379,6 +379,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>          flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>      }
>  
> +    /*
> +     * If vIOMMU requests VFIO's cooperation to create nesting parent HWPT,
> +     * force to create it so that it could be reused by vIOMMU to create
> +     * nested HWPT.
> +     */
> +    if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
> +        flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
> +    }
> +
>      if (cpr_is_incoming()) {
>          hwpt_id = vbasedev->cpr.hwpt_id;
>          goto skip_alloc;