[PATCH v5 14/21] intel_iommu: Stick to system MR for IOMMUFD backed host device when x-fls=on

Zhenzhong Duan posted 21 patches 2 months, 3 weeks ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Yi Liu <yi.l.liu@intel.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>
[PATCH v5 14/21] intel_iommu: Stick to system MR for IOMMUFD backed host device when x-fls=on
Posted by Zhenzhong Duan 2 months, 3 weeks ago
When guest in scalable mode and x-flts=on, we stick to system MR for IOMMUFD
backed host device. Then its default hwpt contains GPA->HPA mappings which is
used directly if PGTT=PT and used as nested parent if PGTT=FLT. Otherwise
fallback to original processing.

Suggested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index af384ce7f0..15582977b8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1773,6 +1773,28 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
 
 }
 
+static VTDHostIOMMUDevice *vtd_find_hiod_iommufd(IntelIOMMUState *s,
+                                                 VTDAddressSpace *as)
+{
+    struct vtd_as_key key = {
+        .bus = as->bus,
+        .devfn = as->devfn,
+    };
+    VTDHostIOMMUDevice *vtd_hiod = g_hash_table_lookup(s->vtd_host_iommu_dev,
+                                                       &key);
+
+    if (vtd_hiod && vtd_hiod->hiod &&
+        object_dynamic_cast(OBJECT(vtd_hiod->hiod),
+                            TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+        return vtd_hiod;
+    }
+    return NULL;
+}
+
+/*
+ * vtd_switch_address_space() calls vtd_as_pt_enabled() to determine which
+ * MR to switch to. Switch to system MR if return true, iommu MR otherwise.
+ */
 static bool vtd_as_pt_enabled(VTDAddressSpace *as)
 {
     IntelIOMMUState *s;
@@ -1781,6 +1803,18 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
     assert(as);
 
     s = as->iommu_state;
+
+    /*
+     * When guest in scalable mode and x-flts=on, we stick to system MR
+     * for IOMMUFD backed host device. Then its default hwpt contains
+     * GPA->HPA mappings which is used directly if PGTT=PT and used as
+     * nested parent if PGTT=FLT. Otherwise fallback to original
+     * processing.
+     */
+    if (s->root_scalable && s->flts && vtd_find_hiod_iommufd(s, as)) {
+        return true;
+    }
+
     if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
                                  &ce)) {
         /*
-- 
2.47.1
Re: [PATCH v5 14/21] intel_iommu: Stick to system MR for IOMMUFD backed host device when x-fls=on
Posted by Yi Liu 2 months, 2 weeks ago
On 2025/8/22 14:40, Zhenzhong Duan wrote:
> When guest in scalable mode and x-flts=on, we stick to system MR for IOMMUFD
> backed host device. Then its default hwpt contains GPA->HPA mappings which is
> used directly if PGTT=PT and used as nested parent if PGTT=FLT. Otherwise
> fallback to original processing.
> 
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu.c | 34 ++++++++++++++++++++++++++++++++++
>   1 file changed, 34 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index af384ce7f0..15582977b8 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1773,6 +1773,28 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
>   
>   }
>   
> +static VTDHostIOMMUDevice *vtd_find_hiod_iommufd(IntelIOMMUState *s,
> +                                                 VTDAddressSpace *as)
> +{
> +    struct vtd_as_key key = {
> +        .bus = as->bus,
> +        .devfn = as->devfn,
> +    };
> +    VTDHostIOMMUDevice *vtd_hiod = g_hash_table_lookup(s->vtd_host_iommu_dev,
> +                                                       &key);
> +
> +    if (vtd_hiod && vtd_hiod->hiod &&
> +        object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> +                            TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> +        return vtd_hiod;
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * vtd_switch_address_space() calls vtd_as_pt_enabled() to determine which
> + * MR to switch to. Switch to system MR if return true, iommu MR otherwise.
> + */
>   static bool vtd_as_pt_enabled(VTDAddressSpace *as)
>   {
>       IntelIOMMUState *s;
> @@ -1781,6 +1803,18 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
>       assert(as);
>   
>       s = as->iommu_state;
> +
> +    /*
> +     * When guest in scalable mode and x-flts=on, we stick to system MR
> +     * for IOMMUFD backed host device. Then its default hwpt contains
> +     * GPA->HPA mappings which is used directly if PGTT=PT and used as
> +     * nested parent if PGTT=FLT. Otherwise fallback to original
> +     * processing.
> +     */
> +    if (s->root_scalable && s->flts && vtd_find_hiod_iommufd(s, as)) {
> +        return true;
> +    }
> +

I think you'd add this logic in vtd_switch_address_space() as the return
value of this helper is to reflect if guest has enabled pt. It may break
logic in the caller side.

Regards,
Yi Liu

>       if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
>                                    &ce)) {
>           /*
Re: [PATCH v5 14/21] intel_iommu: Stick to system MR for IOMMUFD backed host device when x-fls=on
Posted by Eric Auger 2 months, 2 weeks ago
Hi Zhenzhong,

On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
> When guest in scalable mode and x-flts=on, we stick to system MR for IOMMUFD
when x-flts is set on the iommu and guest uses scalable mode we don't
want to use IOMMU MR but rather continue using the system MR or
something alike

To me this deserves more explanation about we don't want IOMMU MR
anymore, from a qemu infrastructure point of view.
> backed host device. Then its default hwpt contains GPA->HPA mappings which is
> used directly if PGTT=PT and used as nested parent if PGTT=FLT. Otherwise
> fallback to original processing.
>
> Suggested-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  hw/i386/intel_iommu.c | 34 ++++++++++++++++++++++++++++++++++
>  1 file changed, 34 insertions(+)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index af384ce7f0..15582977b8 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1773,6 +1773,28 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
>  
>  }
>  
> +static VTDHostIOMMUDevice *vtd_find_hiod_iommufd(IntelIOMMUState *s,
> +                                                 VTDAddressSpace *as)
> +{
> +    struct vtd_as_key key = {
> +        .bus = as->bus,
> +        .devfn = as->devfn,
> +    };
> +    VTDHostIOMMUDevice *vtd_hiod = g_hash_table_lookup(s->vtd_host_iommu_dev,
> +                                                       &key);
> +
> +    if (vtd_hiod && vtd_hiod->hiod &&
> +        object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> +                            TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> +        return vtd_hiod;
> +    }
> +    return NULL;
> +}
> +
> +/*
> + * vtd_switch_address_space() calls vtd_as_pt_enabled() to determine which
> + * MR to switch to. Switch to system MR if return true, iommu MR otherwise.
I would use a proper doc comment and refer to this function first
> + */
>  static bool vtd_as_pt_enabled(VTDAddressSpace *as)
>  {
>      IntelIOMMUState *s;
> @@ -1781,6 +1803,18 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
>      assert(as);
>  
>      s = as->iommu_state;
> +
> +    /*
> +     * When guest in scalable mode and x-flts=on, we stick to system MR
> +     * for IOMMUFD backed host device. Then its default hwpt contains
> +     * GPA->HPA mappings which is used directly if PGTT=PT and used as
> +     * nested parent if PGTT=FLT. Otherwise fallback to original
> +     * processing.
> +     */
> +    if (s->root_scalable && s->flts && vtd_find_hiod_iommufd(s, as)) {
> +        return true;
> +    }
> +
>      if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
>                                   &ce)) {
>          /*
Thanks

Eric