[PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback

Shameer Kolothum posted 27 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Shameer Kolothum 1 month, 2 weeks ago
For accelerated SMMUv3, we need nested parent domain creation. Add the
callback support so that VFIO can create a nested parent.

In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
expose only Stage-1 to the guest to ensure it uses the correct page-table
format.

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/smmuv3-accel.c | 13 +++++++++++++
 hw/arm/virt.c         | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 44410cfb2a..6b0e512d86 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -10,6 +10,7 @@
 #include "qemu/error-report.h"
 
 #include "hw/arm/smmuv3.h"
+#include "hw/iommu.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/vfio/pci.h"
@@ -106,8 +107,20 @@ static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
     }
 }
 
+static uint64_t smmuv3_accel_get_viommu_flags(void *opaque)
+{
+    /*
+     * We return VIOMMU_FLAG_WANT_NESTING_PARENT to inform VFIO core to create a
+     * nesting parent which is required for accelerated SMMUv3 support.
+     * The real HW nested support should be reported from host SMMUv3 and if
+     * it doesn't, the nesting parent allocation will fail anyway in VFIO core.
+     */
+    return VIOMMU_FLAG_WANT_NESTING_PARENT;
+}
+
 static const PCIIOMMUOps smmuv3_accel_ops = {
     .get_address_space = smmuv3_accel_find_add_as,
+    .get_viommu_flags = smmuv3_accel_get_viommu_flags,
 };
 
 void smmuv3_accel_init(SMMUv3State *s)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 02209fadcf..b533b0556e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3073,6 +3073,19 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                 return;
             }
 
+            if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
+                char *stage;
+
+                stage = object_property_get_str(OBJECT(dev), "stage",
+                                                &error_fatal);
+                /* If no stage specified, SMMUv3 will default to stage 1 */
+                if (*stage && strcmp("1", stage)) {
+                    error_setg(errp, "Only stage1 is supported for SMMUV3 with "
+                               "accel=on");
+                    return;
+                }
+            }
+
             create_smmuv3_dev_dtb(vms, dev, bus);
         }
     }
-- 
2.43.0
Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Eric Auger 1 month, 2 weeks ago
Hi Shameer,

On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> For accelerated SMMUv3, we need nested parent domain creation. Add the
> callback support so that VFIO can create a nested parent.
>
> In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
> mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
> expose only Stage-1 to the guest to ensure it uses the correct page-table
> format.
>
> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
Wonder if you shall keep both. I don't know the usage though but worth
to check.
> ---
>  hw/arm/smmuv3-accel.c | 13 +++++++++++++
>  hw/arm/virt.c         | 13 +++++++++++++
>  2 files changed, 26 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 44410cfb2a..6b0e512d86 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -10,6 +10,7 @@
>  #include "qemu/error-report.h"
>  
>  #include "hw/arm/smmuv3.h"
> +#include "hw/iommu.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/pci-host/gpex.h"
>  #include "hw/vfio/pci.h"
> @@ -106,8 +107,20 @@ static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
>      }
>  }
>  
> +static uint64_t smmuv3_accel_get_viommu_flags(void *opaque)
> +{
> +    /*
> +     * We return VIOMMU_FLAG_WANT_NESTING_PARENT to inform VFIO core to create a
> +     * nesting parent which is required for accelerated SMMUv3 support.
> +     * The real HW nested support should be reported from host SMMUv3 and if
> +     * it doesn't, the nesting parent allocation will fail anyway in VFIO core.
> +     */
> +    return VIOMMU_FLAG_WANT_NESTING_PARENT;
> +}
> +
>  static const PCIIOMMUOps smmuv3_accel_ops = {
>      .get_address_space = smmuv3_accel_find_add_as,
> +    .get_viommu_flags = smmuv3_accel_get_viommu_flags,
>  };
>  
>  void smmuv3_accel_init(SMMUv3State *s)
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 02209fadcf..b533b0556e 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -3073,6 +3073,19 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>                  return;
>              }
>  
> +            if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
This looks unrelated to the get_viommu_flags() addition and to me this
shall be put in a separate patch of squashed in the patch that exposes
the accel prop Thanks Eric
> +                char *stage;
> +
> +                stage = object_property_get_str(OBJECT(dev), "stage",
> +                                                &error_fatal);
> +                /* If no stage specified, SMMUv3 will default to stage 1 */
> +                if (*stage && strcmp("1", stage)) {
> +                    error_setg(errp, "Only stage1 is supported for SMMUV3 with "
> +                               "accel=on");
> +                    return;
> +                }
> +            }
> +
>              create_smmuv3_dev_dtb(vms, dev, bus);
>          }
>      }
Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Jonathan Cameron via 1 month, 1 week ago
On Wed, 1 Oct 2025 19:36:47 +0200
Eric Auger <eric.auger@redhat.com> wrote:

> Hi Shameer,
> 
> On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> > For accelerated SMMUv3, we need nested parent domain creation. Add the
> > callback support so that VFIO can create a nested parent.
> >
> > In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
> > mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
> > expose only Stage-1 to the guest to ensure it uses the correct page-table
> > format.
> >
> > Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>  
> Wonder if you shall keep both. I don't know the usage though but worth
> to check.

I've never found any clear guidance on what to do in this case.
Given cost of a bonus SoB that is valid given the "Nvidia" Shameer
has taken the code from the "Huawei" Shameer and moved it forwards
I'd be tempted to keep it unless anyone feels strongly about it.

Jonathan
RE: [PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Shameer Kolothum 1 month, 1 week ago

> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 01 October 2025 18:37
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
> smostafa@google.com; wangzhou1@hisilicon.com;
> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
> shameerkolothum@gmail.com
> Subject: Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement
> get_viommu_cap() callback
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> > For accelerated SMMUv3, we need nested parent domain creation. Add the
> > callback support so that VFIO can create a nested parent.
> >
> > In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
> > mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
> > expose only Stage-1 to the guest to ensure it uses the correct page-table
> > format.
> >
> > Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> Wonder if you shall keep both. I don't know the usage though but worth
> to check.

Hmm.. I don't know either for sure. What I followed here(I will double check)
is all the patches I had previously(v3) I kept all the S-by tags. That seems to be
a right thing to do and IIRC I have seen that previously as well.

> > ---
> >  hw/arm/smmuv3-accel.c | 13 +++++++++++++
> >  hw/arm/virt.c         | 13 +++++++++++++
> >  2 files changed, 26 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index 44410cfb2a..6b0e512d86 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -10,6 +10,7 @@
> >  #include "qemu/error-report.h"
> >
> >  #include "hw/arm/smmuv3.h"
> > +#include "hw/iommu.h"
> >  #include "hw/pci/pci_bridge.h"
> >  #include "hw/pci-host/gpex.h"
> >  #include "hw/vfio/pci.h"
> > @@ -106,8 +107,20 @@ static AddressSpace
> *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
> >      }
> >  }
> >
> > +static uint64_t smmuv3_accel_get_viommu_flags(void *opaque)
> > +{
> > +    /*
> > +     * We return VIOMMU_FLAG_WANT_NESTING_PARENT to inform VFIO
> core to create a
> > +     * nesting parent which is required for accelerated SMMUv3 support.
> > +     * The real HW nested support should be reported from host SMMUv3
> and if
> > +     * it doesn't, the nesting parent allocation will fail anyway in VFIO core.
> > +     */
> > +    return VIOMMU_FLAG_WANT_NESTING_PARENT;
> > +}
> > +
> >  static const PCIIOMMUOps smmuv3_accel_ops = {
> >      .get_address_space = smmuv3_accel_find_add_as,
> > +    .get_viommu_flags = smmuv3_accel_get_viommu_flags,
> >  };
> >
> >  void smmuv3_accel_init(SMMUv3State *s)
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 02209fadcf..b533b0556e 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -3073,6 +3073,19 @@ static void
> virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> >                  return;
> >              }
> >
> > +            if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
> This looks unrelated to the get_viommu_flags() addition and to me this
> shall be put in a separate patch of squashed in the patch that exposes
> the accel prop Thanks Eric

But my thought process was, without this we can't say the vIOMMU will support
the nesting parent. But then the flag seems to be indicating that vIOMMU "want"
nesting parent. So I guess we can move it for later.

Thanks,
Shameer
Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Eric Auger 1 month, 1 week ago

On 10/2/25 11:38 AM, Shameer Kolothum wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 01 October 2025 18:37
>> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
>> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
>> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
>> smostafa@google.com; wangzhou1@hisilicon.com;
>> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
>> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
>> shameerkolothum@gmail.com
>> Subject: Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement
>> get_viommu_cap() callback
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 9/29/25 3:36 PM, Shameer Kolothum wrote:
>>> For accelerated SMMUv3, we need nested parent domain creation. Add the
>>> callback support so that VFIO can create a nested parent.
>>>
>>> In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
>>> mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
>>> expose only Stage-1 to the guest to ensure it uses the correct page-table
>>> format.
>>>
>>> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum
>> <shameerali.kolothum.thodi@huawei.com>
>>> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
>> Wonder if you shall keep both. I don't know the usage though but worth
>> to check.
> Hmm.. I don't know either for sure. What I followed here(I will double check)
> is all the patches I had previously(v3) I kept all the S-by tags. That seems to be
> a right thing to do and IIRC I have seen that previously as well.
>
>>> ---
>>>  hw/arm/smmuv3-accel.c | 13 +++++++++++++
>>>  hw/arm/virt.c         | 13 +++++++++++++
>>>  2 files changed, 26 insertions(+)
>>>
>>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>>> index 44410cfb2a..6b0e512d86 100644
>>> --- a/hw/arm/smmuv3-accel.c
>>> +++ b/hw/arm/smmuv3-accel.c
>>> @@ -10,6 +10,7 @@
>>>  #include "qemu/error-report.h"
>>>
>>>  #include "hw/arm/smmuv3.h"
>>> +#include "hw/iommu.h"
>>>  #include "hw/pci/pci_bridge.h"
>>>  #include "hw/pci-host/gpex.h"
>>>  #include "hw/vfio/pci.h"
>>> @@ -106,8 +107,20 @@ static AddressSpace
>> *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
>>>      }
>>>  }
>>>
>>> +static uint64_t smmuv3_accel_get_viommu_flags(void *opaque)
>>> +{
>>> +    /*
>>> +     * We return VIOMMU_FLAG_WANT_NESTING_PARENT to inform VFIO
>> core to create a
>>> +     * nesting parent which is required for accelerated SMMUv3 support.
>>> +     * The real HW nested support should be reported from host SMMUv3
>> and if
>>> +     * it doesn't, the nesting parent allocation will fail anyway in VFIO core.
>>> +     */
>>> +    return VIOMMU_FLAG_WANT_NESTING_PARENT;
>>> +}
>>> +
>>>  static const PCIIOMMUOps smmuv3_accel_ops = {
>>>      .get_address_space = smmuv3_accel_find_add_as,
>>> +    .get_viommu_flags = smmuv3_accel_get_viommu_flags,
>>>  };
>>>
>>>  void smmuv3_accel_init(SMMUv3State *s)
>>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>>> index 02209fadcf..b533b0556e 100644
>>> --- a/hw/arm/virt.c
>>> +++ b/hw/arm/virt.c
>>> @@ -3073,6 +3073,19 @@ static void
>> virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>>>                  return;
>>>              }
>>>
>>> +            if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
>> This looks unrelated to the get_viommu_flags() addition and to me this
>> shall be put in a separate patch of squashed in the patch that exposes
>> the accel prop Thanks Eric
> But my thought process was, without this we can't say the vIOMMU will support
> the nesting parent. But then the flag seems to be indicating that vIOMMU "want"
> nesting parent. So I guess we can move it for later.
Yes that's my understanding too

Eric
>
> Thanks,
> Shameer
Re: [PATCH v4 07/27] hw/arm/smmuv3: Implement get_viommu_cap() callback
Posted by Jonathan Cameron via 1 month, 2 weeks ago
On Mon, 29 Sep 2025 14:36:23 +0100
Shameer Kolothum <skolothumtho@nvidia.com> wrote:

> For accelerated SMMUv3, we need nested parent domain creation. Add the
> callback support so that VFIO can create a nested parent.
> 
> In the accelerated SMMUv3 case, the host SMMUv3 is configured in nested
> mode (S1 + S2), and the guest owns the Stage-1 page table. Therefore, we
> expose only Stage-1 to the guest to ensure it uses the correct page-table
> format.
> 
> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>