[RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback

Shameer Kolothum via posted 20 patches 3 weeks, 4 days ago
[RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback
Posted by Shameer Kolothum via 3 weeks, 4 days ago
From: Nicolin Chen <nicolinc@nvidia.com>

Implement a set_iommu_device callback:
 -Find an existing S2 hwpt to test attach() or allocate a new one
   (Devices behind the same physical SMMU should share an S2 HWPT.)
 -Attach the device to the S2 hwp
 -Allocate a viommu with the returned s2 hwpt.
 -Allocate bypass and abort hwpt and attach bypass hwpt.
 -and add it to its device list

Also add an unset_iommu_device doing the opposite cleanup routine.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 hw/arm/meson.build            |   2 +-
 hw/arm/smmuv3-accel.c         | 183 ++++++++++++++++++++++++++++++++++
 hw/arm/trace-events           |   4 +
 include/hw/arm/smmuv3-accel.h |  23 +++++
 include/system/iommufd.h      |   6 ++
 5 files changed, 217 insertions(+), 1 deletion(-)

diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index e8593363b0..dd41a86619 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -55,7 +55,7 @@ arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
 arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
 arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
-arm_ss.add(when: 'CONFIG_ARM_SMMUV3_ACCEL', if_true: files('smmuv3-accel.c'))
+arm_ss.add(when: ['CONFIG_ARM_SMMUV3_ACCEL', 'CONFIG_IOMMUFD'], if_true: files('smmuv3-accel.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
 arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
 arm_ss.add(when: 'CONFIG_XEN', if_true: files(
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 6610ebe4be..1c696649d5 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -7,6 +7,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "trace.h"
+#include "qemu/error-report.h"
 
 #include "hw/arm/smmuv3-accel.h"
 #include "hw/pci/pci_bridge.h"
@@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
     return accel_dev;
 }
 
+static bool
+smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
+                               HostIOMMUDeviceIOMMUFD *idev, Error **errp)
+{
+    struct iommu_hwpt_arm_smmuv3 bypass_data = {
+        .ste = { 0x9ULL, 0x0ULL },
+    };
+    struct iommu_hwpt_arm_smmuv3 abort_data = {
+        .ste = { 0x1ULL, 0x0ULL },
+    };
+    SMMUDevice *sdev = &accel_dev->sdev;
+    SMMUState *s = sdev->smmu;
+    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+    SMMUS2Hwpt *s2_hwpt;
+    SMMUViommu *viommu;
+    uint32_t s2_hwpt_id;
+    uint32_t viommu_id;
+
+    if (s_accel->viommu) {
+        accel_dev->viommu = s_accel->viommu;
+        return host_iommu_device_iommufd_attach_hwpt(
+                       idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
+    }
+
+    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
+                                    IOMMU_HWPT_ALLOC_NEST_PARENT,
+                                    IOMMU_HWPT_DATA_NONE, 0, NULL,
+                                    &s2_hwpt_id, errp)) {
+        return false;
+    }
+
+    /* Attach to S2 for MSI cookie */
+    if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
+        goto free_s2_hwpt;
+    }
+
+    if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
+                                      IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
+                                      s2_hwpt_id, &viommu_id, errp)) {
+        goto detach_s2_hwpt;
+    }
+
+    viommu = g_new0(SMMUViommu, 1);
+    viommu->core.viommu_id = viommu_id;
+    viommu->core.s2_hwpt_id = s2_hwpt_id;
+    viommu->core.iommufd = idev->iommufd;
+
+    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+                                    viommu->core.viommu_id, 0,
+                                    IOMMU_HWPT_DATA_ARM_SMMUV3,
+                                    sizeof(abort_data), &abort_data,
+                                    &viommu->abort_hwpt_id, errp)) {
+        error_report("failed to allocate an abort pagetable");
+        goto free_viommu;
+    }
+
+    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+                                    viommu->core.viommu_id, 0,
+                                    IOMMU_HWPT_DATA_ARM_SMMUV3,
+                                    sizeof(bypass_data), &bypass_data,
+                                    &viommu->bypass_hwpt_id, errp)) {
+        error_report("failed to allocate a bypass pagetable");
+        goto free_abort_hwpt;
+    }
+
+    /*
+     * Attach the bypass STE which means S1 bypass and S2 translate.
+     * This is to make sure that the vIOMMU object is now associated
+     * with the device and has this STE installed in the host SMMUV3.
+     */
+    if (!host_iommu_device_iommufd_attach_hwpt(
+                idev, viommu->bypass_hwpt_id, errp)) {
+        error_report("failed to attach the bypass pagetable");
+        goto free_bypass_hwpt;
+    }
+
+    s2_hwpt = g_new0(SMMUS2Hwpt, 1);
+    s2_hwpt->iommufd = idev->iommufd;
+    s2_hwpt->hwpt_id = s2_hwpt_id;
+    s2_hwpt->ioas_id = idev->ioas_id;
+
+    viommu->iommufd = idev->iommufd;
+    viommu->s2_hwpt = s2_hwpt;
+
+    s_accel->viommu = viommu;
+    accel_dev->viommu = viommu;
+    return true;
+
+free_bypass_hwpt:
+    iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id);
+free_abort_hwpt:
+    iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
+free_viommu:
+    iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id);
+    g_free(viommu);
+detach_s2_hwpt:
+    host_iommu_device_iommufd_attach_hwpt(idev, accel_dev->idev->ioas_id, errp);
+free_s2_hwpt:
+    iommufd_backend_free_id(idev->iommufd, s2_hwpt_id);
+    return false;
+}
+
+static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+                                          HostIOMMUDevice *hiod, Error **errp)
+{
+    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+    SMMUState *s = opaque;
+    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+    SMMUPciBus *sbus = smmu_get_sbus(s, bus);
+    SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(s, sbus, bus, devfn);
+    SMMUDevice *sdev = &accel_dev->sdev;
+
+    if (!idev) {
+        return true;
+    }
+
+    if (accel_dev->idev) {
+        if (accel_dev->idev != idev) {
+            error_report("Device 0x%x already ha an associated idev",
+                         smmu_get_sid(sdev));
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    if (!smmuv3_accel_dev_attach_viommu(accel_dev, idev, errp)) {
+        error_report("Unable to attach viommu");
+        return false;
+    }
+
+    accel_dev->idev = idev;
+    QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next);
+    trace_smmuv3_accel_set_iommu_device(devfn, smmu_get_sid(sdev));
+    return true;
+}
+
+static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
+                                            int devfn)
+{
+    SMMUDevice *sdev;
+    SMMUv3AccelDevice *accel_dev;
+    SMMUViommu *viommu;
+    SMMUState *s = opaque;
+    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+    SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
+
+    if (!sbus) {
+        return;
+    }
+
+    sdev = sbus->pbdev[devfn];
+    if (!sdev) {
+        return;
+    }
+
+    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+    if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
+                                               accel_dev->idev->ioas_id,
+                                               NULL)) {
+        error_report("Unable to attach dev to the default HW pagetable");
+    }
+
+
+    accel_dev->idev = NULL;
+    QLIST_REMOVE(accel_dev, next);
+    trace_smmuv3_accel_unset_iommu_device(devfn, smmu_get_sid(sdev));
+
+    viommu = s_accel->viommu;
+    if (QLIST_EMPTY(&viommu->device_list)) {
+        iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
+        iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
+        iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id);
+        iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id);
+        g_free(viommu->s2_hwpt);
+        g_free(viommu);
+        s_accel->viommu = NULL;
+    }
+}
 static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
                                               int devfn)
 {
@@ -77,6 +258,8 @@ static void smmu_accel_realize(DeviceState *d, Error **errp)
         return;
     }
     bs->get_address_space = smmuv3_accel_find_add_as;
+    bs->set_iommu_device = smmuv3_accel_set_iommu_device;
+    bs->unset_iommu_device = smmuv3_accel_unset_iommu_device;
 }
 
 static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 7790db780e..17960794bf 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -58,6 +58,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s
 smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
 smmu_reset_exit(void) ""
 
+#smmuv3-accel.c
+smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
+smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
+
 # strongarm.c
 strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
 strongarm_ssp_read_underrun(void) "SSP rx underrun"
diff --git a/include/hw/arm/smmuv3-accel.h b/include/hw/arm/smmuv3-accel.h
index 86c0523063..aca6838dca 100644
--- a/include/hw/arm/smmuv3-accel.h
+++ b/include/hw/arm/smmuv3-accel.h
@@ -12,16 +12,39 @@
 #include "hw/arm/smmu-common.h"
 #include "hw/arm/smmuv3.h"
 #include "qom/object.h"
+#include "system/iommufd.h"
+
+#include <linux/iommufd.h>
 
 #define TYPE_ARM_SMMUV3_ACCEL   "arm-smmuv3-accel"
 OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL)
 
+typedef struct SMMUS2Hwpt {
+    IOMMUFDBackend *iommufd;
+    uint32_t hwpt_id;
+    uint32_t ioas_id;
+} SMMUS2Hwpt;
+
+typedef struct SMMUViommu {
+    IOMMUFDBackend *iommufd;
+    IOMMUFDViommu core;
+    SMMUS2Hwpt *s2_hwpt;
+    uint32_t bypass_hwpt_id;
+    uint32_t abort_hwpt_id;
+    QLIST_HEAD(, SMMUv3AccelDevice) device_list;
+    QLIST_ENTRY(SMMUViommu) next;
+} SMMUViommu;
+
 typedef struct SMMUv3AccelDevice {
     SMMUDevice  sdev;
+    HostIOMMUDeviceIOMMUFD *idev;
+    SMMUViommu *viommu;
+    QLIST_ENTRY(SMMUv3AccelDevice) next;
 } SMMUv3AccelDevice;
 
 struct SMMUv3AccelState {
     SMMUv3State smmuv3_state;
+    SMMUViommu *viommu;
 };
 
 struct SMMUv3AccelClass {
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 53920bae5b..9c106ea078 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -37,6 +37,12 @@ struct IOMMUFDBackend {
     /*< public >*/
 };
 
+typedef struct IOMMUFDViommu {
+    IOMMUFDBackend *iommufd;
+    uint32_t s2_hwpt_id;
+    uint32_t viommu_id;
+} IOMMUFDViommu;
+
 bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
 void iommufd_backend_disconnect(IOMMUFDBackend *be);
 
-- 
2.34.1
Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback
Posted by Eric Auger 3 weeks, 3 days ago
Hi Shameer,


On 3/11/25 3:10 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Implement a set_iommu_device callback:
>  -Find an existing S2 hwpt to test attach() or allocate a new one
>    (Devices behind the same physical SMMU should share an S2 HWPT.)
>  -Attach the device to the S2 hwp
>  -Allocate a viommu with the returned s2 hwpt.
>  -Allocate bypass and abort hwpt and attach bypass hwpt.
>  -and add it to its device list
>
> Also add an unset_iommu_device doing the opposite cleanup routine.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  hw/arm/meson.build            |   2 +-
>  hw/arm/smmuv3-accel.c         | 183 ++++++++++++++++++++++++++++++++++
>  hw/arm/trace-events           |   4 +
>  include/hw/arm/smmuv3-accel.h |  23 +++++
>  include/system/iommufd.h      |   6 ++
>  5 files changed, 217 insertions(+), 1 deletion(-)
>
> diff --git a/hw/arm/meson.build b/hw/arm/meson.build
> index e8593363b0..dd41a86619 100644
> --- a/hw/arm/meson.build
> +++ b/hw/arm/meson.build
> @@ -55,7 +55,7 @@ arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
>  arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
>  arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
>  arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
> -arm_ss.add(when: 'CONFIG_ARM_SMMUV3_ACCEL', if_true: files('smmuv3-accel.c'))
> +arm_ss.add(when: ['CONFIG_ARM_SMMUV3_ACCEL', 'CONFIG_IOMMUFD'], if_true: files('smmuv3-accel.c'))
I guess we could set from patch 3 onwards?
>  arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
>  arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
>  arm_ss.add(when: 'CONFIG_XEN', if_true: files(
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 6610ebe4be..1c696649d5 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -7,6 +7,8 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "trace.h"
> +#include "qemu/error-report.h"
>  
>  #include "hw/arm/smmuv3-accel.h"
>  #include "hw/pci/pci_bridge.h"
> @@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
>      return accel_dev;
>  }
>  
> +static bool
> +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> +                               HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> +{
> +    struct iommu_hwpt_arm_smmuv3 bypass_data = {
> +        .ste = { 0x9ULL, 0x0ULL },
I would suggest to use defines.
> +    };
> +    struct iommu_hwpt_arm_smmuv3 abort_data = {
> +        .ste = { 0x1ULL, 0x0ULL },
same
> +    };
> +    SMMUDevice *sdev = &accel_dev->sdev;
> +    SMMUState *s = sdev->smmu;
> +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> +    SMMUS2Hwpt *s2_hwpt;
> +    SMMUViommu *viommu;
> +    uint32_t s2_hwpt_id;
> +    uint32_t viommu_id;
> +
> +    if (s_accel->viommu) {
> +        accel_dev->viommu = s_accel->viommu;
> +        return host_iommu_device_iommufd_attach_hwpt(
> +                       idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
> +    }
> +
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
> +                                    IOMMU_HWPT_ALLOC_NEST_PARENT,
> +                                    IOMMU_HWPT_DATA_NONE, 0, NULL,
> +                                    &s2_hwpt_id, errp)) {
> +        return false;
> +    }
> +
> +    /* Attach to S2 for MSI cookie */
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
> +        goto free_s2_hwpt;
> +    }
> +
> +    if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> +                                      IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> +                                      s2_hwpt_id, &viommu_id, errp)) {
> +        goto detach_s2_hwpt;
> +    }
> +
> +    viommu = g_new0(SMMUViommu, 1);
> +    viommu->core.viommu_id = viommu_id;
> +    viommu->core.s2_hwpt_id = s2_hwpt_id;
> +    viommu->core.iommufd = idev->iommufd;
> +
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                    viommu->core.viommu_id, 0,
> +                                    IOMMU_HWPT_DATA_ARM_SMMUV3,
> +                                    sizeof(abort_data), &abort_data,
> +                                    &viommu->abort_hwpt_id, errp)) {
> +        error_report("failed to allocate an abort pagetable");
is that error_report needed as we have the error handle already?
> +        goto free_viommu;
> +    }
> +
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                    viommu->core.viommu_id, 0,
> +                                    IOMMU_HWPT_DATA_ARM_SMMUV3,
> +                                    sizeof(bypass_data), &bypass_data,
> +                                    &viommu->bypass_hwpt_id, errp)) {
> +        error_report("failed to allocate a bypass pagetable");
same
> +        goto free_abort_hwpt;
> +    }
> +
> +    /*
> +     * Attach the bypass STE which means S1 bypass and S2 translate.
> +     * This is to make sure that the vIOMMU object is now associated
> +     * with the device and has this STE installed in the host SMMUV3.
> +     */
> +    if (!host_iommu_device_iommufd_attach_hwpt(
> +                idev, viommu->bypass_hwpt_id, errp)) {
> +        error_report("failed to attach the bypass pagetable");
same
if you prefer you can add error "hint"
> +        goto free_bypass_hwpt;
> +    }
> +
> +    s2_hwpt = g_new0(SMMUS2Hwpt, 1);
> +    s2_hwpt->iommufd = idev->iommufd;
> +    s2_hwpt->hwpt_id = s2_hwpt_id;
> +    s2_hwpt->ioas_id = idev->ioas_id;
> +
> +    viommu->iommufd = idev->iommufd;
> +    viommu->s2_hwpt = s2_hwpt;
> +
> +    s_accel->viommu = viommu;
> +    accel_dev->viommu = viommu;
> +    return true;
> +
> +free_bypass_hwpt:
> +    iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id);
> +free_abort_hwpt:
> +    iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
> +free_viommu:
> +    iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id);
> +    g_free(viommu);
> +detach_s2_hwpt:
> +    host_iommu_device_iommufd_attach_hwpt(idev, accel_dev->idev->ioas_id, errp);
This should be detach.

Eric
> +free_s2_hwpt:
> +    iommufd_backend_free_id(idev->iommufd, s2_hwpt_id);
> +    return false;
> +}
> +
> +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> +                                          HostIOMMUDevice *hiod, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
> +    SMMUState *s = opaque;
> +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> +    SMMUPciBus *sbus = smmu_get_sbus(s, bus);
> +    SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(s, sbus, bus, devfn);
> +    SMMUDevice *sdev = &accel_dev->sdev;
> +
> +    if (!idev) {
> +        return true;
> +    }
> +
> +    if (accel_dev->idev) {
> +        if (accel_dev->idev != idev) {
> +            error_report("Device 0x%x already ha an associated idev",
> +                         smmu_get_sid(sdev));
> +            return false;
> +        } else {
> +            return true;
> +        }
> +    }
> +
> +    if (!smmuv3_accel_dev_attach_viommu(accel_dev, idev, errp)) {
> +        error_report("Unable to attach viommu");
> +        return false;
> +    }
> +
> +    accel_dev->idev = idev;
> +    QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next);
> +    trace_smmuv3_accel_set_iommu_device(devfn, smmu_get_sid(sdev));
> +    return true;
> +}
> +
> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> +                                            int devfn)
> +{
> +    SMMUDevice *sdev;
> +    SMMUv3AccelDevice *accel_dev;
> +    SMMUViommu *viommu;
> +    SMMUState *s = opaque;
> +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> +    SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
> +
> +    if (!sbus) {
> +        return;
> +    }
> +
> +    sdev = sbus->pbdev[devfn];
> +    if (!sdev) {
> +        return;
> +    }
> +
> +    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> +    if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> +                                               accel_dev->idev->ioas_id,
> +                                               NULL)) {
> +        error_report("Unable to attach dev to the default HW pagetable");
> +    }
> +
> +
> +    accel_dev->idev = NULL;
> +    QLIST_REMOVE(accel_dev, next);
> +    trace_smmuv3_accel_unset_iommu_device(devfn, smmu_get_sid(sdev));
> +
> +    viommu = s_accel->viommu;
> +    if (QLIST_EMPTY(&viommu->device_list)) {
> +        iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
> +        iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
> +        iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id);
> +        iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id);
> +        g_free(viommu->s2_hwpt);
> +        g_free(viommu);
> +        s_accel->viommu = NULL;
> +    }
> +}
>  static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
>                                                int devfn)
>  {
> @@ -77,6 +258,8 @@ static void smmu_accel_realize(DeviceState *d, Error **errp)
>          return;
>      }
>      bs->get_address_space = smmuv3_accel_find_add_as;
> +    bs->set_iommu_device = smmuv3_accel_set_iommu_device;
> +    bs->unset_iommu_device = smmuv3_accel_unset_iommu_device;
>  }
>  
>  static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index 7790db780e..17960794bf 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -58,6 +58,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s
>  smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
>  smmu_reset_exit(void) ""
>  
> +#smmuv3-accel.c
> +smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
> +smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
> +
>  # strongarm.c
>  strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
>  strongarm_ssp_read_underrun(void) "SSP rx underrun"
> diff --git a/include/hw/arm/smmuv3-accel.h b/include/hw/arm/smmuv3-accel.h
> index 86c0523063..aca6838dca 100644
> --- a/include/hw/arm/smmuv3-accel.h
> +++ b/include/hw/arm/smmuv3-accel.h
> @@ -12,16 +12,39 @@
>  #include "hw/arm/smmu-common.h"
>  #include "hw/arm/smmuv3.h"
>  #include "qom/object.h"
> +#include "system/iommufd.h"
> +
> +#include <linux/iommufd.h>
>  
>  #define TYPE_ARM_SMMUV3_ACCEL   "arm-smmuv3-accel"
>  OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL)
>  
> +typedef struct SMMUS2Hwpt {
> +    IOMMUFDBackend *iommufd;
> +    uint32_t hwpt_id;
> +    uint32_t ioas_id;
> +} SMMUS2Hwpt;
> +
> +typedef struct SMMUViommu {
> +    IOMMUFDBackend *iommufd;
> +    IOMMUFDViommu core;
> +    SMMUS2Hwpt *s2_hwpt;
> +    uint32_t bypass_hwpt_id;
> +    uint32_t abort_hwpt_id;
> +    QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> +    QLIST_ENTRY(SMMUViommu) next;
> +} SMMUViommu;
> +
>  typedef struct SMMUv3AccelDevice {
>      SMMUDevice  sdev;
> +    HostIOMMUDeviceIOMMUFD *idev;
> +    SMMUViommu *viommu;
> +    QLIST_ENTRY(SMMUv3AccelDevice) next;
>  } SMMUv3AccelDevice;
>  
>  struct SMMUv3AccelState {
>      SMMUv3State smmuv3_state;
> +    SMMUViommu *viommu;
>  };
>  
>  struct SMMUv3AccelClass {
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index 53920bae5b..9c106ea078 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -37,6 +37,12 @@ struct IOMMUFDBackend {
>      /*< public >*/
>  };
>  
> +typedef struct IOMMUFDViommu {
> +    IOMMUFDBackend *iommufd;
> +    uint32_t s2_hwpt_id;
> +    uint32_t viommu_id;
> +} IOMMUFDViommu;
> +
>  bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
>  void iommufd_backend_disconnect(IOMMUFDBackend *be);
>
Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback
Posted by Nicolin Chen 3 weeks, 4 days ago
On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> @@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
>      return accel_dev;
>  }
>  
> +static bool
> +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> +                               HostIOMMUDeviceIOMMUFD *idev, Error **errp)

With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
So, host_iommu_device_iommufd_attach_hwpt() would fail in this
function because vSID isn't ready at this stage. So all those
calls should be moved out of the function, then this should be
likely "smmuv3_accel_dev_alloc_viommu"?

That being said, I don't know when QEMU actually prepare a BDF
number for a vfio-pci device. The only place that I see it is
ready is at guest-level SMMU installing the Stream Table, i.e.
in smmuv3_accel_install_nested_ste().

> +{
> +    struct iommu_hwpt_arm_smmuv3 bypass_data = {
> +        .ste = { 0x9ULL, 0x0ULL },
> +    };
> +    struct iommu_hwpt_arm_smmuv3 abort_data = {
> +        .ste = { 0x1ULL, 0x0ULL },
> +    };
> +    SMMUDevice *sdev = &accel_dev->sdev;
> +    SMMUState *s = sdev->smmu;
> +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> +    SMMUS2Hwpt *s2_hwpt;
> +    SMMUViommu *viommu;
> +    uint32_t s2_hwpt_id;
> +    uint32_t viommu_id;
> +
> +    if (s_accel->viommu) {
> +        accel_dev->viommu = s_accel->viommu;
> +        return host_iommu_device_iommufd_attach_hwpt(
> +                       idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);

Yea, here is my bad. We shouldn't attach a device to s2_hwpt,
since eventually s2_hwpt would be a shared hwpt across SMMUs.

> +    /* Attach to S2 for MSI cookie */
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
> +        goto free_s2_hwpt;
> +    }

With the merged sw_msi series, we don't need this anymore.

> +    /*
> +     * Attach the bypass STE which means S1 bypass and S2 translate.
> +     * This is to make sure that the vIOMMU object is now associated
> +     * with the device and has this STE installed in the host SMMUV3.
> +     */
> +    if (!host_iommu_device_iommufd_attach_hwpt(
> +                idev, viommu->bypass_hwpt_id, errp)) {
> +        error_report("failed to attach the bypass pagetable");
> +        goto free_bypass_hwpt;
> +    }

Ditto. We have to postpone this until vdevice is allocated.

> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> +                                            int devfn)
> +{
> +    SMMUDevice *sdev;
> +    SMMUv3AccelDevice *accel_dev;
> +    SMMUViommu *viommu;
> +    SMMUState *s = opaque;
> +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> +    SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
> +
> +    if (!sbus) {
> +        return;
> +    }
> +
> +    sdev = sbus->pbdev[devfn];
> +    if (!sdev) {
> +        return;
> +    }
> +
> +    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> +    if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> +                                               accel_dev->idev->ioas_id,
> +                                               NULL)) {
> +        error_report("Unable to attach dev to the default HW pagetable");
> +    }
> +
> +

Could drop the extra line.

Thanks
Nicolin
RE: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback
Posted by Shameerali Kolothum Thodi via 2 weeks, 5 days ago
Hi Nicolin,

> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, March 11, 2025 9:08 PM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
> ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
> mochs@nvidia.com; smostafa@google.com; Linuxarm
> <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
> jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org
> Subject: Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add
> set/unset_iommu_device callback
> 
> On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> > @@ -30,6 +32,185 @@ static SMMUv3AccelDevice
> *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> >      return accel_dev;
> >  }
> >
> > +static bool
> > +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> > +                               HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> 
> With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
> to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
> So, host_iommu_device_iommufd_attach_hwpt() would fail in this
> function because vSID isn't ready at this stage. So all those
> calls should be moved out of the function, then this should be
> likely "smmuv3_accel_dev_alloc_viommu"?
> 
> That being said, I don't know when QEMU actually prepare a BDF
> number for a vfio-pci device. The only place that I see it is
> ready is at guest-level SMMU installing the Stream Table, i.e.
> in smmuv3_accel_install_nested_ste().
> 
> > +{
> > +    struct iommu_hwpt_arm_smmuv3 bypass_data = {
> > +        .ste = { 0x9ULL, 0x0ULL },
> > +    };
> > +    struct iommu_hwpt_arm_smmuv3 abort_data = {
> > +        .ste = { 0x1ULL, 0x0ULL },
> > +    };
> > +    SMMUDevice *sdev = &accel_dev->sdev;
> > +    SMMUState *s = sdev->smmu;
> > +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> > +    SMMUS2Hwpt *s2_hwpt;
> > +    SMMUViommu *viommu;
> > +    uint32_t s2_hwpt_id;
> > +    uint32_t viommu_id;
> > +
> > +    if (s_accel->viommu) {
> > +        accel_dev->viommu = s_accel->viommu;
> > +        return host_iommu_device_iommufd_attach_hwpt(
> > +                       idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
> 
> Yea, here is my bad. We shouldn't attach a device to s2_hwpt,
> since eventually s2_hwpt would be a shared hwpt across SMMUs.
> 
> > +    /* Attach to S2 for MSI cookie */
> > +    if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id,
> errp)) {
> > +        goto free_s2_hwpt;
> > +    }
> 
> With the merged sw_msi series, we don't need this anymore.
> 
> > +    /*
> > +     * Attach the bypass STE which means S1 bypass and S2 translate.
> > +     * This is to make sure that the vIOMMU object is now associated
> > +     * with the device and has this STE installed in the host SMMUV3.
> > +     */
> > +    if (!host_iommu_device_iommufd_attach_hwpt(
> > +                idev, viommu->bypass_hwpt_id, errp)) {
> > +        error_report("failed to attach the bypass pagetable");
> > +        goto free_bypass_hwpt;
> > +    }
> 
> Ditto. We have to postpone this until vdevice is allocated.

Ok.  I will take a look based on the  vEVENTQ v9 series.
I guess this Qemu branch of yours is a more representative of the changes described
above?
https://github.com/nicolinc/qemu/tree/wip/for_iommufd_veventq-v9

Right?

Thanks,
Shameer

> > +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void
> *opaque,
> > +                                            int devfn)
> > +{
> > +    SMMUDevice *sdev;
> > +    SMMUv3AccelDevice *accel_dev;
> > +    SMMUViommu *viommu;
> > +    SMMUState *s = opaque;
> > +    SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> > +    SMMUPciBus *sbus = g_hash_table_lookup(s-
> >smmu_pcibus_by_busptr, bus);
> > +
> > +    if (!sbus) {
> > +        return;
> > +    }
> > +
> > +    sdev = sbus->pbdev[devfn];
> > +    if (!sdev) {
> > +        return;
> > +    }
> > +
> > +    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> > +    if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> > +                                               accel_dev->idev->ioas_id,
> > +                                               NULL)) {
> > +        error_report("Unable to attach dev to the default HW pagetable");
> > +    }
> > +
> > +
> 
> Could drop the extra line.
> 
> Thanks
> Nicolin
Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add set/unset_iommu_device callback
Posted by Nicolin Chen 2 weeks, 5 days ago
On Mon, Mar 17, 2025 at 08:38:23AM +0000, Shameerali Kolothum Thodi wrote:
> Hi Nicolin,
> 
> > -----Original Message-----
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, March 11, 2025 9:08 PM
> > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> > eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
> > ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
> > mochs@nvidia.com; smostafa@google.com; Linuxarm
> > <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
> > jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
> > <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org
> > Subject: Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add
> > set/unset_iommu_device callback
> > 
> > On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> > > @@ -30,6 +32,185 @@ static SMMUv3AccelDevice
> > *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> > >      return accel_dev;
> > >  }
> > >
> > > +static bool
> > > +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> > > +                               HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> > 
> > With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
> > to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
> > So, host_iommu_device_iommufd_attach_hwpt() would fail in this
> > function because vSID isn't ready at this stage. So all those
> > calls should be moved out of the function, then this should be
> > likely "smmuv3_accel_dev_alloc_viommu"?
> > 
> > That being said, I don't know when QEMU actually prepare a BDF
> > number for a vfio-pci device. The only place that I see it is
> > ready is at guest-level SMMU installing the Stream Table, i.e.
> > in smmuv3_accel_install_nested_ste().

> > > +    /*
> > > +     * Attach the bypass STE which means S1 bypass and S2 translate.
> > > +     * This is to make sure that the vIOMMU object is now associated
> > > +     * with the device and has this STE installed in the host SMMUV3.
> > > +     */
> > > +    if (!host_iommu_device_iommufd_attach_hwpt(
> > > +                idev, viommu->bypass_hwpt_id, errp)) {
> > > +        error_report("failed to attach the bypass pagetable");
> > > +        goto free_bypass_hwpt;
> > > +    }
> > 
> > Ditto. We have to postpone this until vdevice is allocated.
> 
> Ok.  I will take a look based on the  vEVENTQ v9 series.
> I guess this Qemu branch of yours is a more representative of the changes described
> above?
> https://github.com/nicolinc/qemu/commits/wip/for_iommufd_veventq-v9/

Yes. 

Mainly this change:
https://github.com/nicolinc/qemu/commit/d8f496eaf528f1c397f2374a999b8b23fd55c75b

Thanks
Nicolin