[PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support

Shameer Kolothum posted 27 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support
Posted by Shameer Kolothum 1 month, 2 weeks ago
From: Nicolin Chen <nicolinc@nvidia.com>

Allocates a s1 HWPT for the Guest s1 stage and attaches that to the
pass-through vfio device. This will be invoked when Guest issues
SMMU_CMD_CFGI_STE/STE_RANGE.

While at it, we are also exporting both smmu_find_ste() and
smmuv3_flush_config() from smmuv3.c for use here.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/smmuv3-accel.c    | 164 +++++++++++++++++++++++++++++++++++++++
 hw/arm/smmuv3-accel.h    |  22 ++++++
 hw/arm/smmuv3-internal.h |   3 +
 hw/arm/smmuv3.c          |  18 ++++-
 hw/arm/trace-events      |   1 +
 5 files changed, 205 insertions(+), 3 deletions(-)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 81fa738f6f..5c3825cecd 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -17,10 +17,174 @@
 #include "hw/vfio/pci.h"
 
 #include "smmuv3-accel.h"
+#include "smmuv3-internal.h"
 
 #define SMMU_STE_VALID      (1ULL << 0)
 #define SMMU_STE_CFG_BYPASS (1ULL << 3)
 
+#define STE0_V       MAKE_64BIT_MASK(0, 1)
+#define STE0_CONFIG  MAKE_64BIT_MASK(1, 3)
+#define STE0_S1FMT   MAKE_64BIT_MASK(4, 2)
+#define STE0_CTXPTR  MAKE_64BIT_MASK(6, 50)
+#define STE0_S1CDMAX MAKE_64BIT_MASK(59, 5)
+#define STE0_MASK    (STE0_S1CDMAX | STE0_CTXPTR | STE0_S1FMT | STE0_CONFIG | \
+                      STE0_V)
+
+#define STE1_S1DSS    MAKE_64BIT_MASK(0, 2)
+#define STE1_S1CIR    MAKE_64BIT_MASK(2, 2)
+#define STE1_S1COR    MAKE_64BIT_MASK(4, 2)
+#define STE1_S1CSH    MAKE_64BIT_MASK(6, 2)
+#define STE1_S1STALLD MAKE_64BIT_MASK(27, 1)
+#define STE1_ETS      MAKE_64BIT_MASK(28, 2)
+#define STE1_MASK     (STE1_ETS | STE1_S1STALLD | STE1_S1CSH | STE1_S1COR | \
+                       STE1_S1CIR | STE1_S1DSS)
+
+static bool
+smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort,
+                                      Error **errp)
+{
+    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+    uint32_t hwpt_id;
+
+    if (!s1_hwpt || !accel_dev->viommu) {
+        return true;
+    }
+
+    if (abort) {
+        hwpt_id = accel_dev->viommu->abort_hwpt_id;
+    } else {
+        hwpt_id = accel_dev->viommu->bypass_hwpt_id;
+    }
+
+    if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
+        return false;
+    }
+
+    iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
+    accel_dev->s1_hwpt = NULL;
+    g_free(s1_hwpt);
+    return true;
+}
+
+static bool
+smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
+                                    uint32_t data_type, uint32_t data_len,
+                                    void *data, Error **errp)
+{
+    SMMUViommu *viommu = accel_dev->viommu;
+    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+    uint32_t flags = 0;
+
+    if (!idev || !viommu) {
+        error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
+                   smmu_get_sid(&accel_dev->sdev));
+        return false;
+    }
+
+    if (s1_hwpt) {
+        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
+            return false;
+        }
+    }
+
+    s1_hwpt = g_new0(SMMUS1Hwpt, 1);
+    s1_hwpt->iommufd = idev->iommufd;
+    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+                                    viommu->core.viommu_id, flags, data_type,
+                                    data_len, data, &s1_hwpt->hwpt_id, errp)) {
+        return false;
+    }
+
+    if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, errp)) {
+        iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
+        return false;
+    }
+    accel_dev->s1_hwpt = s1_hwpt;
+    return true;
+}
+
+bool
+smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+                                Error **errp)
+{
+    SMMUv3AccelDevice *accel_dev;
+    SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
+                           .inval_ste_allowed = true};
+    struct iommu_hwpt_arm_smmuv3 nested_data = {};
+    uint64_t ste_0, ste_1;
+    uint32_t config;
+    STE ste;
+    int ret;
+
+    if (!s->accel) {
+        return true;
+    }
+
+    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+    if (!accel_dev->viommu) {
+        return true;
+    }
+
+    ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
+    if (ret) {
+        error_setg(errp, "Failed to find STE for Device 0x%x", sid);
+        return true;
+    }
+
+    config = STE_CONFIG(&ste);
+    if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
+        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
+                                                   STE_CFG_ABORT(config),
+                                                   errp)) {
+            return false;
+        }
+        smmuv3_flush_config(sdev);
+        return true;
+    }
+
+    ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
+    ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
+    nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
+    nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
+
+    if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
+                                             IOMMU_HWPT_DATA_ARM_SMMUV3,
+                                             sizeof(nested_data),
+                                             &nested_data, errp)) {
+        error_setg(errp, "Unable to install nested STE=%16LX:%16LX, sid=0x%x,"
+                   "ret=%d", nested_data.ste[1], nested_data.ste[0], sid, ret);
+        return false;
+    }
+    trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
+                                          nested_data.ste[0]);
+    return true;
+}
+
+bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+                                           Error **errp)
+{
+    SMMUv3AccelState *s_accel = s->s_accel;
+    SMMUv3AccelDevice *accel_dev;
+
+    if (!s_accel || !s_accel->viommu) {
+        return true;
+    }
+
+    QLIST_FOREACH(accel_dev, &s_accel->viommu->device_list, next) {
+        uint32_t sid = smmu_get_sid(&accel_dev->sdev);
+
+        if (sid >= range->start && sid <= range->end) {
+            if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
+                                                 sid, errp)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
                                                PCIBus *bus, int devfn)
 {
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 3c8506d1e6..f631443b09 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -22,9 +22,15 @@ typedef struct SMMUViommu {
     QLIST_HEAD(, SMMUv3AccelDevice) device_list;
 } SMMUViommu;
 
+typedef struct SMMUS1Hwpt {
+    IOMMUFDBackend *iommufd;
+    uint32_t hwpt_id;
+} SMMUS1Hwpt;
+
 typedef struct SMMUv3AccelDevice {
     SMMUDevice  sdev;
     HostIOMMUDeviceIOMMUFD *idev;
+    SMMUS1Hwpt *s1_hwpt;
     SMMUViommu *viommu;
     QLIST_ENTRY(SMMUv3AccelDevice) next;
 } SMMUv3AccelDevice;
@@ -35,10 +41,26 @@ typedef struct SMMUv3AccelState {
 
 #ifdef CONFIG_ARM_SMMUV3_ACCEL
 void smmuv3_accel_init(SMMUv3State *s);
+bool smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+                                     Error **errp);
+bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+                                           Error **errp);
 #else
 static inline void smmuv3_accel_init(SMMUv3State *s)
 {
 }
+static inline bool
+smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+                                Error **errp)
+{
+    return true;
+}
+static inline bool
+smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+                                      Error **errp)
+{
+    return true;
+}
 #endif
 
 #endif /* HW_ARM_SMMUV3_ACCEL_H */
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index b6b7399347..b0dfa9465c 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -547,6 +547,9 @@ typedef struct CD {
     uint32_t word[16];
 } CD;
 
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event);
+void smmuv3_flush_config(SMMUDevice *sdev);
+
 /* STE fields */
 
 #define STE_VALID(x)   extract32((x)->word[0], 0, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ef991cb7d8..1fd8aaa0c7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -630,8 +630,7 @@ bad_ste:
  * Supports linear and 2-level stream table
  * Return 0 on success, -EINVAL otherwise
  */
-static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
-                         SMMUEventInfo *event)
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event)
 {
     dma_addr_t addr, strtab_base;
     uint32_t log2size;
@@ -900,7 +899,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
     return cfg;
 }
 
-static void smmuv3_flush_config(SMMUDevice *sdev)
+void smmuv3_flush_config(SMMUDevice *sdev)
 {
     SMMUv3State *s = sdev->smmu;
     SMMUState *bc = &s->smmu_state;
@@ -1330,6 +1329,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
         {
             uint32_t sid = CMD_SID(&cmd);
             SMMUDevice *sdev = smmu_find_sdev(bs, sid);
+            Error *local_err = NULL;
 
             if (CMD_SSEC(&cmd)) {
                 cmd_error = SMMU_CERROR_ILL;
@@ -1341,6 +1341,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             }
 
             trace_smmuv3_cmdq_cfgi_ste(sid);
+            if (!smmuv3_accel_install_nested_ste(s, sdev, sid, &local_err)) {
+                error_report_err(local_err);
+                cmd_error = SMMU_CERROR_ILL;
+                break;
+            }
             smmuv3_flush_config(sdev);
 
             break;
@@ -1350,6 +1355,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             uint32_t sid = CMD_SID(&cmd), mask;
             uint8_t range = CMD_STE_RANGE(&cmd);
             SMMUSIDRange sid_range;
+            Error *local_err = NULL;
 
             if (CMD_SSEC(&cmd)) {
                 cmd_error = SMMU_CERROR_ILL;
@@ -1361,6 +1367,12 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             sid_range.end = sid_range.start + mask;
 
             trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
+            if (!smmuv3_accel_install_nested_ste_range(s, &sid_range,
+                                                       &local_err)) {
+                error_report_err(local_err);
+                cmd_error = SMMU_CERROR_ILL;
+                break;
+            }
             smmu_configs_inv_sid_range(bs, sid_range);
             break;
         }
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 86370d448a..3b1e9bf083 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -69,6 +69,7 @@ smmu_reset_exit(void) ""
 #smmuv3-accel.c
 smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
 smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
+smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
 
 # strongarm.c
 strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
-- 
2.43.0
Re: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support
Posted by Eric Auger 1 month, 1 week ago
Hi Shameer,

On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Allocates a s1 HWPT for the Guest s1 stage and attaches that to the
> pass-through vfio device. This will be invoked when Guest issues
> SMMU_CMD_CFGI_STE/STE_RANGE.
ON set both alloc + attachment are done. On unset you shall explain the
gym related to config/abort hwpt. Those are S1 hwpt, right? I think this
shall be reflected in the name to make it clearer? In the previous patch
I didn't really understand that.
> While at it, we are also exporting both smmu_find_ste() and
> smmuv3_flush_config() from smmuv3.c for use here.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
>  hw/arm/smmuv3-accel.c    | 164 +++++++++++++++++++++++++++++++++++++++
>  hw/arm/smmuv3-accel.h    |  22 ++++++
>  hw/arm/smmuv3-internal.h |   3 +
>  hw/arm/smmuv3.c          |  18 ++++-
>  hw/arm/trace-events      |   1 +
>  5 files changed, 205 insertions(+), 3 deletions(-)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 81fa738f6f..5c3825cecd 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -17,10 +17,174 @@
>  #include "hw/vfio/pci.h"
>  
>  #include "smmuv3-accel.h"
> +#include "smmuv3-internal.h"
>  
>  #define SMMU_STE_VALID      (1ULL << 0)
>  #define SMMU_STE_CFG_BYPASS (1ULL << 3)
>  
> +#define STE0_V       MAKE_64BIT_MASK(0, 1)
> +#define STE0_CONFIG  MAKE_64BIT_MASK(1, 3)
> +#define STE0_S1FMT   MAKE_64BIT_MASK(4, 2)
> +#define STE0_CTXPTR  MAKE_64BIT_MASK(6, 50)
> +#define STE0_S1CDMAX MAKE_64BIT_MASK(59, 5)
> +#define STE0_MASK    (STE0_S1CDMAX | STE0_CTXPTR | STE0_S1FMT | STE0_CONFIG | \
> +                      STE0_V)
> +
> +#define STE1_S1DSS    MAKE_64BIT_MASK(0, 2)
> +#define STE1_S1CIR    MAKE_64BIT_MASK(2, 2)
> +#define STE1_S1COR    MAKE_64BIT_MASK(4, 2)
> +#define STE1_S1CSH    MAKE_64BIT_MASK(6, 2)
> +#define STE1_S1STALLD MAKE_64BIT_MASK(27, 1)
> +#define STE1_ETS      MAKE_64BIT_MASK(28, 2)
this is EATS
> +#define STE1_MASK     (STE1_ETS | STE1_S1STALLD | STE1_S1CSH | STE1_S1COR | \
> +                       STE1_S1CIR | STE1_S1DSS)
I would move all that stuff in smmuv3-internal.h too
> +
> +static bool
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort,
> +                                      Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> +    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> +    uint32_t hwpt_id;
> +
> +    if (!s1_hwpt || !accel_dev->viommu) {
> +        return true;
> +    }
> +
> +    if (abort) {
> +        hwpt_id = accel_dev->viommu->abort_hwpt_id;
> +    } else {
> +        hwpt_id = accel_dev->viommu->bypass_hwpt_id;
> +    }
> +
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> +        return false;
> +    }
I think you shall add a trace point for uninstall and precise which hwpt
we use (abort or bypass). This might be useful for debug.
> +
> +    iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
> +    accel_dev->s1_hwpt = NULL;
> +    g_free(s1_hwpt);
> +    return true;
> +}
> +
> +static bool
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> +                                    uint32_t data_type, uint32_t data_len,
> +                                    void *data, Error **errp)
> +{
> +    SMMUViommu *viommu = accel_dev->viommu;
> +    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> +    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> +    uint32_t flags = 0;
> +
> +    if (!idev || !viommu) {
> +        error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
> +                   smmu_get_sid(&accel_dev->sdev));
> +        return false;
> +    }
> +
> +    if (s1_hwpt) {
> +        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> +            return false;
> +        }
> +    }
> +
> +    s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> +    s1_hwpt->iommufd = idev->iommufd;
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                    viommu->core.viommu_id, flags, data_type,
> +                                    data_len, data, &s1_hwpt->hwpt_id, errp)) {
> +        return false;
> +    }
> +
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, errp)) {
> +        iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
> +        return false;
> +    }
> +    accel_dev->s1_hwpt = s1_hwpt;
> +    return true;
> +}
> +
> +bool
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> +                                Error **errp)
> +{
> +    SMMUv3AccelDevice *accel_dev;
> +    SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
> +                           .inval_ste_allowed = true};
> +    struct iommu_hwpt_arm_smmuv3 nested_data = {};
> +    uint64_t ste_0, ste_1;
> +    uint32_t config;
> +    STE ste;
> +    int ret;
> +
> +    if (!s->accel) {
> +        return true;
> +    }
> +
> +    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> +    if (!accel_dev->viommu) {
> +        return true;
> +    }
> +
> +    ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
> +    if (ret) {
> +        error_setg(errp, "Failed to find STE for Device 0x%x", sid);
> +        return true;
> +    }
> +
> +    config = STE_CONFIG(&ste);
> +    if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> +        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> +                                                   STE_CFG_ABORT(config),
> +                                                   errp)) {
> +            return false;
> +        }
> +        smmuv3_flush_config(sdev);
> +        return true;
> +    }
> +
> +    ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
> +    ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
> +    nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
> +    nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
> +
> +    if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
> +                                             IOMMU_HWPT_DATA_ARM_SMMUV3,
> +                                             sizeof(nested_data),
> +                                             &nested_data, errp)) {
> +        error_setg(errp, "Unable to install nested STE=%16LX:%16LX, sid=0x%x,"
don't you need to use PRIx64 instead?
also I suggest to put the SID first.
> +                   "ret=%d", nested_data.ste[1], nested_data.ste[0], sid, ret);
> +        return false;
> +    }
> +    trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
> +                                          nested_data.ste[0]);
> +    return true;
> +}
> +
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> +                                           Error **errp)
> +{
> +    SMMUv3AccelState *s_accel = s->s_accel;
> +    SMMUv3AccelDevice *accel_dev;
> +
> +    if (!s_accel || !s_accel->viommu) {
> +        return true;
> +    }
> +
> +    QLIST_FOREACH(accel_dev, &s_accel->viommu->device_list, next) {
> +        uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> +
> +        if (sid >= range->start && sid <= range->end) {
> +            if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> +                                                 sid, errp)) {
> +                return false;
> +            }
> +        }
> +    }
> +    return true;
> +}
> +
>  static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
>                                                 PCIBus *bus, int devfn)
>  {
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 3c8506d1e6..f631443b09 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -22,9 +22,15 @@ typedef struct SMMUViommu {
>      QLIST_HEAD(, SMMUv3AccelDevice) device_list;
>  } SMMUViommu;
>  
> +typedef struct SMMUS1Hwpt {
> +    IOMMUFDBackend *iommufd;
> +    uint32_t hwpt_id;
> +} SMMUS1Hwpt;
> +
>  typedef struct SMMUv3AccelDevice {
>      SMMUDevice  sdev;
>      HostIOMMUDeviceIOMMUFD *idev;
> +    SMMUS1Hwpt *s1_hwpt;
>      SMMUViommu *viommu;
>      QLIST_ENTRY(SMMUv3AccelDevice) next;
>  } SMMUv3AccelDevice;
> @@ -35,10 +41,26 @@ typedef struct SMMUv3AccelState {
>  
>  #ifdef CONFIG_ARM_SMMUV3_ACCEL
>  void smmuv3_accel_init(SMMUv3State *s);
> +bool smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> +                                     Error **errp);
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> +                                           Error **errp);
>  #else
>  static inline void smmuv3_accel_init(SMMUv3State *s)
>  {
>  }
> +static inline bool
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> +                                Error **errp)
> +{
> +    return true;
> +}
> +static inline bool
> +smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> +                                      Error **errp)
> +{
> +    return true;
> +}
>  #endif
>  
>  #endif /* HW_ARM_SMMUV3_ACCEL_H */
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index b6b7399347..b0dfa9465c 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -547,6 +547,9 @@ typedef struct CD {
>      uint32_t word[16];
>  } CD;
>  
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event);
> +void smmuv3_flush_config(SMMUDevice *sdev);
> +
>  /* STE fields */
>  
>  #define STE_VALID(x)   extract32((x)->word[0], 0, 1)
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index ef991cb7d8..1fd8aaa0c7 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -630,8 +630,7 @@ bad_ste:
>   * Supports linear and 2-level stream table
>   * Return 0 on success, -EINVAL otherwise
>   */
> -static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> -                         SMMUEventInfo *event)
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event)
>  {
>      dma_addr_t addr, strtab_base;
>      uint32_t log2size;
> @@ -900,7 +899,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
>      return cfg;
>  }
>  
> -static void smmuv3_flush_config(SMMUDevice *sdev)
> +void smmuv3_flush_config(SMMUDevice *sdev)
>  {
>      SMMUv3State *s = sdev->smmu;
>      SMMUState *bc = &s->smmu_state;
> @@ -1330,6 +1329,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>          {
>              uint32_t sid = CMD_SID(&cmd);
>              SMMUDevice *sdev = smmu_find_sdev(bs, sid);
> +            Error *local_err = NULL;
>  
>              if (CMD_SSEC(&cmd)) {
>                  cmd_error = SMMU_CERROR_ILL;
> @@ -1341,6 +1341,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>              }
>  
>              trace_smmuv3_cmdq_cfgi_ste(sid);
> +            if (!smmuv3_accel_install_nested_ste(s, sdev, sid, &local_err)) {
> +                error_report_err(local_err);
> +                cmd_error = SMMU_CERROR_ILL;
> +                break;
> +            }
>              smmuv3_flush_config(sdev);
>  
>              break;
> @@ -1350,6 +1355,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>              uint32_t sid = CMD_SID(&cmd), mask;
>              uint8_t range = CMD_STE_RANGE(&cmd);
>              SMMUSIDRange sid_range;
> +            Error *local_err = NULL;
>  
>              if (CMD_SSEC(&cmd)) {
>                  cmd_error = SMMU_CERROR_ILL;
> @@ -1361,6 +1367,12 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>              sid_range.end = sid_range.start + mask;
>  
>              trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
> +            if (!smmuv3_accel_install_nested_ste_range(s, &sid_range,
> +                                                       &local_err)) {
> +                error_report_err(local_err);
> +                cmd_error = SMMU_CERROR_ILL;
> +                break;
> +            }
>              smmu_configs_inv_sid_range(bs, sid_range);
>              break;
>          }
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index 86370d448a..3b1e9bf083 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -69,6 +69,7 @@ smmu_reset_exit(void) ""
>  #smmuv3-accel.c
>  smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
>  smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
> +smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
>  
>  # strongarm.c
>  strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
Thanks

Eric
RE: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support
Posted by Shameer Kolothum 1 month, 1 week ago

> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 02 October 2025 11:05
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
> smostafa@google.com; wangzhou1@hisilicon.com;
> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
> shameerkolothum@gmail.com
> Subject: Re: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE
> install/uninstall support
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > Allocates a s1 HWPT for the Guest s1 stage and attaches that to the
> > pass-through vfio device. This will be invoked when Guest issues
> > SMMU_CMD_CFGI_STE/STE_RANGE.
> ON set both alloc + attachment are done. On unset you shall explain the
> gym related to config/abort hwpt. Those are S1 hwpt, right? I think this
> shall be reflected in the name to make it clearer? In the previous patch
> I didn't really understand that.

Ok. There are three HWPTs in play here.

BYPASS HWPT
ABORT HWPT
S1 HWPT --> This is when Guest has a valid S1 (STE_VALID && STE_CFG_S1_ENABLED)

In previous patch we allocate a common BYPASS and ABORT HWPT for all devices
in a vIOMMU. We reuse that here in this patch and attach if Guest request a S1
bypass or abort case.

The S1 HWPT is allocated as and when the Guest has a valid STE with context
descriptor and use that for attachment.

Whether we can call them S1 HWPT only, I am not sure. Because, I think,
during alloc() call the kernel allocates a Nested HWPT(IOMMU_DOMAIN_NESTED)
which uses a Guest S1 nested on a S2 HWPT.

Anyway, I will rephrase the comments and variable names to make it clear.

Thanks,
Shameer
Re: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support
Posted by Eric Auger 1 month, 1 week ago
Hi Shameer,

On 10/2/25 2:08 PM, Shameer Kolothum wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 02 October 2025 11:05
>> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
>> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
>> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
>> smostafa@google.com; wangzhou1@hisilicon.com;
>> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
>> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
>> shameerkolothum@gmail.com
>> Subject: Re: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE
>> install/uninstall support
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 9/29/25 3:36 PM, Shameer Kolothum wrote:
>>> From: Nicolin Chen <nicolinc@nvidia.com>
>>>
>>> Allocates a s1 HWPT for the Guest s1 stage and attaches that to the
>>> pass-through vfio device. This will be invoked when Guest issues
>>> SMMU_CMD_CFGI_STE/STE_RANGE.
>> ON set both alloc + attachment are done. On unset you shall explain the
>> gym related to config/abort hwpt. Those are S1 hwpt, right? I think this
>> shall be reflected in the name to make it clearer? In the previous patch
>> I didn't really understand that.
> Ok. There are three HWPTs in play here.
>
> BYPASS HWPT
> ABORT HWPT
> S1 HWPT --> This is when Guest has a valid S1 (STE_VALID && STE_CFG_S1_ENABLED)
>
> In previous patch we allocate a common BYPASS and ABORT HWPT for all devices
> in a vIOMMU. We reuse that here in this patch and attach if Guest request a S1
> bypass or abort case.
>
> The S1 HWPT is allocated as and when the Guest has a valid STE with context
> descriptor and use that for attachment.
>
> Whether we can call them S1 HWPT only, I am not sure. Because, I think,
> during alloc() call the kernel allocates a Nested HWPT(IOMMU_DOMAIN_NESTED)
> which uses a Guest S1 nested on a S2 HWPT.
the role of BYPASS HWPT and ABORT_HWPT must be better explained I think.
Same in previous patch. I understand they abstract stage 1 in abort or
bypass. I think we shall better explain what HWPT hierarchy we are
putting in place refering to the kernel uapi (and not kernel internal
implementation). Thanks Eric
>
> Anyway, I will rephrase the comments and variable names to make it clear.
>
> Thanks,
> Shameer
Re: [PATCH v4 09/27] hw/arm/smmuv3-accel: Support nested STE install/uninstall support
Posted by Jonathan Cameron via 1 month, 2 weeks ago
On Mon, 29 Sep 2025 14:36:25 +0100
Shameer Kolothum <skolothumtho@nvidia.com> wrote:

> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> Allocates a s1 HWPT for the Guest s1 stage and attaches that to the

S1

> pass-through vfio device. This will be invoked when Guest issues
> SMMU_CMD_CFGI_STE/STE_RANGE.
> 
> While at it, we are also exporting both smmu_find_ste() and
> smmuv3_flush_config() from smmuv3.c for use here.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>

Whilst I'm getting a bit out of my comfort zone for review
and don't have time to dig into the details / specs. Code is in a good state
so

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>