From: Nicolin Chen <nicolinc@nvidia.com>
Allocates a s1 HWPT for the Guest s1 stage and attaches that
to the dev. This will be invoked when Guest issues
SMMU_CMD_CFGI_STE/STE_RANGE.
While at it, we are also exporting both smmu_find_ste() and
smmuv3_flush_config() from smmuv3.c for use here.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3-accel.c | 130 +++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 17 +++++
hw/arm/smmuv3-internal.h | 4 ++
hw/arm/smmuv3.c | 8 ++-
hw/arm/trace-events | 1 +
5 files changed, 157 insertions(+), 3 deletions(-)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index fe90d48675..74bf20cfaf 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -18,9 +18,139 @@
#include "smmuv3-accel.h"
+#include "smmuv3-internal.h"
+
#define SMMU_STE_VALID (1ULL << 0)
#define SMMU_STE_CFG_BYPASS (1ULL << 3)
+static void
+smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort)
+{
+ HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+ SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+ uint32_t hwpt_id;
+
+ if (!s1_hwpt || !accel_dev->viommu) {
+ return;
+ }
+
+ if (abort) {
+ hwpt_id = accel_dev->viommu->abort_hwpt_id;
+ } else {
+ hwpt_id = accel_dev->viommu->bypass_hwpt_id;
+ }
+
+ host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, &error_abort);
+ iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
+ accel_dev->s1_hwpt = NULL;
+ g_free(s1_hwpt);
+}
+
+static int
+smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
+ uint32_t data_type, uint32_t data_len,
+ void *data)
+{
+ SMMUViommu *viommu = accel_dev->viommu;
+ SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+ HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+ uint32_t flags = 0;
+
+ if (!idev || !viommu) {
+ return -ENOENT;
+ }
+
+ if (s1_hwpt) {
+ smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true);
+ }
+
+ s1_hwpt = g_new0(SMMUS1Hwpt, 1);
+ s1_hwpt->iommufd = idev->iommufd;
+ iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core.viommu_id, flags, data_type,
+ data_len, data, &s1_hwpt->hwpt_id, &error_abort);
+ host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, &error_abort);
+ accel_dev->s1_hwpt = s1_hwpt;
+ return 0;
+}
+
+void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid)
+{
+ SMMUv3AccelDevice *accel_dev;
+ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
+ .inval_ste_allowed = true};
+ struct iommu_hwpt_arm_smmuv3 nested_data = {};
+ uint32_t config;
+ STE ste;
+ int ret;
+
+ if (!bs->accel) {
+ return;
+ }
+
+ accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+ if (!accel_dev->viommu) {
+ return;
+ }
+
+ ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
+ if (ret) {
+ error_report("failed to find STE for sid 0x%x", sid);
+ return;
+ }
+
+ config = STE_CONFIG(&ste);
+ if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
+ smmuv3_accel_dev_uninstall_nested_ste(accel_dev, STE_CFG_ABORT(config));
+ smmuv3_flush_config(sdev);
+ return;
+ }
+
+ nested_data.ste[0] = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
+ nested_data.ste[1] = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
+ /* V | CONFIG | S1FMT | S1CTXPTR | S1CDMAX */
+ nested_data.ste[0] &= 0xf80fffffffffffffULL;
+ /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */
+ nested_data.ste[1] &= 0x380000ffULL;
+ ret = smmuv3_accel_dev_install_nested_ste(accel_dev,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(nested_data),
+ &nested_data);
+ if (ret) {
+ error_report("Unable to install nested STE=%16LX:%16LX, sid=0x%x,"
+ "ret=%d", nested_data.ste[1], nested_data.ste[0],
+ sid, ret);
+ }
+
+ trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
+ nested_data.ste[0]);
+}
+
+static void
+smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer user_data)
+{
+ SMMUDevice *sdev = (SMMUDevice *)key;
+ uint32_t sid = smmu_get_sid(sdev);
+ SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
+
+ if (sid >= sid_range->start && sid <= sid_range->end) {
+ SMMUv3State *s = sdev->smmu;
+ SMMUState *bs = &s->smmu_state;
+
+ smmuv3_accel_install_nested_ste(bs, sdev, sid);
+ }
+}
+
+void
+smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
+{
+ if (!bs->accel) {
+ return;
+ }
+
+ g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range);
+}
+
static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
PCIBus *bus, int devfn)
{
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 55a6a353fc..06e81b630d 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -29,10 +29,16 @@ typedef struct SMMUViommu {
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
} SMMUViommu;
+typedef struct SMMUS1Hwpt {
+ IOMMUFDBackend *iommufd;
+ uint32_t hwpt_id;
+} SMMUS1Hwpt;
+
typedef struct SMMUv3AccelDevice {
SMMUDevice sdev;
AddressSpace as_sysmem;
HostIOMMUDeviceIOMMUFD *idev;
+ SMMUS1Hwpt *s1_hwpt;
SMMUViommu *viommu;
QLIST_ENTRY(SMMUv3AccelDevice) next;
} SMMUv3AccelDevice;
@@ -45,10 +51,21 @@ typedef struct SMMUv3AccelState {
#if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD)
void smmuv3_accel_init(SMMUv3State *s);
+void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid);
+void smmuv3_accel_install_nested_ste_range(SMMUState *bs,
+ SMMUSIDRange *range);
#else
static inline void smmuv3_accel_init(SMMUv3State *d)
{
}
+static inline void
+smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid)
+{
+}
+static inline void
+smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
+{
+}
#endif
#endif /* HW_ARM_SMMUV3_ACCEL_H */
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index b6b7399347..738061c6ad 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -547,6 +547,10 @@ typedef struct CD {
uint32_t word[16];
} CD;
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
+ SMMUEventInfo *event);
+void smmuv3_flush_config(SMMUDevice *sdev);
+
/* STE fields */
#define STE_VALID(x) extract32((x)->word[0], 0, 1)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2f5a8157dd..c94bfe6564 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -630,8 +630,8 @@ bad_ste:
* Supports linear and 2-level stream table
* Return 0 on success, -EINVAL otherwise
*/
-static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
- SMMUEventInfo *event)
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
+ SMMUEventInfo *event)
{
dma_addr_t addr, strtab_base;
uint32_t log2size;
@@ -900,7 +900,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
return cfg;
}
-static void smmuv3_flush_config(SMMUDevice *sdev)
+void smmuv3_flush_config(SMMUDevice *sdev)
{
SMMUv3State *s = sdev->smmu;
SMMUState *bc = &s->smmu_state;
@@ -1342,6 +1342,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
trace_smmuv3_cmdq_cfgi_ste(sid);
smmuv3_flush_config(sdev);
+ smmuv3_accel_install_nested_ste(bs, sdev, sid);
break;
}
@@ -1361,6 +1362,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
sid_range.end = sid_range.start + mask;
trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
+ smmuv3_accel_install_nested_ste_range(bs, &sid_range);
smmu_configs_inv_sid_range(bs, sid_range);
break;
}
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index c4537ca1d6..7d232ca17c 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -69,6 +69,7 @@ smmu_reset_exit(void) ""
#smmuv3-accel.c
smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
+smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
# strongarm.c
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
--
2.34.1
Hi Shameer,
On 7/14/25 5:59 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Allocates a s1 HWPT for the Guest s1 stage and attaches that
> to the dev. This will be invoked when Guest issues
dev: I think you shall be more precise because there are so many now ;-)
> SMMU_CMD_CFGI_STE/STE_RANGE.
>
> While at it, we are also exporting both smmu_find_ste() and
> smmuv3_flush_config() from smmuv3.c for use here.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> hw/arm/smmuv3-accel.c | 130 +++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 17 +++++
> hw/arm/smmuv3-internal.h | 4 ++
> hw/arm/smmuv3.c | 8 ++-
> hw/arm/trace-events | 1 +
> 5 files changed, 157 insertions(+), 3 deletions(-)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index fe90d48675..74bf20cfaf 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -18,9 +18,139 @@
>
> #include "smmuv3-accel.h"
>
> +#include "smmuv3-internal.h"
> +
> #define SMMU_STE_VALID (1ULL << 0)
> #define SMMU_STE_CFG_BYPASS (1ULL << 3)
>
> +static void
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort)
> +{
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + uint32_t hwpt_id;
> +
> + if (!s1_hwpt || !accel_dev->viommu) {
> + return;
> + }
> +
> + if (abort) {
> + hwpt_id = accel_dev->viommu->abort_hwpt_id;
> + } else {
> + hwpt_id = accel_dev->viommu->bypass_hwpt_id;
> + }
> +
> + host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, &error_abort);
> + iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
> + accel_dev->s1_hwpt = NULL;
> + g_free(s1_hwpt);
> +}
> +
> +static int
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> + uint32_t data_type, uint32_t data_len,
> + void *data)
> +{
> + SMMUViommu *viommu = accel_dev->viommu;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + uint32_t flags = 0;
> +
> + if (!idev || !viommu) {
> + return -ENOENT;
> + }
> +
> + if (s1_hwpt) {
> + smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true);
> + }
> +
> + s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> + s1_hwpt->iommufd = idev->iommufd;
> + iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, flags, data_type,
> + data_len, data, &s1_hwpt->hwpt_id, &error_abort);
> + host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, &error_abort);
We don't want error_abort here in the prospect to support hotplug. Also
I think you should properly cascade any error through Error handles, at
least on the install path
> + accel_dev->s1_hwpt = s1_hwpt;
> + return 0;
> +}
> +
> +void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid)
return bool and pass Error handle
> +{
> + SMMUv3AccelDevice *accel_dev;
> + SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
> + .inval_ste_allowed = true};
> + struct iommu_hwpt_arm_smmuv3 nested_data = {};
> + uint32_t config;
> + STE ste;
> + int ret;
> +
> + if (!bs->accel) {
> + return;
> + }
> +
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!accel_dev->viommu) {
> + return;
> + }
> +
> + ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
> + if (ret) {
> + error_report("failed to find STE for sid 0x%x", sid);
> + return;
> + }
> +
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + smmuv3_accel_dev_uninstall_nested_ste(accel_dev, STE_CFG_ABORT(config));
> + smmuv3_flush_config(sdev);
> + return;
> + }
> +
> + nested_data.ste[0] = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
> + nested_data.ste[1] = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
> + /* V | CONFIG | S1FMT | S1CTXPTR | S1CDMAX */
use bitmasks here and below?
> + nested_data.ste[0] &= 0xf80fffffffffffffULL;
> + /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */
> + nested_data.ste[1] &= 0x380000ffULL;
> + ret = smmuv3_accel_dev_install_nested_ste(accel_dev,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(nested_data),
> + &nested_data);
> + if (ret) {
> + error_report("Unable to install nested STE=%16LX:%16LX, sid=0x%x,"
> + "ret=%d", nested_data.ste[1], nested_data.ste[0],
> + sid, ret);
error_setg everywhere
> + }
> +
> + trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
> + nested_data.ste[0]);
> +}
> +
> +static void
> +smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer user_data)
> +{
> + SMMUDevice *sdev = (SMMUDevice *)key;
> + uint32_t sid = smmu_get_sid(sdev);
> + SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
> +
> + if (sid >= sid_range->start && sid <= sid_range->end) {
> + SMMUv3State *s = sdev->smmu;
> + SMMUState *bs = &s->smmu_state;
> +
> + smmuv3_accel_install_nested_ste(bs, sdev, sid);
> + }
> +}
> +
> +void
> +smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
> +{
> + if (!bs->accel) {
> + return;
> + }
> +
> + g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range);
> +}
> +
> static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
> PCIBus *bus, int devfn)
> {
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 55a6a353fc..06e81b630d 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -29,10 +29,16 @@ typedef struct SMMUViommu {
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> } SMMUViommu;
>
> +typedef struct SMMUS1Hwpt {
> + IOMMUFDBackend *iommufd;
> + uint32_t hwpt_id;
> +} SMMUS1Hwpt;
> +
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> AddressSpace as_sysmem;
> HostIOMMUDeviceIOMMUFD *idev;
> + SMMUS1Hwpt *s1_hwpt;
> SMMUViommu *viommu;
> QLIST_ENTRY(SMMUv3AccelDevice) next;
> } SMMUv3AccelDevice;
> @@ -45,10 +51,21 @@ typedef struct SMMUv3AccelState {
>
> #if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD)
> void smmuv3_accel_init(SMMUv3State *s);
> +void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid);
> +void smmuv3_accel_install_nested_ste_range(SMMUState *bs,
> + SMMUSIDRange *range);
to me should return an int or bool and convey Error handle
> #else
> static inline void smmuv3_accel_init(SMMUv3State *d)
> {
> }
> +static inline void
> +smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid)
> +{
> +}
> +static inline void
> +smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
> +{
> +}
> #endif
>
> #endif /* HW_ARM_SMMUV3_ACCEL_H */
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index b6b7399347..738061c6ad 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -547,6 +547,10 @@ typedef struct CD {
> uint32_t word[16];
> } CD;
>
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> + SMMUEventInfo *event);
> +void smmuv3_flush_config(SMMUDevice *sdev);
> +
> /* STE fields */
>
> #define STE_VALID(x) extract32((x)->word[0], 0, 1)
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 2f5a8157dd..c94bfe6564 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -630,8 +630,8 @@ bad_ste:
> * Supports linear and 2-level stream table
> * Return 0 on success, -EINVAL otherwise
> */
> -static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> - SMMUEventInfo *event)
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> + SMMUEventInfo *event)
> {
> dma_addr_t addr, strtab_base;
> uint32_t log2size;
> @@ -900,7 +900,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
> return cfg;
> }
>
> -static void smmuv3_flush_config(SMMUDevice *sdev)
> +void smmuv3_flush_config(SMMUDevice *sdev)
> {
> SMMUv3State *s = sdev->smmu;
> SMMUState *bc = &s->smmu_state;
> @@ -1342,6 +1342,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>
> trace_smmuv3_cmdq_cfgi_ste(sid);
> smmuv3_flush_config(sdev);
> + smmuv3_accel_install_nested_ste(bs, sdev, sid);
>
> break;
> }
> @@ -1361,6 +1362,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> sid_range.end = sid_range.start + mask;
>
> trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
> + smmuv3_accel_install_nested_ste_range(bs, &sid_range);
> smmu_configs_inv_sid_range(bs, sid_range);
> break;
> }
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index c4537ca1d6..7d232ca17c 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -69,6 +69,7 @@ smmu_reset_exit(void) ""
> #smmuv3-accel.c
> smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
> smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
> +smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
>
> # strongarm.c
> strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
Thanks
Eric
On Mon, Jul 14, 2025 at 04:59:35PM +0100, Shameer Kolothum wrote:
> +static void
> +smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer user_data)
> +{
> + SMMUDevice *sdev = (SMMUDevice *)key;
> + uint32_t sid = smmu_get_sid(sdev);
> + SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
> +
> + if (sid >= sid_range->start && sid <= sid_range->end) {
> + SMMUv3State *s = sdev->smmu;
> + SMMUState *bs = &s->smmu_state;
> +
> + smmuv3_accel_install_nested_ste(bs, sdev, sid);
> + }
> +}
> +
> +void
> +smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
> +{
> + if (!bs->accel) {
> + return;
> + }
> +
> + g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range);
This will not work correctly?
The bs->configs is a cache that gets an entry inserted to when a
config is fetched via smmuv3_get_config(), which gets invoked by
smmuv3_notify_iova() and smmuv3_translate() only.
But CMDQ_OP_CFGI_ALL can actually happen very early, e.g. Linux
driver does that in the probe() right after SMMU CMDQ is enabled,
at which point neither smmuv3_notify_iova nor smmuv3_translate
could ever get invoked, meaning that the g_hash_table is empty.
Without the acceleration, this foreach works because vSMMU does
not need to do anything since the cache is indeed empty.
But, with accel, it must call smmuv3_accel_install_nested_ste().
So, I think this should foreach the viommu->device_list instead.
Thanks
Nicolin
> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Wednesday, July 16, 2025 12:13 AM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
> ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
> mochs@nvidia.com; smostafa@google.com; Linuxarm
> <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
> jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; shameerkolothum@gmail.com
> Subject: Re: [RFC PATCH v3 09/15] hw/arm/smmuv3-accel: Support nested
> STE install/uninstall support
>
> On Mon, Jul 14, 2025 at 04:59:35PM +0100, Shameer Kolothum wrote:
> > +static void
> > +smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer
> user_data)
> > +{
> > + SMMUDevice *sdev = (SMMUDevice *)key;
> > + uint32_t sid = smmu_get_sid(sdev);
> > + SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
> > +
> > + if (sid >= sid_range->start && sid <= sid_range->end) {
> > + SMMUv3State *s = sdev->smmu;
> > + SMMUState *bs = &s->smmu_state;
> > +
> > + smmuv3_accel_install_nested_ste(bs, sdev, sid);
> > + }
> > +}
> > +
> > +void
> > +smmuv3_accel_install_nested_ste_range(SMMUState *bs,
> SMMUSIDRange *range)
> > +{
> > + if (!bs->accel) {
> > + return;
> > + }
> > +
> > + g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range);
>
> This will not work correctly?
>
> The bs->configs is a cache that gets an entry inserted to when a
> config is fetched via smmuv3_get_config(), which gets invoked by
> smmuv3_notify_iova() and smmuv3_translate() only.
>
> But CMDQ_OP_CFGI_ALL can actually happen very early, e.g. Linux
> driver does that in the probe() right after SMMU CMDQ is enabled,
> at which point neither smmuv3_notify_iova nor smmuv3_translate
> could ever get invoked, meaning that the g_hash_table is empty.
>
> Without the acceleration, this foreach works because vSMMU does
> not need to do anything since the cache is indeed empty.
>
> But, with accel, it must call smmuv3_accel_install_nested_ste().
Ok. The only place I can see CMDQ_OP_CFGI_ALL get invoked by Linux
kernel is during arm_smmu_device_reset() and that is to clear all.
But I am not sure we will have any valid STEs at that time. Just curious,
are you seeing any issues with this at the moment?
> So, I think this should foreach the viommu->device_list instead.
But agree. Using device_list is more appropriate unless we cache the
configs during each install_netsed_ste() path.
Thanks,
Shameer
On 7/16/25 10:36 AM, Shameerali Kolothum Thodi wrote:
>
>> -----Original Message-----
>> From: Nicolin Chen <nicolinc@nvidia.com>
>> Sent: Wednesday, July 16, 2025 12:13 AM
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
>> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
>> eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
>> ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
>> mochs@nvidia.com; smostafa@google.com; Linuxarm
>> <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
>> jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
>> <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org;
>> zhenzhong.duan@intel.com; shameerkolothum@gmail.com
>> Subject: Re: [RFC PATCH v3 09/15] hw/arm/smmuv3-accel: Support nested
>> STE install/uninstall support
>>
>> On Mon, Jul 14, 2025 at 04:59:35PM +0100, Shameer Kolothum wrote:
>>> +static void
>>> +smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer
>> user_data)
>>> +{
>>> + SMMUDevice *sdev = (SMMUDevice *)key;
>>> + uint32_t sid = smmu_get_sid(sdev);
>>> + SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
>>> +
>>> + if (sid >= sid_range->start && sid <= sid_range->end) {
>>> + SMMUv3State *s = sdev->smmu;
>>> + SMMUState *bs = &s->smmu_state;
>>> +
>>> + smmuv3_accel_install_nested_ste(bs, sdev, sid);
>>> + }
>>> +}
>>> +
>>> +void
>>> +smmuv3_accel_install_nested_ste_range(SMMUState *bs,
>> SMMUSIDRange *range)
>>> +{
>>> + if (!bs->accel) {
>>> + return;
>>> + }
>>> +
>>> + g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range);
>> This will not work correctly?
>>
>> The bs->configs is a cache that gets an entry inserted to when a
>> config is fetched via smmuv3_get_config(), which gets invoked by
>> smmuv3_notify_iova() and smmuv3_translate() only.
>>
>> But CMDQ_OP_CFGI_ALL can actually happen very early, e.g. Linux
>> driver does that in the probe() right after SMMU CMDQ is enabled,
>> at which point neither smmuv3_notify_iova nor smmuv3_translate
>> could ever get invoked, meaning that the g_hash_table is empty.
>>
>> Without the acceleration, this foreach works because vSMMU does
>> not need to do anything since the cache is indeed empty.
>>
>> But, with accel, it must call smmuv3_accel_install_nested_ste().
> Ok. The only place I can see CMDQ_OP_CFGI_ALL get invoked by Linux
Easy to say but I think we shall rather look at what the spec mandates
and not what the Linux driver does ;-)
Thanks
Eric
> kernel is during arm_smmu_device_reset() and that is to clear all.
> But I am not sure we will have any valid STEs at that time. Just curious,
> are you seeing any issues with this at the moment?
>
>> So, I think this should foreach the viommu->device_list instead.
> But agree. Using device_list is more appropriate unless we cache the
> configs during each install_netsed_ste() path.
>
> Thanks,
> Shameer
>
On Wed, Jul 16, 2025 at 08:36:38AM +0000, Shameerali Kolothum Thodi wrote: > > > + g_hash_table_foreach(bs->configs, smmuv3_accel_ste_range, range); > > > > This will not work correctly? > > > > The bs->configs is a cache that gets an entry inserted to when a > > config is fetched via smmuv3_get_config(), which gets invoked by > > smmuv3_notify_iova() and smmuv3_translate() only. > > > > But CMDQ_OP_CFGI_ALL can actually happen very early, e.g. Linux > > driver does that in the probe() right after SMMU CMDQ is enabled, > > at which point neither smmuv3_notify_iova nor smmuv3_translate > > could ever get invoked, meaning that the g_hash_table is empty. > > > > Without the acceleration, this foreach works because vSMMU does > > not need to do anything since the cache is indeed empty. > > > > But, with accel, it must call smmuv3_accel_install_nested_ste(). > > Ok. The only place I can see CMDQ_OP_CFGI_ALL get invoked by Linux > kernel is during arm_smmu_device_reset() and that is to clear all. > But I am not sure we will have any valid STEs at that time. Just curious, > are you seeing any issues with this at the moment? I recall that (not for this series) I hit some issue with a guest having "iommu.passthrough=y" string in its bootcmd. The guest OS initialized all SIDs to a Config.Bypass mode accordingly. But that was not handled correctly by QEMU so the host was not getting any request to program a stage-1 bypass STE to the HW. So, I think there would be a similar issue here. Nicolin
On Mon, Jul 14, 2025 at 04:59:35PM +0100, Shameer Kolothum wrote:
> +static int
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> + uint32_t data_type, uint32_t data_len,
> + void *data)
> +{
> + SMMUViommu *viommu = accel_dev->viommu;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + uint32_t flags = 0;
> +
> + if (!idev || !viommu) {
> + return -ENOENT;
> + }
> +
> + if (s1_hwpt) {
> + smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true);
> + }
> +
> + s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> + s1_hwpt->iommufd = idev->iommufd;
> + iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, flags, data_type,
> + data_len, data, &s1_hwpt->hwpt_id, &error_abort);
Let's check the return value.
> + host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, &error_abort);
> + accel_dev->s1_hwpt = s1_hwpt;
> + return 0;
> +}
> +
> +void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid)
> +{
> + SMMUv3AccelDevice *accel_dev;
> + SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
> + .inval_ste_allowed = true};
> + struct iommu_hwpt_arm_smmuv3 nested_data = {};
> + uint32_t config;
> + STE ste;
> + int ret;
> +
> + if (!bs->accel) {
> + return;
> + }
> +
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!accel_dev->viommu) {
> + return;
> + }
> +
> + ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
> + if (ret) {
> + error_report("failed to find STE for sid 0x%x", sid);
> + return;
> + }
> +
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + smmuv3_accel_dev_uninstall_nested_ste(accel_dev, STE_CFG_ABORT(config));
> + smmuv3_flush_config(sdev);
> + return;
> + }
> +
> + nested_data.ste[0] = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
> + nested_data.ste[1] = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
> + /* V | CONFIG | S1FMT | S1CTXPTR | S1CDMAX */
> + nested_data.ste[0] &= 0xf80fffffffffffffULL;
> + /* S1DSS | S1CIR | S1COR | S1CSH | S1STALLD | EATS */
> + nested_data.ste[1] &= 0x380000ffULL;
Likely we need to make sure that values here are little endians, in
alignment with the kernel uABI.
> + ret = smmuv3_accel_dev_install_nested_ste(accel_dev,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(nested_data),
> + &nested_data);
> + if (ret) {
> + error_report("Unable to install nested STE=%16LX:%16LX, sid=0x%x,"
> + "ret=%d", nested_data.ste[1], nested_data.ste[0],
> + sid, ret);
> + }
> +
> + trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
> + nested_data.ste[0]);
> +}
> +
> +static void
> +smmuv3_accel_ste_range(gpointer key, gpointer value, gpointer user_data)
> +{
> + SMMUDevice *sdev = (SMMUDevice *)key;
> + uint32_t sid = smmu_get_sid(sdev);
> + SMMUSIDRange *sid_range = (SMMUSIDRange *)user_data;
> +
> + if (sid >= sid_range->start && sid <= sid_range->end) {
> + SMMUv3State *s = sdev->smmu;
> + SMMUState *bs = &s->smmu_state;
Can we use ARM_SMMU and ARM_SMMUV3 macros?
> +
> + smmuv3_accel_install_nested_ste(bs, sdev, sid);
> + }
> +}
> +
> +void
> +smmuv3_accel_install_nested_ste_range(SMMUState *bs, SMMUSIDRange *range)
Fits in one line.
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> AddressSpace as_sysmem;
> HostIOMMUDeviceIOMMUFD *idev;
> + SMMUS1Hwpt *s1_hwpt;
No need of an extra space.
> SMMUViommu *viommu;
> QLIST_ENTRY(SMMUv3AccelDevice) next;
> } SMMUv3AccelDevice;
> @@ -45,10 +51,21 @@ typedef struct SMMUv3AccelState {
>
> #if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD)
> void smmuv3_accel_init(SMMUv3State *s);
> +void smmuv3_accel_install_nested_ste(SMMUState *bs, SMMUDevice *sdev, int sid);
> +void smmuv3_accel_install_nested_ste_range(SMMUState *bs,
> + SMMUSIDRange *range);
Fits in one line.
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index b6b7399347..738061c6ad 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -547,6 +547,10 @@ typedef struct CD {
> uint32_t word[16];
> } CD;
>
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> + SMMUEventInfo *event);
Ditto
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 2f5a8157dd..c94bfe6564 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -630,8 +630,8 @@ bad_ste:
> * Supports linear and 2-level stream table
> * Return 0 on success, -EINVAL otherwise
> */
> -static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> - SMMUEventInfo *event)
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> + SMMUEventInfo *event)
Ditto
Thanks
Nicolin
© 2016 - 2025 Red Hat, Inc.