From: Nicolin Chen <nicolinc@nvidia.com>
A device placed behind a vSMMU instance must have corresponding vSTEs
(bypass, abort, or translate) installed. The bypass and abort proxy nested
HWPTs are pre-allocated.
For translat HWPT, a vDEVICE object is allocated and associated with the
vIOMMU for each guest device. This allows the host kernel to establish a
virtual SID to physical SID mapping, which is required for handling
invalidations and event reporting.
An translate HWPT is allocated based on the guest STE configuration and
attached to the device when the guest issues SMMU_CMD_CFGI_STE or
SMMU_CMD_CFGI_STE_RANGE, provided the STE enables S1 translation.
If the guest STE is invalid or S1 translation is disabled, the device is
attached to one of the pre-allocated ABORT or BYPASS HWPTs instead.
While at it, export both smmu_find_ste() and smmuv3_flush_config() for
use here.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-accel.c | 193 +++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 23 +++++
hw/arm/smmuv3-internal.h | 20 ++++
hw/arm/smmuv3.c | 18 +++-
hw/arm/trace-events | 2 +
5 files changed, 253 insertions(+), 3 deletions(-)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index d4d65299a8..c74e95a0ea 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -28,6 +28,191 @@ MemoryRegion root;
MemoryRegion sysmem;
static AddressSpace *shared_as_sysmem;
+static bool
+smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error **errp)
+{
+ SMMUViommu *vsmmu = accel_dev->vsmmu;
+ IOMMUFDVdev *vdev;
+ uint32_t vdevice_id;
+
+ if (!accel_dev->idev || accel_dev->vdev) {
+ return true;
+ }
+
+ if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev->devid,
+ vsmmu->viommu.viommu_id, sid,
+ &vdevice_id, errp)) {
+ return false;
+ }
+ if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
+ vsmmu->bypass_hwpt_id, errp)) {
+ iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
+ return false;
+ }
+
+ vdev = g_new(IOMMUFDVdev, 1);
+ vdev->vdevice_id = vdevice_id;
+ vdev->virt_id = sid;
+ accel_dev->vdev = vdev;
+ return true;
+}
+
+static bool
+smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort,
+ Error **errp)
+{
+ HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+ SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+ uint32_t hwpt_id;
+
+ if (!s1_hwpt || !accel_dev->vsmmu) {
+ return true;
+ }
+
+ if (abort) {
+ hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
+ } else {
+ hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
+ return false;
+ }
+ trace_smmuv3_accel_uninstall_nested_ste(smmu_get_sid(&accel_dev->sdev),
+ abort ? "abort" : "bypass",
+ hwpt_id);
+
+ iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
+ accel_dev->s1_hwpt = NULL;
+ g_free(s1_hwpt);
+ return true;
+}
+
+static bool
+smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
+ uint32_t data_type, uint32_t data_len,
+ void *data, Error **errp)
+{
+ SMMUViommu *vsmmu = accel_dev->vsmmu;
+ SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
+ HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
+ uint32_t flags = 0;
+
+ if (!idev || !vsmmu) {
+ error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
+ smmu_get_sid(&accel_dev->sdev));
+ return false;
+ }
+
+ if (s1_hwpt) {
+ if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
+ return false;
+ }
+ }
+
+ s1_hwpt = g_new0(SMMUS1Hwpt, 1);
+ s1_hwpt->iommufd = idev->iommufd;
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ vsmmu->viommu.viommu_id, flags,
+ data_type, data_len, data,
+ &s1_hwpt->hwpt_id, errp)) {
+ return false;
+ }
+
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, errp)) {
+ iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
+ return false;
+ }
+ accel_dev->s1_hwpt = s1_hwpt;
+ return true;
+}
+
+bool
+smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+ Error **errp)
+{
+ SMMUv3AccelDevice *accel_dev;
+ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
+ .inval_ste_allowed = true};
+ struct iommu_hwpt_arm_smmuv3 nested_data = {};
+ uint64_t ste_0, ste_1;
+ uint32_t config;
+ STE ste;
+ int ret;
+
+ if (!s->accel) {
+ return true;
+ }
+
+ accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+ if (!accel_dev->vsmmu) {
+ return true;
+ }
+
+ if (!smmuv3_accel_alloc_vdev(accel_dev, sid, errp)) {
+ return false;
+ }
+
+ ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
+ if (ret) {
+ error_setg(errp, "Failed to find STE for Device 0x%x", sid);
+ return true;
+ }
+
+ config = STE_CONFIG(&ste);
+ if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
+ if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
+ STE_CFG_ABORT(config),
+ errp)) {
+ return false;
+ }
+ smmuv3_flush_config(sdev);
+ return true;
+ }
+
+ ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
+ ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
+ nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
+ nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
+
+ if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(nested_data),
+ &nested_data, errp)) {
+ error_append_hint(errp, "Unable to install sid=0x%x nested STE="
+ "0x%"PRIx64":=0x%"PRIx64"", sid,
+ (uint64_t)le64_to_cpu(nested_data.ste[1]),
+ (uint64_t)le64_to_cpu(nested_data.ste[0]));
+ return false;
+ }
+ trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
+ nested_data.ste[0]);
+ return true;
+}
+
+bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+ Error **errp)
+{
+ SMMUv3AccelState *s_accel = s->s_accel;
+ SMMUv3AccelDevice *accel_dev;
+
+ if (!s_accel || !s_accel->vsmmu) {
+ return true;
+ }
+
+ QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
+ uint32_t sid = smmu_get_sid(&accel_dev->sdev);
+
+ if (sid >= range->start && sid <= range->end) {
+ if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
+ sid, errp)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
PCIBus *bus, int devfn)
{
@@ -154,6 +339,7 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
SMMUv3State *s = ARM_SMMUV3(bs);
SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus);
SMMUv3AccelDevice *accel_dev;
+ IOMMUFDVdev *vdev;
SMMUViommu *vsmmu;
SMMUDevice *sdev;
uint16_t sid;
@@ -182,6 +368,13 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
trace_smmuv3_accel_unset_iommu_device(devfn, sid);
vsmmu = s->s_accel->vsmmu;
+ vdev = accel_dev->vdev;
+ if (vdev) {
+ iommufd_backend_free_id(vsmmu->iommufd, vdev->vdevice_id);
+ g_free(vdev);
+ accel_dev->vdev = NULL;
+ }
+
if (QLIST_EMPTY(&vsmmu->device_list)) {
iommufd_backend_free_id(vsmmu->iommufd, vsmmu->bypass_hwpt_id);
iommufd_backend_free_id(vsmmu->iommufd, vsmmu->abort_hwpt_id);
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index d81f90c32c..73b44cd7be 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -27,9 +27,16 @@ typedef struct SMMUViommu {
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
} SMMUViommu;
+typedef struct SMMUS1Hwpt {
+ IOMMUFDBackend *iommufd;
+ uint32_t hwpt_id;
+} SMMUS1Hwpt;
+
typedef struct SMMUv3AccelDevice {
SMMUDevice sdev;
HostIOMMUDeviceIOMMUFD *idev;
+ SMMUS1Hwpt *s1_hwpt;
+ IOMMUFDVdev *vdev;
SMMUViommu *vsmmu;
QLIST_ENTRY(SMMUv3AccelDevice) next;
} SMMUv3AccelDevice;
@@ -40,10 +47,26 @@ typedef struct SMMUv3AccelState {
#ifdef CONFIG_ARM_SMMUV3_ACCEL
void smmuv3_accel_init(SMMUv3State *s);
+bool smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+ Error **errp);
+bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+ Error **errp);
#else
static inline void smmuv3_accel_init(SMMUv3State *s)
{
}
+static inline bool
+smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
+ Error **errp)
+{
+ return true;
+}
+static inline bool
+smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
+ Error **errp)
+{
+ return true;
+}
#endif
#endif /* HW_ARM_SMMUV3_ACCEL_H */
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 03d86cfc5c..5fd88b4257 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -547,6 +547,9 @@ typedef struct CD {
uint32_t word[16];
} CD;
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event);
+void smmuv3_flush_config(SMMUDevice *sdev);
+
/* STE fields */
#define STE_VALID(x) extract32((x)->word[0], 0, 1)
@@ -586,6 +589,23 @@ typedef struct CD {
#define SMMU_STE_VALID (1ULL << 0)
#define SMMU_STE_CFG_BYPASS (1ULL << 3)
+#define STE0_V MAKE_64BIT_MASK(0, 1)
+#define STE0_CONFIG MAKE_64BIT_MASK(1, 3)
+#define STE0_S1FMT MAKE_64BIT_MASK(4, 2)
+#define STE0_CTXPTR MAKE_64BIT_MASK(6, 50)
+#define STE0_S1CDMAX MAKE_64BIT_MASK(59, 5)
+#define STE0_MASK (STE0_S1CDMAX | STE0_CTXPTR | STE0_S1FMT | STE0_CONFIG | \
+ STE0_V)
+
+#define STE1_S1DSS MAKE_64BIT_MASK(0, 2)
+#define STE1_S1CIR MAKE_64BIT_MASK(2, 2)
+#define STE1_S1COR MAKE_64BIT_MASK(4, 2)
+#define STE1_S1CSH MAKE_64BIT_MASK(6, 2)
+#define STE1_S1STALLD MAKE_64BIT_MASK(27, 1)
+#define STE1_EATS MAKE_64BIT_MASK(28, 2)
+#define STE1_MASK (STE1_EATS | STE1_S1STALLD | STE1_S1CSH | STE1_S1COR | \
+ STE1_S1CIR | STE1_S1DSS)
+
#define SMMU_GBPA_ABORT (1UL << 20)
static inline int oas2bits(int oas_field)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ef991cb7d8..1fd8aaa0c7 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -630,8 +630,7 @@ bad_ste:
* Supports linear and 2-level stream table
* Return 0 on success, -EINVAL otherwise
*/
-static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
- SMMUEventInfo *event)
+int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event)
{
dma_addr_t addr, strtab_base;
uint32_t log2size;
@@ -900,7 +899,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
return cfg;
}
-static void smmuv3_flush_config(SMMUDevice *sdev)
+void smmuv3_flush_config(SMMUDevice *sdev)
{
SMMUv3State *s = sdev->smmu;
SMMUState *bc = &s->smmu_state;
@@ -1330,6 +1329,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
{
uint32_t sid = CMD_SID(&cmd);
SMMUDevice *sdev = smmu_find_sdev(bs, sid);
+ Error *local_err = NULL;
if (CMD_SSEC(&cmd)) {
cmd_error = SMMU_CERROR_ILL;
@@ -1341,6 +1341,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
}
trace_smmuv3_cmdq_cfgi_ste(sid);
+ if (!smmuv3_accel_install_nested_ste(s, sdev, sid, &local_err)) {
+ error_report_err(local_err);
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
smmuv3_flush_config(sdev);
break;
@@ -1350,6 +1355,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
uint32_t sid = CMD_SID(&cmd), mask;
uint8_t range = CMD_STE_RANGE(&cmd);
SMMUSIDRange sid_range;
+ Error *local_err = NULL;
if (CMD_SSEC(&cmd)) {
cmd_error = SMMU_CERROR_ILL;
@@ -1361,6 +1367,12 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
sid_range.end = sid_range.start + mask;
trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
+ if (!smmuv3_accel_install_nested_ste_range(s, &sid_range,
+ &local_err)) {
+ error_report_err(local_err);
+ cmd_error = SMMU_CERROR_ILL;
+ break;
+ }
smmu_configs_inv_sid_range(bs, sid_range);
break;
}
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 49c0460f30..2e0b1f8f6f 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -69,6 +69,8 @@ smmu_reset_exit(void) ""
#smmuv3-accel.c
smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (idev devid=0x%x)"
smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (idev devid=0x%x)"
+smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
+smmuv3_accel_uninstall_nested_ste(uint32_t sid, const char *ste_cfg, uint32_t hwpt_id) "sid=%d attached %s hwpt_id=%u"
# strongarm.c
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
--
2.43.0
Hi Shameer,
On 10/31/25 11:49 AM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> A device placed behind a vSMMU instance must have corresponding vSTEs
> (bypass, abort, or translate) installed. The bypass and abort proxy nested
> HWPTs are pre-allocated.
>
> For translat HWPT, a vDEVICE object is allocated and associated with the
> vIOMMU for each guest device. This allows the host kernel to establish a
> virtual SID to physical SID mapping, which is required for handling
> invalidations and event reporting.
>
> An translate HWPT is allocated based on the guest STE configuration and
> attached to the device when the guest issues SMMU_CMD_CFGI_STE or
> SMMU_CMD_CFGI_STE_RANGE, provided the STE enables S1 translation.
>
> If the guest STE is invalid or S1 translation is disabled, the device is
> attached to one of the pre-allocated ABORT or BYPASS HWPTs instead.
>
> While at it, export both smmu_find_ste() and smmuv3_flush_config() for
> use here.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-accel.c | 193 +++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 23 +++++
> hw/arm/smmuv3-internal.h | 20 ++++
> hw/arm/smmuv3.c | 18 +++-
> hw/arm/trace-events | 2 +
> 5 files changed, 253 insertions(+), 3 deletions(-)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index d4d65299a8..c74e95a0ea 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -28,6 +28,191 @@ MemoryRegion root;
> MemoryRegion sysmem;
> static AddressSpace *shared_as_sysmem;
>
> +static bool
> +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error **errp)
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + IOMMUFDVdev *vdev;
> + uint32_t vdevice_id;
> +
> + if (!accel_dev->idev || accel_dev->vdev) {
> + return true;
> + }
> +
> + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev->devid,
> + vsmmu->viommu.viommu_id, sid,
> + &vdevice_id, errp)) {
> + return false;
> + }
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + vsmmu->bypass_hwpt_id, errp)) {
> + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> + return false;
> + }
> +
> + vdev = g_new(IOMMUFDVdev, 1);
> + vdev->vdevice_id = vdevice_id;
> + vdev->virt_id = sid;
> + accel_dev->vdev = vdev;
> + return true;
> +}
> +
> +static bool
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort,
> + Error **errp)+{
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + uint32_t hwpt_id;
> +
> + if (!s1_hwpt || !accel_dev->vsmmu) {
> + return true;
> + }
> +
> + if (abort) {
> + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> + } else {
> + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> + }
> +
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> + return false;
> + }
> + trace_smmuv3_accel_uninstall_nested_ste(smmu_get_sid(&accel_dev->sdev),
> + abort ? "abort" : "bypass",
> + hwpt_id);
> +
> + iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
> + accel_dev->s1_hwpt = NULL;
> + g_free(s1_hwpt);
> + return true;
> +}
> +
> +static bool
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> + uint32_t data_type, uint32_t data_len,
> + void *data, Error **errp)
the name is very close to the caller function, ie.
smmuv3_accel_install_nested_ste which also takes a sdev.
I would rename to smmuv3_accel_install_hwpt() or something alike
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + uint32_t flags = 0;
> +
> + if (!idev || !vsmmu) {
> + error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
> + smmu_get_sid(&accel_dev->sdev));
> + return false;
> + }
> +
> + if (s1_hwpt) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> + return false;
> + }
> + }
> +
> + s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> + s1_hwpt->iommufd = idev->iommufd;
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + vsmmu->viommu.viommu_id, flags,
> + data_type, data_len, data,
> + &s1_hwpt->hwpt_id, errp)) {
> + return false;
> + }
> +
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt->hwpt_id, errp)) {
> + iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
> + return false;
> + }
> + accel_dev->s1_hwpt = s1_hwpt;
> + return true;
> +}
> +
> +bool
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> + Error **errp)
> +{
> + SMMUv3AccelDevice *accel_dev;
> + SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
> + .inval_ste_allowed = true};
> + struct iommu_hwpt_arm_smmuv3 nested_data = {};
> + uint64_t ste_0, ste_1;
> + uint32_t config;
> + STE ste;
> + int ret;
> +
> + if (!s->accel) {
don't you want to check !s->vsmmu as well done in
smmuv3_accel_install_nested_ste_range()
> + return true;
> + }
> +
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!accel_dev->vsmmu) {
> + return true;
> + }
> +
> + if (!smmuv3_accel_alloc_vdev(accel_dev, sid, errp)) {
> + return false;
> + }
> +
> + ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
> + if (ret) {
> + error_setg(errp, "Failed to find STE for Device 0x%x", sid);
> + return true;
returning true while setting errp looks wrong to me.
> + }
> +
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> + STE_CFG_ABORT(config),
> + errp)) {
> + return false;
> + }
> + smmuv3_flush_config(sdev);
> + return true;
> + }
> +
> + ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
> + ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
> + nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
> + nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
> +
> + if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(nested_data),
> + &nested_data, errp)) {
> + error_append_hint(errp, "Unable to install sid=0x%x nested STE="
> + "0x%"PRIx64":=0x%"PRIx64"", sid,
nit: why ":=" between both 64b?
> + (uint64_t)le64_to_cpu(nested_data.ste[1]),
> + (uint64_t)le64_to_cpu(nested_data.ste[0]));
> + return false;
in case of various failure cases, do we need to free the vdev?
> + }
> + trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
> + nested_data.ste[0]);
> + return true;
> +}
> +
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> + Error **errp)
> +{
> + SMMUv3AccelState *s_accel = s->s_accel;
> + SMMUv3AccelDevice *accel_dev;
> +
> + if (!s_accel || !s_accel->vsmmu) {
> + return true;
> + }
> +
> + QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
> + uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> +
> + if (sid >= range->start && sid <= range->end) {
> + if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> + sid, errp)) {
> + return false;
> + }
> + }
> + }
> + return true;
> +}
> +
> static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
> PCIBus *bus, int devfn)
> {
> @@ -154,6 +339,7 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> SMMUv3State *s = ARM_SMMUV3(bs);
> SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus);
> SMMUv3AccelDevice *accel_dev;
> + IOMMUFDVdev *vdev;
> SMMUViommu *vsmmu;
> SMMUDevice *sdev;
> uint16_t sid;
> @@ -182,6 +368,13 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> trace_smmuv3_accel_unset_iommu_device(devfn, sid);
>
> vsmmu = s->s_accel->vsmmu;
> + vdev = accel_dev->vdev;
> + if (vdev) {
> + iommufd_backend_free_id(vsmmu->iommufd, vdev->vdevice_id);
> + g_free(vdev);
> + accel_dev->vdev = NULL;
> + }
> +
> if (QLIST_EMPTY(&vsmmu->device_list)) {
> iommufd_backend_free_id(vsmmu->iommufd, vsmmu->bypass_hwpt_id);
> iommufd_backend_free_id(vsmmu->iommufd, vsmmu->abort_hwpt_id);
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index d81f90c32c..73b44cd7be 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -27,9 +27,16 @@ typedef struct SMMUViommu {
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> } SMMUViommu;
>
> +typedef struct SMMUS1Hwpt {
> + IOMMUFDBackend *iommufd;
> + uint32_t hwpt_id;
> +} SMMUS1Hwpt;
> +
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> HostIOMMUDeviceIOMMUFD *idev;
> + SMMUS1Hwpt *s1_hwpt;
> + IOMMUFDVdev *vdev;
> SMMUViommu *vsmmu;
> QLIST_ENTRY(SMMUv3AccelDevice) next;
> } SMMUv3AccelDevice;
> @@ -40,10 +47,26 @@ typedef struct SMMUv3AccelState {
>
> #ifdef CONFIG_ARM_SMMUV3_ACCEL
> void smmuv3_accel_init(SMMUv3State *s);
> +bool smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> + Error **errp);
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> + Error **errp);
> #else
> static inline void smmuv3_accel_init(SMMUv3State *s)
> {
> }
> +static inline bool
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,
> + Error **errp)
> +{
> + return true;
> +}
> +static inline bool
> +smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> + Error **errp)
> +{
> + return true;
> +}
> #endif
>
> #endif /* HW_ARM_SMMUV3_ACCEL_H */
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index 03d86cfc5c..5fd88b4257 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -547,6 +547,9 @@ typedef struct CD {
> uint32_t word[16];
> } CD;
>
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event);
> +void smmuv3_flush_config(SMMUDevice *sdev);
> +
> /* STE fields */
>
> #define STE_VALID(x) extract32((x)->word[0], 0, 1)
> @@ -586,6 +589,23 @@ typedef struct CD {
> #define SMMU_STE_VALID (1ULL << 0)
> #define SMMU_STE_CFG_BYPASS (1ULL << 3)
>
> +#define STE0_V MAKE_64BIT_MASK(0, 1)
> +#define STE0_CONFIG MAKE_64BIT_MASK(1, 3)
> +#define STE0_S1FMT MAKE_64BIT_MASK(4, 2)
> +#define STE0_CTXPTR MAKE_64BIT_MASK(6, 50)
> +#define STE0_S1CDMAX MAKE_64BIT_MASK(59, 5)
> +#define STE0_MASK (STE0_S1CDMAX | STE0_CTXPTR | STE0_S1FMT | STE0_CONFIG | \
> + STE0_V)
> +
> +#define STE1_S1DSS MAKE_64BIT_MASK(0, 2)
> +#define STE1_S1CIR MAKE_64BIT_MASK(2, 2)
> +#define STE1_S1COR MAKE_64BIT_MASK(4, 2)
> +#define STE1_S1CSH MAKE_64BIT_MASK(6, 2)
> +#define STE1_S1STALLD MAKE_64BIT_MASK(27, 1)
> +#define STE1_EATS MAKE_64BIT_MASK(28, 2)
> +#define STE1_MASK (STE1_EATS | STE1_S1STALLD | STE1_S1CSH | STE1_S1COR | \
> + STE1_S1CIR | STE1_S1DSS)
> +
> #define SMMU_GBPA_ABORT (1UL << 20)
>
> static inline int oas2bits(int oas_field)
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index ef991cb7d8..1fd8aaa0c7 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -630,8 +630,7 @@ bad_ste:
> * Supports linear and 2-level stream table
> * Return 0 on success, -EINVAL otherwise
> */
> -static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
> - SMMUEventInfo *event)
> +int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, SMMUEventInfo *event)
> {
> dma_addr_t addr, strtab_base;
> uint32_t log2size;
> @@ -900,7 +899,7 @@ static SMMUTransCfg *smmuv3_get_config(SMMUDevice *sdev, SMMUEventInfo *event)
> return cfg;
> }
>
> -static void smmuv3_flush_config(SMMUDevice *sdev)
> +void smmuv3_flush_config(SMMUDevice *sdev)
> {
> SMMUv3State *s = sdev->smmu;
> SMMUState *bc = &s->smmu_state;
> @@ -1330,6 +1329,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> {
> uint32_t sid = CMD_SID(&cmd);
> SMMUDevice *sdev = smmu_find_sdev(bs, sid);
> + Error *local_err = NULL;
>
> if (CMD_SSEC(&cmd)) {
> cmd_error = SMMU_CERROR_ILL;
> @@ -1341,6 +1341,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> }
>
> trace_smmuv3_cmdq_cfgi_ste(sid);
> + if (!smmuv3_accel_install_nested_ste(s, sdev, sid, &local_err)) {
> + error_report_err(local_err);
> + cmd_error = SMMU_CERROR_ILL;
> + break;
> + }
> smmuv3_flush_config(sdev);
>
> break;
> @@ -1350,6 +1355,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> uint32_t sid = CMD_SID(&cmd), mask;
> uint8_t range = CMD_STE_RANGE(&cmd);
> SMMUSIDRange sid_range;
> + Error *local_err = NULL;
>
> if (CMD_SSEC(&cmd)) {
> cmd_error = SMMU_CERROR_ILL;
> @@ -1361,6 +1367,12 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
> sid_range.end = sid_range.start + mask;
>
> trace_smmuv3_cmdq_cfgi_ste_range(sid_range.start, sid_range.end);
> + if (!smmuv3_accel_install_nested_ste_range(s, &sid_range,
> + &local_err)) {
> + error_report_err(local_err);
> + cmd_error = SMMU_CERROR_ILL;
> + break;
> + }
> smmu_configs_inv_sid_range(bs, sid_range);
> break;
> }
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index 49c0460f30..2e0b1f8f6f 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -69,6 +69,8 @@ smmu_reset_exit(void) ""
> #smmuv3-accel.c
> smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (idev devid=0x%x)"
> smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (idev devid=0x%x)"
> +smmuv3_accel_install_nested_ste(uint32_t sid, uint64_t ste_1, uint64_t ste_0) "sid=%d ste=%"PRIx64":%"PRIx64
> +smmuv3_accel_uninstall_nested_ste(uint32_t sid, const char *ste_cfg, uint32_t hwpt_id) "sid=%d attached %s hwpt_id=%u"
>
> # strongarm.c
> strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
Eric
Hi Eric,
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 04 November 2025 11:06
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
> smostafa@google.com; wangzhou1@hisilicon.com;
> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
> Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v5 13/32] hw/arm/smmuv3-accel: Add nested vSTE
> install/uninstall support
>
> External email: Use caution opening links or attachments
>
>
> Hi Shameer,
>
> On 10/31/25 11:49 AM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > A device placed behind a vSMMU instance must have corresponding vSTEs
> > (bypass, abort, or translate) installed. The bypass and abort proxy
> > nested HWPTs are pre-allocated.
> >
> > For translat HWPT, a vDEVICE object is allocated and associated with
> > the vIOMMU for each guest device. This allows the host kernel to
> > establish a virtual SID to physical SID mapping, which is required for
> > handling invalidations and event reporting.
> >
> > An translate HWPT is allocated based on the guest STE configuration
> > and attached to the device when the guest issues SMMU_CMD_CFGI_STE or
> > SMMU_CMD_CFGI_STE_RANGE, provided the STE enables S1 translation.
> >
> > If the guest STE is invalid or S1 translation is disabled, the device
> > is attached to one of the pre-allocated ABORT or BYPASS HWPTs instead.
> >
> > While at it, export both smmu_find_ste() and smmuv3_flush_config() for
> > use here.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > hw/arm/smmuv3-accel.c | 193
> +++++++++++++++++++++++++++++++++++++++
> > hw/arm/smmuv3-accel.h | 23 +++++
> > hw/arm/smmuv3-internal.h | 20 ++++
> > hw/arm/smmuv3.c | 18 +++-
> > hw/arm/trace-events | 2 +
> > 5 files changed, 253 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c index
> > d4d65299a8..c74e95a0ea 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -28,6 +28,191 @@ MemoryRegion root; MemoryRegion sysmem;
> static
> > AddressSpace *shared_as_sysmem;
> >
> > +static bool
> > +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error
> > +**errp) {
> > + SMMUViommu *vsmmu = accel_dev->vsmmu;
> > + IOMMUFDVdev *vdev;
> > + uint32_t vdevice_id;
> > +
> > + if (!accel_dev->idev || accel_dev->vdev) {
> > + return true;
> > + }
> > +
> > + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev-
> >devid,
> > + vsmmu->viommu.viommu_id, sid,
> > + &vdevice_id, errp)) {
> > + return false;
> > + }
> > + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> > + vsmmu->bypass_hwpt_id, errp)) {
> > + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> > + return false;
> > + }
> > +
> > + vdev = g_new(IOMMUFDVdev, 1);
> > + vdev->vdevice_id = vdevice_id;
> > + vdev->virt_id = sid;
> > + accel_dev->vdev = vdev;
> > + return true;
> > +}
> > +
> > +static bool
> > +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev,
> bool abort,
> > + Error **errp)+{
> > + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> > + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> > + uint32_t hwpt_id;
> > +
> > + if (!s1_hwpt || !accel_dev->vsmmu) {
> > + return true;
> > + }
> > +
> > + if (abort) {
> > + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> > + } else {
> > + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> > + }
> > +
> > + if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> > + return false;
> > + }
> > + trace_smmuv3_accel_uninstall_nested_ste(smmu_get_sid(&accel_dev-
> >sdev),
> > + abort ? "abort" : "bypass",
> > + hwpt_id);
> > +
> > + iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
> > + accel_dev->s1_hwpt = NULL;
> > + g_free(s1_hwpt);
> > + return true;
> > +}
> > +
> > +static bool
> > +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> > + uint32_t data_type, uint32_t data_len,
> > + void *data, Error **errp)
> the name is very close to the caller function, ie.
> smmuv3_accel_install_nested_ste which also takes a sdev.
> I would rename to smmuv3_accel_install_hwpt() or something alike
This one is going to change a bit based on Nicolin's feedback on taking
care of SMMUEN/GBPA values.
https://lore.kernel.org/all/aQVLzfaxxSfw1HBL@Asurada-Nvidia/
Probably smmuv3_accel_attach_nested_hwpt() suits better considering
that’s what it finally ends up doing.
> > +{
> > + SMMUViommu *vsmmu = accel_dev->vsmmu;
> > + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> > + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> > + uint32_t flags = 0;
> > +
> > + if (!idev || !vsmmu) {
> > + error_setg(errp, "Device 0x%x has no associated IOMMU dev or
> vIOMMU",
> > + smmu_get_sid(&accel_dev->sdev));
> > + return false;
> > + }
> > +
> > + if (s1_hwpt) {
> > + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> > + return false;
> > + }
> > + }
> > +
> > + s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> > + s1_hwpt->iommufd = idev->iommufd;
> > + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> > + vsmmu->viommu.viommu_id, flags,
> > + data_type, data_len, data,
> > + &s1_hwpt->hwpt_id, errp)) {
> > + return false;
> > + }
> > +
> > + if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt-
> >hwpt_id, errp)) {
> > + iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
> > + return false;
> > + }
> > + accel_dev->s1_hwpt = s1_hwpt;
> > + return true;
> > +}
> > +
> > +bool
> > +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev,
> int sid,
> > + Error **errp) {
> > + SMMUv3AccelDevice *accel_dev;
> > + SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
> > + .inval_ste_allowed = true};
> > + struct iommu_hwpt_arm_smmuv3 nested_data = {};
> > + uint64_t ste_0, ste_1;
> > + uint32_t config;
> > + STE ste;
> > + int ret;
> > +
> > + if (!s->accel) {
> don't you want to check !s->vsmmu as well done in
> smmuv3_accel_install_nested_ste_range()
Nicolin has a suggestion to merge struct SMMUViommu and
SMMUv3AccelState into one and avoid the extra layering. I will
attempt that and all these checking might change as a result.
> > + return true;
> > + }
> > +
> > + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> > + if (!accel_dev->vsmmu) {
> > + return true;
> > + }
> > +
> > + if (!smmuv3_accel_alloc_vdev(accel_dev, sid, errp)) {
> > + return false;
> > + }
> > +
> > + ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
> > + if (ret) {
> > + error_setg(errp, "Failed to find STE for Device 0x%x", sid);
> > + return true;
> returning true while setting errp looks wrong to me.
Right, will just return true from here. I am not sure under what circumstances
we will hit here though.
> + }
> +
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> + STE_CFG_ABORT(config),
> + errp)) {
> + return false;
> + }
> + smmuv3_flush_config(sdev);
> + return true;
> + }
> +
> + ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
> + ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
> + nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
> + nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
> +
> + if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(nested_data),
> + &nested_data, errp)) {
> + error_append_hint(errp, "Unable to install sid=0x%x nested STE="
> + "0x%"PRIx64":=0x%"PRIx64"", sid,
nit: why ":=" between both 64b?
> + (uint64_t)le64_to_cpu(nested_data.ste[1]),
> + (uint64_t)le64_to_cpu(nested_data.ste[0]));
> + return false;
in case of various failure cases, do we need to free the vdev?
I don't think we need to fee the vdev corresponding to this vSID on failures
here. I think the association between vSID and host SID can remain
intact even if the nested HWPT alloc/attach fails for whatever reason.
Thanks,
Shameer
On Tue, Nov 04, 2025 at 04:26:56AM -0800, Shameer Kolothum wrote: > > > +static bool > > > +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev, > > > + uint32_t data_type, uint32_t data_len, > > > + void *data, Error **errp) > > the name is very close to the caller function, ie. > > smmuv3_accel_install_nested_ste which also takes a sdev. > > I would rename to smmuv3_accel_install_hwpt() or something alike > > This one is going to change a bit based on Nicolin's feedback on taking > care of SMMUEN/GBPA values. > > Probably smmuv3_accel_attach_nested_hwpt() suits better considering > that’s what it finally ends up doing. Eric is right, because the current version passes in hwpt data for allocation. Yet, my new version passes in STE, so naming could keep "ste" IMHO. Thanks Nicolin
On 11/4/25 1:26 PM, Shameer Kolothum wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 04 November 2025 11:06
>> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
>> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
>> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
>> smostafa@google.com; wangzhou1@hisilicon.com;
>> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
>> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
>> Krishnakant Jaju <kjaju@nvidia.com>
>> Subject: Re: [PATCH v5 13/32] hw/arm/smmuv3-accel: Add nested vSTE
>> install/uninstall support
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 10/31/25 11:49 AM, Shameer Kolothum wrote:
>>> From: Nicolin Chen <nicolinc@nvidia.com>
>>>
>>> A device placed behind a vSMMU instance must have corresponding vSTEs
>>> (bypass, abort, or translate) installed. The bypass and abort proxy
>>> nested HWPTs are pre-allocated.
>>>
>>> For translat HWPT, a vDEVICE object is allocated and associated with
>>> the vIOMMU for each guest device. This allows the host kernel to
>>> establish a virtual SID to physical SID mapping, which is required for
>>> handling invalidations and event reporting.
>>>
>>> An translate HWPT is allocated based on the guest STE configuration
>>> and attached to the device when the guest issues SMMU_CMD_CFGI_STE or
>>> SMMU_CMD_CFGI_STE_RANGE, provided the STE enables S1 translation.
>>>
>>> If the guest STE is invalid or S1 translation is disabled, the device
>>> is attached to one of the pre-allocated ABORT or BYPASS HWPTs instead.
>>>
>>> While at it, export both smmu_find_ste() and smmuv3_flush_config() for
>>> use here.
>>>
>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum
>> <shameerali.kolothum.thodi@huawei.com>
>>> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>>> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
>>> ---
>>> hw/arm/smmuv3-accel.c | 193
>> +++++++++++++++++++++++++++++++++++++++
>>> hw/arm/smmuv3-accel.h | 23 +++++
>>> hw/arm/smmuv3-internal.h | 20 ++++
>>> hw/arm/smmuv3.c | 18 +++-
>>> hw/arm/trace-events | 2 +
>>> 5 files changed, 253 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c index
>>> d4d65299a8..c74e95a0ea 100644
>>> --- a/hw/arm/smmuv3-accel.c
>>> +++ b/hw/arm/smmuv3-accel.c
>>> @@ -28,6 +28,191 @@ MemoryRegion root; MemoryRegion sysmem;
>> static
>>> AddressSpace *shared_as_sysmem;
>>>
>>> +static bool
>>> +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error
>>> +**errp) {
>>> + SMMUViommu *vsmmu = accel_dev->vsmmu;
>>> + IOMMUFDVdev *vdev;
>>> + uint32_t vdevice_id;
>>> +
>>> + if (!accel_dev->idev || accel_dev->vdev) {
>>> + return true;
>>> + }
>>> +
>>> + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev-
>>> devid,
>>> + vsmmu->viommu.viommu_id, sid,
>>> + &vdevice_id, errp)) {
>>> + return false;
>>> + }
>>> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
>>> + vsmmu->bypass_hwpt_id, errp)) {
>>> + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
>>> + return false;
>>> + }
>>> +
>>> + vdev = g_new(IOMMUFDVdev, 1);
>>> + vdev->vdevice_id = vdevice_id;
>>> + vdev->virt_id = sid;
>>> + accel_dev->vdev = vdev;
>>> + return true;
>>> +}
>>> +
>>> +static bool
>>> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev,
>> bool abort,
>>> + Error **errp)+{
>>> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
>>> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
>>> + uint32_t hwpt_id;
>>> +
>>> + if (!s1_hwpt || !accel_dev->vsmmu) {
>>> + return true;
>>> + }
>>> +
>>> + if (abort) {
>>> + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
>>> + } else {
>>> + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
>>> + }
>>> +
>>> + if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
>>> + return false;
>>> + }
>>> + trace_smmuv3_accel_uninstall_nested_ste(smmu_get_sid(&accel_dev-
>>> sdev),
>>> + abort ? "abort" : "bypass",
>>> + hwpt_id);
>>> +
>>> + iommufd_backend_free_id(s1_hwpt->iommufd, s1_hwpt->hwpt_id);
>>> + accel_dev->s1_hwpt = NULL;
>>> + g_free(s1_hwpt);
>>> + return true;
>>> +}
>>> +
>>> +static bool
>>> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
>>> + uint32_t data_type, uint32_t data_len,
>>> + void *data, Error **errp)
>> the name is very close to the caller function, ie.
>> smmuv3_accel_install_nested_ste which also takes a sdev.
>> I would rename to smmuv3_accel_install_hwpt() or something alike
> This one is going to change a bit based on Nicolin's feedback on taking
> care of SMMUEN/GBPA values.
> https://lore.kernel.org/all/aQVLzfaxxSfw1HBL@Asurada-Nvidia/
OK
>
> Probably smmuv3_accel_attach_nested_hwpt() suits better considering
> that’s what it finally ends up doing.
>
>>> +{
>>> + SMMUViommu *vsmmu = accel_dev->vsmmu;
>>> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
>>> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
>>> + uint32_t flags = 0;
>>> +
>>> + if (!idev || !vsmmu) {
>>> + error_setg(errp, "Device 0x%x has no associated IOMMU dev or
>> vIOMMU",
>>> + smmu_get_sid(&accel_dev->sdev));
>>> + return false;
>>> + }
>>> +
>>> + if (s1_hwpt) {
>>> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
>>> + return false;
>>> + }
>>> + }
>>> +
>>> + s1_hwpt = g_new0(SMMUS1Hwpt, 1);
>>> + s1_hwpt->iommufd = idev->iommufd;
>>> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
>>> + vsmmu->viommu.viommu_id, flags,
>>> + data_type, data_len, data,
>>> + &s1_hwpt->hwpt_id, errp)) {
>>> + return false;
>>> + }
>>> +
>>> + if (!host_iommu_device_iommufd_attach_hwpt(idev, s1_hwpt-
>>> hwpt_id, errp)) {
>>> + iommufd_backend_free_id(idev->iommufd, s1_hwpt->hwpt_id);
>>> + return false;
>>> + }
>>> + accel_dev->s1_hwpt = s1_hwpt;
>>> + return true;
>>> +}
>>> +
>>> +bool
>>> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev,
>> int sid,
>>> + Error **errp) {
>>> + SMMUv3AccelDevice *accel_dev;
>>> + SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid,
>>> + .inval_ste_allowed = true};
>>> + struct iommu_hwpt_arm_smmuv3 nested_data = {};
>>> + uint64_t ste_0, ste_1;
>>> + uint32_t config;
>>> + STE ste;
>>> + int ret;
>>> +
>>> + if (!s->accel) {
>> don't you want to check !s->vsmmu as well done in
>> smmuv3_accel_install_nested_ste_range()
> Nicolin has a suggestion to merge struct SMMUViommu and
> SMMUv3AccelState into one and avoid the extra layering. I will
> attempt that and all these checking might change as a result.
interesting idea indeed.
>
>>> + return true;
>>> + }
>>> +
>>> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
>>> + if (!accel_dev->vsmmu) {
>>> + return true;
>>> + }
>>> +
>>> + if (!smmuv3_accel_alloc_vdev(accel_dev, sid, errp)) {
>>> + return false;
>>> + }
>>> +
>>> + ret = smmu_find_ste(sdev->smmu, sid, &ste, &event);
>>> + if (ret) {
>>> + error_setg(errp, "Failed to find STE for Device 0x%x", sid);
>>> + return true;
>> returning true while setting errp looks wrong to me.
> Right, will just return true from here. I am not sure under what circumstances
> we will hit here though.
>
>> + }
>> +
>> + config = STE_CONFIG(&ste);
>> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
>> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
>> + STE_CFG_ABORT(config),
>> + errp)) {
>> + return false;
>> + }
>> + smmuv3_flush_config(sdev);
>> + return true;
>> + }
>> +
>> + ste_0 = (uint64_t)ste.word[0] | (uint64_t)ste.word[1] << 32;
>> + ste_1 = (uint64_t)ste.word[2] | (uint64_t)ste.word[3] << 32;
>> + nested_data.ste[0] = cpu_to_le64(ste_0 & STE0_MASK);
>> + nested_data.ste[1] = cpu_to_le64(ste_1 & STE1_MASK);
>> +
>> + if (!smmuv3_accel_dev_install_nested_ste(accel_dev,
>> + IOMMU_HWPT_DATA_ARM_SMMUV3,
>> + sizeof(nested_data),
>> + &nested_data, errp)) {
>> + error_append_hint(errp, "Unable to install sid=0x%x nested STE="
>> + "0x%"PRIx64":=0x%"PRIx64"", sid,
> nit: why ":=" between both 64b?
>> + (uint64_t)le64_to_cpu(nested_data.ste[1]),
>> + (uint64_t)le64_to_cpu(nested_data.ste[0]));
>> + return false;
> in case of various failure cases, do we need to free the vdev?
>
> I don't think we need to fee the vdev corresponding to this vSID on failures
> here. I think the association between vSID and host SID can remain
> intact even if the nested HWPT alloc/attach fails for whatever reason.
OK then worth a comment
Eric
>
> Thanks,
> Shameer
On Fri, Oct 31, 2025 at 10:49:46AM +0000, Shameer Kolothum wrote:
> +static bool
> +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error **errp)
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + IOMMUFDVdev *vdev;
> + uint32_t vdevice_id;
> +
> + if (!accel_dev->idev || accel_dev->vdev) {
> + return true;
> + }
We probably don't need to check !accel_dev->dev. It should have
been blocked by its caller, which does block !accel_dev->vsmmu.
Once we fix the missing "accel_dev->vsmmu NULL", it should work.
> +
> + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev->devid,
> + vsmmu->viommu.viommu_id, sid,
> + &vdevice_id, errp)) {
> + return false;
> + }
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + vsmmu->bypass_hwpt_id, errp)) {
> + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> + return false;
> + }
This should check SMMUEN bit?
Linux driver (as an example) seems to set CMDQEN and install all
the default bypass STEs, before SMMUEN=1.
In this case, the target hwpt here should follow guest's GBPA.
> +static bool
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool abort,
> + Error **errp)
> +{
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + uint32_t hwpt_id;
> +
> + if (!s1_hwpt || !accel_dev->vsmmu) {
> + return true;
> + }
> +
> + if (abort) {
> + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> + } else {
> + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> + }
This should probably check SMMUEN/GBPA as well.
Likely we need "enabled" and "gbpa_abort" flags in SMMUState.
> +static bool
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> + uint32_t data_type, uint32_t data_len,
> + void *data, Error **errp)
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + uint32_t flags = 0;
> +
> + if (!idev || !vsmmu) {
> + error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
> + smmu_get_sid(&accel_dev->sdev));
> + return false;
> + }
> +
> + if (s1_hwpt) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> + return false;
> + }
> + }
I think we could have some improvements here.
The current flow is:
(attached to s1_hwpt1)
attach to bypass/abort_hwpt // no issue though.
free s1_hwpt1
alloc s2_hwpt2
attach to s2_hwpt2
It could have been a flow like replace() in the kernel:
(attached to s1_hwpt1)
alloc s2_hwpt2
attach to s2_hwpt2 /* skipping bypass/abort */
free s1_hwpt
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,\
[...]
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> + STE_CFG_ABORT(config),
This smmuv3_accel_uninstall_nested_ste() feels a bit redundant now.
Perhaps we could try something like this:
#define accel_dev_to_smmuv3(dev) ARM_SMMUV3(&dev->sdev.smmu)
static bool smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
int sid, STE *ste)
{
SMMUv3State *s = accel_dev_to_smmuv3(accel_dev);
HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
uint32_t config = STE_CONFIG(ste);
SMMUS1Hwpt *s1_hwpt = NULL;
uint64_t ste_0, ste_1;
uint32_t hwpt_id = 0;
if (!s->enabled) {
if (s->gbpa_abort) {
hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
} else {
hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
}
} else {
if (!STE_VALID(ste) || STE_CFG_ABORT(config)) {
hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
} else if (STE_CFG_BYPASS(config))
hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
} else {
// FIXME handle STE_CFG_S2_ENABLED()
}
}
if (!hwpt_id) {
uint64_t ste_0 = (uint64_t)ste->word[0] | (uint64_t)ste->word[1] << 32;
uint64_t ste_1 = (uint64_t)ste->word[2] | (uint64_t)ste->word[3] << 32;
struct iommu_hwpt_arm_smmuv3 nested_data = {
.ste[2] = {
cpu_to_le64(ste_0 & STE0_MASK),
cpu_to_le64(ste_1 & STE1_MASK),
},
};
trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
nested_data.ste[0]);
s1_hwpt = g_new0(SMMUS1Hwpt, 1);
[...]
iommufd_backend_alloc_hwpt(..., &s1_hwpt->hwpt_id);
hwpt_id = s1_hwpt->hwpt_id;
}
host_iommu_device_iommufd_attach_hwpt(.., hwpt_id);
if (accel_dev->s1_hwpt) {
iommufd_backend_free_id(idev->iommufd, accel_dev->s1_hwpt->hwpt_id);
}
accel_dev->s1_hwpt = s1_hwpt;
return true;
}
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange *range,
> + Error **errp)
> +{
> + SMMUv3AccelState *s_accel = s->s_accel;
> + SMMUv3AccelDevice *accel_dev;
> +
> + if (!s_accel || !s_accel->vsmmu) {
> + return true;
> + }
> +
> + QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
> + uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> +
> + if (sid >= range->start && sid <= range->end) {
> + if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> + sid, errp)) {
> + return false;
> + }
> + }
This is a bit tricky..
I think CFGI_STE_RANGE shouldn't stop in the middle, if one of the
STEs fails.
That being said, HW doesn't seem to propagate C_BAD_STE during a
CFGI_STE or CFGI_STE_RANGE, IIUIC. It reports C_BAD_STE event when
a transaction starts. If we want to perfectly mimic the hardware,
we'd have to set up a bad STE down to the HW, which will trigger a
C_BAD_STE vevent to be forwarded by vEVENTQ.
Nicolin
> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: 31 October 2025 23:53
> To: Shameer Kolothum <skolothumtho@nvidia.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; Jason Gunthorpe
> <jgg@nvidia.com>; ddutile@redhat.com; berrange@redhat.com; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
> smostafa@google.com; wangzhou1@hisilicon.com;
> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; yi.l.liu@intel.com;
> Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v5 13/32] hw/arm/smmuv3-accel: Add nested vSTE
> install/uninstall support
>
> On Fri, Oct 31, 2025 at 10:49:46AM +0000, Shameer Kolothum wrote:
> > +static bool
> > +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error
> **errp)
> > +{
> > + SMMUViommu *vsmmu = accel_dev->vsmmu;
> > + IOMMUFDVdev *vdev;
> > + uint32_t vdevice_id;
> > +
> > + if (!accel_dev->idev || accel_dev->vdev) {
> > + return true;
> > + }
>
> We probably don't need to check !accel_dev->dev. It should have
> been blocked by its caller, which does block !accel_dev->vsmmu.
> Once we fix the missing "accel_dev->vsmmu NULL", it should work.
Ok.
>
> > +
> > + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev-
> >devid,
> > + vsmmu->viommu.viommu_id, sid,
> > + &vdevice_id, errp)) {
> > + return false;
> > + }
> > + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> > + vsmmu->bypass_hwpt_id, errp)) {
> > + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> > + return false;
> > + }
>
> This should check SMMUEN bit?
>
> Linux driver (as an example) seems to set CMDQEN and install all
> the default bypass STEs, before SMMUEN=1.
Yeah. For RMR I think.
> In this case, the target hwpt here should follow guest's GBPA.
>
> > +static bool
> > +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev,
> bool abort,
> > + Error **errp)
> > +{
> > + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> > + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> > + uint32_t hwpt_id;
> > +
> > + if (!s1_hwpt || !accel_dev->vsmmu) {
> > + return true;
> > + }
> > +
> > + if (abort) {
> > + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> > + } else {
> > + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> > + }
>
> This should probably check SMMUEN/GBPA as well.
>
> Likely we need "enabled" and "gbpa_abort" flags in SMMUState.
>
> > +static bool
> > +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> > + uint32_t data_type, uint32_t data_len,
> > + void *data, Error **errp)
> > +{
> > + SMMUViommu *vsmmu = accel_dev->vsmmu;
> > + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> > + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> > + uint32_t flags = 0;
> > +
> > + if (!idev || !vsmmu) {
> > + error_setg(errp, "Device 0x%x has no associated IOMMU dev or
> vIOMMU",
> > + smmu_get_sid(&accel_dev->sdev));
> > + return false;
> > + }
> > +
> > + if (s1_hwpt) {
> > + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> > + return false;
> > + }
> > + }
>
> I think we could have some improvements here.
>
> The current flow is:
> (attached to s1_hwpt1)
> attach to bypass/abort_hwpt // no issue though.
> free s1_hwpt1
> alloc s2_hwpt2
> attach to s2_hwpt2
>
> It could have been a flow like replace() in the kernel:
> (attached to s1_hwpt1)
> alloc s2_hwpt2
> attach to s2_hwpt2 /* skipping bypass/abort */
> free s1_hwpt
Not sure I get the above, you mean in this _instatl_nested_ste() path,
we have a case where we need to alloc a S2 HWPT and attach?
> > +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev,
> int sid,\
> [...]
> > + config = STE_CONFIG(&ste);
> > + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> > + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> > + STE_CFG_ABORT(config),
>
> This smmuv3_accel_uninstall_nested_ste() feels a bit redundant now.
Agree. It crossed my mind too.
>
> Perhaps we could try something like this:
>
> #define accel_dev_to_smmuv3(dev) ARM_SMMUV3(&dev->sdev.smmu)
>
> static bool smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice
> *accel_dev,
> int sid, STE *ste)
> {
> SMMUv3State *s = accel_dev_to_smmuv3(accel_dev);
> HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> uint32_t config = STE_CONFIG(ste);
> SMMUS1Hwpt *s1_hwpt = NULL;
> uint64_t ste_0, ste_1;
> uint32_t hwpt_id = 0;
>
> if (!s->enabled) {
> if (s->gbpa_abort) {
> hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> } else {
> hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> }
> } else {
> if (!STE_VALID(ste) || STE_CFG_ABORT(config)) {
> hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> } else if (STE_CFG_BYPASS(config))
> hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> } else {
> // FIXME handle STE_CFG_S2_ENABLED()
> }
> }
>
> if (!hwpt_id) {
> uint64_t ste_0 = (uint64_t)ste->word[0] | (uint64_t)ste->word[1] << 32;
> uint64_t ste_1 = (uint64_t)ste->word[2] | (uint64_t)ste->word[3] << 32;
> struct iommu_hwpt_arm_smmuv3 nested_data = {
> .ste[2] = {
> cpu_to_le64(ste_0 & STE0_MASK),
> cpu_to_le64(ste_1 & STE1_MASK),
> },
> };
>
> trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
> nested_data.ste[0]);
> s1_hwpt = g_new0(SMMUS1Hwpt, 1);
> [...]
> iommufd_backend_alloc_hwpt(..., &s1_hwpt->hwpt_id);
> hwpt_id = s1_hwpt->hwpt_id;
> }
>
> host_iommu_device_iommufd_attach_hwpt(.., hwpt_id);
>
> if (accel_dev->s1_hwpt) {
> iommufd_backend_free_id(idev->iommufd, accel_dev->s1_hwpt-
> >hwpt_id);
> }
> accel_dev->s1_hwpt = s1_hwpt;
> return true;
> }
Ok. I will take a look at this.
> > +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s,
> SMMUSIDRange *range,
> > + Error **errp)
> > +{
> > + SMMUv3AccelState *s_accel = s->s_accel;
> > + SMMUv3AccelDevice *accel_dev;
> > +
> > + if (!s_accel || !s_accel->vsmmu) {
> > + return true;
> > + }
> > +
> > + QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
> > + uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> > +
> > + if (sid >= range->start && sid <= range->end) {
> > + if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> > + sid, errp)) {
> > + return false;
> > + }
> > + }
>
> This is a bit tricky..
>
> I think CFGI_STE_RANGE shouldn't stop in the middle, if one of the
> STEs fails.
True.
> That being said, HW doesn't seem to propagate C_BAD_STE during a
> CFGI_STE or CFGI_STE_RANGE, IIUIC. It reports C_BAD_STE event when
> a transaction starts. If we want to perfectly mimic the hardware,
> we'd have to set up a bad STE down to the HW, which will trigger a
> C_BAD_STE vevent to be forwarded by vEVENTQ.
I don't think we need to mimic that behaviour. We could return an event
from here to Guest if required or just have error_report().
Thanks,
Shameer
On Mon, Nov 03, 2025 at 07:11:18AM -0800, Shameer Kolothum wrote:
> > > + if (s1_hwpt) {
> > > + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> > > + return false;
> > > + }
> > > + }
> >
> > I think we could have some improvements here.
> >
> > The current flow is:
> > (attached to s1_hwpt1)
> > attach to bypass/abort_hwpt // no issue though.
> > free s1_hwpt1
> > alloc s2_hwpt2
> > attach to s2_hwpt2
> >
> > It could have been a flow like replace() in the kernel:
> > (attached to s1_hwpt1)
> > alloc s2_hwpt2
> > attach to s2_hwpt2 /* skipping bypass/abort */
> > free s1_hwpt
>
> Not sure I get the above, you mean in this _instatl_nested_ste() path,
> we have a case where we need to alloc a S2 HWPT and attach?
Oh no. s1_hwpt1 and s1_hwpt2. The point is that we don't really
need that bypass/abort attachment (i.e. the uninstall function)
when switching between two nested hwpts. The sample code that I
shared should cover this already.
> > That being said, HW doesn't seem to propagate C_BAD_STE during a
> > CFGI_STE or CFGI_STE_RANGE, IIUIC. It reports C_BAD_STE event when
> > a transaction starts. If we want to perfectly mimic the hardware,
> > we'd have to set up a bad STE down to the HW, which will trigger a
> > C_BAD_STE vevent to be forwarded by vEVENTQ.
>
> I don't think we need to mimic that behaviour. We could return an event
> from here to Guest if required or just have error_report().
I am not sure about that. Reporting event on CMD_CFGI_STE doesn't
sound like a correct behavior following the HW spec.
error_report() would be fine. But we might need to leave a FIXME.
Nicolin
On Fri, Oct 31, 2025 at 04:52:50PM -0700, Nicolin Chen wrote:
> On Fri, Oct 31, 2025 at 10:49:46AM +0000, Shameer Kolothum wrote:
> > +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,\
> [...]
> > + config = STE_CONFIG(&ste);
> > + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> > + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> > + STE_CFG_ABORT(config),
>
> This smmuv3_accel_uninstall_nested_ste() feels a bit redundant now.
>
> Perhaps we could try something like this:
>
> #define accel_dev_to_smmuv3(dev) ARM_SMMUV3(&dev->sdev.smmu)
Oops. This should be:
#define accel_dev_to_smmuv3(dev) ARM_SMMUV3((SMMUState *)dev->sdev.smmu)
But it doesn't seem to be very useful anyway. So, feel free to
keep it inline or just drop it :)
Nicolin
© 2016 - 2025 Red Hat, Inc.