drivers/iommu/arm/arm-smmu/arm-smmu.c | 92 +++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 25 deletions(-)
Commit d4a44f0750bb ("iommu/arm-smmu: Invoke pm_runtime across the driver")
enabled pm_runtime for the arm-smmu device. On systems where the SMMU
sits in a power domain, all register accesses must be done while the
device is runtime active to avoid unclocked register reads and
potential NoC errors.
So far, this has not been an issue for most SMMU clients because
stall-on-fault is enabled by default. While a translation fault is
being handled, the SMMU stalls further translations for that context
bank, so the fault handler would not race with a powered-down SMMU.
Adreno SMMU now disables stall-on-fault in the presence of fault
storms to avoid saturating SMMU resources and hanging the GMU. With
stall-on-fault disabled, the SMMU can generate faults while its power
domain may no longer be enabled, which makes unclocked accesses to
fault-status registers in the SMMU fault handlers possible.
Guard the context and global fault handlers with
arm_smmu_rpm_get_if_active() and arm_smmu_rpm_put() so that all SMMU
fault register accesses are done with the SMMU powered. If the SMMU is
not runtime active, the fault can be safely ignored as
arm_smmu_device_reset() clears fault registers on resume.
Additionally, disable fault reporting in arm_smmu_runtime_suspend()
before powering down. pm_runtime_get_if_active() returns 0 during
RPM_SUSPENDING, so without this, level-triggered fault interrupts would
cause an interrupt storm while the device is being suspended.
arm_smmu_device_reset() re-enables fault reporting on resume.
Fixes: b13044092c1e ("drm/msm: Temporarily disable stall-on-fault after a page fault")
Co-developed-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
Signed-off-by: Pratyush Brahma <pratyush.brahma@oss.qualcomm.com>
Signed-off-by: Prakash Gupta <prakash.gupta@oss.qualcomm.com>
---
Changes in v3:
- Add arm_smmu_rpm_get_if_active() wrapper that returns 1 when pm_runtime
is disabled, ensuring fault handlers work on non-pm_runtime systems
- Disable fault reporting in arm_smmu_runtime_suspend() before powering
down to prevent interrupt storms during RPM_SUSPENDING state
- Use pm_runtime_put_autosuspend() in arm_smmu_rpm_put() instead of
private __pm_runtime_put_autosuspend()
- Link to v2: https://patch.msgid.link/20260313-smmu-rpm-v2-1-8c2236b402b0@oss.qualcomm.com
Changes in v2:
- Switched from arm_smmu_rpm_get()/arm_smmu_rpm_put() wrappers to
pm_runtime_get_if_active()/pm_runtime_put_autosuspend() APIs
- Added support for smmu->impl->global_fault callback in global fault handler
- Remove threaded irq context fault restriction to allow modifying stall
mode for adreno smmu
- Link to v1: https://patch.msgid.link/20260127-smmu-rpm-v1-1-2ef2f4c85305@oss.qualcomm.com
---
drivers/iommu/arm/arm-smmu/arm-smmu.c | 92 +++++++++++++++++++++++++----------
1 file changed, 67 insertions(+), 25 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 0bd21d206eb3..045389e89484 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -79,11 +79,16 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
- if (pm_runtime_enabled(smmu->dev)) {
- pm_runtime_mark_last_busy(smmu->dev);
- __pm_runtime_put_autosuspend(smmu->dev);
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_put_autosuspend(smmu->dev);
+}
- }
+static inline int arm_smmu_rpm_get_if_active(struct arm_smmu_device *smmu)
+{
+ if (!pm_runtime_enabled(smmu->dev))
+ return 1;
+
+ return pm_runtime_get_if_active(smmu->dev);
}
static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
@@ -462,10 +467,20 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
int idx = smmu_domain->cfg.cbndx;
int ret;
+ if (!arm_smmu_rpm_get_if_active(smmu))
+ return IRQ_NONE;
+
+ if (smmu->impl && smmu->impl->context_fault) {
+ ret = smmu->impl->context_fault(irq, dev);
+ goto out_power_off;
+ }
+
arm_smmu_read_context_fault_info(smmu, idx, &cfi);
- if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
- return IRQ_NONE;
+ if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
@@ -480,7 +495,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
}
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -489,14 +509,25 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
struct arm_smmu_device *smmu = dev;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
+ int ret;
+
+ if (!arm_smmu_rpm_get_if_active(smmu))
+ return IRQ_NONE;
+
+ if (smmu->impl && smmu->impl->global_fault) {
+ ret = smmu->impl->global_fault(irq, dev);
+ goto out_power_off;
+ }
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
- if (!gfsr)
- return IRQ_NONE;
+ if (!gfsr) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
if (__ratelimit(&rs)) {
if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
@@ -513,7 +544,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
}
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+ arm_smmu_rpm_put(smmu);
+ return ret;
}
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
@@ -683,7 +718,6 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
enum io_pgtable_fmt fmt;
struct iommu_domain *domain = &smmu_domain->domain;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- irqreturn_t (*context_fault)(int irq, void *dev);
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -850,19 +884,14 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
*/
irq = smmu->irqs[cfg->irptndx];
- if (smmu->impl && smmu->impl->context_fault)
- context_fault = smmu->impl->context_fault;
- else
- context_fault = arm_smmu_context_fault;
-
if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
- context_fault,
+ arm_smmu_context_fault,
IRQF_ONESHOT | IRQF_SHARED,
"arm-smmu-context-fault",
smmu_domain);
else
- ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, IRQF_SHARED,
"arm-smmu-context-fault", smmu_domain);
if (ret < 0) {
@@ -2125,7 +2154,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
int num_irqs, i, err;
u32 global_irqs, pmu_irqs;
- irqreturn_t (*global_fault)(int irq, void *dev);
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu) {
@@ -2205,18 +2233,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->num_context_irqs = smmu->num_context_banks;
}
- if (smmu->impl && smmu->impl->global_fault)
- global_fault = smmu->impl->global_fault;
- else
- global_fault = arm_smmu_global_fault;
-
for (i = 0; i < global_irqs; i++) {
int irq = platform_get_irq(pdev, i);
if (irq < 0)
return irq;
- err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
+ err = devm_request_irq(dev, irq, arm_smmu_global_fault, IRQF_SHARED,
"arm-smmu global fault", smmu);
if (err)
return dev_err_probe(dev, err,
@@ -2306,6 +2329,25 @@ static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ int i;
+ u32 reg;
+
+ /*
+ * Disable fault reporting before powering down to prevent unclocked
+ * register accesses in the fault handlers if an interrupt races with
+ * the suspend callback (e.g. device in RPM_SUSPENDING state).
+ * arm_smmu_device_reset() re-enables fault reporting on resume.
+ */
+ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
+ reg &= ~(ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
+ ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
+
+ for (i = 0; i < smmu->num_context_banks; i++) {
+ reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_SCTLR);
+ reg &= ~(ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE);
+ arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_SCTLR, reg);
+ }
clk_bulk_disable(smmu->num_clks, smmu->clks);
---
base-commit: ba3e43a9e601636f5edb54e259a74f96ca3b8fd8
change-id: 20251208-smmu-rpm-8bd67db93dca
Best regards,
--
Prakash Gupta <prakash.gupta@oss.qualcomm.com>
© 2016 - 2026 Red Hat, Inc.