[PATCH v2 6/7] iommu/arm-smmu-v3: Introduce master->ats_broken flag

Nicolin Chen posted 7 patches 2 weeks, 6 days ago
[PATCH v2 6/7] iommu/arm-smmu-v3: Introduce master->ats_broken flag
Posted by Nicolin Chen 2 weeks, 6 days ago
The flag will be set when IOMMU cannot trust device's ATS function. E.g.,
when ATC invalidation request to the device times out.

Once it is set, unsupport the ATS feature to prevent data corruption, and
skip further ATC invalidation commands to avoid new timeouts.

Unset the flag when the device finishes a reset for recovery.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  1 +
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 28 +++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index cb83ea1f3407f..0a0a88bb60e65 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -941,6 +941,7 @@ struct arm_smmu_master {
 	/* Locked by the iommu core using the group mutex */
 	struct arm_smmu_ctx_desc_cfg	cd_table;
 	unsigned int			num_streams;
+	bool				ats_broken;
 	bool				ats_enabled : 1;
 	bool				ste_ats_enabled : 1;
 	bool				stall_enabled;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index aa42fe39d66b6..366d812668011 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2502,6 +2502,10 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
 	struct arm_smmu_cmdq_ent cmd;
 	struct arm_smmu_cmdq_batch cmds;
 
+	/* Do not issue ATC_INV that will definitely time out */
+	if (READ_ONCE(master->ats_broken))
+		return 0;
+
 	arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
 
 	arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
@@ -2708,11 +2712,17 @@ static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs,
 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 			break;
 		case INV_TYPE_ATS:
+			/* Do not issue ATC_INV that will definitely time out */
+			if (READ_ONCE(cur->master->ats_broken))
+				continue;
 			arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd);
 			cmd.atc.sid = cur->id;
 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 			break;
 		case INV_TYPE_ATS_FULL:
+			/* Do not issue ATC_INV that will definitely time out */
+			if (READ_ONCE(cur->master->ats_broken))
+				continue;
 			arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
 			cmd.atc.sid = cur->id;
 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -3048,6 +3058,15 @@ void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
 	}
 }
 
+static void arm_smmu_reset_device_done(struct device *dev)
+{
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+	if (WARN_ON(!master))
+		return;
+	WRITE_ONCE(master->ats_broken, false);
+}
+
 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
 {
 	struct device *dev = master->dev;
@@ -3060,6 +3079,14 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
 		return false;
 
+	/*
+	 * Reject any new ATS request because ATC invalidation was timed out.
+	 * The PCI device should go through a recovery (reset) and notify the
+	 * SMMUv3 driver via a reset_device_done callback.
+	 */
+	if (READ_ONCE(master->ats_broken))
+		return false;
+
 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
 }
 
@@ -4392,6 +4419,7 @@ static const struct iommu_ops arm_smmu_ops = {
 	.domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
 	.probe_device		= arm_smmu_probe_device,
 	.release_device		= arm_smmu_release_device,
+	.reset_device_done	= arm_smmu_reset_device_done,
 	.device_group		= arm_smmu_device_group,
 	.of_xlate		= arm_smmu_of_xlate,
 	.get_resv_regions	= arm_smmu_get_resv_regions,
-- 
2.43.0
RE: [PATCH v2 6/7] iommu/arm-smmu-v3: Introduce master->ats_broken flag
Posted by Tian, Kevin 2 weeks, 5 days ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Wednesday, March 18, 2026 3:16 AM
> 
> @@ -3060,6 +3079,14 @@ static bool arm_smmu_ats_supported(struct
> arm_smmu_master *master)
>  	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
>  		return false;
> 
> +	/*
> +	 * Reject any new ATS request because ATC invalidation was timed
> out.
> +	 * The PCI device should go through a recovery (reset) and notify the
> +	 * SMMUv3 driver via a reset_device_done callback.
> +	 */
> +	if (READ_ONCE(master->ats_broken))
> +		return false;
> +

"Reject any new ATS request" means any new request to enable ATS
on this device, instead of rejecting any new ATS translation request,
correct? next patch does the actual work to block ATS...
Re: [PATCH v2 6/7] iommu/arm-smmu-v3: Introduce master->ats_broken flag
Posted by Nicolin Chen 2 weeks, 5 days ago
On Wed, Mar 18, 2026 at 07:39:53AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Wednesday, March 18, 2026 3:16 AM
> > 
> > @@ -3060,6 +3079,14 @@ static bool arm_smmu_ats_supported(struct
> > arm_smmu_master *master)
> >  	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
> >  		return false;
> > 
> > +	/*
> > +	 * Reject any new ATS request because ATC invalidation was timed
> > out.
> > +	 * The PCI device should go through a recovery (reset) and notify the
> > +	 * SMMUv3 driver via a reset_device_done callback.
> > +	 */
> > +	if (READ_ONCE(master->ats_broken))
> > +		return false;
> > +
> 
> "Reject any new ATS request" means any new request to enable ATS
> on this device, instead of rejecting any new ATS translation request,
> correct? next patch does the actual work to block ATS...

Yes. We won't call pci_enable_ats() due to !arm_smmu_ats_supported.
So, there shouldn't be any new ATS request any more.

I will change it to:
	/*
	 * Do not enable ATS if master->ats_broken is set. The PCI device should
	 * go through a recovery (reset) that shall notify the SMMUv3 driver via
	 * a reset_device_done callback.
	 */

Thanks
Nicolin