When a device's default substream attaches to an identity domain, the SMMU
driver currently sets the device's STE between two modes:
Mode 1: Cfg=Translate, S1DSS=Bypass, EATS=1
Mode 2: Cfg=bypass (EATS is ignored by HW)
When there is an active PASID (non-default substream), mode 1 is used. And
when there is no PASID support or no active PASID, mode 2 is used.
The driver will also downgrade an STE from mode 1 to mode 2, when the last
active substream becomes inactive.
However, there are PCIe devices that demand ATS to be always on. For these
devices, their STEs have to use the mode 1 as HW ignores EATS with mode 2.
Change the driver accordingly:
- always use the mode 1
- never downgrade to mode 2
- allocate and retain a CD table (see note below)
Note that these devices might not support PASID, i.e. doing non-PASID ATS.
In such a case, the ssid_bits is set to 0. However, s1cdmax must be set to
a !0 value in order to keep the S1DSS field effective. Thus, when a master
requires ats_always_on, set its s1cdmax to minimal 1, meaning the CD table
will have a dummy entry (SSID=1) that will be never used.
Now, for these device, arm_smmu_cdtab_allocated() will always return true,
v.s. false prior to this change. When its default substream is attached to
an IDENTITY domain, its first CD is NULL in the table, which is a totally
valid case. Thus, drop the WARN_ON().
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 74 ++++++++++++++++++---
2 files changed, 64 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index ae23aacc3840..2ed68f43347e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -850,6 +850,7 @@ struct arm_smmu_master {
bool ats_enabled : 1;
bool ste_ats_enabled : 1;
bool stall_enabled;
+ bool ats_always_on;
unsigned int ssid_bits;
unsigned int iopf_refcount;
};
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d16d35c78c06..5b7deb708636 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1422,7 +1422,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
- if (WARN_ON(!cdptr))
+ if (!cdptr)
return;
arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
@@ -1436,6 +1436,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
cd_table->s1cdmax = master->ssid_bits;
+
+ /*
+ * When a device doesn't support PASID (non default SSID), ssid_bits is
+ * set to 0. This also sets S1CDMAX to 0, which disables the substreams
+ * and ignores the S1DSS field.
+ *
+ * On the other hand, if a device demands ATS to be always on even when
+ * its default substream is IOMMU bypassed, it has to use EATS that is
+ * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
+ * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
+ *
+ * Set S1CDMAX no lower than 1. This would add a dummy substream in the
+ * CD table but it should never be used by an actual CD.
+ */
+ if (master->ats_always_on)
+ cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
@@ -3189,7 +3205,8 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
* When the last user of the CD table goes away downgrade the STE back
* to a non-cd_table one, by re-attaching its sid_domain.
*/
- if (!arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (!master->ats_always_on &&
+ !arm_smmu_ssids_in_use(&master->cd_table)) {
struct iommu_domain *sid_domain =
iommu_get_domain_for_dev(master->dev);
@@ -3205,7 +3222,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
struct iommu_domain *old_domain,
struct device *dev,
struct arm_smmu_ste *ste,
- unsigned int s1dss)
+ unsigned int s1dss, bool ats_always_on)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_attach_state state = {
@@ -3224,7 +3241,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
* If the CD table is not in use we can use the provided STE, otherwise
* we use a cdtable STE with the provided S1DSS.
*/
- if (arm_smmu_ssids_in_use(&master->cd_table)) {
+ if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
* on because we have to assume a PASID is using ATS. For
@@ -3260,7 +3277,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
- STRTAB_STE_1_S1DSS_BYPASS);
+ STRTAB_STE_1_S1DSS_BYPASS,
+ master->ats_always_on);
return 0;
}
@@ -3283,7 +3301,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
- STRTAB_STE_1_S1DSS_TERMINATE);
+ STRTAB_STE_1_S1DSS_TERMINATE, false);
return 0;
}
@@ -3521,6 +3539,40 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
kfree(master->streams);
}
+static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
+{
+ bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
+ unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
+ struct pci_dev *pdev = to_pci_dev(master->dev);
+ int ret;
+
+ if (!arm_smmu_ats_supported(master))
+ return 0;
+
+ if (!pci_ats_always_on(pdev))
+ goto out_prepare;
+
+ /*
+ * S1DSS is required for ATS to be always on for identity domain cases.
+ * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
+ */
+ if (!s1p || !master->smmu->ssid_bits) {
+ dev_info_once(master->dev,
+ "SMMU doesn't support ATS to be always on\n");
+ goto out_prepare;
+ }
+
+ master->ats_always_on = true;
+
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret)
+ return ret;
+
+out_prepare:
+ pci_prepare_ats(pdev, stu);
+ return 0;
+}
+
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
@@ -3569,14 +3621,14 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
- if (dev_is_pci(dev)) {
- unsigned int stu = __ffs(smmu->pgsize_bitmap);
-
- pci_prepare_ats(to_pci_dev(dev), stu);
- }
+ ret = arm_smmu_master_prepare_ats(master);
+ if (ret)
+ goto err_disable_pasid;
return &smmu->iommu;
+err_disable_pasid:
+ arm_smmu_disable_pasid(master);
err_free_master:
kfree(master);
return ERR_PTR(ret);
--
2.43.0
On Fri, Jan 16, 2026 at 08:56:42PM -0800, Nicolin Chen wrote:
> When a device's default substream attaches to an identity domain, the SMMU
> driver currently sets the device's STE between two modes:
>
> Mode 1: Cfg=Translate, S1DSS=Bypass, EATS=1
> Mode 2: Cfg=bypass (EATS is ignored by HW)
>
> When there is an active PASID (non-default substream), mode 1 is used. And
> when there is no PASID support or no active PASID, mode 2 is used.
>
> The driver will also downgrade an STE from mode 1 to mode 2, when the last
> active substream becomes inactive.
>
> However, there are PCIe devices that demand ATS to be always on. For these
> devices, their STEs have to use the mode 1 as HW ignores EATS with mode 2.
>
> Change the driver accordingly:
> - always use the mode 1
> - never downgrade to mode 2
> - allocate and retain a CD table (see note below)
>
> Note that these devices might not support PASID, i.e. doing non-PASID ATS.
> In such a case, the ssid_bits is set to 0. However, s1cdmax must be set to
> a !0 value in order to keep the S1DSS field effective. Thus, when a master
> requires ats_always_on, set its s1cdmax to minimal 1, meaning the CD table
> will have a dummy entry (SSID=1) that will be never used.
>
> Now, for these device, arm_smmu_cdtab_allocated() will always return true,
> v.s. false prior to this change. When its default substream is attached to
> an IDENTITY domain, its first CD is NULL in the table, which is a totally
> valid case. Thus, drop the WARN_ON().
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 74 ++++++++++++++++++---
> 2 files changed, 64 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index ae23aacc3840..2ed68f43347e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -850,6 +850,7 @@ struct arm_smmu_master {
> bool ats_enabled : 1;
> bool ste_ats_enabled : 1;
> bool stall_enabled;
> + bool ats_always_on;
> unsigned int ssid_bits;
> unsigned int iopf_refcount;
> };
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index d16d35c78c06..5b7deb708636 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1422,7 +1422,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
> if (!arm_smmu_cdtab_allocated(&master->cd_table))
> return;
> cdptr = arm_smmu_get_cd_ptr(master, ssid);
> - if (WARN_ON(!cdptr))
> + if (!cdptr)
> return;
Should we still warn if !master->ats_always_on?
> arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
> }
> @@ -1436,6 +1436,22 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
> struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
>
> cd_table->s1cdmax = master->ssid_bits;
> +
> + /*
> + * When a device doesn't support PASID (non default SSID), ssid_bits is
> + * set to 0. This also sets S1CDMAX to 0, which disables the substreams
> + * and ignores the S1DSS field.
> + *
> + * On the other hand, if a device demands ATS to be always on even when
> + * its default substream is IOMMU bypassed, it has to use EATS that is
> + * only effective with an STE (CFG=S1translate, S1DSS=Bypass). For such
> + * use cases, S1CDMAX has to be !0, in order to make use of S1DSS/EATS.
> + *
> + * Set S1CDMAX no lower than 1. This would add a dummy substream in the
> + * CD table but it should never be used by an actual CD.
> + */
> + if (master->ats_always_on)
> + cd_table->s1cdmax = max_t(u8, cd_table->s1cdmax, 1);
> max_contexts = 1 << cd_table->s1cdmax;
>
> if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
> @@ -3189,7 +3205,8 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
> * When the last user of the CD table goes away downgrade the STE back
> * to a non-cd_table one, by re-attaching its sid_domain.
> */
> - if (!arm_smmu_ssids_in_use(&master->cd_table)) {
> + if (!master->ats_always_on &&
> + !arm_smmu_ssids_in_use(&master->cd_table)) {
> struct iommu_domain *sid_domain =
> iommu_get_domain_for_dev(master->dev);
>
> @@ -3205,7 +3222,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> struct iommu_domain *old_domain,
> struct device *dev,
> struct arm_smmu_ste *ste,
> - unsigned int s1dss)
> + unsigned int s1dss, bool ats_always_on)
> {
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
Can we avoid the 'bool' parameter if possible, please? They tend to make the
callsites pretty horrible to read and you're already passing the 'struct
device *' so you should have the master in hand?
> struct arm_smmu_attach_state state = {
> @@ -3224,7 +3241,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> * If the CD table is not in use we can use the provided STE, otherwise
> * we use a cdtable STE with the provided S1DSS.
> */
> - if (arm_smmu_ssids_in_use(&master->cd_table)) {
> + if (ats_always_on || arm_smmu_ssids_in_use(&master->cd_table)) {
> /*
> * If a CD table has to be present then we need to run with ATS
> * on because we have to assume a PASID is using ATS. For
> @@ -3260,7 +3277,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
> arm_smmu_master_clear_vmaster(master);
> arm_smmu_make_bypass_ste(master->smmu, &ste);
> arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
> - STRTAB_STE_1_S1DSS_BYPASS);
> + STRTAB_STE_1_S1DSS_BYPASS,
> + master->ats_always_on);
> return 0;
> }
>
> @@ -3283,7 +3301,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
> arm_smmu_master_clear_vmaster(master);
> arm_smmu_make_abort_ste(&ste);
> arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
> - STRTAB_STE_1_S1DSS_TERMINATE);
> + STRTAB_STE_1_S1DSS_TERMINATE, false);
> return 0;
> }
>
> @@ -3521,6 +3539,40 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
> kfree(master->streams);
> }
>
> +static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
> +{
> + bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
> + unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
> + struct pci_dev *pdev = to_pci_dev(master->dev);
> + int ret;
> +
> + if (!arm_smmu_ats_supported(master))
> + return 0;
> +
> + if (!pci_ats_always_on(pdev))
> + goto out_prepare;
> +
> + /*
> + * S1DSS is required for ATS to be always on for identity domain cases.
> + * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
> + */
> + if (!s1p || !master->smmu->ssid_bits) {
> + dev_info_once(master->dev,
> + "SMMU doesn't support ATS to be always on\n");
> + goto out_prepare;
> + }
> +
> + master->ats_always_on = true;
> +
> + ret = arm_smmu_alloc_cd_tables(master);
> + if (ret)
> + return ret;
Were do you allocate the second level entry for ssid 0 if we're using
2-level cd tables?
Will
On Mon, Jan 26, 2026 at 12:39:50PM +0000, Will Deacon wrote:
> On Fri, Jan 16, 2026 at 08:56:42PM -0800, Nicolin Chen wrote:
> > @@ -1422,7 +1422,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
> > if (!arm_smmu_cdtab_allocated(&master->cd_table))
> > return;
> > cdptr = arm_smmu_get_cd_ptr(master, ssid);
> > - if (WARN_ON(!cdptr))
> > + if (!cdptr)
> > return;
>
> Should we still warn if !master->ats_always_on?
Hmm, yes. I'll fix this.
> > @@ -3205,7 +3222,7 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> > struct iommu_domain *old_domain,
> > struct device *dev,
> > struct arm_smmu_ste *ste,
> > - unsigned int s1dss)
> > + unsigned int s1dss, bool ats_always_on)
> > {
> > struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>
> Can we avoid the 'bool' parameter if possible, please? They tend to make the
> callsites pretty horrible to read and you're already passing the 'struct
> device *' so you should have the master in hand?
Trying to set ats_always_on=false for blocked domain here:
@@ -3260,7 +3277,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
STRTAB_STE_1_S1DSS_BYPASS,
master->ats_always_on);
@@ -3283,7 +3301,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
arm_smmu_attach_dev_ste(domain, old_domain, dev, &ste,
STRTAB_STE_1_S1DSS_TERMINATE, false);
But I think we could do that by combining master->ats_always_on
with the s1dss. I will drop the "bool".
Thanks
Nicolin
On Mon, Jan 26, 2026 at 12:39:50PM +0000, Will Deacon wrote: > > + ret = arm_smmu_alloc_cd_tables(master); > > + if (ret) > > + return ret; > > Were do you allocate the second level entry for ssid 0 if we're using > 2-level cd tables? I don't think we need to. The entire design here has a non-valid CD entry for SSID 0. The spec is really weird here, on one hand it explicitly says that with S1DSS the CD entry is ignored. On the other hand, you are also required to have a CD table pointer of at least size one for some reason. So, I think a CD table pointer to a fully invalid L1 table of at least size 1 should be OK? Or stated another way, why would ie be OK to have a 1 level table with an non-valid CD table entry for SSID0 but not OK to have a 2 level table that returns non-valid at the first walk? Jason
On 2026-01-26 5:20 pm, Jason Gunthorpe wrote: > On Mon, Jan 26, 2026 at 12:39:50PM +0000, Will Deacon wrote: >>> + ret = arm_smmu_alloc_cd_tables(master); >>> + if (ret) >>> + return ret; >> >> Were do you allocate the second level entry for ssid 0 if we're using >> 2-level cd tables? > > I don't think we need to. The entire design here has a non-valid CD entry > for SSID 0. > > The spec is really weird here, on one hand it explicitly says that with > S1DSS the CD entry is ignored. > > On the other hand, you are also required to have a CD table pointer of > at least size one for some reason. Because it is not possible to enable 0 SubStreams, since that wouldn't make any sense, hence S1CDMax also acts as the "enable SubStreams" control (assuming SSIDSIZE > 0 and it does anything at all - note that strictly we cannot assume this bypass trick is *always* possible, since an SMMU is permitted to support ATS without supporting SubStreams). > So, I think a CD table pointer to a fully invalid L1 table of at least > size 1 should be OK? > > Or stated another way, why would ie be OK to have a 1 level table with > an non-valid CD table entry for SSID0 but not OK to have a 2 level > table that returns non-valid at the first walk? S1ContextPtr itself is reachable since S1 is enabled, so it cannot point to nonsense. But the S1DSS==Bypass behaviour does state: "Note: Such a transaction does not fetch a CD, and therefore does not report F_CD_FETCH, C_BAD_CD or a stage 2 Translation-related fault with CLASS == CD." So if we're not intending to actually allow traffic on the SubStream(s), then it should be fine to use either a 1-level table of invalid CDs, or a 2-level format with an empty L1CD table to gracefully terminate any config prefetches. Thanks, Robin.
On Mon, Jan 26, 2026 at 06:49:07PM +0000, Robin Murphy wrote: > (assuming SSIDSIZE > 0 and it does anything at all - note that strictly we > cannot assume this bypass trick is *always* possible, since an SMMU is > permitted to support ATS without supporting SubStreams). Yes, I think Nicolin has captured those conditions in computing it... We don't have a logic to disable bypass in that case though. > > So, I think a CD table pointer to a fully invalid L1 table of at least > > size 1 should be OK? > > > > Or stated another way, why would ie be OK to have a 1 level table with > > an non-valid CD table entry for SSID0 but not OK to have a 2 level > > table that returns non-valid at the first walk? > > S1ContextPtr itself is reachable since S1 is enabled, so it cannot point to > nonsense. But the S1DSS==Bypass behaviour does state: > "Note: Such a transaction does not fetch a CD, and therefore does not report > F_CD_FETCH, C_BAD_CD or a stage 2 Translation-related fault with CLASS == > CD." Yes However, taken together: * S1CDMax is set to substream 0 only * S1DSS is set such that "does not fetch a CD" for SSID = 0 * SSID >0 doesn't fetch CDs because of S1CDMax Then it seems to be saying that it will never use S1ContextPtr? ie it is IGNORED? > So if we're not intending to actually allow traffic on the SubStream(s), > then it should be fine to use either a 1-level table of invalid CDs, or a > 2-level format with an empty L1CD table to gracefully terminate any config > prefetches. Yes, so arm_smmu_alloc_cd_tables() is fine since it creates a valid value for S1ContextPtr such that any future use can happen without changing S1ContextPtr. Jason
On Mon, Jan 26, 2026 at 03:09:35PM -0400, Jason Gunthorpe wrote: > On Mon, Jan 26, 2026 at 06:49:07PM +0000, Robin Murphy wrote: > > (assuming SSIDSIZE > 0 and it does anything at all - note that strictly we > > cannot assume this bypass trick is *always* possible, since an SMMU is > > permitted to support ATS without supporting SubStreams). > > Yes, I think Nicolin has captured those conditions in computing > it... We don't have a logic to disable bypass in that case though. > > > > So, I think a CD table pointer to a fully invalid L1 table of at least > > > size 1 should be OK? > > > > > > Or stated another way, why would ie be OK to have a 1 level table with > > > an non-valid CD table entry for SSID0 but not OK to have a 2 level > > > table that returns non-valid at the first walk? > > > > S1ContextPtr itself is reachable since S1 is enabled, so it cannot point to > > nonsense. But the S1DSS==Bypass behaviour does state: > > > "Note: Such a transaction does not fetch a CD, and therefore does not report > > F_CD_FETCH, C_BAD_CD or a stage 2 Translation-related fault with CLASS == > > CD." > > Yes > > However, taken together: > * S1CDMax is set to substream 0 only > * S1DSS is set such that "does not fetch a CD" for SSID = 0 > * SSID >0 doesn't fetch CDs because of S1CDMax > > Then it seems to be saying that it will never use S1ContextPtr? ie it > is IGNORED? Right, I think the critical question is whether that setting of S1DSS (0b01) means that STE.S1ContextPtr is considered "invalid". The spec doesn't call this out explicitly but the "translation procedure charts" seem to indicate that it doesn't use the CD for anything... It would be good to get some clarification from Arm about this particular case. Will
On 2026-01-27 1:10 pm, Will Deacon wrote: > On Mon, Jan 26, 2026 at 03:09:35PM -0400, Jason Gunthorpe wrote: >> On Mon, Jan 26, 2026 at 06:49:07PM +0000, Robin Murphy wrote: >>> (assuming SSIDSIZE > 0 and it does anything at all - note that strictly we >>> cannot assume this bypass trick is *always* possible, since an SMMU is >>> permitted to support ATS without supporting SubStreams). >> >> Yes, I think Nicolin has captured those conditions in computing >> it... We don't have a logic to disable bypass in that case though. >> >>>> So, I think a CD table pointer to a fully invalid L1 table of at least >>>> size 1 should be OK? >>>> >>>> Or stated another way, why would ie be OK to have a 1 level table with >>>> an non-valid CD table entry for SSID0 but not OK to have a 2 level >>>> table that returns non-valid at the first walk? >>> >>> S1ContextPtr itself is reachable since S1 is enabled, so it cannot point to >>> nonsense. But the S1DSS==Bypass behaviour does state: >> >>> "Note: Such a transaction does not fetch a CD, and therefore does not report >>> F_CD_FETCH, C_BAD_CD or a stage 2 Translation-related fault with CLASS == >>> CD." >> >> Yes >> >> However, taken together: >> * S1CDMax is set to substream 0 only >> * S1DSS is set such that "does not fetch a CD" for SSID = 0 >> * SSID >0 doesn't fetch CDs because of S1CDMax >> >> Then it seems to be saying that it will never use S1ContextPtr? ie it >> is IGNORED? > > Right, I think the critical question is whether that setting of S1DSS > (0b01) means that STE.S1ContextPtr is considered "invalid". The spec > doesn't call this out explicitly but the "translation procedure charts" > seem to indicate that it doesn't use the CD for anything... > > It would be good to get some clarification from Arm about this > particular case. No, STE.S1ContextPtr itself is "valid" since S1 is enabled. No CD fetch will occur for no-SubStreamID transactions that are bypassed by S1DSS, but the SMMU is permitted to attempt to speculatively fetch CDs for the enabled SubStreamID(s). Those fetches do not have to reach a valid CD if the SubStream is not actually in use, much like we don't have to fully populate a 2-level Stream table for StreamID ranges we don't care about either. Don't confuse S1DSS==1 (bypass) with the S1DSS==2 behaviour we use in other cases - the latter is "Use CD 0 for no-SubstreamID traffic" which makes SubStreamID 0 invalid to use. However in the bypass case (and also S1DSS==0 where no-SubstreamID traffic is blocked entirely), SubStreamID 0 remains perfectly valid and usable (we just still won't ever use it in Linux due to the middle case). Thanks, Robin.
On Tue, Jan 27, 2026 at 01:26:02PM +0000, Robin Murphy wrote: > On 2026-01-27 1:10 pm, Will Deacon wrote: > > On Mon, Jan 26, 2026 at 03:09:35PM -0400, Jason Gunthorpe wrote: > > > On Mon, Jan 26, 2026 at 06:49:07PM +0000, Robin Murphy wrote: > > > > (assuming SSIDSIZE > 0 and it does anything at all - note that strictly we > > > > cannot assume this bypass trick is *always* possible, since an SMMU is > > > > permitted to support ATS without supporting SubStreams). > > > > > > Yes, I think Nicolin has captured those conditions in computing > > > it... We don't have a logic to disable bypass in that case though. > > > > > > > > So, I think a CD table pointer to a fully invalid L1 table of at least > > > > > size 1 should be OK? > > > > > > > > > > Or stated another way, why would ie be OK to have a 1 level table with > > > > > an non-valid CD table entry for SSID0 but not OK to have a 2 level > > > > > table that returns non-valid at the first walk? > > > > > > > > S1ContextPtr itself is reachable since S1 is enabled, so it cannot point to > > > > nonsense. But the S1DSS==Bypass behaviour does state: > > > > > > > "Note: Such a transaction does not fetch a CD, and therefore does not report > > > > F_CD_FETCH, C_BAD_CD or a stage 2 Translation-related fault with CLASS == > > > > CD." > > > > > > Yes > > > > > > However, taken together: > > > * S1CDMax is set to substream 0 only > > > * S1DSS is set such that "does not fetch a CD" for SSID = 0 > > > * SSID >0 doesn't fetch CDs because of S1CDMax > > > > > > Then it seems to be saying that it will never use S1ContextPtr? ie it > > > is IGNORED? > > > > Right, I think the critical question is whether that setting of S1DSS > > (0b01) means that STE.S1ContextPtr is considered "invalid". The spec > > doesn't call this out explicitly but the "translation procedure charts" > > seem to indicate that it doesn't use the CD for anything... > > > > It would be good to get some clarification from Arm about this > > particular case. > > No, STE.S1ContextPtr itself is "valid" since S1 is enabled. No CD fetch will > occur for no-SubStreamID transactions that are bypassed by S1DSS, but the > SMMU is permitted to attempt to speculatively fetch CDs for the enabled > SubStreamID(s). Those fetches do not have to reach a valid CD if the > SubStream is not actually in use, much like we don't have to fully populate > a 2-level Stream table for StreamID ranges we don't care about either. > > Don't confuse S1DSS==1 (bypass) with the S1DSS==2 behaviour we use in other > cases - the latter is "Use CD 0 for no-SubstreamID traffic" which makes > SubStreamID 0 invalid to use. However in the bypass case (and also S1DSS==0 > where no-SubstreamID traffic is blocked entirely), SubStreamID 0 remains > perfectly valid and usable (we just still won't ever use it in Linux due to > the middle case). Argh, I had conflated a transaction using SSID 0 vs a transaction without a substream at all. So I think this makes sense now... Thanks, Will
On Tue, Jan 27, 2026 at 01:50:54PM +0000, Will Deacon wrote: > Argh, I had conflated a transaction using SSID 0 vs a transaction > without a substream at all. So I think this makes sense now... Yeah, it is bit subtle, but as a SW choice the iommu subsystem reserves PASID 0/SSID 0 as the "untagged" translation. Several HW's force this in their implementation (ie AMD) ARM however includes a "Substream Valid" in the input bus. Linux doesn't use the combination "Substream Valid, SSID=0", that should never occur. If it wrongly does happen then IDENTITY will generate a fault, either C_BAD_CD (due to it being non-valid) or C_BAD_SUBSTREAMID (due to S1CDMax disabling substreams). While PAGING will either fault with C_BAD_SUBSTREAMID (S2 paging domain) or success when S1DSS=b10. Jason
On Mon, Jan 26, 2026 at 01:20:20PM -0400, Jason Gunthorpe wrote: > On Mon, Jan 26, 2026 at 12:39:50PM +0000, Will Deacon wrote: > > > + ret = arm_smmu_alloc_cd_tables(master); > > > + if (ret) > > > + return ret; > > > > Were do you allocate the second level entry for ssid 0 if we're using > > 2-level cd tables? > > I don't think we need to. The entire design here has a non-valid CD entry > for SSID 0. Hmm, whether we allocate a 2-level cd table would actually depend on the "1 << cd_table->s1cdmax" v.s. CTXDESC_L2_ENTRIES, right? If the device supports PASID and s1cdmax is large, we should prepare a 2-level cd tables, even if only SSID0 is used at this moment since we have to support !0 pasids via potential SVA domains. In all Other cases, we would prepare a linear one. Nicolin
On Mon, Jan 26, 2026 at 10:40:39AM -0800, Nicolin Chen wrote: > On Mon, Jan 26, 2026 at 01:20:20PM -0400, Jason Gunthorpe wrote: > > On Mon, Jan 26, 2026 at 12:39:50PM +0000, Will Deacon wrote: > > > > + ret = arm_smmu_alloc_cd_tables(master); > > > > + if (ret) > > > > + return ret; > > > > > > Were do you allocate the second level entry for ssid 0 if we're using > > > 2-level cd tables? > > > > I don't think we need to. The entire design here has a non-valid CD entry > > for SSID 0. > > Hmm, whether we allocate a 2-level cd table would actually depend on > the "1 << cd_table->s1cdmax" v.s. CTXDESC_L2_ENTRIES, right? > > If the device supports PASID and s1cdmax is large, we should prepare > a 2-level cd tables, even if only SSID0 is used at this moment since > we have to support !0 pasids via potential SVA domains. > > In all Other cases, we would prepare a linear one. Yes, this is what arm_smmu_alloc_cd_tables() is doing. I think will was questioning if this needs to be arm_smmu_alloc_cd_ptr(master, 0); To ensure there is some memory under the SSID=0 case, but it seems we don't need that. Jason
On Fri, Jan 16, 2026 at 08:56:42PM -0800, Nicolin Chen wrote:
> +static int arm_smmu_master_prepare_ats(struct arm_smmu_master *master)
> +{
> + bool s1p = master->smmu->features & ARM_SMMU_FEAT_TRANS_S1;
> + unsigned int stu = __ffs(master->smmu->pgsize_bitmap);
> + struct pci_dev *pdev = to_pci_dev(master->dev);
> + int ret;
> +
> + if (!arm_smmu_ats_supported(master))
> + return 0;
> +
> + if (!pci_ats_always_on(pdev))
> + goto out_prepare;
> +
> + /*
> + * S1DSS is required for ATS to be always on for identity domain cases.
> + * However, the S1DSS field is ignored if !IDR0_S1P or !IDR1_SSIDSIZE.
> + */
> + if (!s1p || !master->smmu->ssid_bits) {
> + dev_info_once(master->dev,
> + "SMMU doesn't support ATS to be always on\n");
> + goto out_prepare;
> + }
It looks right, IDK if Will would prefer a formal ARM_SMMU_FEAT_S1DSS
though.
Jason
© 2016 - 2026 Red Hat, Inc.