If a master has only a default substream, it can skip CD/translation table
allocations when being attached to an IDENTITY domain, by simply setting
STE to the "bypass" mode (STE.Config[2:0] == 0b100).
If a master has multiple substreams, it will still need a CD table for the
non-default substreams when being attached to an IDENTITY domain, in which
case the STE.Config is set to the "stage-1 translate" mode while STE.S1DSS
field instead is set to the "bypass" mode (STE.S1DSS[1:0] == 0b01).
If a master is attached to a stage-2 domain, it does not need a CD table,
while the STE.Config is set to the "stage-2 translate" mode.
Add boolean bypass_ste and skip_cdtab flags in arm_smmu_attach_dev(), to
handle clearly the cases above, which also corrects the conditions at the
ats_enabled setting and arm_smmu_alloc_cd_tables() callback to cover the
second use case.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 ++++++++++++++++-----
1 file changed, 27 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index df6409017127..dbe11997b4b9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2381,6 +2381,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master;
+ bool byapss_ste, skip_cdtab;
if (!fwspec)
return -ENOENT;
@@ -2416,6 +2417,24 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
master->domain = smmu_domain;
+ /*
+ * When master attaches ARM_SMMU_DOMAIN_BYPASS to its single substream,
+ * set STE.Config to "bypass" and skip a CD table allocation. Otherwise,
+ * set STE.Config to "stage-1 translate" and allocate a CD table for its
+ * multiple stage-1 substream support, unless with a stage-2 domain in
+ * which case set STE.config to "stage-2 translate" and skip a CD table.
+ */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS && !master->ssid_bits) {
+ byapss_ste = true;
+ skip_cdtab = true;
+ } else {
+ byapss_ste = false;
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ skip_cdtab = true;
+ else
+ skip_cdtab = false;
+ }
+
/*
* The SMMU does not support enabling ATS with bypass. When the STE is
* in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
@@ -2423,22 +2442,22 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
* stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
* F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
*/
- if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
+ if (!byapss_ste)
master->ats_enabled = arm_smmu_ats_supported(master);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- if (!master->cd_table.cdtab) {
- ret = arm_smmu_alloc_cd_tables(master);
- if (ret) {
- master->domain = NULL;
- goto out_list_del;
- }
+ if (!skip_cdtab && !master->cd_table.cdtab) {
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
}
+ }
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
/*
* Prevent SVA from concurrently modifying the CD or writing to
* the CD entry
--
2.42.0
On Wed, Sep 20, 2023 at 01:52:03PM -0700, Nicolin Chen wrote:
> If a master has only a default substream, it can skip CD/translation table
> allocations when being attached to an IDENTITY domain, by simply setting
> STE to the "bypass" mode (STE.Config[2:0] == 0b100).
>
> If a master has multiple substreams, it will still need a CD table for the
> non-default substreams when being attached to an IDENTITY domain, in which
> case the STE.Config is set to the "stage-1 translate" mode while STE.S1DSS
> field instead is set to the "bypass" mode (STE.S1DSS[1:0] == 0b01).
>
> If a master is attached to a stage-2 domain, it does not need a CD table,
> while the STE.Config is set to the "stage-2 translate" mode.
>
> Add boolean bypass_ste and skip_cdtab flags in arm_smmu_attach_dev(), to
> handle clearly the cases above, which also corrects the conditions at the
> ats_enabled setting and arm_smmu_alloc_cd_tables() callback to cover the
> second use case.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 ++++++++++++++++-----
> 1 file changed, 27 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index df6409017127..dbe11997b4b9 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2381,6 +2381,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> struct arm_smmu_device *smmu;
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> struct arm_smmu_master *master;
> + bool byapss_ste, skip_cdtab;
>
> if (!fwspec)
> return -ENOENT;
> @@ -2416,6 +2417,24 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>
> master->domain = smmu_domain;
>
> + /*
> + * When master attaches ARM_SMMU_DOMAIN_BYPASS to its single substream,
> + * set STE.Config to "bypass" and skip a CD table allocation. Otherwise,
> + * set STE.Config to "stage-1 translate" and allocate a CD table for its
> + * multiple stage-1 substream support, unless with a stage-2 domain in
> + * which case set STE.config to "stage-2 translate" and skip a CD table.
> + */
It might be clearer like this:
static bool arm_smmu_domain_needs_cdtab(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_master *master)
{
switch (smmu_domain->stage) {
/*
* The SMMU can support IOMMU_DOMAIN_IDENTITY either by programming
* STE.Config to 0b100 (bypass) or by configuring STE.Config to 0b101
* (S1 translate) and setting STE.S1DSS[1:0] to 0b01 "bypass". The
* latter requires allocating a CD table.
*
* The 0b100 config has the drawback that ATS and PASID cannot be used,
* however it could be higher performance. Select the "S1 translation"
* option if we might need those features.
*/
case ARM_SMMU_DOMAIN_BYPASS:
return master->ssid_bits || arm_smmu_ats_supported(master);
case ARM_SMMU_DOMAIN_S1:
case ARM_SMMU_DOMAIN_NESTED:
return true;
case ARM_SMMU_DOMAIN_S2:
return false;
}
return false;
}
Then the below is
if (needs_cdtab || smm_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
And the CD table should be sync'd to the result of arm_smmu_domain_needs_cdtab()..
It looks like there is still some kind of logic missing as we need to
know if there are any PASIDs using the cd table here:
if (!master->cd_table_empty && !needs_cdtab)
return -EBUSY;
if (needs_ctab && !master->cd_table.cdtab)
ret = arm_smmu_alloc_cd_tables(master);
if (!needs_ctab && master->cd_table.cdtab)
arm_smmu_dealloc_cd_tables(master);
And add master->cd_table_emty to the arm_smmu_domain_needs_cdtab bypass logic.
Also, are these patches are out of order, this should come last since
the arm_smmu_write_strtab_ent hasn't learned yet how to do
STRTAB_STE_1_S1DSS_BYPASS?
Jason
On Mon, Sep 25, 2023 at 02:57:08PM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 20, 2023 at 01:52:03PM -0700, Nicolin Chen wrote:
> > If a master has only a default substream, it can skip CD/translation table
> > allocations when being attached to an IDENTITY domain, by simply setting
> > STE to the "bypass" mode (STE.Config[2:0] == 0b100).
> >
> > If a master has multiple substreams, it will still need a CD table for the
> > non-default substreams when being attached to an IDENTITY domain, in which
> > case the STE.Config is set to the "stage-1 translate" mode while STE.S1DSS
> > field instead is set to the "bypass" mode (STE.S1DSS[1:0] == 0b01).
> >
> > If a master is attached to a stage-2 domain, it does not need a CD table,
> > while the STE.Config is set to the "stage-2 translate" mode.
> >
> > Add boolean bypass_ste and skip_cdtab flags in arm_smmu_attach_dev(), to
> > handle clearly the cases above, which also corrects the conditions at the
> > ats_enabled setting and arm_smmu_alloc_cd_tables() callback to cover the
> > second use case.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > ---
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 35 ++++++++++++++++-----
> > 1 file changed, 27 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index df6409017127..dbe11997b4b9 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -2381,6 +2381,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> > struct arm_smmu_device *smmu;
> > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > struct arm_smmu_master *master;
> > + bool byapss_ste, skip_cdtab;
> >
> > if (!fwspec)
> > return -ENOENT;
> > @@ -2416,6 +2417,24 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> >
> > master->domain = smmu_domain;
> >
> > + /*
> > + * When master attaches ARM_SMMU_DOMAIN_BYPASS to its single substream,
> > + * set STE.Config to "bypass" and skip a CD table allocation. Otherwise,
> > + * set STE.Config to "stage-1 translate" and allocate a CD table for its
> > + * multiple stage-1 substream support, unless with a stage-2 domain in
> > + * which case set STE.config to "stage-2 translate" and skip a CD table.
> > + */
>
> It might be clearer like this:
>
> static bool arm_smmu_domain_needs_cdtab(struct arm_smmu_domain *smmu_domain,
> struct arm_smmu_master *master)
> {
> switch (smmu_domain->stage) {
> /*
> * The SMMU can support IOMMU_DOMAIN_IDENTITY either by programming
> * STE.Config to 0b100 (bypass) or by configuring STE.Config to 0b101
> * (S1 translate) and setting STE.S1DSS[1:0] to 0b01 "bypass". The
> * latter requires allocating a CD table.
> *
> * The 0b100 config has the drawback that ATS and PASID cannot be used,
> * however it could be higher performance. Select the "S1 translation"
> * option if we might need those features.
> */
> case ARM_SMMU_DOMAIN_BYPASS:
> return master->ssid_bits || arm_smmu_ats_supported(master);
> case ARM_SMMU_DOMAIN_S1:
> case ARM_SMMU_DOMAIN_NESTED:
> return true;
> case ARM_SMMU_DOMAIN_S2:
> return false;
> }
> return false;
> }
>
> Then the below is
>
> if (needs_cdtab || smm_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
> master->ats_enabled = arm_smmu_ats_supported(master);
>
> And the CD table should be sync'd to the result of arm_smmu_domain_needs_cdtab()..
Ack.
> It looks like there is still some kind of logic missing as we need to
> know if there are any PASIDs using the cd table here:
>
> if (!master->cd_table_empty && !needs_cdtab)
> return -EBUSY;
>
> if (needs_ctab && !master->cd_table.cdtab)
> ret = arm_smmu_alloc_cd_tables(master);
>
> if (!needs_ctab && master->cd_table.cdtab)
> arm_smmu_dealloc_cd_tables(master);
>
> And add master->cd_table_emty to the arm_smmu_domain_needs_cdtab bypass logic.
OK. I will give it a try.
> Also, are these patches are out of order, this should come last since
> the arm_smmu_write_strtab_ent hasn't learned yet how to do
> STRTAB_STE_1_S1DSS_BYPASS?
Hmm. It could although it's a status quo IMHO -- in practical
only ARM_SMMU_DOMAIN_BYPASS would be changed by the 1st patch
to allocating a CD table that still doesn't work since this
STRTAB_STE_1_S1DSS_BYPASS is unset.
Thanks
Nic
© 2016 - 2025 Red Hat, Inc.