Remove struct arm_smmu_s1_cfg. This is really just a CD table with a
bit of extra information. Enhance the existing CD table structure,
struct arm_smmu_ctx_desc_cfg, with max_cds_bits and replace all usages
of arm_smmu_s1_cfg with arm_smmu_ctx_desc_cfg.
Compute the other values that were stored in s1cfg directly from
existing arm_smmu_ctx_desc_cfg.
For clarity, use the name "cd_table" for the variables pointing to
arm_smmu_ctx_desc_cfg in the new code instead of cdcfg. A later patch
will make this fully consistent.
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Michael Shavit <mshavit@google.com>
---
(no changes since v3)
Changes in v3:
- Updated commit messages again
- Replace more usages of cdcfg with cdtable (lines that were already
touched by this commit anyways).
Changes in v2:
- Updated commit message
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 45 +++++++++++----------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 10 ++---
2 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bb277ff86f65f..ded613aedbb04 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1033,9 +1033,9 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
- if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ if (!cdcfg->l1_desc)
return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
@@ -1071,7 +1071,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
bool cd_live;
__le64 *cdptr;
- if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits)))
return -E2BIG;
cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
@@ -1138,19 +1138,16 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
size_t l1size;
size_t max_contexts;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
- max_contexts = 1 << cfg->s1cdmax;
+ max_contexts = 1 << cdcfg->max_cds_bits;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cdcfg->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
@@ -1186,7 +1183,7 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
int i;
size_t size, l1size;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
if (cdcfg->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
@@ -1276,7 +1273,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
- struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1291,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
- s1_cfg = &smmu_domain->s1_cfg;
+ cd_table = &smmu_domain->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1322,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
@@ -1344,7 +1341,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
- if (s1_cfg) {
+ if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
@@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
- FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+ val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX,
+ cd_table->max_cds_bits) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT,
+ cd_table->l1_desc ?
+ STRTAB_STE_0_S1FMT_64K_L2 :
+ STRTAB_STE_0_S1FMT_LINEAR);
}
if (s2_cfg) {
@@ -2082,11 +2083,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
/* Free the CD and ASID, if we allocated them */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- if (cfg->cdcfg.cdtab)
+ if (cd_table->cdtab)
arm_smmu_free_cd_tables(smmu_domain);
arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
@@ -2106,7 +2107,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
@@ -2119,7 +2120,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
if (ret)
goto out_unlock;
- cfg->s1cdmax = master->ssid_bits;
+ cd_table->max_cds_bits = master->ssid_bits;
smmu_domain->stall_enabled = master->stall_enabled;
@@ -2457,7 +2458,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index f841383a55a35..35a93e8858872 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,12 +595,8 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
- struct arm_smmu_ctx_desc_cfg cdcfg;
- u8 s1fmt;
- u8 s1cdmax;
+ /* log2 of the maximum number of CDs supported by this table */
+ u8 max_cds_bits;
};
struct arm_smmu_s2_cfg {
@@ -725,7 +721,7 @@ struct arm_smmu_domain {
union {
struct {
struct arm_smmu_ctx_desc cd;
- struct arm_smmu_s1_cfg s1_cfg;
+ struct arm_smmu_ctx_desc_cfg cd_table;
};
struct arm_smmu_s2_cfg s2_cfg;
};
--
2.41.0.640.ga95def55d0-goog
On Wed, Aug 09, 2023 at 01:11:58AM +0800, Michael Shavit wrote: > Remove struct arm_smmu_s1_cfg. This is really just a CD table with a > bit of extra information. Enhance the existing CD table structure, > struct arm_smmu_ctx_desc_cfg, with max_cds_bits and replace all usages > of arm_smmu_s1_cfg with arm_smmu_ctx_desc_cfg. > > Compute the other values that were stored in s1cfg directly from > existing arm_smmu_ctx_desc_cfg. > > For clarity, use the name "cd_table" for the variables pointing to > arm_smmu_ctx_desc_cfg in the new code instead of cdcfg. A later patch > will make this fully consistent. > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > Reviewed-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Michael Shavit <mshavit@google.com> > --- Sorry, but I'm having a hard time seeing some of the benefits of this particular change. Most of the rest of the series looks good, but see below: > @@ -1071,7 +1071,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, > bool cd_live; > __le64 *cdptr; > > - if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) > + if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits))) > return -E2BIG; S1CDMAX is architectural terminology -- it's the name given to bits 63:59 of the STE structure. Why is "max_cds_bits" better? > cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); > @@ -1138,19 +1138,16 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) > size_t l1size; > size_t max_contexts; > struct arm_smmu_device *smmu = smmu_domain->smmu; > - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; > - struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; > + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table; > > - max_contexts = 1 << cfg->s1cdmax; > + max_contexts = 1 << cdcfg->max_cds_bits; > > if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || > max_contexts <= CTXDESC_L2_ENTRIES) { > - cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; > cdcfg->num_l1_ents = max_contexts; > > l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); > } else { > - cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; And here we're dropping the S1FMT setting from the code allocating the CD tables (i.e. the only code which should be aware of it's configuration) and now having the low-level STE writing logic here: > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > !master->stall_enabled) > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, > + cd_table->max_cds_bits) | > + FIELD_PREP(STRTAB_STE_0_S1FMT, > + cd_table->l1_desc ? > + STRTAB_STE_0_S1FMT_64K_L2 : > + STRTAB_STE_0_S1FMT_LINEAR); magically know that we're using 64k tables. Why is this an improvement to the driver? Will
On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote: > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > > !master->stall_enabled) > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, > > + cd_table->max_cds_bits) | > > + FIELD_PREP(STRTAB_STE_0_S1FMT, > > + cd_table->l1_desc ? > > + STRTAB_STE_0_S1FMT_64K_L2 : > > + STRTAB_STE_0_S1FMT_LINEAR); > > magically know that we're using 64k tables. > > Why is this an improvement to the driver? Put the above in a function arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste) And it makes more sense. We don't need the driver to precompute the "s1_cfg" parameters and store them in a redundant struct along side the ctx_desc_cfg when we can compute those same values on the fly with no cost. Jason
On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote: > On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote: > > > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > > > !master->stall_enabled) > > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > > > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, > > > + cd_table->max_cds_bits) | > > > + FIELD_PREP(STRTAB_STE_0_S1FMT, > > > + cd_table->l1_desc ? > > > + STRTAB_STE_0_S1FMT_64K_L2 : > > > + STRTAB_STE_0_S1FMT_LINEAR); > > > > magically know that we're using 64k tables. > > > > Why is this an improvement to the driver? > > Put the above in a function > > arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste) > > And it makes more sense. Sorry, but I'm not seeing it :/ > We don't need the driver to precompute the "s1_cfg" parameters and > store them in a redundant struct along side the ctx_desc_cfg when we > can compute those same values on the fly with no cost. But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2 constant is hardcoded here. If we want to use 4k leaf tables in some cases, how would you add that? Such a change shouldn't need the low-level strtab creation code to change. Will
On Wed, Aug 09, 2023 at 03:55:43PM +0100, Will Deacon wrote: > On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote: > > On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote: > > > > > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > > > > !master->stall_enabled) > > > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > > > > > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > > > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > > > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, > > > > + cd_table->max_cds_bits) | > > > > + FIELD_PREP(STRTAB_STE_0_S1FMT, > > > > + cd_table->l1_desc ? > > > > + STRTAB_STE_0_S1FMT_64K_L2 : > > > > + STRTAB_STE_0_S1FMT_LINEAR); > > > > > > magically know that we're using 64k tables. > > > > > > Why is this an improvement to the driver? > > > > Put the above in a function > > > > arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste) > > > > And it makes more sense. > > Sorry, but I'm not seeing it :/ > > > We don't need the driver to precompute the "s1_cfg" parameters and > > store them in a redundant struct along side the ctx_desc_cfg when we > > can compute those same values on the fly with no cost. > > But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2 > constant is hardcoded here. So it would be hard coded in arm_smmu_get_cd_ste() because that reflects the current state of CD table code. > If we want to use 4k leaf tables in some cases, how would you add > that? Such a change shouldn't need the low-level strtab creation > code to change. You would modify arm_smmu_ctx_desc_cfg to teach it about the different format. It would gain some (enum?) member that specifies the CD table layout and geometry. arm_smmu_get_cd_ste() will interpret that member and generate the correct STE for the specifc cd table. It is a standard OOP sort of practice that the object self-describes and has accessors to export its internal definition. In this case the STE bits are part of/derived from the internal definition of the CD table. Jason
On Wed, Aug 09, 2023 at 12:08:02PM -0300, Jason Gunthorpe wrote: > On Wed, Aug 09, 2023 at 03:55:43PM +0100, Will Deacon wrote: > > On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote: > > > On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote: > > > > > > > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > > > > > !master->stall_enabled) > > > > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > > > > > > > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > > > > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > > > > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > > > > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > > > > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, > > > > > + cd_table->max_cds_bits) | > > > > > + FIELD_PREP(STRTAB_STE_0_S1FMT, > > > > > + cd_table->l1_desc ? > > > > > + STRTAB_STE_0_S1FMT_64K_L2 : > > > > > + STRTAB_STE_0_S1FMT_LINEAR); > > > > > > > > magically know that we're using 64k tables. > > > > > > > > Why is this an improvement to the driver? > > > > > > Put the above in a function > > > > > > arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste) > > > > > > And it makes more sense. > > > > Sorry, but I'm not seeing it :/ > > > > > We don't need the driver to precompute the "s1_cfg" parameters and > > > store them in a redundant struct along side the ctx_desc_cfg when we > > > can compute those same values on the fly with no cost. > > > > But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2 > > constant is hardcoded here. > > So it would be hard coded in arm_smmu_get_cd_ste() because that > reflects the current state of CD table code. > > > If we want to use 4k leaf tables in some cases, how would you add > > that? Such a change shouldn't need the low-level strtab creation > > code to change. > > You would modify arm_smmu_ctx_desc_cfg to teach it about the different > format. It would gain some (enum?) member that specifies the CD table > layout and geometry. arm_smmu_get_cd_ste() will interpret that member > and generate the correct STE for the specifc cd table. Sounds a lot like the existing s1fmt field. Can we keep it? Will
On Wed, Aug 09, 2023 at 05:22:54PM +0100, Will Deacon wrote: > > > If we want to use 4k leaf tables in some cases, how would you add > > > that? Such a change shouldn't need the low-level strtab creation > > > code to change. > > > > You would modify arm_smmu_ctx_desc_cfg to teach it about the different > > format. It would gain some (enum?) member that specifies the CD table > > layout and geometry. arm_smmu_get_cd_ste() will interpret that member > > and generate the correct STE for the specifc cd table. > > Sounds a lot like the existing s1fmt field. Can we keep it? If you are OK with the dead code, I don't object. But let's put it in the struct arm_smmu_ctx_desc_cfg. Jason
On Wed, Aug 09, 2023 at 01:26:41PM -0300, Jason Gunthorpe wrote: > On Wed, Aug 09, 2023 at 05:22:54PM +0100, Will Deacon wrote: > > > > > If we want to use 4k leaf tables in some cases, how would you add > > > > that? Such a change shouldn't need the low-level strtab creation > > > > code to change. > > > > > > You would modify arm_smmu_ctx_desc_cfg to teach it about the different > > > format. It would gain some (enum?) member that specifies the CD table > > > layout and geometry. arm_smmu_get_cd_ste() will interpret that member > > > and generate the correct STE for the specifc cd table. > > > > Sounds a lot like the existing s1fmt field. Can we keep it? > > If you are OK with the dead code, I don't object. But let's put it in > the struct arm_smmu_ctx_desc_cfg. Ok, we have a deal! Will
> > > Sounds a lot like the existing s1fmt field. Can we keep it? > > > > If you are OK with the dead code, I don't object. But let's put it in > > the struct arm_smmu_ctx_desc_cfg. > > Ok, we have a deal! What dead code? Is the deal here that we keep the field, but still infer the value to write from (cd_table->l1_desc == null) in arm_smmu_write_strtab_ent??
On Thu, Aug 10, 2023 at 05:33:53PM +0800, Michael Shavit wrote: > > > > Sounds a lot like the existing s1fmt field. Can we keep it? > > > > > > If you are OK with the dead code, I don't object. But let's put it in > > > the struct arm_smmu_ctx_desc_cfg. > > > > Ok, we have a deal! > > What dead code? Is the deal here that we keep the field, but still > infer the value to write from (cd_table->l1_desc == null) in > arm_smmu_write_strtab_ent?? Keep the field and write it directly when populating the ste (i.e. don't infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'. Will
On Thu, Aug 10, 2023 at 10:43:33AM +0100, Will Deacon wrote: > On Thu, Aug 10, 2023 at 05:33:53PM +0800, Michael Shavit wrote: > > > > > Sounds a lot like the existing s1fmt field. Can we keep it? > > > > > > > > If you are OK with the dead code, I don't object. But let's put it in > > > > the struct arm_smmu_ctx_desc_cfg. > > > > > > Ok, we have a deal! > > > > What dead code? Is the deal here that we keep the field, but still > > infer the value to write from (cd_table->l1_desc == null) in > > arm_smmu_write_strtab_ent?? > > Keep the field and write it directly when populating the ste (i.e. don't > infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'. Yes - the 'dead code' is that we introduce storage for a field that is always a known constant (STRTAB_STE_0_S1FMT_64K_L2). Jason
> > > What dead code? Is the deal here that we keep the field, but still > > > infer the value to write from (cd_table->l1_desc == null) in > > > arm_smmu_write_strtab_ent?? > > > > Keep the field and write it directly when populating the ste (i.e. don't > > infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'. > > Yes - the 'dead code' is that we introduce storage for a field that is > always a known constant (STRTAB_STE_0_S1FMT_64K_L2). I'm not sure we're on the same page here. s1fmt could contain either `STRTAB_STE_0_S1FMT_64K_L2` or `STRTAB_STE_0_S1FMT_LINEAR`, and this value will be directly copied in arm_smmu_write_strtab_ent.
On Fri, Aug 11, 2023 at 01:15:14AM +0800, Michael Shavit wrote: > > > > What dead code? Is the deal here that we keep the field, but still > > > > infer the value to write from (cd_table->l1_desc == null) in > > > > arm_smmu_write_strtab_ent?? > > > > > > Keep the field and write it directly when populating the ste (i.e. don't > > > infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'. > > > > Yes - the 'dead code' is that we introduce storage for a field that is > > always a known constant (STRTAB_STE_0_S1FMT_64K_L2). > > I'm not sure we're on the same page here. s1fmt could contain either > `STRTAB_STE_0_S1FMT_64K_L2` or `STRTAB_STE_0_S1FMT_LINEAR`, and this > value will be directly copied in arm_smmu_write_strtab_ent. Ah, I did not check this closely, Will said: > But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2 > constant is hardcoded here. So the nuanced answer is that computation is happening because today the format of the CD table (linear vs 64k) is encoded in l1_desc: + cd_table->l1_desc ? + STRTAB_STE_0_S1FMT_64K_L2 : + STRTAB_STE_0_S1FMT_LINEAR); So I would suggest that along with adding s1fmt to arm_smmu_ctx_desc_cfg you also go and normalize the usage: @@ -1030,7 +1030,7 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (!cd_table->l1_desc) + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS; Jason
© 2016 - 2025 Red Hat, Inc.