Now, each smmu_domain is built with an invs array that keeps all the IDs
(ASID/VMID) and its attached device SIDs, following the exact pattern of
all the existing invalidation functions.
Introduce a new arm_smmu_domain_inv helper iterating smmu_domain->invs,
to convert the invalidation array to commands. Any invalidation request
with no size specified means an entire flush over a range based one.
ATC invalidations must be in sync with an attachment, espeically when it
is an IOMMU_DOMAIN_BLOCKED that is used by the core for a device reset.
Add a has_ats flag in the invs structure and hold the read lock if it is
set, in which way a concurrent attachment will be blocked until any ATC
invalidation being sent to the in the command queue is finished.
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 16 ++
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 225 ++++++++++++++++++--
2 files changed, 228 insertions(+), 13 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index e4e0e066108cc..c73a94514c6d6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -675,9 +675,15 @@ struct arm_smmu_inv {
refcount_t users; /* users=0 to mark as a trash to be purged */
};
+static inline bool arm_smmu_inv_is_ats(struct arm_smmu_inv *inv)
+{
+ return inv->type == INV_TYPE_ATS || inv->type == INV_TYPE_ATS_FULL;
+}
+
/**
* struct arm_smmu_invs - Per-domain invalidation array
* @num_invs: number of invalidations in the flexible array
+ * @has_ats: flag if the array contains an INV_TYPE_ATS or INV_TYPE_ATS_FULL
* @old: flag to synchronize with reader
* @rwlock: optional rwlock to fench ATS operations
* @rcu: rcu head for kfree_rcu()
@@ -706,6 +712,7 @@ struct arm_smmu_inv {
struct arm_smmu_invs {
size_t num_invs;
rwlock_t rwlock;
+ u8 has_ats;
u8 old;
struct rcu_head rcu;
struct arm_smmu_inv inv[];
@@ -1072,6 +1079,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size);
+void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
+ unsigned long iova, size_t size,
+ unsigned int granule, bool leaf);
+
+static inline void arm_smmu_domain_inv(struct arm_smmu_domain *smmu_domain)
+{
+ arm_smmu_domain_inv_range(smmu_domain, 0, 0, 0, false);
+}
+
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq);
int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ee779df1d78fb..c06d2dd893c11 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2479,23 +2479,19 @@ static void arm_smmu_tlb_inv_context(void *cookie)
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
}
-static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
- unsigned long iova, size_t size,
- size_t granule,
- struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ size_t granule, size_t pgsize)
{
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- unsigned long end = iova + size, num_pages = 0, tg = 0;
+ unsigned long end = iova + size, num_pages = 0, tg = pgsize;
size_t inv_range = granule;
- struct arm_smmu_cmdq_batch cmds;
if (!size)
return;
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
- /* Get the leaf page size */
- tg = __ffs(smmu_domain->domain.pgsize_bitmap);
-
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
@@ -2515,8 +2511,6 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++;
}
- arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
-
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
/*
@@ -2544,9 +2538,26 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
}
cmd->tlbi.addr = iova;
- arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
+ arm_smmu_cmdq_batch_add(smmu, cmds, cmd);
iova += inv_range;
}
+}
+
+static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ size_t granule,
+ struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cmdq_batch cmds;
+ size_t pgsize;
+
+ /* Get the leaf page size */
+ pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
+
+ arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
+ arm_smmu_cmdq_batch_add_range(smmu, &cmds, cmd, iova, size, granule,
+ pgsize);
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -2602,6 +2613,194 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
}
+static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
+ size_t granule)
+{
+ size_t max_tlbi_ops;
+
+ /* 0 size means invalidate all */
+ if (!size || size == SIZE_MAX)
+ return true;
+
+ if (smmu->features & ARM_SMMU_FEAT_RANGE_INV)
+ return false;
+
+ /*
+ * Borrowed from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h,
+ * this is used as a threshold to replace "size_opcode" commands with a
+ * single "nsize_opcode" command, when SMMU doesn't implement the range
+ * invalidation feature, where there can be too many per-granule TLBIs,
+ * resulting in a soft lockup.
+ */
+ max_tlbi_ops = 1 << (ilog2(granule) - 3);
+ return size >= max_tlbi_ops * granule;
+}
+
+/* Used by non INV_TYPE_ATS* invalidations */
+static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *cmd,
+ unsigned long iova, size_t size,
+ unsigned int granule)
+{
+ if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) {
+ cmd->opcode = inv->nsize_opcode;
+ /* nsize_opcode always needs a sync, no batching */
+ arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, cmd);
+ return;
+ }
+
+ cmd->opcode = inv->size_opcode;
+ arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule,
+ inv->pgsize);
+}
+
+static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur,
+ struct arm_smmu_inv *next)
+{
+ /* Changing smmu means changing command queue */
+ if (cur->smmu != next->smmu)
+ return true;
+ /* The batch for S2 TLBI must be done before nested S1 ASIDs */
+ if (cur->type != INV_TYPE_S2_VMID_S1_CLEAR &&
+ next->type == INV_TYPE_S2_VMID_S1_CLEAR)
+ return true;
+ /* ATS must be after a sync of the S1/S2 invalidations */
+ if (!arm_smmu_inv_is_ats(cur) && arm_smmu_inv_is_ats(next))
+ return true;
+ return false;
+}
+
+void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
+ unsigned long iova, size_t size,
+ unsigned int granule, bool leaf)
+{
+ struct arm_smmu_cmdq_batch cmds = {};
+ struct arm_smmu_invs *invs;
+ struct arm_smmu_inv *cur;
+ struct arm_smmu_inv *end;
+ bool locked;
+
+ /*
+ * An invalidation request must follow some IOPTE change and then load
+ * an invalidation array. In the meantime, a domain attachment mutates
+ * the array and then stores an STE/CD asking SMMU HW to acquire those
+ * changed IOPTEs. In other word, these two are interdependent and can
+ * race.
+ *
+ * In a race, the RCU design (with its underlying memory barriers) can
+ * ensure the invalidation array to always get updated before loaded.
+ *
+ * smp_mb() is used here, paired with the smp_mb() following the array
+ * update in a concurrent attach, to ensure:
+ * - HW sees the new IOPTEs if it walks after STE installation
+ * - Invalidation thread sees the updated array with the new ASID.
+ *
+ * [CPU0] | [CPU1]
+ * |
+ * change IOPTEs and TLB flush: |
+ * arm_smmu_domain_inv_range() { | arm_smmu_install_new_domain_invs {
+ * ... | rcu_assign_pointer(new_invs);
+ * smp_mb(); // ensure IOPTEs | smp_mb(); // ensure new_invs
+ * ... | kfree_rcu(old_invs, rcu);
+ * // load invalidation array | }
+ * invs = rcu_dereference(); | arm_smmu_install_ste_for_dev {
+ * | STE = TTB0 // read new IOPTEs
+ */
+ smp_mb();
+
+ rcu_read_lock();
+
+ while (true) {
+ invs = rcu_dereference(smmu_domain->invs);
+
+ /*
+ * Avoid locking unless ATS is being used. No ATS invalidate can
+ * be going on after a domain is detached.
+ */
+ locked = false;
+ if (invs->has_ats || READ_ONCE(invs->old)) {
+ read_lock(&invs->rwlock);
+ if (invs->old) {
+ read_unlock(&invs->rwlock);
+ continue;
+ }
+ locked = true;
+ }
+ break;
+ }
+
+ cur = invs->inv;
+ end = cur + READ_ONCE(invs->num_invs);
+ /* Skip any leading entry marked as a trash */
+ for (; cur != end; cur++)
+ if (refcount_read(&cur->users))
+ break;
+ while (cur != end) {
+ struct arm_smmu_device *smmu = cur->smmu;
+ struct arm_smmu_cmdq_ent cmd = {
+ /*
+ * Pick size_opcode to run arm_smmu_get_cmdq(). This can
+ * be changed to nsize_opcode, which would result in the
+ * same CMDQ pointer.
+ */
+ .opcode = cur->size_opcode,
+ };
+ struct arm_smmu_inv *next;
+
+ if (!cmds.num)
+ arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
+
+ switch (cur->type) {
+ case INV_TYPE_S1_ASID:
+ cmd.tlbi.asid = cur->id;
+ cmd.tlbi.leaf = leaf;
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
+ granule);
+ break;
+ case INV_TYPE_S2_VMID:
+ cmd.tlbi.vmid = cur->id;
+ cmd.tlbi.leaf = leaf;
+ arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size,
+ granule);
+ break;
+ case INV_TYPE_S2_VMID_S1_CLEAR:
+ /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */
+ if (arm_smmu_inv_size_too_big(cur->smmu, size, granule))
+ continue;
+ cmd.tlbi.vmid = cur->id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ break;
+ case INV_TYPE_ATS:
+ arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd);
+ cmd.atc.sid = cur->id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ break;
+ case INV_TYPE_ATS_FULL:
+ arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
+ cmd.atc.sid = cur->id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ /* Skip any trash entry in-between */
+ for (next = cur + 1; next != end; next++)
+ if (refcount_read(&next->users))
+ break;
+
+ if (cmds.num &&
+ (next == end || arm_smmu_invs_end_batch(cur, next)))
+ arm_smmu_cmdq_batch_submit(smmu, &cmds);
+ cur = next;
+ }
+ if (locked)
+ read_unlock(&invs->rwlock);
+ rcu_read_unlock();
+}
+
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
--
2.43.0
© 2016 - 2025 Red Hat, Inc.