[PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters

Nicolin Chen posted 7 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters
Posted by Nicolin Chen 1 month, 1 week ago
Update the invs array with the invalidations required by each domain type
during attachment operations.

Only an SVA domain or a paging domain will have an invs array:
 a. SVA domain will add an INV_TYPE_S1_ASID per SMMU and an INV_TYPE_ATS
    per SID

 b. Non-nesting-parent paging domain with no ATS-enabled master will add
    a single INV_TYPE_S1_ASID or INV_TYPE_S2_VMID per SMMU

 c. Non-nesting-parent paging domain with ATS-enabled master(s) will do
    (b) and add an INV_TYPE_ATS per SID

 d. Nesting-parent paging domain will add an INV_TYPE_S2_VMID followed by
    an INV_TYPE_S2_VMID_S1_CLEAR per vSMMU. For an ATS-enabled master, it
    will add an INV_TYPE_ATS_FULL per SID

 Note that case #d prepares for a future implementation of VMID allocation
 which requires a followup series for S2 domain sharing. So when a nesting
 parent domain is attached through a vSMMU instance using a nested domain.
 VMID will be allocated per vSMMU instance v.s. currectly per S2 domain.

The per-domain invalidation is not needed until the domain is attached to
a master (when it starts to possibly use TLB). This will make it possible
to attach the domain to multiple SMMUs and avoid unnecessary invalidation
overhead during teardown if no STEs/CDs refer to the domain. It also means
that when the last device is detached, the old domain must flush its ASID
or VMID, since any new iommu_unmap() call would not trigger invalidations
given an empty domain->invs array.

Introduce some arm_smmu_invs helper functions for building scratch arrays,
preparing and installing old/new domain's invalidation arrays.

Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  17 ++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 242 +++++++++++++++++++-
 2 files changed, 258 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 7b81a82c0dfe4..5899429a514ab 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -1094,6 +1094,21 @@ static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
 	       IOMMU_FWSPEC_PCI_RC_CANWBS;
 }
 
+/**
+ * struct arm_smmu_inv_state - Per-domain invalidation array state
+ * @invs_ptr: points to the domain->invs (unwinding nesting/etc.) or is NULL if
+ *            no change should be made
+ * @old_invs: the original invs array
+ * @new_invs: for new domain, this is the new invs array to update domin->invs;
+ *            for old domain, this is the master->build_invs to pass in as the
+ *            to_unref argument to an arm_smmu_invs_unref() call
+ */
+struct arm_smmu_inv_state {
+	struct arm_smmu_invs __rcu **invs_ptr;
+	struct arm_smmu_invs *old_invs;
+	struct arm_smmu_invs *new_invs;
+};
+
 struct arm_smmu_attach_state {
 	/* Inputs */
 	struct iommu_domain *old_domain;
@@ -1103,6 +1118,8 @@ struct arm_smmu_attach_state {
 	ioasid_t ssid;
 	/* Resulting state */
 	struct arm_smmu_vmaster *vmaster;
+	struct arm_smmu_inv_state old_domain_invst;
+	struct arm_smmu_inv_state new_domain_invst;
 	bool ats_enabled;
 };
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 26b8492a13f20..22875a951488d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3058,6 +3058,97 @@ static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
 		iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
 }
 
+/*
+ * Use the preallocated scratch array at master->build_invs, to build a to_merge
+ * or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call.
+ *
+ * Do not free the returned invs array. It is reused, and will be overwritten by
+ * the next arm_smmu_master_build_invs() call.
+ */
+static struct arm_smmu_invs *
+arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
+			   ioasid_t ssid, struct arm_smmu_domain *smmu_domain)
+{
+	const bool e2h = master->smmu->features & ARM_SMMU_FEAT_E2H;
+	struct arm_smmu_invs *build_invs = master->build_invs;
+	const bool nesting = smmu_domain->nest_parent;
+	struct arm_smmu_inv *cur;
+
+	iommu_group_mutex_assert(master->dev);
+
+	cur = build_invs->inv;
+
+	switch (smmu_domain->stage) {
+	case ARM_SMMU_DOMAIN_SVA:
+	case ARM_SMMU_DOMAIN_S1:
+		*cur = (struct arm_smmu_inv){
+			.smmu = master->smmu,
+			.type = INV_TYPE_S1_ASID,
+			.id = smmu_domain->cd.asid,
+			.size_opcode = e2h ? CMDQ_OP_TLBI_EL2_VA :
+					     CMDQ_OP_TLBI_NH_VA,
+			.nsize_opcode = e2h ? CMDQ_OP_TLBI_EL2_ASID :
+					      CMDQ_OP_TLBI_NH_ASID
+		};
+		break;
+	case ARM_SMMU_DOMAIN_S2:
+		*cur = (struct arm_smmu_inv){
+			.smmu = master->smmu,
+			.type = INV_TYPE_S2_VMID,
+			.id = smmu_domain->s2_cfg.vmid,
+			.size_opcode = CMDQ_OP_TLBI_S2_IPA,
+			.nsize_opcode = CMDQ_OP_TLBI_S12_VMALL,
+		};
+		break;
+	default:
+		WARN_ON(true);
+		return NULL;
+	}
+
+	/* Range-based invalidation requires the leaf pgsize for calculation */
+	if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV)
+		cur->pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
+	cur++;
+
+	/* All the nested S1 ASIDs have to be flushed when S2 parent changes */
+	if (nesting) {
+		*cur = (struct arm_smmu_inv){
+			.smmu = master->smmu,
+			.type = INV_TYPE_S2_VMID_S1_CLEAR,
+			.id = smmu_domain->s2_cfg.vmid,
+			.size_opcode = CMDQ_OP_TLBI_NH_ALL,
+			.nsize_opcode = CMDQ_OP_TLBI_NH_ALL,
+		};
+		cur++;
+	}
+
+	if (ats_enabled) {
+		size_t i;
+
+		for (i = 0; i < master->num_streams; i++) {
+			/*
+			 * If an S2 used as a nesting parent is changed we have
+			 * no option but to completely flush the ATC.
+			 */
+			*cur = (struct arm_smmu_inv){
+				.smmu = master->smmu,
+				.type = nesting ? INV_TYPE_ATS_FULL :
+						  INV_TYPE_ATS,
+				.id = master->streams[i].id,
+				.ssid = ssid,
+				.size_opcode = CMDQ_OP_ATC_INV,
+				.nsize_opcode = CMDQ_OP_ATC_INV,
+			};
+			cur++;
+		}
+	}
+
+	/* Note this build_invs must have been sorted */
+
+	build_invs->num_invs = cur - build_invs->inv;
+	return build_invs;
+}
+
 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
 					  struct iommu_domain *domain,
 					  ioasid_t ssid)
@@ -3087,6 +3178,146 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
 	kfree(master_domain);
 }
 
+/*
+ * During attachment, the updates of the two domain->invs arrays are sequenced:
+ *  1. new domain updates its invs array, merging master->build_invs
+ *  2. new domain starts to include the master during its invalidation
+ *  3. master updates its STE switching from the old domain to the new domain
+ *  4. old domain still includes the master during its invalidation
+ *  5. old domain updates its invs array, unreferencing master->build_invs
+ *
+ * For 1 and 5, prepare the two updated arrays in advance, handling any changes
+ * that can possibly failure. So the actual update of either 1 or 5 won't fail.
+ * arm_smmu_asid_lock ensures that the old invs in the domains are intact while
+ * we are sequencing to update them.
+ */
+static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,
+					struct arm_smmu_domain *new_smmu_domain)
+{
+	struct arm_smmu_domain *old_smmu_domain =
+		to_smmu_domain_devices(state->old_domain);
+	struct arm_smmu_master *master = state->master;
+	ioasid_t ssid = state->ssid;
+
+	/* A re-attach case doesn't need to update invs array */
+	if (new_smmu_domain == old_smmu_domain)
+		return 0;
+
+	/*
+	 * At this point a NULL domain indicates the domain doesn't use the
+	 * IOTLB, see to_smmu_domain_devices().
+	 */
+	if (new_smmu_domain) {
+		struct arm_smmu_inv_state *invst = &state->new_domain_invst;
+		struct arm_smmu_invs *build_invs;
+
+		invst->invs_ptr = &new_smmu_domain->invs;
+		invst->old_invs = rcu_dereference_protected(
+			new_smmu_domain->invs,
+			lockdep_is_held(&arm_smmu_asid_lock));
+		build_invs = arm_smmu_master_build_invs(
+			master, state->ats_enabled, ssid, new_smmu_domain);
+		if (!build_invs)
+			return -EINVAL;
+
+		invst->new_invs =
+			arm_smmu_invs_merge(invst->old_invs, build_invs);
+		if (IS_ERR(invst->new_invs))
+			return PTR_ERR(invst->new_invs);
+	}
+
+	if (old_smmu_domain) {
+		struct arm_smmu_inv_state *invst = &state->old_domain_invst;
+
+		invst->invs_ptr = &old_smmu_domain->invs;
+		invst->old_invs = rcu_dereference_protected(
+			old_smmu_domain->invs,
+			lockdep_is_held(&arm_smmu_asid_lock));
+		/* For old_smmu_domain, new_invs points to master->build_invs */
+		invst->new_invs = arm_smmu_master_build_invs(
+			master, master->ats_enabled, ssid, old_smmu_domain);
+	}
+
+	return 0;
+}
+
+/* Must be installed before arm_smmu_install_ste_for_dev() */
+static void
+arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
+{
+	struct arm_smmu_inv_state *invst = &state->new_domain_invst;
+
+	if (!invst->invs_ptr)
+		return;
+
+	rcu_assign_pointer(*invst->invs_ptr, invst->new_invs);
+	/*
+	 * We are committed to updating the STE. Ensure the invalidation array
+	 * is visable to concurrent map/unmap threads, and acquire any racying
+	 * IOPTE updates.
+	 */
+	smp_mb();
+	kfree_rcu(invst->old_invs, rcu);
+}
+
+/*
+ * When an array entry's users count reaches zero, it means the ASID/VMID is no
+ * longer being invalidated by map/unmap and must be cleaned. The rule is that
+ * all ASIDs/VMIDs not in an invalidation array are left cleared in the IOTLB.
+ */
+static void arm_smmu_invs_flush_iotlb_tags(struct arm_smmu_inv *inv)
+{
+	struct arm_smmu_cmdq_ent cmd = {};
+
+	switch (inv->type) {
+	case INV_TYPE_S1_ASID:
+		cmd.tlbi.asid = inv->id;
+		break;
+	case INV_TYPE_S2_VMID:
+		/* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
+		cmd.tlbi.vmid = inv->id;
+		break;
+	default:
+		return;
+	}
+
+	cmd.opcode = inv->nsize_opcode;
+	arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);
+}
+
+/* Should be installed after arm_smmu_install_ste_for_dev() */
+static void
+arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state)
+{
+	struct arm_smmu_inv_state *invst = &state->old_domain_invst;
+	struct arm_smmu_invs *old_invs = invst->old_invs;
+	struct arm_smmu_invs *new_invs;
+	size_t num_trashes;
+
+	lockdep_assert_held(&arm_smmu_asid_lock);
+
+	if (!invst->invs_ptr)
+		return;
+
+	num_trashes = arm_smmu_invs_unref(old_invs, invst->new_invs,
+					  arm_smmu_invs_flush_iotlb_tags);
+	if (!num_trashes)
+		return;
+
+	new_invs = arm_smmu_invs_purge(old_invs, num_trashes);
+	if (!new_invs)
+		return;
+
+	rcu_assign_pointer(*invst->invs_ptr, new_invs);
+	/*
+	 * We are committed to updating the STE. Ensure the invalidation array
+	 * is visable to concurrent map/unmap threads, and acquire any racying
+	 * IOPTE updates.
+	 */
+	smp_mb();
+	kfree_rcu(old_invs, rcu);
+}
+
 /*
  * Start the sequence to attach a domain to a master. The sequence contains three
  * steps:
@@ -3144,12 +3375,16 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
 				     arm_smmu_ats_supported(master);
 	}
 
+	ret = arm_smmu_attach_prepare_invs(state, smmu_domain);
+	if (ret)
+		return ret;
+
 	if (smmu_domain) {
 		if (new_domain->type == IOMMU_DOMAIN_NESTED) {
 			ret = arm_smmu_attach_prepare_vmaster(
 				state, to_smmu_nested_domain(new_domain));
 			if (ret)
-				return ret;
+				goto err_unprepare_invs;
 		}
 
 		master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
@@ -3197,6 +3432,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
 			atomic_inc(&smmu_domain->nr_ats_masters);
 		list_add(&master_domain->devices_elm, &smmu_domain->devices);
 		spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+		arm_smmu_install_new_domain_invs(state);
 	}
 
 	if (!state->ats_enabled && master->ats_enabled) {
@@ -3216,6 +3453,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
 	kfree(master_domain);
 err_free_vmaster:
 	kfree(state->vmaster);
+err_unprepare_invs:
+	kfree(state->new_domain_invst.new_invs);
 	return ret;
 }
 
@@ -3247,6 +3486,7 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
 	}
 
 	arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
+	arm_smmu_install_old_domain_invs(state);
 	master->ats_enabled = state->ats_enabled;
 }
 
-- 
2.43.0
Re: [PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters
Posted by Will Deacon 3 weeks, 4 days ago
On Sat, Nov 08, 2025 at 12:08:06AM -0800, Nicolin Chen wrote:
> Update the invs array with the invalidations required by each domain type
> during attachment operations.
> 
> Only an SVA domain or a paging domain will have an invs array:
>  a. SVA domain will add an INV_TYPE_S1_ASID per SMMU and an INV_TYPE_ATS
>     per SID
> 
>  b. Non-nesting-parent paging domain with no ATS-enabled master will add
>     a single INV_TYPE_S1_ASID or INV_TYPE_S2_VMID per SMMU
> 
>  c. Non-nesting-parent paging domain with ATS-enabled master(s) will do
>     (b) and add an INV_TYPE_ATS per SID
> 
>  d. Nesting-parent paging domain will add an INV_TYPE_S2_VMID followed by
>     an INV_TYPE_S2_VMID_S1_CLEAR per vSMMU. For an ATS-enabled master, it
>     will add an INV_TYPE_ATS_FULL per SID
> 
>  Note that case #d prepares for a future implementation of VMID allocation
>  which requires a followup series for S2 domain sharing. So when a nesting
>  parent domain is attached through a vSMMU instance using a nested domain.
>  VMID will be allocated per vSMMU instance v.s. currectly per S2 domain.
> 
> The per-domain invalidation is not needed until the domain is attached to
> a master (when it starts to possibly use TLB). This will make it possible
> to attach the domain to multiple SMMUs and avoid unnecessary invalidation
> overhead during teardown if no STEs/CDs refer to the domain. It also means
> that when the last device is detached, the old domain must flush its ASID
> or VMID, since any new iommu_unmap() call would not trigger invalidations
> given an empty domain->invs array.
> 
> Introduce some arm_smmu_invs helper functions for building scratch arrays,
> preparing and installing old/new domain's invalidation arrays.
> 
> Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  17 ++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 242 +++++++++++++++++++-
>  2 files changed, 258 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 7b81a82c0dfe4..5899429a514ab 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -1094,6 +1094,21 @@ static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
>  	       IOMMU_FWSPEC_PCI_RC_CANWBS;
>  }
>  
> +/**
> + * struct arm_smmu_inv_state - Per-domain invalidation array state
> + * @invs_ptr: points to the domain->invs (unwinding nesting/etc.) or is NULL if
> + *            no change should be made
> + * @old_invs: the original invs array
> + * @new_invs: for new domain, this is the new invs array to update domin->invs;

nit: typo "domin"

> + *            for old domain, this is the master->build_invs to pass in as the
> + *            to_unref argument to an arm_smmu_invs_unref() call
> + */
> +struct arm_smmu_inv_state {
> +	struct arm_smmu_invs __rcu **invs_ptr;
> +	struct arm_smmu_invs *old_invs;
> +	struct arm_smmu_invs *new_invs;
> +};
> +
>  struct arm_smmu_attach_state {
>  	/* Inputs */
>  	struct iommu_domain *old_domain;
> @@ -1103,6 +1118,8 @@ struct arm_smmu_attach_state {
>  	ioasid_t ssid;
>  	/* Resulting state */
>  	struct arm_smmu_vmaster *vmaster;
> +	struct arm_smmu_inv_state old_domain_invst;
> +	struct arm_smmu_inv_state new_domain_invst;
>  	bool ats_enabled;
>  };
>  
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 26b8492a13f20..22875a951488d 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -3058,6 +3058,97 @@ static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
>  		iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
>  }
>  
> +/*
> + * Use the preallocated scratch array at master->build_invs, to build a to_merge
> + * or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call.
> + *
> + * Do not free the returned invs array. It is reused, and will be overwritten by
> + * the next arm_smmu_master_build_invs() call.
> + */
> +static struct arm_smmu_invs *
> +arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
> +			   ioasid_t ssid, struct arm_smmu_domain *smmu_domain)
> +{
> +	const bool e2h = master->smmu->features & ARM_SMMU_FEAT_E2H;
> +	struct arm_smmu_invs *build_invs = master->build_invs;
> +	const bool nesting = smmu_domain->nest_parent;
> +	struct arm_smmu_inv *cur;
> +
> +	iommu_group_mutex_assert(master->dev);
> +
> +	cur = build_invs->inv;
> +
> +	switch (smmu_domain->stage) {
> +	case ARM_SMMU_DOMAIN_SVA:
> +	case ARM_SMMU_DOMAIN_S1:
> +		*cur = (struct arm_smmu_inv){
> +			.smmu = master->smmu,
> +			.type = INV_TYPE_S1_ASID,
> +			.id = smmu_domain->cd.asid,
> +			.size_opcode = e2h ? CMDQ_OP_TLBI_EL2_VA :
> +					     CMDQ_OP_TLBI_NH_VA,
> +			.nsize_opcode = e2h ? CMDQ_OP_TLBI_EL2_ASID :
> +					      CMDQ_OP_TLBI_NH_ASID
> +		};
> +		break;
> +	case ARM_SMMU_DOMAIN_S2:
> +		*cur = (struct arm_smmu_inv){
> +			.smmu = master->smmu,
> +			.type = INV_TYPE_S2_VMID,
> +			.id = smmu_domain->s2_cfg.vmid,
> +			.size_opcode = CMDQ_OP_TLBI_S2_IPA,
> +			.nsize_opcode = CMDQ_OP_TLBI_S12_VMALL,
> +		};
> +		break;

Having a helper to add an invalidation command would make this a little
more compact and you could also check against the size of the array.

> +	default:
> +		WARN_ON(true);
> +		return NULL;
> +	}
> +
> +	/* Range-based invalidation requires the leaf pgsize for calculation */
> +	if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV)
> +		cur->pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
> +	cur++;
> +
> +	/* All the nested S1 ASIDs have to be flushed when S2 parent changes */
> +	if (nesting) {
> +		*cur = (struct arm_smmu_inv){
> +			.smmu = master->smmu,
> +			.type = INV_TYPE_S2_VMID_S1_CLEAR,
> +			.id = smmu_domain->s2_cfg.vmid,
> +			.size_opcode = CMDQ_OP_TLBI_NH_ALL,
> +			.nsize_opcode = CMDQ_OP_TLBI_NH_ALL,
> +		};
> +		cur++;
> +	}
> +
> +	if (ats_enabled) {
> +		size_t i;
> +
> +		for (i = 0; i < master->num_streams; i++) {
> +			/*
> +			 * If an S2 used as a nesting parent is changed we have
> +			 * no option but to completely flush the ATC.
> +			 */
> +			*cur = (struct arm_smmu_inv){
> +				.smmu = master->smmu,
> +				.type = nesting ? INV_TYPE_ATS_FULL :
> +						  INV_TYPE_ATS,
> +				.id = master->streams[i].id,
> +				.ssid = ssid,
> +				.size_opcode = CMDQ_OP_ATC_INV,
> +				.nsize_opcode = CMDQ_OP_ATC_INV,
> +			};
> +			cur++;
> +		}
> +	}
> +
> +	/* Note this build_invs must have been sorted */
> +
> +	build_invs->num_invs = cur - build_invs->inv;
> +	return build_invs;
> +}
> +
>  static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
>  					  struct iommu_domain *domain,
>  					  ioasid_t ssid)
> @@ -3087,6 +3178,146 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
>  	kfree(master_domain);
>  }
>  
> +/*
> + * During attachment, the updates of the two domain->invs arrays are sequenced:
> + *  1. new domain updates its invs array, merging master->build_invs
> + *  2. new domain starts to include the master during its invalidation
> + *  3. master updates its STE switching from the old domain to the new domain
> + *  4. old domain still includes the master during its invalidation
> + *  5. old domain updates its invs array, unreferencing master->build_invs
> + *
> + * For 1 and 5, prepare the two updated arrays in advance, handling any changes
> + * that can possibly failure. So the actual update of either 1 or 5 won't fail.
> + * arm_smmu_asid_lock ensures that the old invs in the domains are intact while
> + * we are sequencing to update them.
> + */
> +static int arm_smmu_attach_prepare_invs(struct arm_smmu_attach_state *state,
> +					struct arm_smmu_domain *new_smmu_domain)
> +{
> +	struct arm_smmu_domain *old_smmu_domain =
> +		to_smmu_domain_devices(state->old_domain);
> +	struct arm_smmu_master *master = state->master;
> +	ioasid_t ssid = state->ssid;
> +
> +	/* A re-attach case doesn't need to update invs array */
> +	if (new_smmu_domain == old_smmu_domain)
> +		return 0;
> +
> +	/*
> +	 * At this point a NULL domain indicates the domain doesn't use the
> +	 * IOTLB, see to_smmu_domain_devices().
> +	 */
> +	if (new_smmu_domain) {
> +		struct arm_smmu_inv_state *invst = &state->new_domain_invst;
> +		struct arm_smmu_invs *build_invs;
> +
> +		invst->invs_ptr = &new_smmu_domain->invs;
> +		invst->old_invs = rcu_dereference_protected(
> +			new_smmu_domain->invs,
> +			lockdep_is_held(&arm_smmu_asid_lock));
> +		build_invs = arm_smmu_master_build_invs(
> +			master, state->ats_enabled, ssid, new_smmu_domain);
> +		if (!build_invs)
> +			return -EINVAL;
> +
> +		invst->new_invs =
> +			arm_smmu_invs_merge(invst->old_invs, build_invs);
> +		if (IS_ERR(invst->new_invs))
> +			return PTR_ERR(invst->new_invs);
> +	}
> +
> +	if (old_smmu_domain) {
> +		struct arm_smmu_inv_state *invst = &state->old_domain_invst;
> +
> +		invst->invs_ptr = &old_smmu_domain->invs;
> +		invst->old_invs = rcu_dereference_protected(
> +			old_smmu_domain->invs,
> +			lockdep_is_held(&arm_smmu_asid_lock));
> +		/* For old_smmu_domain, new_invs points to master->build_invs */
> +		invst->new_invs = arm_smmu_master_build_invs(
> +			master, master->ats_enabled, ssid, old_smmu_domain);
> +	}
> +
> +	return 0;
> +}
> +
> +/* Must be installed before arm_smmu_install_ste_for_dev() */
> +static void
> +arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
> +{
> +	struct arm_smmu_inv_state *invst = &state->new_domain_invst;
> +
> +	if (!invst->invs_ptr)
> +		return;
> +
> +	rcu_assign_pointer(*invst->invs_ptr, invst->new_invs);
> +	/*
> +	 * We are committed to updating the STE. Ensure the invalidation array
> +	 * is visable to concurrent map/unmap threads, and acquire any racying
> +	 * IOPTE updates.
> +	 */
> +	smp_mb();

Sorry, but the comment hasn't really helped me here. We're ordering the
publishing of the invalidation array above before ... what?

> +	kfree_rcu(invst->old_invs, rcu);
> +}
> +
> +/*
> + * When an array entry's users count reaches zero, it means the ASID/VMID is no
> + * longer being invalidated by map/unmap and must be cleaned. The rule is that
> + * all ASIDs/VMIDs not in an invalidation array are left cleared in the IOTLB.
> + */
> +static void arm_smmu_invs_flush_iotlb_tags(struct arm_smmu_inv *inv)
> +{
> +	struct arm_smmu_cmdq_ent cmd = {};
> +
> +	switch (inv->type) {
> +	case INV_TYPE_S1_ASID:
> +		cmd.tlbi.asid = inv->id;
> +		break;
> +	case INV_TYPE_S2_VMID:
> +		/* S2_VMID using nsize_opcode covers S2_VMID_S1_CLEAR */
> +		cmd.tlbi.vmid = inv->id;
> +		break;
> +	default:
> +		return;
> +	}
> +
> +	cmd.opcode = inv->nsize_opcode;
> +	arm_smmu_cmdq_issue_cmd_with_sync(inv->smmu, &cmd);
> +}
> +
> +/* Should be installed after arm_smmu_install_ste_for_dev() */
> +static void
> +arm_smmu_install_old_domain_invs(struct arm_smmu_attach_state *state)
> +{
> +	struct arm_smmu_inv_state *invst = &state->old_domain_invst;
> +	struct arm_smmu_invs *old_invs = invst->old_invs;
> +	struct arm_smmu_invs *new_invs;
> +	size_t num_trashes;
> +
> +	lockdep_assert_held(&arm_smmu_asid_lock);
> +
> +	if (!invst->invs_ptr)
> +		return;
> +
> +	num_trashes = arm_smmu_invs_unref(old_invs, invst->new_invs,
> +					  arm_smmu_invs_flush_iotlb_tags);
> +	if (!num_trashes)
> +		return;
> +
> +	new_invs = arm_smmu_invs_purge(old_invs, num_trashes);
> +	if (!new_invs)
> +		return;
> +
> +	rcu_assign_pointer(*invst->invs_ptr, new_invs);
> +	/*
> +	 * We are committed to updating the STE. Ensure the invalidation array
> +	 * is visable to concurrent map/unmap threads, and acquire any racying
> +	 * IOPTE updates.
> +	 */
> +	smp_mb();

(same here)

Will
Re: [PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters
Posted by Jason Gunthorpe 3 weeks, 3 days ago
On Mon, Nov 24, 2025 at 09:43:18PM +0000, Will Deacon wrote:
> > +	switch (smmu_domain->stage) {
> > +	case ARM_SMMU_DOMAIN_SVA:
> > +	case ARM_SMMU_DOMAIN_S1:
> > +		*cur = (struct arm_smmu_inv){
> > +			.smmu = master->smmu,
> > +			.type = INV_TYPE_S1_ASID,
> > +			.id = smmu_domain->cd.asid,
> > +			.size_opcode = e2h ? CMDQ_OP_TLBI_EL2_VA :
> > +					     CMDQ_OP_TLBI_NH_VA,
> > +			.nsize_opcode = e2h ? CMDQ_OP_TLBI_EL2_ASID :
> > +					      CMDQ_OP_TLBI_NH_ASID
> > +		};
> > +		break;
> > +	case ARM_SMMU_DOMAIN_S2:
> > +		*cur = (struct arm_smmu_inv){
> > +			.smmu = master->smmu,
> > +			.type = INV_TYPE_S2_VMID,
> > +			.id = smmu_domain->s2_cfg.vmid,
> > +			.size_opcode = CMDQ_OP_TLBI_S2_IPA,
> > +			.nsize_opcode = CMDQ_OP_TLBI_S12_VMALL,
> > +		};
> > +		break;
> 
> Having a helper to add an invalidation command would make this a little
> more compact and you could also check against the size of the array.

Yeah but it makes all the parameters positional instead of nicely
named..

> > +/* Must be installed before arm_smmu_install_ste_for_dev() */
> > +static void
> > +arm_smmu_install_new_domain_invs(struct arm_smmu_attach_state *state)
> > +{
> > +	struct arm_smmu_inv_state *invst = &state->new_domain_invst;
> > +
> > +	if (!invst->invs_ptr)
> > +		return;
> > +
> > +	rcu_assign_pointer(*invst->invs_ptr, invst->new_invs);
> > +	/*
> > +	 * We are committed to updating the STE. Ensure the invalidation array
> > +	 * is visable to concurrent map/unmap threads, and acquire any racying
> > +	 * IOPTE updates.
> > +	 */
> > +	smp_mb();
> 
> Sorry, but the comment hasn't really helped me here. We're ordering the
> publishing of the invalidation array above before ... what?

"concurrent map/unmap threads" means other threads calling
arm_smmu_iotlb_sync().. I think the matching comment describing this
and introducing the pair'd smp_mb() is in a later patch.

Either the HW observes the latest page table and needs no invalidation
or arm_smmu_iotlb_sync observes the invalidation list and issues
invalidation.

The smb_mb() is intended to make one of the two statements true prior
to storing the STE.

Nicolin maybe this barrier line should be moved to be added in the
patch that add's the pair'd barrier and description?

Jason
Re: [PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters
Posted by Nicolin Chen 3 weeks, 2 days ago
On Mon, Nov 24, 2025 at 07:13:41PM -0400, Jason Gunthorpe wrote:
> On Mon, Nov 24, 2025 at 09:43:18PM +0000, Will Deacon wrote:
> > > +	switch (smmu_domain->stage) {
> > > +	case ARM_SMMU_DOMAIN_SVA:
> > > +	case ARM_SMMU_DOMAIN_S1:
> > > +		*cur = (struct arm_smmu_inv){
> > > +			.smmu = master->smmu,
> > > +			.type = INV_TYPE_S1_ASID,
> > > +			.id = smmu_domain->cd.asid,
> > > +			.size_opcode = e2h ? CMDQ_OP_TLBI_EL2_VA :
> > > +					     CMDQ_OP_TLBI_NH_VA,
> > > +			.nsize_opcode = e2h ? CMDQ_OP_TLBI_EL2_ASID :
> > > +					      CMDQ_OP_TLBI_NH_ASID
> > > +		};
> > > +		break;
> > > +	case ARM_SMMU_DOMAIN_S2:
> > > +		*cur = (struct arm_smmu_inv){
> > > +			.smmu = master->smmu,
> > > +			.type = INV_TYPE_S2_VMID,
> > > +			.id = smmu_domain->s2_cfg.vmid,
> > > +			.size_opcode = CMDQ_OP_TLBI_S2_IPA,
> > > +			.nsize_opcode = CMDQ_OP_TLBI_S12_VMALL,
> > > +		};
> > > +		break;
> > 
> > Having a helper to add an invalidation command would make this a little
> > more compact and you could also check against the size of the array.
> 
> Yeah but it makes all the parameters positional instead of nicely
> named..

Will's remarks did raise an issue that __counted_by() requires a
max_invs v.s. num_invs that can be smaller due to the removal of
tailing trash entries.

Then, it's probably nicer to add a check against the max_invs.

I did the following, which doesn't look too bad to me:

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b04936e76cfd..28765febf99f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3066,6 +3066,51 @@ static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
 		iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
 }
 
+static struct arm_smmu_inv *
+arm_smmu_master_build_inv(struct arm_smmu_master *master,
+			  enum arm_smmu_inv_type type, u32 id, ioasid_t ssid,
+			  size_t pgsize)
+{
+	struct arm_smmu_invs *build_invs = master->build_invs;
+	struct arm_smmu_inv *cur, inv = {
+		.smmu = master->smmu,
+		.type = type,
+		.id = id,
+		.pgsize = pgsize,
+	};
+
+	if (WARN_ON(build_invs->num_invs >= build_invs->max_invs))
+		return NULL;
+	cur = &build_invs->inv[build_invs->num_invs];
+	build_invs->num_invs++;
+
+	*cur = inv;
+	switch (type) {
+	case INV_TYPE_S1_ASID:
+		if (master->smmu->features & ARM_SMMU_FEAT_E2H) {
+			cur->size_opcode = CMDQ_OP_TLBI_EL2_VA;
+			cur->nsize_opcode = CMDQ_OP_TLBI_EL2_ASID;
+		} else {
+			cur->size_opcode = CMDQ_OP_TLBI_NH_VA;
+			cur->nsize_opcode = CMDQ_OP_TLBI_NH_ASID;
+		}
+		break;
+	case INV_TYPE_S2_VMID:
+		cur->size_opcode = CMDQ_OP_TLBI_S2_IPA;
+		cur->nsize_opcode = CMDQ_OP_TLBI_S12_VMALL;
+		break;
+	case INV_TYPE_S2_VMID_S1_CLEAR:
+		cur->size_opcode = cur->nsize_opcode = CMDQ_OP_TLBI_NH_ALL;
+		break;
+	case INV_TYPE_ATS:
+	case INV_TYPE_ATS_FULL:
+		cur->size_opcode = cur->nsize_opcode = CMDQ_OP_ATC_INV;
+		break;
+	}
+
+	return cur;
+}
+
 /*
  * Use the preallocated scratch array at master->build_invs, to build a to_merge
  * or to_unref array, to pass into a following arm_smmu_invs_merge/unref() call.
@@ -3077,84 +3122,58 @@ static struct arm_smmu_invs *
 arm_smmu_master_build_invs(struct arm_smmu_master *master, bool ats_enabled,
 			   ioasid_t ssid, struct arm_smmu_domain *smmu_domain)
 {
-	const bool e2h = master->smmu->features & ARM_SMMU_FEAT_E2H;
-	struct arm_smmu_invs *build_invs = master->build_invs;
 	const bool nesting = smmu_domain->nest_parent;
-	struct arm_smmu_inv *cur;
+	size_t pgsize = 0, i;
 
 	iommu_group_mutex_assert(master->dev);
 
-	cur = build_invs->inv;
+	master->build_invs->num_invs = 0;
+
+	/* Range-based invalidation requires the leaf pgsize for calculation */
+	if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV)
+		pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
 
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_SVA:
 	case ARM_SMMU_DOMAIN_S1:
-		*cur = (struct arm_smmu_inv){
-			.smmu = master->smmu,
-			.type = INV_TYPE_S1_ASID,
-			.id = smmu_domain->cd.asid,
-			.size_opcode = e2h ? CMDQ_OP_TLBI_EL2_VA :
-					     CMDQ_OP_TLBI_NH_VA,
-			.nsize_opcode = e2h ? CMDQ_OP_TLBI_EL2_ASID :
-					      CMDQ_OP_TLBI_NH_ASID
-		};
+		if (!arm_smmu_master_build_inv(master, INV_TYPE_S1_ASID,
+					       smmu_domain->cd.asid,
+					       IOMMU_NO_PASID, pgsize))
+			return NULL;
 		break;
 	case ARM_SMMU_DOMAIN_S2:
-		*cur = (struct arm_smmu_inv){
-			.smmu = master->smmu,
-			.type = INV_TYPE_S2_VMID,
-			.id = smmu_domain->s2_cfg.vmid,
-			.size_opcode = CMDQ_OP_TLBI_S2_IPA,
-			.nsize_opcode = CMDQ_OP_TLBI_S12_VMALL,
-		};
+		if (!arm_smmu_master_build_inv(master, INV_TYPE_S2_VMID,
+					       smmu_domain->s2_cfg.vmid,
+					       IOMMU_NO_PASID, pgsize))
+			return NULL;
 		break;
 	default:
 		WARN_ON(true);
 		return NULL;
 	}
 
-	/* Range-based invalidation requires the leaf pgsize for calculation */
-	if (master->smmu->features & ARM_SMMU_FEAT_RANGE_INV)
-		cur->pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
-	cur++;
-
 	/* All the nested S1 ASIDs have to be flushed when S2 parent changes */
 	if (nesting) {
-		*cur = (struct arm_smmu_inv){
-			.smmu = master->smmu,
-			.type = INV_TYPE_S2_VMID_S1_CLEAR,
-			.id = smmu_domain->s2_cfg.vmid,
-			.size_opcode = CMDQ_OP_TLBI_NH_ALL,
-			.nsize_opcode = CMDQ_OP_TLBI_NH_ALL,
-		};
-		cur++;
+		if (!arm_smmu_master_build_inv(
+			    master, INV_TYPE_S2_VMID_S1_CLEAR,
+			    smmu_domain->s2_cfg.vmid, IOMMU_NO_PASID, 0))
+			return NULL;
 	}
 
-	if (ats_enabled) {
-		size_t i;
-
-		for (i = 0; i < master->num_streams; i++) {
-			/*
-			 * If an S2 used as a nesting parent is changed we have
-			 * no option but to completely flush the ATC.
-			 */
-			*cur = (struct arm_smmu_inv){
-				.smmu = master->smmu,
-				.type = nesting ? INV_TYPE_ATS_FULL :
-						  INV_TYPE_ATS,
-				.id = master->streams[i].id,
-				.ssid = ssid,
-				.size_opcode = CMDQ_OP_ATC_INV,
-				.nsize_opcode = CMDQ_OP_ATC_INV,
-			};
-			cur++;
-		}
+	for (i = 0; ats_enabled && i < master->num_streams; i++) {
+		/*
+		 * If an S2 used as a nesting parent is changed we have no
+		 * option but to completely flush the ATC.
+		 */
+		if (!arm_smmu_master_build_inv(
+			    master, nesting ? INV_TYPE_ATS_FULL : INV_TYPE_ATS,
+			    master->streams[i].id, ssid, 0))
+			return NULL;
 	}
 
 	/* Note this build_invs must have been sorted */
 
-	build_invs->num_invs = cur - build_invs->inv;
-	return build_invs;
+	return master->build_invs;
 }
 
 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,

Thanks
Nicolin
Re: [PATCH v5 5/7] iommu/arm-smmu-v3: Populate smmu_domain->invs when attaching masters
Posted by Nicolin Chen 3 weeks, 3 days ago
On Mon, Nov 24, 2025 at 07:13:41PM -0400, Jason Gunthorpe wrote:
> On Mon, Nov 24, 2025 at 09:43:18PM +0000, Will Deacon wrote:
> > > +	/*
> > > +	 * We are committed to updating the STE. Ensure the invalidation array
> > > +	 * is visable to concurrent map/unmap threads, and acquire any racying
> > > +	 * IOPTE updates.
> > > +	 */
> > > +	smp_mb();
> > 
> > Sorry, but the comment hasn't really helped me here. We're ordering the
> > publishing of the invalidation array above before ... what?
> 
> "concurrent map/unmap threads" means other threads calling
> arm_smmu_iotlb_sync().. I think the matching comment describing this
> and introducing the pair'd smp_mb() is in a later patch.
... 
> Nicolin maybe this barrier line should be moved to be added in the
> patch that add's the pair'd barrier and description?

Sure. I can move that to PATCH-6.

Thanks
Nicolin