[PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array

Nicolin Chen posted 8 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Nicolin Chen 1 month, 3 weeks ago
From: Jason Gunthorpe <jgg@nvidia.com>

Create a new data structure to hold an array of invalidations that need to
be performed for the domain based on what masters are attached, to replace
the single smmu pointer and linked list of masters in the current design.

Each array entry holds one of the invalidation actions - S1_ASID, S2_VMID,
ATS or their variant with information to feed invalidation commands to HW.
It is structured so that multiple SMMUs can participate in the same array,
removing one key limitation of the current system.

To maximize performance, a sorted array is used as the data structure. It
allows grouping SYNCs together to parallelize invalidations. For instance,
it will group all the ATS entries after the ASID/VMID entry, so they will
all be pushed to the PCI devices in parallel with one SYNC.

To minimize the locking cost on the invalidation fast path (reader of the
invalidation array), the array is managed with RCU.

Provide a set of APIs to add/delete entries to/from an array, including a
special no-fail helper function for a cannot-fail case, e.g. attaching to
arm_smmu_blocked_domain. Also, add kunit coverage for those APIs.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  79 ++++++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c  |  85 ++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 266 ++++++++++++++++++
 3 files changed, 430 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 96a23ca633cb6..d7421b56e3598 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -649,6 +649,82 @@ struct arm_smmu_cmdq_batch {
 	int				num;
 };
 
+enum arm_smmu_inv_type {
+	INV_TYPE_S1_ASID,
+	INV_TYPE_S2_VMID,
+	INV_TYPE_S2_VMID_S1_CLEAR,
+	INV_TYPE_ATS,
+	INV_TYPE_ATS_FULL,
+};
+
+struct arm_smmu_inv {
+	/* invalidation items */
+	struct arm_smmu_device *smmu;
+	u8 type;
+	u8 size_opcode;
+	u8 nsize_opcode;
+	u32 id; /* ASID or VMID or SID */
+	union {
+		size_t pgsize; /* ARM_SMMU_FEAT_RANGE_INV */
+		u32 ssid; /* INV_TYPE_ATS */
+	};
+
+	/* infrastructure items */
+	refcount_t users; /* users=0 to mark as a trash */
+	bool todel : 1; /* set for a pending deletion */
+};
+
+/**
+ * struct arm_smmu_invs - Per-domain invalidation array
+ * @num_invs: number of invalidations in @invs
+ * @rwlock: Optional rwlock to fench arm_smmu_invs_dec()
+ * @rcu: rcu head for kfree_rcu()
+ * @inv: flexible invalidation array
+ *
+ * The arm_smmu_invs is an RCU data structure. During a ->attach_dev callback,
+ * arm_smmu_invs_add() and arm_smmu_invs_del() will be used to allocate a new
+ * copy of an old array for addition and deletion.
+ *
+ * The arm_smmu_invs_dec() is a special function to mutate a given array, by
+ * internally reducing the users counts of some given entries. This exists to
+ * support a no-fail routine like attaching to an IOMMU_DOMAIN_BLOCKED. Use it
+ * carefully as it can impact performance from the extra rwlock.
+ *
+ * Concurrent invalidation thread will push all the invalidations described on
+ * the array into the command queue for each invalidation event. It is designed
+ * like this to optimize the invalidation fast path by avoiding locks.
+ *
+ * A domain can be shared across SMMU instances. When an instance gets removed
+ * it would delete all the entries that belong to that SMMU instance. Then, a
+ * synchronize_rcu() would have to be called to sync the array, to prevent any
+ * concurrent invalidation thread accessing the old array from issuing commands
+ * to the command queue of a removed SMMU instance.
+ */
+struct arm_smmu_invs {
+	size_t num_invs;
+	rwlock_t rwlock;
+	struct rcu_head rcu;
+	struct arm_smmu_inv inv[];
+};
+
+static inline struct arm_smmu_invs *arm_smmu_invs_alloc(size_t num_invs)
+{
+	struct arm_smmu_invs *new_invs;
+
+	new_invs = kzalloc(struct_size(new_invs, inv, num_invs), GFP_KERNEL);
+	if (!new_invs)
+		return ERR_PTR(-ENOMEM);
+	rwlock_init(&new_invs->rwlock);
+	return new_invs;
+}
+
+struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
+					struct arm_smmu_invs *add_invs);
+struct arm_smmu_invs *arm_smmu_invs_del(struct arm_smmu_invs *old_invs,
+					struct arm_smmu_invs *del_invs);
+size_t arm_smmu_invs_dec(struct arm_smmu_invs *invs,
+			 struct arm_smmu_invs *dec_invs);
+
 struct arm_smmu_evtq {
 	struct arm_smmu_queue		q;
 	struct iopf_queue		*iopf;
@@ -875,6 +951,8 @@ struct arm_smmu_domain {
 
 	struct iommu_domain		domain;
 
+	struct arm_smmu_invs		*invs;
+
 	/* List of struct arm_smmu_master_domain */
 	struct list_head		devices;
 	spinlock_t			devices_lock;
@@ -956,6 +1034,7 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void);
 
 static inline void arm_smmu_domain_free(struct arm_smmu_domain *smmu_domain)
 {
+	kfree_rcu(smmu_domain->invs, rcu);
 	kfree(smmu_domain);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
index d2671bfd37981..2008a4b55ef70 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
@@ -567,6 +567,90 @@ static void arm_smmu_v3_write_cd_test_sva_release(struct kunit *test)
 						      NUM_EXPECTED_SYNCS(2));
 }
 
+static void arm_smmu_v3_invs_test_verify(struct kunit *test,
+					 struct arm_smmu_invs *invs, int num,
+					 const int *ids, const int *users)
+{
+	KUNIT_EXPECT_EQ(test, invs->num_invs, num);
+	while (num--) {
+		KUNIT_EXPECT_EQ(test, invs->inv[num].id, ids[num]);
+		KUNIT_EXPECT_EQ(test, refcount_read(&invs->inv[num].users),
+				users[num]);
+	}
+}
+
+static struct arm_smmu_invs invs1 = {
+	.num_invs = 3,
+	.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, },
+		 { .type = INV_TYPE_S2_VMID, .id = 2, },
+		 { .type = INV_TYPE_S2_VMID, .id = 3, }, },
+};
+
+static struct arm_smmu_invs invs2 = {
+	.num_invs = 3,
+	.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
+		 { .type = INV_TYPE_ATS, .id = 5, },
+		 { .type = INV_TYPE_ATS, .id = 4, }, },
+};
+
+static struct arm_smmu_invs invs3 = {
+	.num_invs = 3,
+	.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
+		 { .type = INV_TYPE_ATS, .id = 7, },
+		 { .type = INV_TYPE_ATS, .id = 6, }, },
+};
+
+static void arm_smmu_v3_invs_test(struct kunit *test)
+{
+	const int results1[2][3] = { { 1, 2, 3, }, { 1, 1, 1, }, };
+	const int results2[2][5] = { { 1, 2, 3, 4, 5, }, { 2, 1, 1, 1, 1, }, };
+	const int results3[2][5] = { { 1, 2, 3, 4, 5, }, { 1, 1, 1, 0, 0, }, };
+	const int results4[2][5] = { { 1, 2, 3, 6, 7, }, { 2, 1, 1, 1, 1, }, };
+	const int results5[2][5] = { { 1, 2, 3, 6, 7, }, { 1, 0, 0, 1, 1, }, };
+	struct arm_smmu_invs *test_a, *test_b;
+	size_t num_invs;
+
+	/* New array */
+	test_a = arm_smmu_invs_alloc(0);
+	KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
+
+	/* Test1: add invs1 (new array) */
+	test_b = arm_smmu_invs_add(test_a, &invs1);
+	arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results1[0]),
+				     results1[0], results1[1]);
+	kfree(test_a);
+
+	/* Test2: add invs2 (new array) */
+	test_a = arm_smmu_invs_add(test_b, &invs2);
+	arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results2[0]),
+				     results2[0], results2[1]);
+	kfree(test_b);
+
+	/* Test3: decrease invs2 (same array) */
+	num_invs = arm_smmu_invs_dec(test_a, &invs2);
+	arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results3[0]),
+				     results3[0], results3[1]);
+	KUNIT_EXPECT_EQ(test, num_invs, 3);
+
+	/* Test4: add invs3 (new array) */
+	test_b = arm_smmu_invs_add(test_a, &invs3);
+	arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results4[0]),
+				     results4[0], results4[1]);
+	kfree(test_a);
+
+	/* Test5: decrease invs1 (same array) */
+	num_invs = arm_smmu_invs_dec(test_b, &invs1);
+	arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results5[0]),
+				     results5[0], results5[1]);
+	KUNIT_EXPECT_EQ(test, num_invs, 3);
+
+	/* Test6: delete invs3 (new array) */
+	test_a = arm_smmu_invs_del(test_b, &invs3);
+	KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
+	kfree(test_b);
+	kfree(test_a);
+}
+
 static struct kunit_case arm_smmu_v3_test_cases[] = {
 	KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_abort),
 	KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_bypass),
@@ -590,6 +674,7 @@ static struct kunit_case arm_smmu_v3_test_cases[] = {
 	KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall),
 	KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
 	KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
+	KUNIT_CASE(arm_smmu_v3_invs_test),
 	{},
 };
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 08af5f2d1235a..73f3b411ff7ef 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
 #include <linux/platform_device.h>
+#include <linux/sort.h>
 #include <linux/string_choices.h>
 #include <kunit/visibility.h>
 #include <uapi/linux/iommufd.h>
@@ -1033,6 +1034,263 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
+static int arm_smmu_invs_cmp(const void *_l, const void *_r)
+{
+	const struct arm_smmu_inv *l = _l;
+	const struct arm_smmu_inv *r = _r;
+
+	if (l->smmu != r->smmu)
+		return cmp_int((uintptr_t)l->smmu, (uintptr_t)r->smmu);
+	if (l->type != r->type)
+		return cmp_int(l->type, r->type);
+	return cmp_int(l->id, r->id);
+}
+
+static inline bool same_op(const struct arm_smmu_inv *a,
+			   const struct arm_smmu_inv *b)
+{
+	return a->smmu == b->smmu && a->type == b->type && a->id == b->id;
+}
+
+/**
+ * arm_smmu_invs_add() - Combine @old_invs with @add_invs to a new array
+ * @old_invs: the old invalidation array
+ * @add_invs: an array of invlidations to add
+ *
+ * Return: a newly allocated and sorted invalidation array on success, or an
+ * ERR_PTR.
+ *
+ * This function must be locked and serialized with arm_smmu_invs_del/dec(),
+ * but do not lockdep on any lock for KUNIT test.
+ *
+ * Caller is resposible for freeing the @old_invs and the returned one.
+ *
+ * Entries marked as trash can be resued if @add_invs wants to add them back.
+ * Otherwise, they will be completely removed in the returned array.
+ */
+VISIBLE_IF_KUNIT
+struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
+					struct arm_smmu_invs *add_invs)
+{
+	size_t need = old_invs->num_invs + add_invs->num_invs;
+	struct arm_smmu_invs *new_invs;
+	size_t deletes = 0, i, j;
+	u64 existed = 0;
+
+	/* Max of add_invs->num_invs is 64 */
+	if (WARN_ON(add_invs->num_invs > sizeof(existed) * 8))
+		return ERR_PTR(-EINVAL);
+
+	for (i = 0; i != old_invs->num_invs; i++) {
+		struct arm_smmu_inv *cur = &old_invs->inv[i];
+		/* Count the trash entries to deletes */
+		if (cur->todel) {
+			WARN_ON_ONCE(refcount_read(&cur->users));
+			deletes++;
+		}
+		for (j = 0; j != add_invs->num_invs; j++) {
+			if (!same_op(cur, &add_invs->inv[j]))
+				continue;
+			/* Found duplicated entries in add_invs */
+			if (WARN_ON_ONCE(existed & BIT_ULL(j)))
+				continue;
+			/* Revert the todel marker for reuse */
+			if (cur->todel) {
+				cur->todel = false;
+				deletes--;
+			}
+			/* Store the new location of this existing op in id */
+			add_invs->inv[j].id = i - deletes;
+			existed |= BIT_ULL(j);
+			need--;
+			break;
+		}
+	}
+
+	need -= deletes;
+
+	new_invs = arm_smmu_invs_alloc(need);
+	if (IS_ERR(new_invs)) {
+		/* Don't forget to revert all the todel markers */
+		for (i = 0; i != old_invs->num_invs; i++) {
+			if (refcount_read(&old_invs->inv[i].users) == 0)
+				old_invs->inv[i].todel = true;
+		}
+		return new_invs;
+	}
+
+	/* Copy the entire array less all the todel entries */
+	for (i = 0; i != old_invs->num_invs; i++) {
+		if (old_invs->inv[i].todel)
+			continue;
+		new_invs->inv[new_invs->num_invs++] = old_invs->inv[i];
+	}
+
+	for (j = 0; j != add_invs->num_invs; j++) {
+		if (existed & BIT_ULL(j)) {
+			unsigned int idx = add_invs->inv[j].id;
+
+			refcount_inc(&new_invs->inv[idx].users);
+
+			/* Restore the id of the passed in add_invs->inv[j] */
+			add_invs->inv[j].id = new_invs->inv[idx].id;
+		} else {
+			unsigned int idx = new_invs->num_invs;
+
+			new_invs->inv[idx] = add_invs->inv[j];
+			refcount_set(&new_invs->inv[idx].users, 1);
+			new_invs->num_invs++;
+		}
+	}
+
+	WARN_ON(new_invs->num_invs != need);
+
+	/*
+	 * A sorted array allows batching invalidations together for fewer SYNCs.
+	 * Also, ATS must follow the ASID/VMID invalidation SYNC.
+	 */
+	sort_nonatomic(new_invs->inv, new_invs->num_invs,
+		       sizeof(add_invs->inv[0]), arm_smmu_invs_cmp, NULL);
+	return new_invs;
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_add);
+
+/**
+ * arm_smmu_invs_del() - Remove @del_invs from @old_invs
+ * @old_invs: the old invalidation array
+ * @del_invs: an array of invlidations to delete
+ *
+ * Return: a newly allocated and sorted invalidation array on success, or an
+ * ERR_PTR.
+ *
+ * This function must be locked and serialized with arm_smmu_invs_add/dec(),
+ * but do not lockdep on any lock for KUNIT test.
+ *
+ * Caller is resposible for freeing the @old_invs and the returned one.
+ *
+ * Entries marked as trash will be completely removed in the returned array.
+ */
+VISIBLE_IF_KUNIT
+struct arm_smmu_invs *arm_smmu_invs_del(struct arm_smmu_invs *old_invs,
+					struct arm_smmu_invs *del_invs)
+{
+	size_t need = old_invs->num_invs;
+	struct arm_smmu_invs *new_invs;
+	size_t i, j;
+
+	if (WARN_ON(old_invs->num_invs < del_invs->num_invs))
+		return ERR_PTR(-EINVAL);
+
+	for (i = 0; i != old_invs->num_invs; i++) {
+		struct arm_smmu_inv *cur = &old_invs->inv[i];
+		/* Skip any trash entry */
+		if (cur->todel) {
+			WARN_ON_ONCE(refcount_read(&cur->users));
+			need--;
+			continue;
+		}
+		for (j = 0; j != del_invs->num_invs; j++) {
+			if (!same_op(cur, &del_invs->inv[j]))
+				continue;
+			/* Found duplicated entries in del_invs */
+			if (WARN_ON_ONCE(cur->todel))
+				continue;
+			/* Mark todel. The deletion part will take care of it */
+			cur->todel = true;
+			if (refcount_read(&cur->users) == 1)
+				need--;
+		}
+	}
+
+	new_invs = arm_smmu_invs_alloc(need);
+	if (IS_ERR(new_invs)) {
+		/* Don't forget to revert all the todel markers */
+		for (i = 0; i != old_invs->num_invs; i++) {
+			if (refcount_read(&old_invs->inv[i].users) != 0)
+				old_invs->inv[i].todel = false;
+		}
+		return new_invs;
+	}
+
+	for (i = 0; i != old_invs->num_invs; i++) {
+		struct arm_smmu_inv *cur = &old_invs->inv[i];
+		unsigned int idx = new_invs->num_invs;
+
+		/* Either a trash entry or a matched entry for a dec-and-test */
+		if (cur->todel) {
+			/* Can't do refcount_dec_and_test() on a trash entry */
+			if (refcount_read(&cur->users) <= 1)
+				continue;
+			refcount_dec(&cur->users);
+			cur->todel = false;
+		}
+		new_invs->inv[idx] = *cur;
+		new_invs->num_invs++;
+	}
+
+	WARN_ON(new_invs->num_invs != need);
+
+	/* Still sorted */
+	return new_invs;
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_del);
+
+/**
+ * arm_smmu_invs_dec() - Find in @invs for all entries in @del_invs, decrease
+ *                       the user counts without deletions
+ * @invs: a given invalidation array
+ * @dec_invs: an array of invlidations to decrease their user counts
+ *
+ * Return: the actual number of invs in the array, excluding all trash entries
+ *
+ * This function will not fail. Any entry with users=0 will be marked as trash.
+ * All trash entries will remain in the @invs until being completely deleted by
+ * the next arm_smmu_invs_add() or arm_smmu_invs_del() function call.
+ *
+ * This function must be locked and serialized with arm_smmu_invs_add/del(), but
+ * do not lockdep on any lock for KUNIT test.
+ *
+ * Note that the @invs->num_invs will not be updated, even if the actual number
+ * of invalidations are decreased. Readers should take the read lock to iterate
+ * each entry and check its users counter until @inv->num_invs.
+ */
+VISIBLE_IF_KUNIT
+size_t arm_smmu_invs_dec(struct arm_smmu_invs *invs,
+			 struct arm_smmu_invs *dec_invs)
+{
+	size_t num_invs = 0, i, j;
+	unsigned long flags;
+
+	/* Driver bug. Must fix rather, but do not fail here */
+	if (WARN_ON(invs->num_invs < dec_invs->num_invs)) {
+		for (i = 0; i != invs->num_invs; i++) {
+			if (!invs->inv[i].todel)
+				num_invs++;
+		}
+		return num_invs;
+	}
+
+	/* We have no choice but to lock the array while editing it in place */
+	write_lock_irqsave(&invs->rwlock, flags);
+
+	for (i = 0; i != invs->num_invs; i++) {
+		for (j = 0; j != dec_invs->num_invs; j++) {
+			if (same_op(&invs->inv[i], &dec_invs->inv[j]) &&
+			    refcount_dec_and_test(&invs->inv[i].users)) {
+				/* Set the todel marker for deletion */
+				invs->inv[i].todel = true;
+				break;
+			}
+		}
+		if (!invs->inv[i].todel)
+			num_invs++;
+	}
+
+	write_unlock_irqrestore(&invs->rwlock, flags);
+	return num_invs;
+}
+EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_dec);
+
 /*
  * Based on the value of ent report which bits of the STE the HW will access. It
  * would be nice if this was complete according to the spec, but minimally it
@@ -2468,13 +2726,21 @@ static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
 {
 	struct arm_smmu_domain *smmu_domain;
+	struct arm_smmu_invs *new_invs;
 
 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
 	if (!smmu_domain)
 		return ERR_PTR(-ENOMEM);
 
+	new_invs = arm_smmu_invs_alloc(0);
+	if (IS_ERR(new_invs)) {
+		kfree(smmu_domain);
+		return ERR_CAST(new_invs);
+	}
+
 	INIT_LIST_HEAD(&smmu_domain->devices);
 	spin_lock_init(&smmu_domain->devices_lock);
+	rcu_assign_pointer(smmu_domain->invs, new_invs);
 
 	return smmu_domain;
 }
-- 
2.43.0
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> +struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
> +					struct arm_smmu_invs *add_invs)
> +{

It turns out it is fairly easy and cheap to sort add_invs by sorting
the ids during probe:

@@ -3983,6 +3989,14 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
        return 0;
 }
 
+static int arm_smmu_ids_cmp(const void *_l, const void *_r)
+{
+       const typeof_member(struct iommu_fwspec, ids[0]) *l = _l;
+       const typeof_member(struct iommu_fwspec, ids[0]) *r = _r;
+
+       return cmp_int(*l, *r);
+}
+
 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
                                  struct arm_smmu_master *master)
 {
@@ -4011,6 +4025,13 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
                return PTR_ERR(master->invs);
        }
 
+       /*
+        * Put the ids into order so that arm_smmu_build_invs() can trivially
+        * generate sorted lists.
+        */
+       sort_nonatomic(fwspec->ids, fwspec->num_ids, sizeof(fwspec->ids[0]),
+                      arm_smmu_ids_cmp, NULL);
+
        mutex_lock(&smmu->streams_mutex);
        for (i = 0; i < fwspec->num_ids; i++) {
                struct arm_smmu_stream *new_stream = &master->streams[i];

Then arm_smmu_build_invs() trivially makes sorted lists.

So if old_invs and add_invs are both sorted list we can use variations
on a merge algorithm for sorted lists which is both simpler to
understand and runs faster:

/*
 * Compare used for merging two sorted lists. Merge compare of two sorted list
 * items. If one side is past the end of the list then return the other side to
 * let it run out the iteration.
 */
static inline int arm_smmu_invs_merge_cmp(const struct arm_smmu_invs *lhs,
					  size_t lhs_idx,
					  const struct arm_smmu_invs *rhs,
					  size_t rhs_idx)
{
	if (lhs_idx != lhs->num_invs && rhs_idx != rhs->num_invs)
		return arm_smmu_invs_cmp(&lhs->inv[lhs_idx],
					 &rhs->inv[rhs_idx]);
	if (lhs_idx != lhs->num_invs)
		return -1;
	return 1;
}

struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *invs,
					struct arm_smmu_invs *add_invs)
{
	struct arm_smmu_invs *new_invs;
	struct arm_smmu_inv *new;
	size_t to_add = 0;
	size_t to_del = 0;
	size_t i, j;

	for (i = 0, j = 0; i != invs->num_invs || j != add_invs->num_invs;) {
		int cmp = arm_smmu_invs_merge_cmp(invs, i, add_invs, j);

		if (cmp < 0) {
			/* not found in add_invs, leave alone */
			if (refcount_read(&invs->inv[i].users))
				i++;
			else
				to_del++;
		} else if (cmp == 0) {
			/* same item */
			i++;
			j++;
		} else {
			/* unique to add_invs */
			to_add++;
			j++;
		}
	}

	new_invs = arm_smmu_invs_alloc(invs->num_invs + to_add - to_del);
	if (IS_ERR(new_invs))
		return new_invs;

	new = new_invs->inv;
	for (i = 0, j = 0; i != invs->num_invs || j != add_invs->num_invs;) {
		int cmp = arm_smmu_invs_merge_cmp(invs, i, add_invs, j);

		if (cmp <= 0 && !refcount_read(&invs->inv[i].users)) {
			i++;
			continue;
		}

		if (cmp < 0) {
			*new = invs->inv[i];
			i++;
		} else if (cmp == 0) {
			*new = invs->inv[i];
			refcount_inc(&new->users);
			i++;
			j++;
		} else {
			*new = add_invs->inv[j];
			refcount_set(&new->users, 1);
			j++;
		}
		if (arm_smmu_inv_is_ats(new))
			new_invs->has_ats = true;
		new++;
	}

	WARN_ON(new != new_invs->inv + new_invs->num_invs);

	/*
	 * A sorted array allows batching invalidations together for fewer SYNCs.
	 * Also, ATS must follow the ASID/VMID invalidation SYNC.
	 */
	sort_nonatomic(new_invs->inv, new_invs->num_invs,
		       sizeof(add_invs->inv[0]), arm_smmu_invs_cmp, NULL);
	return new_invs;
}

size_t arm_smmu_invs_dec(struct arm_smmu_invs *invs,
			 struct arm_smmu_invs *dec_invs)
{
	size_t to_del = 0;
	size_t i, j;

	for (i = 0, j = 0; i != invs->num_invs || j != dec_invs->num_invs;) {
		int cmp = arm_smmu_invs_merge_cmp(invs, i, dec_invs, j);

		if (cmp < 0) {
			/* not found in dec_invs, leave alone */
			i++;
		} else if (cmp == 0) {
			/* same item */
			if (refcount_dec_and_test(&invs->inv[i].users)) {
				dec_invs->inv[j].todel = true;
				to_del++;
			}
			i++;
			j++;
		} else {
			/* item in dec_invs is not in invs? */
			WARN_ON(true);
			j++;
		}
	}
	return to_del;
}
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Nicolin Chen 4 weeks ago
On Wed, Aug 27, 2025 at 05:00:02PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> > +struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
> > +					struct arm_smmu_invs *add_invs)
> > +{
> 
> It turns out it is fairly easy and cheap to sort add_invs by sorting
> the ids during probe:

I have integrated this and also renamed these three helpers:

+struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,
+					  struct arm_smmu_invs *to_merge);
+size_t arm_smmu_invs_unref(struct arm_smmu_invs *invs,
+			   struct arm_smmu_invs *to_unref);
+struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs,
+					  size_t num_dels);

Thanks!
Nicolin


> @@ -3983,6 +3989,14 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
>         return 0;
>  }
>  
> +static int arm_smmu_ids_cmp(const void *_l, const void *_r)
> +{
> +       const typeof_member(struct iommu_fwspec, ids[0]) *l = _l;
> +       const typeof_member(struct iommu_fwspec, ids[0]) *r = _r;
> +
> +       return cmp_int(*l, *r);
> +}
> +
>  static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
>                                   struct arm_smmu_master *master)
>  {
> @@ -4011,6 +4025,13 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
>                 return PTR_ERR(master->invs);
>         }
>  
> +       /*
> +        * Put the ids into order so that arm_smmu_build_invs() can trivially
> +        * generate sorted lists.
> +        */
> +       sort_nonatomic(fwspec->ids, fwspec->num_ids, sizeof(fwspec->ids[0]),
> +                      arm_smmu_ids_cmp, NULL);
> +
>         mutex_lock(&smmu->streams_mutex);
>         for (i = 0; i < fwspec->num_ids; i++) {
>                 struct arm_smmu_stream *new_stream = &master->streams[i];
> 
> Then arm_smmu_build_invs() trivially makes sorted lists.
> 
> So if old_invs and add_invs are both sorted list we can use variations
> on a merge algorithm for sorted lists which is both simpler to
> understand and runs faster:
> 
> /*
>  * Compare used for merging two sorted lists. Merge compare of two sorted list
>  * items. If one side is past the end of the list then return the other side to
>  * let it run out the iteration.
>  */
> static inline int arm_smmu_invs_merge_cmp(const struct arm_smmu_invs *lhs,
> 					  size_t lhs_idx,
> 					  const struct arm_smmu_invs *rhs,
> 					  size_t rhs_idx)
> {
> 	if (lhs_idx != lhs->num_invs && rhs_idx != rhs->num_invs)
> 		return arm_smmu_invs_cmp(&lhs->inv[lhs_idx],
> 					 &rhs->inv[rhs_idx]);
> 	if (lhs_idx != lhs->num_invs)
> 		return -1;
> 	return 1;
> }
> 
> struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *invs,
> 					struct arm_smmu_invs *add_invs)
> {
> 	struct arm_smmu_invs *new_invs;
> 	struct arm_smmu_inv *new;
> 	size_t to_add = 0;
> 	size_t to_del = 0;
> 	size_t i, j;
> 
> 	for (i = 0, j = 0; i != invs->num_invs || j != add_invs->num_invs;) {
> 		int cmp = arm_smmu_invs_merge_cmp(invs, i, add_invs, j);
> 
> 		if (cmp < 0) {
> 			/* not found in add_invs, leave alone */
> 			if (refcount_read(&invs->inv[i].users))
> 				i++;
> 			else
> 				to_del++;
> 		} else if (cmp == 0) {
> 			/* same item */
> 			i++;
> 			j++;
> 		} else {
> 			/* unique to add_invs */
> 			to_add++;
> 			j++;
> 		}
> 	}
> 
> 	new_invs = arm_smmu_invs_alloc(invs->num_invs + to_add - to_del);
> 	if (IS_ERR(new_invs))
> 		return new_invs;
> 
> 	new = new_invs->inv;
> 	for (i = 0, j = 0; i != invs->num_invs || j != add_invs->num_invs;) {
> 		int cmp = arm_smmu_invs_merge_cmp(invs, i, add_invs, j);
> 
> 		if (cmp <= 0 && !refcount_read(&invs->inv[i].users)) {
> 			i++;
> 			continue;
> 		}
> 
> 		if (cmp < 0) {
> 			*new = invs->inv[i];
> 			i++;
> 		} else if (cmp == 0) {
> 			*new = invs->inv[i];
> 			refcount_inc(&new->users);
> 			i++;
> 			j++;
> 		} else {
> 			*new = add_invs->inv[j];
> 			refcount_set(&new->users, 1);
> 			j++;
> 		}
> 		if (arm_smmu_inv_is_ats(new))
> 			new_invs->has_ats = true;
> 		new++;
> 	}
> 
> 	WARN_ON(new != new_invs->inv + new_invs->num_invs);
> 
> 	/*
> 	 * A sorted array allows batching invalidations together for fewer SYNCs.
> 	 * Also, ATS must follow the ASID/VMID invalidation SYNC.
> 	 */
> 	sort_nonatomic(new_invs->inv, new_invs->num_invs,
> 		       sizeof(add_invs->inv[0]), arm_smmu_invs_cmp, NULL);
> 	return new_invs;
> }
> 
> size_t arm_smmu_invs_dec(struct arm_smmu_invs *invs,
> 			 struct arm_smmu_invs *dec_invs)
> {
> 	size_t to_del = 0;
> 	size_t i, j;
> 
> 	for (i = 0, j = 0; i != invs->num_invs || j != dec_invs->num_invs;) {
> 		int cmp = arm_smmu_invs_merge_cmp(invs, i, dec_invs, j);
> 
> 		if (cmp < 0) {
> 			/* not found in dec_invs, leave alone */
> 			i++;
> 		} else if (cmp == 0) {
> 			/* same item */
> 			if (refcount_dec_and_test(&invs->inv[i].users)) {
> 				dec_invs->inv[j].todel = true;
> 				to_del++;
> 			}
> 			i++;
> 			j++;
> 		} else {
> 			/* item in dec_invs is not in invs? */
> 			WARN_ON(true);
> 			j++;
> 		}
> 	}
> 	return to_del;
> }
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Jason Gunthorpe 3 weeks, 5 days ago
On Sat, Sep 06, 2025 at 01:16:45AM -0700, Nicolin Chen wrote:
> > 	/*
> > 	 * A sorted array allows batching invalidations together for fewer SYNCs.
> > 	 * Also, ATS must follow the ASID/VMID invalidation SYNC.
> > 	 */
> > 	sort_nonatomic(new_invs->inv, new_invs->num_invs,
> > 		       sizeof(add_invs->inv[0]), arm_smmu_invs_cmp, NULL);
> > 	return new_invs;

This should be deleted, merge sort always makes sorted lists.

Jason
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Nicolin Chen 3 weeks, 4 days ago
On Mon, Sep 08, 2025 at 12:51:49PM -0300, Jason Gunthorpe wrote:
> On Sat, Sep 06, 2025 at 01:16:45AM -0700, Nicolin Chen wrote:
> > > 	/*
> > > 	 * A sorted array allows batching invalidations together for fewer SYNCs.
> > > 	 * Also, ATS must follow the ASID/VMID invalidation SYNC.
> > > 	 */
> > > 	sort_nonatomic(new_invs->inv, new_invs->num_invs,
> > > 		       sizeof(add_invs->inv[0]), arm_smmu_invs_cmp, NULL);
> > > 	return new_invs;
> 
> This should be deleted, merge sort always makes sorted lists.

I see. Dropping it.

Thanks
Nicolin
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> +/**
> + * arm_smmu_invs_del() - Remove @del_invs from @old_invs
> + * @old_invs: the old invalidation array
> + * @del_invs: an array of invlidations to delete
> + *
> + * Return: a newly allocated and sorted invalidation array on success, or an
> + * ERR_PTR.
> + *
> + * This function must be locked and serialized with arm_smmu_invs_add/dec(),
> + * but do not lockdep on any lock for KUNIT test.
> + *
> + * Caller is resposible for freeing the @old_invs and the returned one.
> + *
> + * Entries marked as trash will be completely removed in the returned array.
> + */
> +VISIBLE_IF_KUNIT
> +struct arm_smmu_invs *arm_smmu_invs_del(struct arm_smmu_invs *old_invs,
> +					struct arm_smmu_invs *del_invs)
> +{

Having looked at this more completely, I think we should drop this
function.

Just always do decr, then have a simple function to compact the list
after the decr:

struct arm_smmu_invs *arm_smmu_invs_cleanup(struct arm_smmu_invs *invs,
					    size_t to_del)
{
	struct arm_smmu_invs *new_invs;
	size_t i, j;

	if (WARN_ON(invs->num_invs < to_del))
		return NULL;

	new_invs = arm_smmu_invs_alloc(invs->num_invs - to_del);
	if (IS_ERR(new_invs))
		return NULL;

	for (i = 0, j = 0; i != invs->num_invs; i++) {
		if (!refcount_read(&invs->inv[i].users))
			continue;
		new_invs->inv[j] = invs->inv[i];
		j++;
	}
	return new_invs;
}

If this returns NULL then just leave the list alone, it is OK to sit
there with the 0 users left behind.

No need for the complex _del function and the _decr function..

This also means the memory doesn't need to be preallocated and it
significantly simplifies alot of the logic.

Jason
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Nicolin Chen 1 month, 1 week ago
On Wed, Aug 27, 2025 at 01:48:04PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> > +/**
> > + * arm_smmu_invs_del() - Remove @del_invs from @old_invs
> > + * @old_invs: the old invalidation array
> > + * @del_invs: an array of invlidations to delete
> > + *
> > + * Return: a newly allocated and sorted invalidation array on success, or an
> > + * ERR_PTR.
> > + *
> > + * This function must be locked and serialized with arm_smmu_invs_add/dec(),
> > + * but do not lockdep on any lock for KUNIT test.
> > + *
> > + * Caller is resposible for freeing the @old_invs and the returned one.
> > + *
> > + * Entries marked as trash will be completely removed in the returned array.
> > + */
> > +VISIBLE_IF_KUNIT
> > +struct arm_smmu_invs *arm_smmu_invs_del(struct arm_smmu_invs *old_invs,
> > +					struct arm_smmu_invs *del_invs)
> > +{
> 
> Having looked at this more completely, I think we should drop this
> function.
> 
> Just always do decr, then have a simple function to compact the list
> after the decr:

But the _dec function will always take the write lock, which seems
to lose the benefit of using an RCU array?

The complexity of the _dec function wouldn't release the lock very
quickly. The current version only invokes it upon kmalloc failure,
which can be seem as a very rare case having a very minimal impact.

> struct arm_smmu_invs *arm_smmu_invs_cleanup(struct arm_smmu_invs *invs,
> 					    size_t to_del)
> {
> 	struct arm_smmu_invs *new_invs;
> 	size_t i, j;
> 
> 	if (WARN_ON(invs->num_invs < to_del))
> 		return NULL;
> 
> 	new_invs = arm_smmu_invs_alloc(invs->num_invs - to_del);
> 	if (IS_ERR(new_invs))
> 		return NULL;
> 
> 	for (i = 0, j = 0; i != invs->num_invs; i++) {
> 		if (!refcount_read(&invs->inv[i].users))
> 			continue;
> 		new_invs->inv[j] = invs->inv[i];
> 		j++;
> 	}
> 	return new_invs;
> }
> 
> If this returns NULL then just leave the list alone, it is OK to sit
> there with the 0 users left behind.

Yea, it's better than waiting for the next _add function to compact
the list.

> No need for the complex _del function and the _decr function..
> 
> This also means the memory doesn't need to be preallocated and it
> significantly simplifies alot of the logic.

By "preallocated" you mean "master->scratch_invs"? I think it will
be still needed for the "dec_invs" argument in:
  size_t arm_smmu_invs_dec(struct arm_smmu_invs *invs,
			   struct arm_smmu_invs *dec_invs);
?

Or you mean the allocation in arm_smmu_invs_del()? Yes, this drops
the kmalloc in the detach path.

Thanks
Nicolin
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Aug 27, 2025 at 10:19:18AM -0700, Nicolin Chen wrote:
> > Just always do decr, then have a simple function to compact the list
> > after the decr:
> 
> But the _dec function will always take the write lock, which seems
> to lose the benefit of using an RCU array?

The lock isn't needed, all it does is refcount dec which is atomic.

And all the lists are locked by the asid lock on the write side
anyhow.

> > If this returns NULL then just leave the list alone, it is OK to sit
> > there with the 0 users left behind.
> 
> Yea, it's better than waiting for the next _add function to compact
> the list.

I was thinking you could call the add function with an empty list
instead of adding another function..

> > No need for the complex _del function and the _decr function..
> > 
> > This also means the memory doesn't need to be preallocated and it
> > significantly simplifies alot of the logic.
>
> By "preallocated" you mean "master->scratch_invs"? 

No, I mean the second list of invalidations the old domain new list.

Jason
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> +/**
> + * arm_smmu_invs_add() - Combine @old_invs with @add_invs to a new array
> + * @old_invs: the old invalidation array
> + * @add_invs: an array of invlidations to add
> + *
> + * Return: a newly allocated and sorted invalidation array on success, or an
> + * ERR_PTR.
> + *
> + * This function must be locked and serialized with arm_smmu_invs_del/dec(),
> + * but do not lockdep on any lock for KUNIT test.
> + *
> + * Caller is resposible for freeing the @old_invs and the returned one.
> + *
> + * Entries marked as trash can be resued if @add_invs wants to add them back.
> + * Otherwise, they will be completely removed in the returned array.
> + */
> +VISIBLE_IF_KUNIT
> +struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
> +					struct arm_smmu_invs *add_invs)
> +{
> +	size_t need = old_invs->num_invs + add_invs->num_invs;
> +	struct arm_smmu_invs *new_invs;
> +	size_t deletes = 0, i, j;
> +	u64 existed = 0;
> +
> +	/* Max of add_invs->num_invs is 64 */
> +	if (WARN_ON(add_invs->num_invs > sizeof(existed) * 8))
> +		return ERR_PTR(-EINVAL);

Since this is driven off of num_streams using a fixed bitmap doesn't
seem great since I suppose the dt isn't limited to 64.

Given how this is working now I think you can just add a new member to
the struct:

struct arm_smmu_inv {
	/* invalidation items */
	struct arm_smmu_device *smmu;
	u8 type;
	u8 size_opcode;
	u8 nsize_opcode;
	/* Temporary bits for add/del functions */
	u8 reuse:1;
	u8 todel:1;

And use reuse as the temporary instead of the bitmap.

> +	for (i = 0; i != old_invs->num_invs; i++) {
> +		struct arm_smmu_inv *cur = &old_invs->inv[i];

missing newline

> +		/* Count the trash entries to deletes */
> +		if (cur->todel) {
> +			WARN_ON_ONCE(refcount_read(&cur->users));
> +			deletes++;
> +		}

Just do continue here.

todel should only be used as a temporary. Use refcount_read() ==
0. Then you don't need a WARN either.

> +		for (j = 0; j != add_invs->num_invs; j++) {
> +			if (!same_op(cur, &add_invs->inv[j]))
> +				continue;
> +			/* Found duplicated entries in add_invs */
> +			if (WARN_ON_ONCE(existed & BIT_ULL(j)))
> +				continue;

inv[j].reuse

> +			/* Revert the todel marker for reuse */
> +			if (cur->todel) {
> +				cur->todel = false;
> +				deletes--;

This wil blow up the refcount_inc() below because users is 0..
There is no point in trying to optimize like this just discard the
old entry and add a new one.

> +	new_invs = arm_smmu_invs_alloc(need);
> +	if (IS_ERR(new_invs)) {
> +		/* Don't forget to revert all the todel markers */
> +		for (i = 0; i != old_invs->num_invs; i++) {
> +			if (refcount_read(&old_invs->inv[i].users) == 0)
> +				old_invs->inv[i].todel = true;
> +		}

Drop as well

> +		return new_invs;
> +	}
> +
> +	/* Copy the entire array less all the todel entries */
> +	for (i = 0; i != old_invs->num_invs; i++) {
> +		if (old_invs->inv[i].todel)
> +			continue;

refcount_read

> +		new_invs->inv[new_invs->num_invs++] = old_invs->inv[i];
> +	}
> +
> +	for (j = 0; j != add_invs->num_invs; j++) {
> +		if (existed & BIT_ULL(j)) {

inv[j]->reuse

> +			unsigned int idx = add_invs->inv[j].id;


Similar remarks for del, use users to set todel, don't expect it to be
valid coming into the function.

Jason
Re: [PATCH rfcv1 4/8] iommu/arm-smmu-v3: Introduce a per-domain arm_smmu_invs array
Posted by Nicolin Chen 1 month, 1 week ago
On Tue, Aug 26, 2025 at 04:50:03PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 13, 2025 at 06:25:35PM -0700, Nicolin Chen wrote:
> > +struct arm_smmu_invs *arm_smmu_invs_add(struct arm_smmu_invs *old_invs,
> > +					struct arm_smmu_invs *add_invs)
> > +{
> > +	size_t need = old_invs->num_invs + add_invs->num_invs;
> > +	struct arm_smmu_invs *new_invs;
> > +	size_t deletes = 0, i, j;
> > +	u64 existed = 0;
> > +
> > +	/* Max of add_invs->num_invs is 64 */
> > +	if (WARN_ON(add_invs->num_invs > sizeof(existed) * 8))
> > +		return ERR_PTR(-EINVAL);
> 
> Since this is driven off of num_streams using a fixed bitmap doesn't
> seem great since I suppose the dt isn't limited to 64.

In the other patch, you noted that it's likely very rare to have
an ATS-supported device with multiple SIDs. Also given that this
function is called per device. So, 64 should be enough?

With that being said...

> Given how this is working now I think you can just add a new member to
> the struct:
> 
> struct arm_smmu_inv {
> 	/* invalidation items */
> 	struct arm_smmu_device *smmu;
> 	u8 type;
> 	u8 size_opcode;
> 	u8 nsize_opcode;
> 	/* Temporary bits for add/del functions */
> 	u8 reuse:1;
> 	u8 todel:1;
> 
> And use reuse as the temporary instead of the bitmap.
 
... I do like this reuse flag. I will give it a try.

> > +		/* Count the trash entries to deletes */
> > +		if (cur->todel) {
> > +			WARN_ON_ONCE(refcount_read(&cur->users));
> > +			deletes++;
> > +		}
> 
> Just do continue here.
> 
> todel should only be used as a temporary. Use refcount_read() ==
> 0. Then you don't need a WARN either.

I did so until my last local pre-v1 version as I found it seems
cleaner to mark it using the todel. I'll try again and see how
it goes.
 
> > +			/* Revert the todel marker for reuse */
> > +			if (cur->todel) {
> > +				cur->todel = false;
> > +				deletes--;
> 
> This wil blow up the refcount_inc() below because users is 0..
> There is no point in trying to optimize like this just discard the
> old entry and add a new one.

Oh right. refcount == 0 can't increase...

> > +			unsigned int idx = add_invs->inv[j].id;
> 
> 
> Similar remarks for del, use users to set todel, don't expect it to be
> valid coming into the function.

OK.

Thanks
Nicolin