[v2] Batch IOTLB/dev-IOTLB invalidation

[PATCH v2 5/5] vt-d/iommu: Enable batching of IOTLB/Dev-IOTLB invalidations

Posted by Tina Zhang 1 year, 6 months ago

Enable batch processing for IOTLB/Dev-IOTLB invalidation commands of SVA
domains and default domains with ATS enabled.

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
 drivers/iommu/intel/cache.c | 145 ++++++++++++++++++++++++++++++++++--
 drivers/iommu/intel/iommu.c |   1 +
 drivers/iommu/intel/iommu.h |   1 +
 drivers/iommu/intel/svm.c   |   5 +-
 4 files changed, 145 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 3ae84ccfcfa1..fabb98138760 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -59,10 +59,19 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
 	tag->pasid = pasid;
 	tag->users = 1;
 
-	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
+	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) {
 		tag->dev = dev;
-	else
+
+		if (!domain->qi_batch && iommu->qi)
+			/*
+			 * It doesn't matter if domain->qi_batch is NULL, as in
+			 * this case the commands will be submitted individually.
+			 */
+			domain->qi_batch = kzalloc(sizeof(struct qi_batch),
+						   GFP_KERNEL);
+	} else {
 		tag->dev = iommu->iommu.dev;
+	}
 
 	spin_lock_irqsave(&domain->cache_lock, flags);
 	list_for_each_entry(temp, &domain->cache_tags, node) {
@@ -255,6 +264,84 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
 	return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
 }
 
+static inline void handle_batched_iotlb_descs(struct dmar_domain *domain,
+					 struct cache_tag *tag,
+					 unsigned long addr,
+					 unsigned long pages,
+					 unsigned long mask,
+					 int ih)
+{
+	struct intel_iommu *iommu = tag->iommu;
+
+	if (domain->use_first_level) {
+		qi_batch_add_piotlb_desc(iommu, tag->domain_id,
+					 tag->pasid, addr, pages,
+					 ih, domain->qi_batch);
+	} else {
+		/*
+		 * Fallback to domain selective flush if no
+		 * PSI support or the size is too big.
+		 */
+		if (!cap_pgsel_inv(iommu->cap) ||
+		    mask > cap_max_amask_val(iommu->cap) ||
+		    pages == -1)
+			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
+						0, 0, DMA_TLB_DSI_FLUSH,
+						domain->qi_batch);
+		else
+			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
+						addr | ih, mask,
+						DMA_TLB_PSI_FLUSH,
+						domain->qi_batch);
+	}
+
+}
+
+static inline void handle_batched_dev_tlb_descs(struct dmar_domain *domain,
+						struct cache_tag *tag,
+						unsigned long addr,
+						unsigned long mask)
+{
+	struct intel_iommu *iommu = tag->iommu;
+	struct device_domain_info *info;
+	u16 sid;
+
+	info = dev_iommu_priv_get(tag->dev);
+	sid = PCI_DEVID(info->bus, info->devfn);
+
+	if (tag->pasid == IOMMU_NO_PASID)
+		qi_batch_add_dev_iotlb_desc(iommu, sid, info->pfsid,
+					    info->ats_qdep, addr, mask,
+					    domain->qi_batch);
+	else
+		qi_batch_add_dev_iotlb_pasid_desc(iommu, sid, info->pfsid,
+						  tag->pasid, info->ats_qdep,
+						  addr, mask, domain->qi_batch);
+
+	batch_quirk_extra_dev_tlb_flush(info, addr, mask,
+					tag->pasid,
+					info->ats_qdep,
+					domain->qi_batch);
+}
+
+static void handle_batched_dev_tlb_descs_all(struct dmar_domain *domain,
+					    struct cache_tag *tag)
+{
+	struct intel_iommu *iommu = tag->iommu;
+	struct device_domain_info *info;
+	u16 sid;
+
+	info = dev_iommu_priv_get(tag->dev);
+	sid = PCI_DEVID(info->bus, info->devfn);
+
+	qi_batch_add_dev_iotlb_desc(iommu, sid, info->pfsid, info->ats_qdep,
+				    0, MAX_AGAW_PFN_WIDTH, domain->qi_batch);
+	batch_quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+					IOMMU_NO_PASID, info->ats_qdep,
+					domain->qi_batch);
+
+}
+
 static inline void handle_iotlb_flush(struct dmar_domain *domain,
 				      struct cache_tag *tag,
 				      unsigned long addr,
@@ -264,6 +351,12 @@ static inline void handle_iotlb_flush(struct dmar_domain *domain,
 {
 	struct intel_iommu *iommu = tag->iommu;
 
+	if (domain->qi_batch) {
+		handle_batched_iotlb_descs(domain, tag, addr,
+					pages, mask, ih);
+		return;
+	}
+
 	if (domain->use_first_level) {
 		qi_flush_piotlb(iommu, tag->domain_id,
 				tag->pasid, addr, pages, ih);
@@ -284,7 +377,8 @@ static inline void handle_iotlb_flush(struct dmar_domain *domain,
 	}
 }
 
-static void handle_dev_tlb_flush(struct cache_tag *tag,
+static void handle_dev_tlb_flush(struct dmar_domain *domain,
+				 struct cache_tag *tag,
 				 unsigned long addr,
 				 unsigned long mask)
 {
@@ -292,6 +386,11 @@ static void handle_dev_tlb_flush(struct cache_tag *tag,
 	struct device_domain_info *info;
 	u16 sid;
 
+	if (domain->qi_batch) {
+		handle_batched_dev_tlb_descs(domain, tag, addr, mask);
+		return;
+	}
+
 	info = dev_iommu_priv_get(tag->dev);
 	sid = PCI_DEVID(info->bus, info->devfn);
 
@@ -306,12 +405,18 @@ static void handle_dev_tlb_flush(struct cache_tag *tag,
 	quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
 }
 
-static void handle_dev_tlb_flush_all(struct cache_tag *tag)
+static void handle_dev_tlb_flush_all(struct dmar_domain *domain,
+				     struct cache_tag *tag)
 {
 	struct intel_iommu *iommu = tag->iommu;
 	struct device_domain_info *info;
 	u16 sid;
 
+	if (domain->qi_batch) {
+		handle_batched_dev_tlb_descs_all(domain, tag);
+		return;
+	}
+
 	info = dev_iommu_priv_get(tag->dev);
 	sid = PCI_DEVID(info->bus, info->devfn);
 
@@ -329,6 +434,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
 			   unsigned long end, int ih)
 {
 	unsigned long pages, mask, addr;
+	struct intel_iommu *iommu = NULL;
 	struct cache_tag *tag;
 	unsigned long flags;
 
@@ -336,6 +442,17 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
 
 	spin_lock_irqsave(&domain->cache_lock, flags);
 	list_for_each_entry(tag, &domain->cache_tags, node) {
+		if (domain->qi_batch && iommu != tag->iommu) {
+			/*
+			 * If domain supports batching commands, need to flush the
+			 * batch buffer before switching to another iommu.
+			 */
+			if (iommu)
+				qi_batch_flush_descs(iommu, domain->qi_batch);
+
+			iommu = tag->iommu;
+		}
+
 		switch (tag->type) {
 		case CACHE_TAG_IOTLB:
 		case CACHE_TAG_NESTING_IOTLB:
@@ -353,12 +470,14 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
 			mask = MAX_AGAW_PFN_WIDTH;
 			fallthrough;
 		case CACHE_TAG_DEVTLB:
-			handle_dev_tlb_flush(tag, addr, mask);
+			handle_dev_tlb_flush(domain, tag, addr, mask);
 			break;
 		}
 
 		trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
 	}
+	if (domain->qi_batch && domain->qi_batch->index)
+		qi_batch_flush_descs(iommu, domain->qi_batch);
 	spin_unlock_irqrestore(&domain->cache_lock, flags);
 }
 
@@ -368,11 +487,23 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
  */
 void cache_tag_flush_all(struct dmar_domain *domain)
 {
+	struct intel_iommu *iommu = NULL;
 	struct cache_tag *tag;
 	unsigned long flags;
 
 	spin_lock_irqsave(&domain->cache_lock, flags);
 	list_for_each_entry(tag, &domain->cache_tags, node) {
+		if (domain->qi_batch && iommu != tag->iommu) {
+			/*
+			 * If domain supports batching commands, need to flush the
+			 * batch buffer before switching to another iommu.
+			 */
+			if (iommu)
+				qi_batch_flush_descs(iommu, domain->qi_batch);
+
+			iommu = tag->iommu;
+		}
+
 		switch (tag->type) {
 		case CACHE_TAG_IOTLB:
 		case CACHE_TAG_NESTING_IOTLB:
@@ -380,12 +511,14 @@ void cache_tag_flush_all(struct dmar_domain *domain)
 			break;
 		case CACHE_TAG_DEVTLB:
 		case CACHE_TAG_NESTING_DEVTLB:
-			handle_dev_tlb_flush_all(tag);
+			handle_dev_tlb_flush_all(domain, tag);
 			break;
 		}
 
 		trace_cache_tag_flush_all(tag);
 	}
+	if (domain->qi_batch && domain->qi_batch->index)
+		qi_batch_flush_descs(iommu, domain->qi_batch);
 	spin_unlock_irqrestore(&domain->cache_lock, flags);
 }
 
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 160d569015b4..5907470b9b35 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1632,6 +1632,7 @@ static void domain_exit(struct dmar_domain *domain)
 	if (WARN_ON(!list_empty(&domain->devices)))
 		return;
 
+	kfree(domain->qi_batch);
 	kfree(domain);
 }
 
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 04aa1f200124..f16ffda48095 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -625,6 +625,7 @@ struct dmar_domain {
 
 	spinlock_t cache_lock;		/* Protect the cache tag list */
 	struct list_head cache_tags;	/* Cache tag list */
+	struct qi_batch *qi_batch;	/* QI descriptors batch */
 
 	int		iommu_superpage;/* Level of superpages supported:
 					   0 == 4KiB (no superpages), 1 == 2MiB,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 0e3a9b38bef2..3421813995db 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 
 static void intel_mm_free_notifier(struct mmu_notifier *mn)
 {
-	kfree(container_of(mn, struct dmar_domain, notifier));
+	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+
+	kfree(domain->qi_batch);
+	kfree(domain);
 }
 
 static const struct mmu_notifier_ops intel_mmuops = {
-- 
2.43.0

Re: [PATCH v2 5/5] vt-d/iommu: Enable batching of IOTLB/Dev-IOTLB invalidations

Posted by Baolu Lu 1 year, 6 months ago

On 2024/8/9 10:54, Tina Zhang wrote:
> +static inline void handle_batched_iotlb_descs(struct dmar_domain *domain,
> +					 struct cache_tag *tag,
> +					 unsigned long addr,
> +					 unsigned long pages,
> +					 unsigned long mask,
> +					 int ih)
> +{
> +	struct intel_iommu *iommu = tag->iommu;
> +
> +	if (domain->use_first_level) {
> +		qi_batch_add_piotlb_desc(iommu, tag->domain_id,
> +					 tag->pasid, addr, pages,
> +					 ih, domain->qi_batch);
> +	} else {
> +		/*
> +		 * Fallback to domain selective flush if no
> +		 * PSI support or the size is too big.
> +		 */
> +		if (!cap_pgsel_inv(iommu->cap) ||
> +		    mask > cap_max_amask_val(iommu->cap) ||
> +		    pages == -1)
> +			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
> +						0, 0, DMA_TLB_DSI_FLUSH,
> +						domain->qi_batch);
> +		else
> +			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
> +						addr | ih, mask,
> +						DMA_TLB_PSI_FLUSH,
> +						domain->qi_batch);
> +	}
> +
> +}

What if the iommu driver is running on an early or emulated hardware
where the queued invalidation is not supported?

Thanks,
baolu

RE: [PATCH v2 5/5] vt-d/iommu: Enable batching of IOTLB/Dev-IOTLB invalidations

Posted by Zhang, Tina 1 year, 6 months ago

Hi Baolu,

> -----Original Message-----
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, August 9, 2024 4:22 PM
> To: Zhang, Tina <tina.zhang@intel.com>; Tian, Kevin <kevin.tian@intel.com>
> Cc: baolu.lu@linux.intel.com; iommu@lists.linux.dev; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v2 5/5] vt-d/iommu: Enable batching of IOTLB/Dev-IOTLB
> invalidations
> 
> On 2024/8/9 10:54, Tina Zhang wrote:
> > +static inline void handle_batched_iotlb_descs(struct dmar_domain *domain,
> > +					 struct cache_tag *tag,
> > +					 unsigned long addr,
> > +					 unsigned long pages,
> > +					 unsigned long mask,
> > +					 int ih)
> > +{
> > +	struct intel_iommu *iommu = tag->iommu;
> > +
> > +	if (domain->use_first_level) {
> > +		qi_batch_add_piotlb_desc(iommu, tag->domain_id,
> > +					 tag->pasid, addr, pages,
> > +					 ih, domain->qi_batch);
> > +	} else {
> > +		/*
> > +		 * Fallback to domain selective flush if no
> > +		 * PSI support or the size is too big.
> > +		 */
> > +		if (!cap_pgsel_inv(iommu->cap) ||
> > +		    mask > cap_max_amask_val(iommu->cap) ||
> > +		    pages == -1)
> > +			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
> > +						0, 0, DMA_TLB_DSI_FLUSH,
> > +						domain->qi_batch);
> > +		else
> > +			qi_batch_add_iotlb_desc(iommu, tag->domain_id,
> > +						addr | ih, mask,
> > +						DMA_TLB_PSI_FLUSH,
> > +						domain->qi_batch);
> > +	}
> > +
> > +}
> 
> What if the iommu driver is running on an early or emulated hardware where
> the queued invalidation is not supported?
Yes, this is also taken into consideration. 

In this patch, domain->qi_batch will be NULL if the IOMMU doesn't support qi based invalidations (i.e. iommu->qi is NULL), see:

-       if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
+       if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) {
                tag->dev = dev;
-       else
+
+               if (!domain->qi_batch && iommu->qi)
+                       /*
+                        * It doesn't matter if domain->qi_batch is NULL, as in
+                        * this case the commands will be submitted individually.
+                        */
+                       domain->qi_batch = kzalloc(sizeof(struct qi_batch),
+                                                  GFP_KERNEL);
+       } else {
                tag->dev = iommu->iommu.dev;
+       }

Then, when invoking handle_batched_xxx() helpers, the logic, introduced in this patch, would check if domain->qi_batch is valid or not before proceeding batch processing.

Regards,
-Tina
> 
> Thanks,
> baolu

[PATCH v2 1/5] iommu/vt-d: Refactor IOTLB/Dev-IOTLB invalidation command logic
[PATCH v2 2/5] iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush logic
[PATCH v2 3/5] iommu/vt-d: Introduce interfaces for QI batching operations
[PATCH v2 4/5] vt-d/iommu: Refactor quirk_extra_dev_tlb_flush()
[PATCH v2 5/5] vt-d/iommu: Enable batching of IOTLB/Dev-IOTLB invalidations