Add several helpers to invalidate the caches after mappings in the
affected domain are changed.
- cache_tag_flush_range() invalidates a range of caches after mappings
within this range are changed. It uses the page-selective cache
invalidation methods.
- cache_tag_flush_all() invalidates all caches tagged by a domain ID.
It uses the domain-selective cache invalidation methods.
- cache_tag_flush_range_np() invalidates a range of caches when new
mappings are created in the domain and the corresponding page table
entries change from non-present to present.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/iommu.h | 14 +++
drivers/iommu/intel/cache.c | 195 ++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.c | 12 ---
3 files changed, 209 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 52471f5337d5..e17683ecef4b 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -35,6 +35,8 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
@@ -1041,6 +1043,13 @@ static inline void context_set_sm_pre(struct context_entry *context)
context->lo |= BIT_ULL(4);
}
+/* Returns a number of VTD pages, but aligned to MM page size */
+static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
+{
+ host_addr &= ~PAGE_MASK;
+ return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
+}
+
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)
@@ -1122,6 +1131,11 @@ int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih);
+void cache_tag_flush_all(struct dmar_domain *domain);
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end);
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 296f1645a739..0539275a9d20 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -12,6 +12,7 @@
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/memory.h>
+#include <linux/pci.h>
#include <linux/spinlock.h>
#include "iommu.h"
@@ -212,3 +213,197 @@ void cache_tag_unassign_domain(struct dmar_domain *domain,
if (domain->domain.type == IOMMU_DOMAIN_NESTED)
__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
}
+
+static unsigned long calculate_psi_aligned_address(unsigned long start,
+ unsigned long end,
+ unsigned long *_pages,
+ unsigned long *_mask)
+{
+ unsigned long pages = aligned_nrpages(start, end - start + 1);
+ unsigned long aligned_pages = __roundup_pow_of_two(pages);
+ unsigned long bitmask = aligned_pages - 1;
+ unsigned long mask = ilog2(aligned_pages);
+ unsigned long pfn = IOVA_PFN(start);
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
+ *_pages = aligned_pages;
+ *_mask = mask;
+
+ return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
+}
+
+/*
+ * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
+ * when the memory mappings in the target domain have been modified.
+ */
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_NESTING_IOTLB:
+ if (domain->use_first_level) {
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ break;
+ case CACHE_TAG_NESTING_DEVTLB:
+ /*
+ * Address translation cache in device side caches the
+ * result of nested translation. There is no easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the entire
+ * device cache.
+ */
+ addr = 0;
+ mask = MAX_AGAW_PFN_WIDTH;
+ fallthrough;
+ case CACHE_TAG_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (tag->pasid == IOMMU_NO_PASID)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
+ info->ats_qdep, addr, mask);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
+ tag->pasid, info->ats_qdep,
+ addr, mask);
+
+ quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidates all ranges of IOVA when the memory mappings in the target
+ * domain have been modified.
+ */
+void cache_tag_flush_all(struct dmar_domain *domain)
+{
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_NESTING_IOTLB:
+ if (domain->use_first_level)
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ break;
+ case CACHE_TAG_DEVTLB:
+ case CACHE_TAG_NESTING_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidate a range of IOVA when new mappings are created in the target
+ * domain.
+ *
+ * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
+ * Set, any software updates to remapping structures other than first-
+ * stage mapping requires explicit invalidation of the caches.
+ * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
+ * write buffer flushing, software must explicitly perform write-buffer
+ * flushing, if cache invalidation is not required.
+ */
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+
+ if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
+ iommu_flush_write_buffer(iommu);
+ continue;
+ }
+
+ if (tag->type == CACHE_TAG_IOTLB ||
+ tag->type == CACHE_TAG_NESTING_IOTLB) {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 0c0b8e493fda..ac413097058c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -54,11 +54,6 @@
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
@@ -1991,13 +1986,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
domain_context_mapping_cb, domain);
}
-/* Returns a number of VTD pages, but aligned to MM page size */
-static unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
-{
- host_addr &= ~PAGE_MASK;
- return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
-}
-
/* Return largest possible superpage level for a given mapping */
static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phy_pfn, unsigned long pages)
--
2.34.1
On 4/16/24 4:06 PM, Lu Baolu wrote:
> Add several helpers to invalidate the caches after mappings in the
> affected domain are changed.
>
> - cache_tag_flush_range() invalidates a range of caches after mappings
> within this range are changed. It uses the page-selective cache
> invalidation methods.
>
> - cache_tag_flush_all() invalidates all caches tagged by a domain ID.
> It uses the domain-selective cache invalidation methods.
>
> - cache_tag_flush_range_np() invalidates a range of caches when new
> mappings are created in the domain and the corresponding page table
> entries change from non-present to present.
>
> Signed-off-by: Lu Baolu<baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel/iommu.h | 14 +++
> drivers/iommu/intel/cache.c | 195 ++++++++++++++++++++++++++++++++++++
> drivers/iommu/intel/iommu.c | 12 ---
> 3 files changed, 209 insertions(+), 12 deletions(-)
[...]
> +
> +/*
> + * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
> + * when the memory mappings in the target domain have been modified.
> + */
> +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
> + unsigned long end, int ih)
> +{
> + unsigned long pages, mask, addr;
> + struct cache_tag *tag;
> + unsigned long flags;
> +
> + addr = calculate_psi_aligned_address(start, end, &pages, &mask);
> +
> + spin_lock_irqsave(&domain->cache_lock, flags);
> + list_for_each_entry(tag, &domain->cache_tags, node) {
> + struct intel_iommu *iommu = tag->iommu;
> + struct device_domain_info *info;
> + u16 sid;
> +
> + switch (tag->type) {
> + case CACHE_TAG_IOTLB:
> + case CACHE_TAG_NESTING_IOTLB:
> + if (domain->use_first_level) {
> + qi_flush_piotlb(iommu, tag->domain_id,
> + tag->pasid, addr, pages, ih);
> + } else {
> + /*
> + * Fallback to domain selective flush if no
> + * PSI support or the size is too big.
> + */
> + if (!cap_pgsel_inv(iommu->cap) ||
> + mask > cap_max_amask_val(iommu->cap))
> + iommu->flush.flush_iotlb(iommu, tag->domain_id,
> + 0, 0, DMA_TLB_DSI_FLUSH);
> + else
> + iommu->flush.flush_iotlb(iommu, tag->domain_id,
> + addr | ih, mask,
> + DMA_TLB_PSI_FLUSH);
> + }
> + break;
> + case CACHE_TAG_NESTING_DEVTLB:
> + /*
> + * Address translation cache in device side caches the
> + * result of nested translation. There is no easy way
> + * to identify the exact set of nested translations
> + * affected by a change in S2. So just flush the entire
> + * device cache.
> + */
> + addr = 0;
> + mask = MAX_AGAW_PFN_WIDTH;
> + fallthrough;
I realized that the logic above is not right. Setting both @addr and
@mask to 0 doesn't means flush all caches on the device. I will change
it like below:
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index e8418cdd8331..18debb82272a 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -302,9 +302,14 @@ void cache_tag_flush_range(struct dmar_domain
*domain, unsigned long start,
* affected by a change in S2. So just flush
the entire
* device cache.
*/
- addr = 0;
- mask = MAX_AGAW_PFN_WIDTH;
- fallthrough;
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0,
MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID,
info->ats_qdep);
+ break;
case CACHE_TAG_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
> + case CACHE_TAG_DEVTLB:
> + info = dev_iommu_priv_get(tag->dev);
> + sid = PCI_DEVID(info->bus, info->devfn);
> +
> + if (tag->pasid == IOMMU_NO_PASID)
> + qi_flush_dev_iotlb(iommu, sid, info->pfsid,
> + info->ats_qdep, addr, mask);
> + else
> + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
> + tag->pasid, info->ats_qdep,
> + addr, mask);
> +
> + quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
> + break;
> + }
> + }
> + spin_unlock_irqrestore(&domain->cache_lock, flags);
Best regards,
baolu
> From: Baolu Lu <baolu.lu@linux.intel.com> > Sent: Monday, April 22, 2024 1:30 PM > > On 4/16/24 4:06 PM, Lu Baolu wrote: > > + case CACHE_TAG_NESTING_DEVTLB: > > + /* > > + * Address translation cache in device side caches the > > + * result of nested translation. There is no easy way > > + * to identify the exact set of nested translations > > + * affected by a change in S2. So just flush the entire > > + * device cache. > > + */ > > + addr = 0; > > + mask = MAX_AGAW_PFN_WIDTH; > > + fallthrough; > > I realized that the logic above is not right. Setting both @addr and > @mask to 0 doesn't means flush all caches on the device. I will change > it like below: I didn't get. Above code doesn't set @mask to 0. > > diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c > index e8418cdd8331..18debb82272a 100644 > --- a/drivers/iommu/intel/cache.c > +++ b/drivers/iommu/intel/cache.c > @@ -302,9 +302,14 @@ void cache_tag_flush_range(struct dmar_domain > *domain, unsigned long start, > * affected by a change in S2. So just flush > the entire > * device cache. > */ > - addr = 0; > - mask = MAX_AGAW_PFN_WIDTH; > - fallthrough; > + info = dev_iommu_priv_get(tag->dev); > + sid = PCI_DEVID(info->bus, info->devfn); > + > + qi_flush_dev_iotlb(iommu, sid, info->pfsid, > info->ats_qdep, > + 0, MAX_AGAW_PFN_WIDTH); > + quirk_extra_dev_tlb_flush(info, 0, > MAX_AGAW_PFN_WIDTH, > + IOMMU_NO_PASID, > info->ats_qdep); > + break; and I didn't get this change. It goes backward by ignoring tag->pasid. what's the exact problem of the fallthrough logic in original code? > case CACHE_TAG_DEVTLB: > info = dev_iommu_priv_get(tag->dev); > sid = PCI_DEVID(info->bus, info->devfn); > > > + case CACHE_TAG_DEVTLB: > > + info = dev_iommu_priv_get(tag->dev); > > + sid = PCI_DEVID(info->bus, info->devfn); > > + > > + if (tag->pasid == IOMMU_NO_PASID) > > + qi_flush_dev_iotlb(iommu, sid, info->pfsid, > > + info->ats_qdep, addr, > mask); > > + else > > + qi_flush_dev_iotlb_pasid(iommu, sid, info- > >pfsid, > > + tag->pasid, info- > >ats_qdep, > > + addr, mask); > > + > > + quirk_extra_dev_tlb_flush(info, addr, mask, tag- > >pasid, info->ats_qdep); > > + break; > > + } > > + } > > + spin_unlock_irqrestore(&domain->cache_lock, flags); > > Best regards, > baolu
On 4/23/24 4:42 PM, Tian, Kevin wrote: >> From: Baolu Lu<baolu.lu@linux.intel.com> >> Sent: Monday, April 22, 2024 1:30 PM >> >> On 4/16/24 4:06 PM, Lu Baolu wrote: >>> + case CACHE_TAG_NESTING_DEVTLB: >>> + /* >>> + * Address translation cache in device side caches the >>> + * result of nested translation. There is no easy way >>> + * to identify the exact set of nested translations >>> + * affected by a change in S2. So just flush the entire >>> + * device cache. >>> + */ >>> + addr = 0; >>> + mask = MAX_AGAW_PFN_WIDTH; >>> + fallthrough; >> I realized that the logic above is not right. Setting both @addr and >> @mask to 0 doesn't means flush all caches on the device. I will change >> it like below: > I didn't get. Above code doesn't set @mask to 0. Oh!? I have no idea why I read that as "mask = 0" now. Perhaps my brain was on vacation earlier. :-) > >> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c >> index e8418cdd8331..18debb82272a 100644 >> --- a/drivers/iommu/intel/cache.c >> +++ b/drivers/iommu/intel/cache.c >> @@ -302,9 +302,14 @@ void cache_tag_flush_range(struct dmar_domain >> *domain, unsigned long start, >> * affected by a change in S2. So just flush >> the entire >> * device cache. >> */ >> - addr = 0; >> - mask = MAX_AGAW_PFN_WIDTH; >> - fallthrough; >> + info = dev_iommu_priv_get(tag->dev); >> + sid = PCI_DEVID(info->bus, info->devfn); >> + >> + qi_flush_dev_iotlb(iommu, sid, info->pfsid, >> info->ats_qdep, >> + 0, MAX_AGAW_PFN_WIDTH); >> + quirk_extra_dev_tlb_flush(info, 0, >> MAX_AGAW_PFN_WIDTH, >> + IOMMU_NO_PASID, >> info->ats_qdep); >> + break; > and I didn't get this change. It goes backward by ignoring tag->pasid. > > what's the exact problem of the fallthrough logic in original code? Sorry! Please ignore this. Best regards, baolu
© 2016 - 2026 Red Hat, Inc.