[RFC PATCH 03/13] iommu/intel: zap context table entries on kexec

James Gowans posted 13 patches 2 months, 2 weeks ago
[RFC PATCH 03/13] iommu/intel: zap context table entries on kexec
Posted by James Gowans 2 months, 2 weeks ago
Instead of fully shutting down the IOMMU on kexec, rather zap context
table entries for devices. This is the initial step to be able to
persist some domains. Once a struct iommu_domain can be marked
persistent then those persistent domains will be skipped when doing the
IOMMU shut down.
---
 drivers/iommu/intel/dmar.c  |  1 +
 drivers/iommu/intel/iommu.c | 34 ++++++++++++++++++++++++++++++----
 drivers/iommu/intel/iommu.h |  2 ++
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 1c8d3141cb55..f79aba382e77 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1099,6 +1099,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	spin_lock_init(&iommu->device_rbtree_lock);
 	mutex_init(&iommu->iopf_lock);
 	iommu->node = NUMA_NO_NODE;
+	INIT_LIST_HEAD(&iommu->domains);
 
 	ver = readl(iommu->reg + DMAR_VER_REG);
 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9ff8b83c19a3..2297cbb0253f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1575,6 +1575,7 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
 		goto err_clear;
 	}
 	domain_update_iommu_cap(domain);
+	list_add(&domain->domains, &iommu->domains);
 
 	spin_unlock(&iommu->lock);
 	return 0;
@@ -3185,6 +3186,33 @@ static void intel_disable_iommus(void)
 		iommu_disable_translation(iommu);
 }
 
+static void zap_context_table_entries(struct intel_iommu *iommu)
+{
+	struct context_entry *context;
+	struct dmar_domain *domain;
+	struct device_domain_info *device;
+	int bus, devfn;
+	u16 did_old;
+
+	list_for_each_entry(domain, &iommu->domains, domains) {
+		list_for_each_entry(device, &domain->devices, link) {
+			context = iommu_context_addr(iommu, device->bus, device->devfn, 0);
+			if (!context || !context_present(context))
+				continue;
+			context_domain_id(context);
+			context_clear_entry(context);
+			__iommu_flush_cache(iommu, context, sizeof(*context));
+			iommu->flush.flush_context(iommu,
+						   did_old,
+						   (((u16)bus) << 8) | devfn,
+						   DMA_CCMD_MASK_NOBIT,
+						   DMA_CCMD_DEVICE_INVL);
+			iommu->flush.flush_iotlb(iommu,	did_old, 0, 0,
+						 DMA_TLB_DSI_FLUSH);
+		}
+	}
+}
+
 void intel_iommu_shutdown(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -3197,10 +3225,8 @@ void intel_iommu_shutdown(void)
 
 	/* Disable PMRs explicitly here. */
 	for_each_iommu(iommu, drhd)
-		iommu_disable_protect_mem_regions(iommu);
-
-	/* Make sure the IOMMUs are switched off */
-	intel_disable_iommus();
+		zap_context_table_entries(iommu);
+	return
 
 	up_write(&dmar_global_lock);
 }
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index b67c14da1240..cfd006588824 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -606,6 +606,7 @@ struct dmar_domain {
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
 	struct list_head dev_pasids;	/* all attached pasids */
+	struct list_head domains;	/* all struct dmar_domains on this IOMMU */
 
 	spinlock_t cache_lock;		/* Protect the cache tag list */
 	struct list_head cache_tags;	/* Cache tag list */
@@ -749,6 +750,7 @@ struct intel_iommu {
 	void *perf_statistic;
 
 	struct iommu_pmu *pmu;
+	struct list_head domains;	/* all struct dmar_domains on this IOMMU */
 };
 
 /* PCI domain-device relationship */
-- 
2.34.1
Re: [RFC PATCH 03/13] iommu/intel: zap context table entries on kexec
Posted by Jason Gunthorpe 1 month, 3 weeks ago
On Mon, Sep 16, 2024 at 01:30:52PM +0200, James Gowans wrote:
> Instead of fully shutting down the IOMMU on kexec, rather zap context
> table entries for devices. This is the initial step to be able to
> persist some domains. Once a struct iommu_domain can be marked
> persistent then those persistent domains will be skipped when doing the
> IOMMU shut down.
> ---
>  drivers/iommu/intel/dmar.c  |  1 +
>  drivers/iommu/intel/iommu.c | 34 ++++++++++++++++++++++++++++++----
>  drivers/iommu/intel/iommu.h |  2 ++
>  3 files changed, 33 insertions(+), 4 deletions(-)

We should probably try to avoid doing this kind of stuff in
drivers. The core code can generically ask drivers to attach a
BLOCKING domain as part of the kexec sequence and the core code can
then decide which devices should be held over.

There is also some complexity here around RMRs, we can't always apply
a blocking domain... Not sure what you'd do in those cases.

IIRC we already do something like this with the bus master enable on
the PCI side?? At least, if the kernel is deciding to block DMA when
the IOMMU is on it should do it consistently and inhibit the PCI
device as well.

Jason