[RFC PATCH 11/13] iommu: Add callback to restore persisted iommu_domain

James Gowans posted 13 patches 2 months, 2 weeks ago
[RFC PATCH 11/13] iommu: Add callback to restore persisted iommu_domain
Posted by James Gowans 2 months, 2 weeks ago
The previous commits re-hydrated the struct iommu_domain and added them
to the persisted_domains xarray. Now provide a callback to get the
domain so that iommufd can restore a link to it.

Roughly where the restore would happen is called out in a comment, but
some more head scratching is needed to figure out how to actually do
this.
---
 drivers/iommu/intel/iommu.c       | 12 ++++++++++++
 drivers/iommu/iommufd/serialise.c |  9 ++++++++-
 include/linux/iommu.h             |  5 +++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 8e0ed033b03f..000ddfe5b6de 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4690,6 +4690,17 @@ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
 	return 0;
 }
 
+static struct iommu_domain *intel_domain_restore(struct device *dev,
+		unsigned long persistent_id)
+{
+	struct iommu_domain *domain;
+
+	domain = xa_load(&persistent_domains, persistent_id);
+	if (!domain)
+		pr_warn("No such persisted domain id %lu\n", persistent_id);
+	return domain;
+}
+
 static const struct iommu_dirty_ops intel_dirty_ops = {
 	.set_dirty_tracking = intel_iommu_set_dirty_tracking,
 	.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
@@ -4703,6 +4714,7 @@ const struct iommu_ops intel_iommu_ops = {
 	.domain_alloc		= intel_iommu_domain_alloc,
 	.domain_alloc_user	= intel_iommu_domain_alloc_user,
 	.domain_alloc_sva	= intel_svm_domain_alloc,
+	.domain_restore 	= intel_domain_restore,
 	.probe_device		= intel_iommu_probe_device,
 	.release_device		= intel_iommu_release_device,
 	.get_resv_regions	= intel_iommu_get_resv_regions,
diff --git a/drivers/iommu/iommufd/serialise.c b/drivers/iommu/iommufd/serialise.c
index 9519969bd201..baac7d6150cb 100644
--- a/drivers/iommu/iommufd/serialise.c
+++ b/drivers/iommu/iommufd/serialise.c
@@ -139,7 +139,14 @@ static int rehydrate_iommufd(char *iommufd_name)
 		    area->node.last = *iova_start + *iova_len - 1;
 		    interval_tree_insert(&area->node, &ioas->iopt.area_itree);
 	    }
-	    /* TODO: restore link from ioas to hwpt. */
+	    /*
+	     * Here we should do something to associate struct iommufd_device with the
+	     * ictx, then get the iommu_ops via dev_iommu_ops(), and call the new
+	     * .domain_restore callback to get the struct iommu_domain.
+	     * Something like:
+	     * hwpt->domain = ops->domain_restore(dev, persistent_id);
+	     * Hand wavy - the details allude me at the moment...
+	     */
 	}
 
 	return fd;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a616e8702a1c..0dc97d494fd9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -529,6 +529,8 @@ static inline int __iommu_copy_struct_from_user_array(
  * @domain_alloc_paging: Allocate an iommu_domain that can be used for
  *                       UNMANAGED, DMA, and DMA_FQ domain types.
  * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing.
+ * @domain_restore: After kexec, give the same persistent_id which was originally
+ *                  used to allocate the domain, and the domain will be restored.
  * @probe_device: Add device to iommu driver handling
  * @release_device: Remove device from iommu driver handling
  * @probe_finalize: Do final setup work after the device is added to an IOMMU
@@ -576,6 +578,9 @@ struct iommu_ops {
 	struct iommu_domain *(*domain_alloc_sva)(struct device *dev,
 						 struct mm_struct *mm);
 
+	struct iommu_domain *(*domain_restore)(struct device *dev,
+			unsigned long persistent_id);
+
 	struct iommu_device *(*probe_device)(struct device *dev);
 	void (*release_device)(struct device *dev);
 	void (*probe_finalize)(struct device *dev);
-- 
2.34.1
Re: [RFC PATCH 11/13] iommu: Add callback to restore persisted iommu_domain
Posted by Jason Gunthorpe 1 month, 3 weeks ago
On Mon, Sep 16, 2024 at 01:31:00PM +0200, James Gowans wrote:
> diff --git a/drivers/iommu/iommufd/serialise.c b/drivers/iommu/iommufd/serialise.c
> index 9519969bd201..baac7d6150cb 100644
> --- a/drivers/iommu/iommufd/serialise.c
> +++ b/drivers/iommu/iommufd/serialise.c
> @@ -139,7 +139,14 @@ static int rehydrate_iommufd(char *iommufd_name)
>  		    area->node.last = *iova_start + *iova_len - 1;
>  		    interval_tree_insert(&area->node, &ioas->iopt.area_itree);
>  	    }
> -	    /* TODO: restore link from ioas to hwpt. */
> +	    /*
> +	     * Here we should do something to associate struct iommufd_device with the
> +	     * ictx, then get the iommu_ops via dev_iommu_ops(), and call the new
> +	     * .domain_restore callback to get the struct iommu_domain.
> +	     * Something like:
> +	     * hwpt->domain = ops->domain_restore(dev, persistent_id);
> +	     * Hand wavy - the details allude me at the moment...
> +	     */
>  	}

The core code should request a iommu_domain handle for the
pre-existing translation very early on, it should not leave the device
in some weird NULL domain state. I have been trying hard to eliminate
that.

The special domain would need to remain attached and some protocol
would be needed to carefully convey that past vfio to iommufd,
including inhibiting attaching a blocked domain in VFIO
startup. Including blocking FLRs from VFIO and rejecting attaches to
other non-VFIO drivers.

This is a twisty complicated path, it needs some solid definition of
what the lifecycle of this special domain is, and some sensible exits
if userspace isn't expecting/co-operating with the hand over, or it
crashes while doing this..

> @@ -576,6 +578,9 @@ struct iommu_ops {
>  	struct iommu_domain *(*domain_alloc_sva)(struct device *dev,
>  						 struct mm_struct *mm);
>  
> +	struct iommu_domain *(*domain_restore)(struct device *dev,
> +			unsigned long persistent_id);
> +

Why do we need an ID? There is only one persistent domain per device,
right?

This may need PASID, at least Intel requires the hypervisor to handle
PASID domains, and they would need to persist as well.

Jason