[RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support

Tina Zhang posted 5 patches 2 years, 1 month ago
[RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Posted by Tina Zhang 2 years, 1 month ago
Add basic hook up code to implement generic IO page table framework.

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
---
 drivers/iommu/intel/Kconfig |  1 +
 drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/iommu.h |  7 +++
 drivers/iommu/io-pgtable.c  |  3 ++
 include/linux/io-pgtable.h  |  2 +
 5 files changed, 107 insertions(+)

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..8334e7e50e69 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@ config INTEL_IOMMU
 	select DMA_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
+	select IOMMU_IO_PGTABLE
 	select NEED_DMA_MAP_STATE
 	select DMAR_TABLE
 	select SWIOTLB
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dbcdf7b95b9f..80bd1993861c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -23,6 +23,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <uapi/linux/iommufd.h>
+#include <linux/io-pgtable.h>
 
 #include "iommu.h"
 #include "../dma-iommu.h"
@@ -67,6 +68,20 @@
 #define LEVEL_STRIDE		(9)
 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
 
+#define io_pgtable_cfg_to_dmar_pgtable(x) \
+	container_of((x), struct dmar_io_pgtable, pgtbl_cfg)
+
+#define io_pgtable_to_dmar_pgtable(x) \
+	container_of((x), struct dmar_io_pgtable, iop)
+
+#define io_pgtable_to_dmar_domain(x) \
+	container_of(io_pgtable_to_dmar_pgtable(x), \
+		struct dmar_domain, dmar_iop)
+
+#define io_pgtable_ops_to_dmar_domain(x) \
+	container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \
+		struct dmar_domain, dmar_iop)
+
 static inline int agaw_to_level(int agaw)
 {
 	return agaw + 2;
@@ -5171,3 +5186,82 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
 
 	return ret;
 }
+
+static void flush_all(void *cookie)
+{
+}
+
+static void flush_walk(unsigned long iova, size_t size,
+		       size_t granule, void *cookie)
+{
+}
+
+static void add_page(struct iommu_iotlb_gather *gather,
+		     unsigned long iova, size_t granule,
+		     void *cookie)
+{
+}
+
+static const struct iommu_flush_ops flush_ops = {
+	.tlb_flush_all  = flush_all,
+	.tlb_flush_walk = flush_walk,
+	.tlb_add_page   = add_page,
+};
+
+static void free_pgtable(struct io_pgtable *iop)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop);
+
+	if (dmar_domain->pgd) {
+		LIST_HEAD(freelist);
+
+		domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), &freelist);
+		put_pages_list(&freelist);
+	}
+}
+
+static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+			     phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			     int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_map_pages(&dmar_domain->domain, iova, paddr, pgsize,
+				     pgcount, iommu_prot, gfp, mapped);
+}
+
+static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+				  size_t pgsize, size_t pgcount,
+				  struct iommu_iotlb_gather *gather)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_unmap_pages(&dmar_domain->domain, iova, pgsize,
+				       pgcount, gather);
+}
+
+static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops,
+					unsigned long iova)
+{
+	struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops);
+
+	return intel_iommu_iova_to_phys(&dmar_domain->domain, iova);
+}
+
+static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+	struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg);
+
+	pgtable->iop.ops.map_pages = pgtable_map_pages;
+	pgtable->iop.ops.unmap_pages = pgtable_unmap_pages;
+	pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys;
+
+	cfg->tlb = &flush_ops;
+
+	return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = {
+	.alloc = alloc_pgtable,
+	.free  = free_pgtable,
+};
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 8d0aac71c135..5207fea6477a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/iommu.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/io-pgtable.h>
 #include <linux/dmar.h>
 #include <linux/bitfield.h>
 #include <linux/xarray.h>
@@ -579,6 +580,11 @@ struct iommu_domain_info {
 					 * to VT-d spec, section 9.3 */
 };
 
+struct dmar_io_pgtable {
+	struct io_pgtable_cfg   pgtbl_cfg;
+	struct io_pgtable       iop;
+};
+
 struct dmar_domain {
 	int	nid;			/* node id */
 	struct xarray iommu_array;	/* Attached IOMMU array */
@@ -633,6 +639,7 @@ struct dmar_domain {
 
 	struct iommu_domain domain;	/* generic domain data structure for
 					   iommu core */
+	struct dmar_io_pgtable dmar_iop;
 };
 
 /*
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 5755dee96a68..533b27557290 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -35,6 +35,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_IOMMU_IO_PGTABLE_VIRT
 	[VIRT_IO_PGTABLE] = &io_pgtable_virt_init_fns,
 #endif
+#ifdef CONFIG_INTEL_IOMMU
+	[INTEL_IOMMU] = &io_pgtable_intel_iommu_init_fns,
+#endif
 };
 
 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index bdcade2c4844..b2857c18f963 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -20,6 +20,7 @@ enum io_pgtable_fmt {
 	APPLE_DART,
 	APPLE_DART2,
 	VIRT_IO_PGTABLE,
+	INTEL_IOMMU,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -281,5 +282,6 @@ extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_virt_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns;
 
 #endif /* __IO_PGTABLE_H */
-- 
2.39.3
Re: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Posted by Jason Gunthorpe 2 years, 1 month ago
On Mon, Nov 06, 2023 at 02:12:23AM -0500, Tina Zhang wrote:
> Add basic hook up code to implement generic IO page table framework.
> 
> Signed-off-by: Tina Zhang <tina.zhang@intel.com>
> ---
>  drivers/iommu/intel/Kconfig |  1 +
>  drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel/iommu.h |  7 +++
>  drivers/iommu/io-pgtable.c  |  3 ++
>  include/linux/io-pgtable.h  |  2 +
>  5 files changed, 107 insertions(+)

If this is going to happen can we also convert vt-d to actually use
the io page table stuff directly and shuffle the code around so it is
structured like the rest of the io page table implementations?

Jason
RE: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Posted by Zhang, Tina 2 years, 1 month ago
Hi Jason,

> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Tuesday, November 7, 2023 3:32 AM
> To: Zhang, Tina <tina.zhang@intel.com>
> Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>; Tian, Kevin
> <kevin.tian@intel.com>; Lu Baolu <baolu.lu@linux.intel.com>; joro@8bytes.org;
> will@kernel.org; Liu, Yi L <yi.l.liu@intel.com>; virtualization@lists.linux-
> foundation.org; iommu@lists.linux.dev; linux-kernel@vger.kernel.org
> Subject: Re: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
> 
> On Mon, Nov 06, 2023 at 02:12:23AM -0500, Tina Zhang wrote:
> > Add basic hook up code to implement generic IO page table framework.
> >
> > Signed-off-by: Tina Zhang <tina.zhang@intel.com>
> > ---
> >  drivers/iommu/intel/Kconfig |  1 +
> >  drivers/iommu/intel/iommu.c | 94
> > +++++++++++++++++++++++++++++++++++++
> >  drivers/iommu/intel/iommu.h |  7 +++
> >  drivers/iommu/io-pgtable.c  |  3 ++
> >  include/linux/io-pgtable.h  |  2 +
> >  5 files changed, 107 insertions(+)
> 
> If this is going to happen can we also convert vt-d to actually use the io page
> table stuff directly and shuffle the code around so it is structured like the rest of
> the io page table implementations?
Converting VT-d driver to use io page table involves much code change. I made a local version of it, and it didn't prove much benefit.

VT-d only supports one 1st-stage IO pgtable format and one 2nd-stage IO pgtable format. So, the current IO pgtable handling operations seems more efficient comparing to adding another layer callbacks in them.

We have a plan to add a new io_pgtable.c file under VT-d driver directory and use that file to collect IO pgtable related functions. But we want to hold on converting VT-d to use the IO page table directly unless we can see some benefits.

Regards,
-Tina
> 
> Jason
Re: [RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Posted by Jason Gunthorpe 2 years, 1 month ago
On Thu, Nov 09, 2023 at 12:10:59AM +0000, Zhang, Tina wrote:

> > If this is going to happen can we also convert vt-d to actually use the io page
> > table stuff directly and shuffle the code around so it is structured like the rest of
> > the io page table implementations?

> Converting VT-d driver to use io page table involves much code
> change. I made a local version of it, and it didn't prove much
> benefit.

Well, it structures the code in a similar way to all the other
drivers, though I admit to having not looked closely at the io page
table stuff.
 
> VT-d only supports one 1st-stage IO pgtable format and one 2nd-stage
> IO pgtable format. So, the current IO pgtable handling operations
> seems more efficient comparing to adding another layer callbacks in
> them.

I would like to de-virtualize those callbacks, is is completely
pointless when we have per-format iommu_domain ops now.

Jason