[PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu

Jacob Pan posted 11 patches 3 weeks, 4 days ago
[PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Jacob Pan 3 weeks, 4 days ago
From: Jason Gunthorpe <jgg@nvidia.com>

Create just a little part of a real iommu driver, enough to
slot in under the dev_iommu_ops() and allow iommufd to call
domain_alloc_paging_flags() and fail everything else.

This allows explicitly creating a HWPT under an IOAS.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
---
 drivers/iommu/iommufd/Makefile          |  1 +
 drivers/iommu/iommufd/hw_pagetable.c    | 11 ++-
 drivers/iommu/iommufd/hwpt_noiommu.c    | 91 +++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h |  2 +
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iommu/iommufd/hwpt_noiommu.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 71d692c9a8f4..2b1a020b14a6 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -10,6 +10,7 @@ iommufd-y := \
 	vfio_compat.o \
 	viommu.o
 
+iommufd-$(CONFIG_VFIO_NOIOMMU) += hwpt_noiommu.o
 iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index fe789c2dc0c9..37316d77277d 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -8,6 +8,13 @@
 #include "../iommu-priv.h"
 #include "iommufd_private.h"
 
+static const struct iommu_ops *get_iommu_ops(struct iommufd_device *idev)
+{
+	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && !idev->igroup->group)
+		return &iommufd_noiommu_ops;
+	return dev_iommu_ops(idev->dev);
+}
+
 static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
 {
 	if (hwpt->domain)
@@ -114,7 +121,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 				IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
 				IOMMU_HWPT_FAULT_ID_VALID |
 				IOMMU_HWPT_ALLOC_PASID;
-	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+	const struct iommu_ops *ops = get_iommu_ops(idev);
 	struct iommufd_hwpt_paging *hwpt_paging;
 	struct iommufd_hw_pagetable *hwpt;
 	int rc;
@@ -229,7 +236,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
 			  struct iommufd_device *idev, u32 flags,
 			  const struct iommu_user_data *user_data)
 {
-	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
+	const struct iommu_ops *ops = get_iommu_ops(idev);
 	struct iommufd_hwpt_nested *hwpt_nested;
 	struct iommufd_hw_pagetable *hwpt;
 	int rc;
diff --git a/drivers/iommu/iommufd/hwpt_noiommu.c b/drivers/iommu/iommufd/hwpt_noiommu.c
new file mode 100644
index 000000000000..0aa99f581ca3
--- /dev/null
+++ b/drivers/iommu/iommufd/hwpt_noiommu.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/iommu.h>
+#include <linux/generic_pt/iommu.h>
+#include "iommufd_private.h"
+
+static const struct iommu_domain_ops noiommu_amdv1_ops;
+
+struct noiommu_domain {
+	union {
+		struct iommu_domain domain;
+		struct pt_iommu_amdv1 amdv1;
+	};
+	spinlock_t lock;
+};
+PT_IOMMU_CHECK_DOMAIN(struct noiommu_domain, amdv1.iommu, domain);
+
+static void noiommu_change_top(struct pt_iommu *iommu_table,
+			       phys_addr_t top_paddr, unsigned int top_level)
+{
+}
+
+static spinlock_t *noiommu_get_top_lock(struct pt_iommu *iommupt)
+{
+	struct noiommu_domain *domain =
+		container_of(iommupt, struct noiommu_domain, amdv1.iommu);
+
+	return &domain->lock;
+}
+
+static const struct pt_iommu_driver_ops noiommu_driver_ops = {
+	.get_top_lock = noiommu_get_top_lock,
+	.change_top = noiommu_change_top,
+};
+
+static struct iommu_domain *
+noiommu_alloc_paging_flags(struct device *dev, u32 flags,
+			   const struct iommu_user_data *user_data)
+{
+	struct pt_iommu_amdv1_cfg cfg = {};
+	struct noiommu_domain *dom;
+	int rc;
+
+	if (flags || user_data)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	cfg.common.hw_max_vasz_lg2 = 64;
+	cfg.common.hw_max_oasz_lg2 = 52;
+	cfg.starting_level = 2;
+	cfg.common.features =
+		(BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
+		 BIT(PT_FEAT_AMDV1_FORCE_COHERENCE));
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&dom->lock);
+	dom->amdv1.iommu.nid = NUMA_NO_NODE;
+	dom->amdv1.iommu.driver_ops = &noiommu_driver_ops;
+	dom->domain.ops = &noiommu_amdv1_ops;
+
+	/* Use mock page table which is based on AMDV1 */
+	rc = pt_iommu_amdv1_init(&dom->amdv1, &cfg, GFP_KERNEL);
+	if (rc) {
+		kfree(dom);
+		return ERR_PTR(rc);
+	}
+
+	return &dom->domain;
+}
+
+static void noiommu_domain_free(struct iommu_domain *iommu_domain)
+{
+	struct noiommu_domain *domain =
+		container_of(iommu_domain, struct noiommu_domain, domain);
+
+	pt_iommu_deinit(&domain->amdv1.iommu);
+	kfree(domain);
+}
+
+static const struct iommu_domain_ops noiommu_amdv1_ops = {
+	IOMMU_PT_DOMAIN_OPS(amdv1),
+	.free = noiommu_domain_free,
+};
+
+struct iommu_ops iommufd_noiommu_ops = {
+	.domain_alloc_paging_flags = noiommu_alloc_paging_flags,
+};
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 6ac1965199e9..9c18c5eb1899 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -464,6 +464,8 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
 	refcount_dec(&hwpt->obj.users);
 }
 
+extern struct iommu_ops iommufd_noiommu_ops;
+
 struct iommufd_attach;
 
 struct iommufd_group {
-- 
2.34.1
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Mostafa Saleh 2 weeks, 2 days ago
On Thu, Mar 12, 2026 at 08:56:27AM -0700, Jacob Pan wrote:
> From: Jason Gunthorpe <jgg@nvidia.com>
> 
> Create just a little part of a real iommu driver, enough to
> slot in under the dev_iommu_ops() and allow iommufd to call
> domain_alloc_paging_flags() and fail everything else.
> 
> This allows explicitly creating a HWPT under an IOAS.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
> ---
>  drivers/iommu/iommufd/Makefile          |  1 +
>  drivers/iommu/iommufd/hw_pagetable.c    | 11 ++-
>  drivers/iommu/iommufd/hwpt_noiommu.c    | 91 +++++++++++++++++++++++++
>  drivers/iommu/iommufd/iommufd_private.h |  2 +
>  4 files changed, 103 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/iommu/iommufd/hwpt_noiommu.c
> 
> diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
> index 71d692c9a8f4..2b1a020b14a6 100644
> --- a/drivers/iommu/iommufd/Makefile
> +++ b/drivers/iommu/iommufd/Makefile
> @@ -10,6 +10,7 @@ iommufd-y := \
>  	vfio_compat.o \
>  	viommu.o
>  
> +iommufd-$(CONFIG_VFIO_NOIOMMU) += hwpt_noiommu.o
>  iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
>  
>  obj-$(CONFIG_IOMMUFD) += iommufd.o
> diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
> index fe789c2dc0c9..37316d77277d 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -8,6 +8,13 @@
>  #include "../iommu-priv.h"
>  #include "iommufd_private.h"
>  
> +static const struct iommu_ops *get_iommu_ops(struct iommufd_device *idev)
> +{
> +	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && !idev->igroup->group)
> +		return &iommufd_noiommu_ops;
> +	return dev_iommu_ops(idev->dev);
> +}
> +
>  static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
>  {
>  	if (hwpt->domain)
> @@ -114,7 +121,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
>  				IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
>  				IOMMU_HWPT_FAULT_ID_VALID |
>  				IOMMU_HWPT_ALLOC_PASID;
> -	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> +	const struct iommu_ops *ops = get_iommu_ops(idev);
>  	struct iommufd_hwpt_paging *hwpt_paging;
>  	struct iommufd_hw_pagetable *hwpt;
>  	int rc;
> @@ -229,7 +236,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
>  			  struct iommufd_device *idev, u32 flags,
>  			  const struct iommu_user_data *user_data)
>  {
> -	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> +	const struct iommu_ops *ops = get_iommu_ops(idev);
>  	struct iommufd_hwpt_nested *hwpt_nested;
>  	struct iommufd_hw_pagetable *hwpt;
>  	int rc;
> diff --git a/drivers/iommu/iommufd/hwpt_noiommu.c b/drivers/iommu/iommufd/hwpt_noiommu.c
> new file mode 100644
> index 000000000000..0aa99f581ca3
> --- /dev/null
> +++ b/drivers/iommu/iommufd/hwpt_noiommu.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
> + */
> +#include <linux/iommu.h>
> +#include <linux/generic_pt/iommu.h>
> +#include "iommufd_private.h"
> +
> +static const struct iommu_domain_ops noiommu_amdv1_ops;
> +
> +struct noiommu_domain {
> +	union {
> +		struct iommu_domain domain;
> +		struct pt_iommu_amdv1 amdv1;
> +	};
> +	spinlock_t lock;
> +};
> +PT_IOMMU_CHECK_DOMAIN(struct noiommu_domain, amdv1.iommu, domain);
> +
> +static void noiommu_change_top(struct pt_iommu *iommu_table,
> +			       phys_addr_t top_paddr, unsigned int top_level)
> +{
> +}
> +
> +static spinlock_t *noiommu_get_top_lock(struct pt_iommu *iommupt)
> +{
> +	struct noiommu_domain *domain =
> +		container_of(iommupt, struct noiommu_domain, amdv1.iommu);
> +
> +	return &domain->lock;
> +}
> +
> +static const struct pt_iommu_driver_ops noiommu_driver_ops = {
> +	.get_top_lock = noiommu_get_top_lock,
> +	.change_top = noiommu_change_top,
> +};
> +
> +static struct iommu_domain *
> +noiommu_alloc_paging_flags(struct device *dev, u32 flags,
> +			   const struct iommu_user_data *user_data)
> +{
> +	struct pt_iommu_amdv1_cfg cfg = {};
> +	struct noiommu_domain *dom;
> +	int rc;
> +
> +	if (flags || user_data)
> +		return ERR_PTR(-EOPNOTSUPP);
> +
> +	cfg.common.hw_max_vasz_lg2 = 64;
> +	cfg.common.hw_max_oasz_lg2 = 52;
> +	cfg.starting_level = 2;
> +	cfg.common.features =
> +		(BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
> +		 BIT(PT_FEAT_AMDV1_FORCE_COHERENCE));
> +
> +	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
> +	if (!dom)
> +		return ERR_PTR(-ENOMEM);
> +
> +	spin_lock_init(&dom->lock);
> +	dom->amdv1.iommu.nid = NUMA_NO_NODE;
> +	dom->amdv1.iommu.driver_ops = &noiommu_driver_ops;
> +	dom->domain.ops = &noiommu_amdv1_ops;
> +
> +	/* Use mock page table which is based on AMDV1 */
> +	rc = pt_iommu_amdv1_init(&dom->amdv1, &cfg, GFP_KERNEL);
> +	if (rc) {
> +		kfree(dom);
> +		return ERR_PTR(rc);
> +	}
> +
> +	return &dom->domain;
> +}
> +
> +static void noiommu_domain_free(struct iommu_domain *iommu_domain)
> +{
> +	struct noiommu_domain *domain =
> +		container_of(iommu_domain, struct noiommu_domain, domain);
> +
> +	pt_iommu_deinit(&domain->amdv1.iommu);
> +	kfree(domain);
> +}
> +
> +static const struct iommu_domain_ops noiommu_amdv1_ops = {
> +	IOMMU_PT_DOMAIN_OPS(amdv1),

I see the appeal of re-using an existing page table implementation to
keep track of iovas which -as far as I understand- are used as tokens
for DMA pinned pages later, but maybe at least add some paragraph about
that, as it is not immediately clear and that's a different design
from the legacy noiommu VFIO code.

Thanks,
Mostafa

> +	.free = noiommu_domain_free,
> +};
> +
> +struct iommu_ops iommufd_noiommu_ops = {
> +	.domain_alloc_paging_flags = noiommu_alloc_paging_flags,
> +};
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 6ac1965199e9..9c18c5eb1899 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -464,6 +464,8 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
>  	refcount_dec(&hwpt->obj.users);
>  }
>  
> +extern struct iommu_ops iommufd_noiommu_ops;
> +
>  struct iommufd_attach;
>  
>  struct iommufd_group {
> -- 
> 2.34.1
>
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Jacob Pan 2 weeks ago
Hi Mostafa,

On Sun, 22 Mar 2026 09:24:37 +0000
Mostafa Saleh <smostafa@google.com> wrote:

> On Thu, Mar 12, 2026 at 08:56:27AM -0700, Jacob Pan wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > 
> > Create just a little part of a real iommu driver, enough to
> > slot in under the dev_iommu_ops() and allow iommufd to call
> > domain_alloc_paging_flags() and fail everything else.
> > 
> > This allows explicitly creating a HWPT under an IOAS.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
> > ---
> >  drivers/iommu/iommufd/Makefile          |  1 +
> >  drivers/iommu/iommufd/hw_pagetable.c    | 11 ++-
> >  drivers/iommu/iommufd/hwpt_noiommu.c    | 91
> > +++++++++++++++++++++++++ drivers/iommu/iommufd/iommufd_private.h |
> >  2 + 4 files changed, 103 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/iommu/iommufd/hwpt_noiommu.c
> > 
> > diff --git a/drivers/iommu/iommufd/Makefile
> > b/drivers/iommu/iommufd/Makefile index 71d692c9a8f4..2b1a020b14a6
> > 100644 --- a/drivers/iommu/iommufd/Makefile
> > +++ b/drivers/iommu/iommufd/Makefile
> > @@ -10,6 +10,7 @@ iommufd-y := \
> >  	vfio_compat.o \
> >  	viommu.o
> >  
> > +iommufd-$(CONFIG_VFIO_NOIOMMU) += hwpt_noiommu.o
> >  iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
> >  
> >  obj-$(CONFIG_IOMMUFD) += iommufd.o
> > diff --git a/drivers/iommu/iommufd/hw_pagetable.c
> > b/drivers/iommu/iommufd/hw_pagetable.c index
> > fe789c2dc0c9..37316d77277d 100644 ---
> > a/drivers/iommu/iommufd/hw_pagetable.c +++
> > b/drivers/iommu/iommufd/hw_pagetable.c @@ -8,6 +8,13 @@
> >  #include "../iommu-priv.h"
> >  #include "iommufd_private.h"
> >  
> > +static const struct iommu_ops *get_iommu_ops(struct iommufd_device
> > *idev) +{
> > +	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
> > !idev->igroup->group)
> > +		return &iommufd_noiommu_ops;
> > +	return dev_iommu_ops(idev->dev);
> > +}
> > +
> >  static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable
> > *hwpt) {
> >  	if (hwpt->domain)
> > @@ -114,7 +121,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_ioas *ioas, IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
> >  				IOMMU_HWPT_FAULT_ID_VALID |
> >  				IOMMU_HWPT_ALLOC_PASID;
> > -	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > +	const struct iommu_ops *ops = get_iommu_ops(idev);
> >  	struct iommufd_hwpt_paging *hwpt_paging;
> >  	struct iommufd_hw_pagetable *hwpt;
> >  	int rc;
> > @@ -229,7 +236,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx
> > *ictx, struct iommufd_device *idev, u32 flags,
> >  			  const struct iommu_user_data *user_data)
> >  {
> > -	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
> > +	const struct iommu_ops *ops = get_iommu_ops(idev);
> >  	struct iommufd_hwpt_nested *hwpt_nested;
> >  	struct iommufd_hw_pagetable *hwpt;
> >  	int rc;
> > diff --git a/drivers/iommu/iommufd/hwpt_noiommu.c
> > b/drivers/iommu/iommufd/hwpt_noiommu.c new file mode 100644
> > index 000000000000..0aa99f581ca3
> > --- /dev/null
> > +++ b/drivers/iommu/iommufd/hwpt_noiommu.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
> > + */
> > +#include <linux/iommu.h>
> > +#include <linux/generic_pt/iommu.h>
> > +#include "iommufd_private.h"
> > +
> > +static const struct iommu_domain_ops noiommu_amdv1_ops;
> > +
> > +struct noiommu_domain {
> > +	union {
> > +		struct iommu_domain domain;
> > +		struct pt_iommu_amdv1 amdv1;
> > +	};
> > +	spinlock_t lock;
> > +};
> > +PT_IOMMU_CHECK_DOMAIN(struct noiommu_domain, amdv1.iommu, domain);
> > +
> > +static void noiommu_change_top(struct pt_iommu *iommu_table,
> > +			       phys_addr_t top_paddr, unsigned int
> > top_level) +{
> > +}
> > +
> > +static spinlock_t *noiommu_get_top_lock(struct pt_iommu *iommupt)
> > +{
> > +	struct noiommu_domain *domain =
> > +		container_of(iommupt, struct noiommu_domain,
> > amdv1.iommu); +
> > +	return &domain->lock;
> > +}
> > +
> > +static const struct pt_iommu_driver_ops noiommu_driver_ops = {
> > +	.get_top_lock = noiommu_get_top_lock,
> > +	.change_top = noiommu_change_top,
> > +};
> > +
> > +static struct iommu_domain *
> > +noiommu_alloc_paging_flags(struct device *dev, u32 flags,
> > +			   const struct iommu_user_data *user_data)
> > +{
> > +	struct pt_iommu_amdv1_cfg cfg = {};
> > +	struct noiommu_domain *dom;
> > +	int rc;
> > +
> > +	if (flags || user_data)
> > +		return ERR_PTR(-EOPNOTSUPP);
> > +
> > +	cfg.common.hw_max_vasz_lg2 = 64;
> > +	cfg.common.hw_max_oasz_lg2 = 52;
> > +	cfg.starting_level = 2;
> > +	cfg.common.features =
> > +		(BIT(PT_FEAT_DYNAMIC_TOP) |
> > BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
> > +		 BIT(PT_FEAT_AMDV1_FORCE_COHERENCE));
> > +
> > +	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
> > +	if (!dom)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	spin_lock_init(&dom->lock);
> > +	dom->amdv1.iommu.nid = NUMA_NO_NODE;
> > +	dom->amdv1.iommu.driver_ops = &noiommu_driver_ops;
> > +	dom->domain.ops = &noiommu_amdv1_ops;
> > +
> > +	/* Use mock page table which is based on AMDV1 */
> > +	rc = pt_iommu_amdv1_init(&dom->amdv1, &cfg, GFP_KERNEL);
> > +	if (rc) {
> > +		kfree(dom);
> > +		return ERR_PTR(rc);
> > +	}
> > +
> > +	return &dom->domain;
> > +}
> > +
> > +static void noiommu_domain_free(struct iommu_domain *iommu_domain)
> > +{
> > +	struct noiommu_domain *domain =
> > +		container_of(iommu_domain, struct noiommu_domain,
> > domain); +
> > +	pt_iommu_deinit(&domain->amdv1.iommu);
> > +	kfree(domain);
> > +}
> > +
> > +static const struct iommu_domain_ops noiommu_amdv1_ops = {
> > +	IOMMU_PT_DOMAIN_OPS(amdv1),  
> 
> I see the appeal of re-using an existing page table implementation to
> keep track of iovas which -as far as I understand- are used as tokens
> for DMA pinned pages later, but maybe at least add some paragraph
> about that, as it is not immediately clear and that's a different
> design from the legacy noiommu VFIO code.
> 
Indeed it is a little confusing where we use the same VFIO noiommu
knobs but with extended set of features. The legacy VFIO noiommu mode
does not support container/IOAS level APIs thus no need for domain ops.

I also tried to explain the new design in the doc patch[11/11] with
summaries of API limitations between legacy VFIO noiommu mode and this
new mode under iommufd.
+-------------------+---------------------+---------------------+
| Feature           | VFIO group          | VFIO device cdev   |
+===================+=====================+=====================+
| VFIO device UAPI  | Yes                 | Yes                |
+-------------------+---------------------+---------------------+
| VFIO container    | No                  | No                 |
+-------------------+---------------------+---------------------+
| IOMMUFD IOAS      | No                  | Yes*               |
+-------------------+---------------------+---------------------+

How about adding the following comments:

@@ -81,6 +81,17 @@ static void noiommu_domain_free(struct iommu_domain *iommu_domain)
        kfree(domain);
 }
 
+/*
+ * AMDV1 is used as a dummy page table for no-IOMMU mode, similar to the
+ * iommufd selftest mock page table.
+ * Unlike legacy VFIO no-IOMMU mode, where no container level APIs are
+ * supported, this allows IOAS and hwpt objects to exist without hardware
+ * IOMMU support. IOVAs are used only for IOVA-to-PA lookups not for
+ * hardware translation in DMA.
+ *
+ * This is only used with iommufd and cdev-based interfaces and does not
+ * apply to legacy VFIO group-container based noiommu mode.
+ */
 static const struct iommu_domain_ops noiommu_amdv1_ops = {
        IOMMU_PT_DOMAIN_OPS(amdv1),
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Jason Gunthorpe 2 weeks ago
On Mon, Mar 23, 2026 at 02:11:32PM -0700, Jacob Pan wrote:

> +/*
> + * AMDV1 is used as a dummy page table for no-IOMMU mode, similar to the
> + * iommufd selftest mock page table.
> + * Unlike legacy VFIO no-IOMMU mode, where no container level APIs are
> + * supported, this allows IOAS and hwpt objects to exist without hardware
> + * IOMMU support. IOVAs are used only for IOVA-to-PA lookups not for
> + * hardware translation in DMA.
> + *
> + * This is only used with iommufd and cdev-based interfaces and does not
> + * apply to legacy VFIO group-container based noiommu mode.
> + */
>  static const struct iommu_domain_ops noiommu_amdv1_ops = {

That seems clear

Jason
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Samiullah Khawaja 2 weeks, 5 days ago
On Thu, Mar 12, 2026 at 08:56:27AM -0700, Jacob Pan wrote:
>From: Jason Gunthorpe <jgg@nvidia.com>
>
>Create just a little part of a real iommu driver, enough to
>slot in under the dev_iommu_ops() and allow iommufd to call
>domain_alloc_paging_flags() and fail everything else.
>
>This allows explicitly creating a HWPT under an IOAS.
>
>Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>Signed-off-by: Jacob Pan <jacob.pan@linux.microsoft.com>
>---
> drivers/iommu/iommufd/Makefile          |  1 +
> drivers/iommu/iommufd/hw_pagetable.c    | 11 ++-
> drivers/iommu/iommufd/hwpt_noiommu.c    | 91 +++++++++++++++++++++++++
> drivers/iommu/iommufd/iommufd_private.h |  2 +
> 4 files changed, 103 insertions(+), 2 deletions(-)
> create mode 100644 drivers/iommu/iommufd/hwpt_noiommu.c
>
>diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
>index 71d692c9a8f4..2b1a020b14a6 100644
>--- a/drivers/iommu/iommufd/Makefile
>+++ b/drivers/iommu/iommufd/Makefile
>@@ -10,6 +10,7 @@ iommufd-y := \
> 	vfio_compat.o \
> 	viommu.o
>
>+iommufd-$(CONFIG_VFIO_NOIOMMU) += hwpt_noiommu.o
> iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
>
> obj-$(CONFIG_IOMMUFD) += iommufd.o
>diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
>index fe789c2dc0c9..37316d77277d 100644
>--- a/drivers/iommu/iommufd/hw_pagetable.c
>+++ b/drivers/iommu/iommufd/hw_pagetable.c
>@@ -8,6 +8,13 @@
> #include "../iommu-priv.h"
> #include "iommufd_private.h"
>
>+static const struct iommu_ops *get_iommu_ops(struct iommufd_device *idev)
>+{
>+	if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && !idev->igroup->group)
>+		return &iommufd_noiommu_ops;
>+	return dev_iommu_ops(idev->dev);
>+}
>+
> static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
> {
> 	if (hwpt->domain)
>@@ -114,7 +121,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
> 				IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
> 				IOMMU_HWPT_FAULT_ID_VALID |
> 				IOMMU_HWPT_ALLOC_PASID;
>-	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
>+	const struct iommu_ops *ops = get_iommu_ops(idev);
> 	struct iommufd_hwpt_paging *hwpt_paging;
> 	struct iommufd_hw_pagetable *hwpt;
> 	int rc;
>@@ -229,7 +236,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
> 			  struct iommufd_device *idev, u32 flags,
> 			  const struct iommu_user_data *user_data)
> {
>-	const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
>+	const struct iommu_ops *ops = get_iommu_ops(idev);
> 	struct iommufd_hwpt_nested *hwpt_nested;
> 	struct iommufd_hw_pagetable *hwpt;
> 	int rc;
>diff --git a/drivers/iommu/iommufd/hwpt_noiommu.c b/drivers/iommu/iommufd/hwpt_noiommu.c
>new file mode 100644
>index 000000000000..0aa99f581ca3
>--- /dev/null
>+++ b/drivers/iommu/iommufd/hwpt_noiommu.c
>@@ -0,0 +1,91 @@
>+// SPDX-License-Identifier: GPL-2.0-only
>+/*
>+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
>+ */
>+#include <linux/iommu.h>
>+#include <linux/generic_pt/iommu.h>
>+#include "iommufd_private.h"
>+
>+static const struct iommu_domain_ops noiommu_amdv1_ops;
>+
>+struct noiommu_domain {
>+	union {
>+		struct iommu_domain domain;
>+		struct pt_iommu_amdv1 amdv1;
>+	};
>+	spinlock_t lock;
>+};
>+PT_IOMMU_CHECK_DOMAIN(struct noiommu_domain, amdv1.iommu, domain);
>+
>+static void noiommu_change_top(struct pt_iommu *iommu_table,
>+			       phys_addr_t top_paddr, unsigned int top_level)
>+{
>+}
>+
>+static spinlock_t *noiommu_get_top_lock(struct pt_iommu *iommupt)
>+{
>+	struct noiommu_domain *domain =
>+		container_of(iommupt, struct noiommu_domain, amdv1.iommu);
>+
>+	return &domain->lock;
>+}
>+
>+static const struct pt_iommu_driver_ops noiommu_driver_ops = {
>+	.get_top_lock = noiommu_get_top_lock,
>+	.change_top = noiommu_change_top,
>+};
>+
>+static struct iommu_domain *
>+noiommu_alloc_paging_flags(struct device *dev, u32 flags,
>+			   const struct iommu_user_data *user_data)
>+{
>+	struct pt_iommu_amdv1_cfg cfg = {};
>+	struct noiommu_domain *dom;
>+	int rc;
>+
>+	if (flags || user_data)
>+		return ERR_PTR(-EOPNOTSUPP);
>+
>+	cfg.common.hw_max_vasz_lg2 = 64;
>+	cfg.common.hw_max_oasz_lg2 = 52;
>+	cfg.starting_level = 2;
>+	cfg.common.features =
>+		(BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_AMDV1_ENCRYPT_TABLES) |
>+		 BIT(PT_FEAT_AMDV1_FORCE_COHERENCE));
>+
>+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
>+	if (!dom)
>+		return ERR_PTR(-ENOMEM);
>+
>+	spin_lock_init(&dom->lock);
>+	dom->amdv1.iommu.nid = NUMA_NO_NODE;
>+	dom->amdv1.iommu.driver_ops = &noiommu_driver_ops;
>+	dom->domain.ops = &noiommu_amdv1_ops;
>+
>+	/* Use mock page table which is based on AMDV1 */
>+	rc = pt_iommu_amdv1_init(&dom->amdv1, &cfg, GFP_KERNEL);
>+	if (rc) {
>+		kfree(dom);
>+		return ERR_PTR(rc);
>+	}
>+
>+	return &dom->domain;
>+}
>+
>+static void noiommu_domain_free(struct iommu_domain *iommu_domain)
>+{
>+	struct noiommu_domain *domain =
>+		container_of(iommu_domain, struct noiommu_domain, domain);
>+
>+	pt_iommu_deinit(&domain->amdv1.iommu);
>+	kfree(domain);
>+}
>+
>+static const struct iommu_domain_ops noiommu_amdv1_ops = {
>+	IOMMU_PT_DOMAIN_OPS(amdv1),

I understand that this fits in really well into the iommufd/hwpt
construction, but do we need page tables for this as all the
iova-to-phys information should be available in the IOPT in IOAS? As the
get_pa() function introduced in the later patch is only used for noiommu
use-cases, it can use the IOPT to get the physical addresses?
>+	.free = noiommu_domain_free,
>+};
>+
>+struct iommu_ops iommufd_noiommu_ops = {
>+	.domain_alloc_paging_flags = noiommu_alloc_paging_flags,
>+};
>diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
>index 6ac1965199e9..9c18c5eb1899 100644
>--- a/drivers/iommu/iommufd/iommufd_private.h
>+++ b/drivers/iommu/iommufd/iommufd_private.h
>@@ -464,6 +464,8 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
> 	refcount_dec(&hwpt->obj.users);
> }
>
>+extern struct iommu_ops iommufd_noiommu_ops;
>+
> struct iommufd_attach;
>
> struct iommufd_group {
>-- 
>2.34.1
>
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Jason Gunthorpe 2 weeks, 1 day ago
On Wed, Mar 18, 2026 at 06:38:14PM +0000, Samiullah Khawaja wrote:

> > +static const struct iommu_domain_ops noiommu_amdv1_ops = {
> > +	IOMMU_PT_DOMAIN_OPS(amdv1),
> 
> I understand that this fits in really well into the iommufd/hwpt
> construction, but do we need page tables for this as all the
> iova-to-phys information should be available in the IOPT in IOAS?

Yes we do! That is the whole point.

In iommufd once you pin the memory the phys is stored in only two
possible ways:

1) Inside an xarray if an access is used
2) Inside at least one iommu_domain

That's it. So to fit noiommu into this scheme, and have it rely on the
existing pinning, we either have to make it use an access or make it
use an iommu_domain -> a real one that can store phys.

Maybe a comment is helpful, but using the domain like this to store
the pinned phys has been the vfio design from day 1..

> get_pa() function introduced in the later patch is only used for noiommu
> use-cases, it can use the IOPT to get the physical addresses?

No.

Jason
Re: [PATCH V2 01/11] iommufd: Support a HWPT without an iommu driver for noiommu
Posted by Samiullah Khawaja 1 week, 6 days ago
On Mon, Mar 23, 2026 at 10:17:14AM -0300, Jason Gunthorpe wrote:
>On Wed, Mar 18, 2026 at 06:38:14PM +0000, Samiullah Khawaja wrote:
>
>> > +static const struct iommu_domain_ops noiommu_amdv1_ops = {
>> > +	IOMMU_PT_DOMAIN_OPS(amdv1),
>>
>> I understand that this fits in really well into the iommufd/hwpt
>> construction, but do we need page tables for this as all the
>> iova-to-phys information should be available in the IOPT in IOAS?
>
>Yes we do! That is the whole point.
>
>In iommufd once you pin the memory the phys is stored in only two
>possible ways:
>
>1) Inside an xarray if an access is used
>2) Inside at least one iommu_domain
>
>That's it. So to fit noiommu into this scheme, and have it rely on the
>existing pinning, we either have to make it use an access or make it
>use an iommu_domain -> a real one that can store phys.

Thanks for the explanation.

I missed the part where once pinning is done, the pfns are only
available in those two places.
>
>Maybe a comment is helpful, but using the domain like this to store
>the pinned phys has been the vfio design from day 1..
>
>> get_pa() function introduced in the later patch is only used for noiommu
>> use-cases, it can use the IOPT to get the physical addresses?
>
>No.
>
>Jason