[PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct

Nicolin Chen posted 11 patches 1 month, 2 weeks ago
[PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 2 weeks ago
Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a slice of physical IOMMU device passed to or shared with a user space VM.
This slice, now a vIOMMU object, is a group of virtualization resources of
a physical IOMMU's, such as:
 - Security namespace for guest owned ID, e.g. guest-controlled cache tags
 - Access to a sharable nesting parent pagetable across physical IOMMUs
 - Virtualization of various platforms IDs, e.g. RIDs and others
 - Delivery of paravirtualized invalidation
 - Direct assigned invalidation queues
 - Direct assigned interrupts
 - Non-affiliated event reporting

Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
vIOMMU structures. And this allocation also needs a free(), so add struct
iommufd_viommu_ops.

To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
It's suggested that a driver should embed a core-level viommu structure in
its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
meanwhile the driver can also implement a viommu ops:
    struct my_driver_viommu {
        struct iommufd_viommu core;
        /* driver-owned properties/features */
        ....
    };

    static const struct iommufd_viommu_ops my_driver_viommu_ops = {
        .free = my_driver_viommu_free,
        /* future ops for virtualization features */
        ....
    };

    static struct iommufd_viommu my_driver_viommu_alloc(...)
    {
        struct my_driver_viommu *my_viommu =
                iommufd_viommu_alloc(ictx, my_driver_viommu, core,
                                     my_driver_viommu_ops);
        /* Init my_viommu and related HW feature */
        ....
        return &my_viommu->core;
    }

    static struct iommu_domain_ops my_driver_domain_ops = {
        ....
        .viommu_alloc = my_driver_viommu_alloc,
    };

To make the Kernel config work between a driver and the iommufd core, put
the for-driver allocation helpers into a new viommu_api file building with
CONFIG_IOMMUFD_DRIVER.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |  2 +-
 drivers/iommu/iommufd/iommufd_private.h |  1 +
 include/linux/iommu.h                   | 14 ++++++
 include/linux/iommufd.h                 | 43 +++++++++++++++++++
 drivers/iommu/iommufd/main.c            | 32 --------------
 drivers/iommu/iommufd/viommu_api.c      | 57 +++++++++++++++++++++++++
 6 files changed, 116 insertions(+), 33 deletions(-)
 create mode 100644 drivers/iommu/iommufd/viommu_api.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cf4605962bea..93daedd7e5c8 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -12,4 +12,4 @@ iommufd-y := \
 iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
-obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o
+obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o viommu_api.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f2f3a906eac9..6a364073f699 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -131,6 +131,7 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
+	IOMMUFD_OBJ_VIOMMU,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c8d18f5f644e..3a50f57b0861 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,8 @@ struct notifier_block;
 struct iommu_sva;
 struct iommu_dma_cookie;
 struct iommu_fault_param;
+struct iommufd_ctx;
+struct iommufd_viommu;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -542,6 +544,13 @@ static inline int __iommu_copy_struct_from_user_array(
  * @remove_dev_pasid: Remove any translation configurations of a specific
  *                    pasid, so that any DMA transactions with this pasid
  *                    will be blocked by the hardware.
+ * @viommu_alloc: Allocate an iommufd_viommu on an @iommu_dev as the group of
+ *                virtualization resources shared/passed to user space IOMMU
+ *                instance. Associate it with a nesting parent @domain. The
+ *                @viommu_type must be defined in include/uapi/linux/iommufd.h
+ *                It is suggested to call iommufd_viommu_alloc() helper for
+ *                a bundled allocation of the core and the driver structures,
+ *                using the given @ictx pointer.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
  * @identity_domain: An always available, always attachable identity
@@ -591,6 +600,11 @@ struct iommu_ops {
 	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
 				 struct iommu_domain *domain);
 
+	struct iommufd_viommu *(*viommu_alloc)(struct iommu_device *iommu_dev,
+					       struct iommu_domain *domain,
+					       struct iommufd_ctx *ictx,
+					       unsigned int viommu_type);
+
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
 	struct module *owner;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 6b9d46981870..069a38999cdd 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -17,6 +17,7 @@ struct iommu_group;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
+struct iommufd_viommu_ops;
 struct page;
 
 /* Base struct for all objects with a userspace ID handle. */
@@ -63,6 +64,26 @@ void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
+struct iommufd_viommu {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommu_device *iommu_dev;
+	struct iommufd_hwpt_paging *hwpt;
+
+	const struct iommufd_viommu_ops *ops;
+
+	unsigned int type;
+};
+
+/**
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ * @free: Free all driver-specific parts of an iommufd_viommu. The memory of the
+ *        vIOMMU will be free-ed by iommufd core after calling this free op.
+ */
+struct iommufd_viommu_ops {
+	void (*free)(struct iommufd_viommu *viommu);
+};
+
 #if IS_ENABLED(CONFIG_IOMMUFD)
 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
 struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
@@ -79,6 +100,9 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
 int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx);
 int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx);
+struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops);
 #else /* !CONFIG_IOMMUFD */
 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 {
@@ -119,5 +143,24 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
 {
 	return -EOPNOTSUPP;
 }
+
+static inline struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_IOMMUFD */
+
+/*
+ * Helpers for IOMMU driver to allocate driver structures that will be freed by
+ * the iommufd core. Yet, a driver is responsible for its own struct cleanup.
+ */
+#define iommufd_viommu_alloc(ictx, drv_struct, member, ops)                    \
+	container_of(__iommufd_viommu_alloc(ictx,                              \
+					    sizeof(struct drv_struct) +        \
+					    BUILD_BUG_ON_ZERO(offsetof(        \
+						struct drv_struct, member)),   \
+					    ops),                              \
+		     struct drv_struct, member)
 #endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 28e1ef5666e9..92bd075108e5 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -29,38 +29,6 @@ struct iommufd_object_ops {
 static const struct iommufd_object_ops iommufd_object_ops[];
 static struct miscdevice vfio_misc_dev;
 
-struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
-						size_t size,
-						enum iommufd_object_type type)
-{
-	struct iommufd_object *obj;
-	int rc;
-
-	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
-	if (!obj)
-		return ERR_PTR(-ENOMEM);
-	obj->type = type;
-	/* Starts out bias'd by 1 until it is removed from the xarray */
-	refcount_set(&obj->shortterm_users, 1);
-	refcount_set(&obj->users, 1);
-
-	/*
-	 * Reserve an ID in the xarray but do not publish the pointer yet since
-	 * the caller hasn't initialized it yet. Once the pointer is published
-	 * in the xarray and visible to other threads we can't reliably destroy
-	 * it anymore, so the caller must complete all errorable operations
-	 * before calling iommufd_object_finalize().
-	 */
-	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
-		      xa_limit_31b, GFP_KERNEL_ACCOUNT);
-	if (rc)
-		goto out_free;
-	return obj;
-out_free:
-	kfree(obj);
-	return ERR_PTR(rc);
-}
-
 /*
  * Allow concurrent access to the object.
  *
diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
new file mode 100644
index 000000000000..c1731f080d6b
--- /dev/null
+++ b/drivers/iommu/iommufd/viommu_api.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include "iommufd_private.h"
+
+struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
+						size_t size,
+						enum iommufd_object_type type)
+{
+	struct iommufd_object *obj;
+	int rc;
+
+	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+	obj->type = type;
+	/* Starts out bias'd by 1 until it is removed from the xarray */
+	refcount_set(&obj->shortterm_users, 1);
+	refcount_set(&obj->users, 1);
+
+	/*
+	 * Reserve an ID in the xarray but do not publish the pointer yet since
+	 * the caller hasn't initialized it yet. Once the pointer is published
+	 * in the xarray and visible to other threads we can't reliably destroy
+	 * it anymore, so the caller must complete all errorable operations
+	 * before calling iommufd_object_finalize().
+	 */
+	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
+		      xa_limit_31b, GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_free;
+	return obj;
+out_free:
+	kfree(obj);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_object_alloc_elm, IOMMUFD);
+
+struct iommufd_viommu *
+__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
+		       const struct iommufd_viommu_ops *ops)
+{
+	struct iommufd_viommu *viommu;
+	struct iommufd_object *obj;
+
+	if (WARN_ON(size < sizeof(*viommu)))
+		return ERR_PTR(-EINVAL);
+	obj = iommufd_object_alloc_elm(ictx, size, IOMMUFD_OBJ_VIOMMU);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+	viommu = container_of(obj, struct iommufd_viommu, obj);
+	if (ops)
+		viommu->ops = ops;
+	return viommu;
+}
+EXPORT_SYMBOL_NS_GPL(__iommufd_viommu_alloc, IOMMUFD);
-- 
2.43.0
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Jason Gunthorpe 1 month, 1 week ago
On Wed, Oct 09, 2024 at 09:38:03AM -0700, Nicolin Chen wrote:
> +	struct iommufd_viommu *(*viommu_alloc)(struct iommu_device *iommu_dev,
> +					       struct iommu_domain *domain,

Let's call this parent_domain ie nesting parent

> +/*
> + * Helpers for IOMMU driver to allocate driver structures that will be freed by
> + * the iommufd core. Yet, a driver is responsible for its own
> struct cleanup.

'own struct cleanup via the free callback'

> diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> new file mode 100644
> index 000000000000..c1731f080d6b
> --- /dev/null
> +++ b/drivers/iommu/iommufd/viommu_api.c

Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER

> +struct iommufd_viommu *
> +__iommufd_viommu_alloc(struct iommufd_ctx *ictx, size_t size,
> +		       const struct iommufd_viommu_ops *ops)
> +{
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_object *obj;
> +
> +	if (WARN_ON(size < sizeof(*viommu)))
> +		return ERR_PTR(-EINVAL);

The macro does this already

> +	obj = iommufd_object_alloc_elm(ictx, size, IOMMUFD_OBJ_VIOMMU);
> +	if (IS_ERR(obj))
> +		return ERR_CAST(obj);
> +	viommu = container_of(obj, struct iommufd_viommu, obj);

And this too..

> +	if (ops)
> +		viommu->ops = ops;

No need for an if...

It could just be in the macro which is a bit appealing given we want
this linked into the drivers..

/*
 * Helpers for IOMMU driver to allocate driver structures that will be freed by
 * the iommufd core. The free op will be called prior to freeing the memory.
 */
#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)            \
	({                                                                    \
		struct drv_struct *ret;                                       \
                                                                              \
		static_assert(                                                \
			__same_type(struct iommufd_viommu,                    \
				    ((struct drv_struct *)NULL)->member));    \
		static_assert(offsetof(struct drv_struct, member.obj) == 0);  \
		ret = (struct drv_struct *)iommufd_object_alloc_elm(          \
			ictx, sizeof(struct drv_struct), IOMMUFD_OBJ_VIOMMU); \
		ret->member.ops = viommu_ops;                                 \
		ret;                                                          \
	})

Jason
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 1 week ago
On Thu, Oct 17, 2024 at 01:33:59PM -0300, Jason Gunthorpe wrote:

> > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > new file mode 100644
> > index 000000000000..c1731f080d6b
> > --- /dev/null
> > +++ b/drivers/iommu/iommufd/viommu_api.c
> 
> Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER

Would this make its scope too large that it might feel odd to have
the iova_bitmap.c in a separate file?

> /*
>  * Helpers for IOMMU driver to allocate driver structures that will be freed by
>  * the iommufd core. The free op will be called prior to freeing the memory.
>  */
> #define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)            \
> 	({                                                                    \
> 		struct drv_struct *ret;                                       \
>                                                                               \
> 		static_assert(                                                \
> 			__same_type(struct iommufd_viommu,                    \
> 				    ((struct drv_struct *)NULL)->member));    \
> 		static_assert(offsetof(struct drv_struct, member.obj) == 0);  \
> 		ret = (struct drv_struct *)iommufd_object_alloc_elm(          \
> 			ictx, sizeof(struct drv_struct), IOMMUFD_OBJ_VIOMMU); \
> 		ret->member.ops = viommu_ops;                                 \
> 		ret;                                                          \
> 	})

OK. Will try with this.

Thanks
Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Jason Gunthorpe 1 month, 1 week ago
On Thu, Oct 17, 2024 at 10:01:28AM -0700, Nicolin Chen wrote:
> On Thu, Oct 17, 2024 at 01:33:59PM -0300, Jason Gunthorpe wrote:
> 
> > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > new file mode 100644
> > > index 000000000000..c1731f080d6b
> > > --- /dev/null
> > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > 
> > Let's call this file driver.c to match CONFIG_IOMMUFD_DRIVER
> 
> Would this make its scope too large that it might feel odd to have
> the iova_bitmap.c in a separate file?

Think of it as the catchall ?

Jason
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 2 weeks ago
On Thu, 10 Oct 2024 at 00:40, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
> a slice of physical IOMMU device passed to or shared with a user space VM.
> This slice, now a vIOMMU object, is a group of virtualization resources of
> a physical IOMMU's, such as:
>  - Security namespace for guest owned ID, e.g. guest-controlled cache tags
>  - Access to a sharable nesting parent pagetable across physical IOMMUs
>  - Virtualization of various platforms IDs, e.g. RIDs and others
>  - Delivery of paravirtualized invalidation
>  - Direct assigned invalidation queues
>  - Direct assigned interrupts
>  - Non-affiliated event reporting
>
> Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
> vIOMMU structures. And this allocation also needs a free(), so add struct
> iommufd_viommu_ops.
>
> To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
> It's suggested that a driver should embed a core-level viommu structure in
> its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
> meanwhile the driver can also implement a viommu ops:
>     struct my_driver_viommu {
>         struct iommufd_viommu core;
>         /* driver-owned properties/features */
>         ....
>     };
>
>     static const struct iommufd_viommu_ops my_driver_viommu_ops = {
>         .free = my_driver_viommu_free,
>         /* future ops for virtualization features */
>         ....
>     };
>
>     static struct iommufd_viommu my_driver_viommu_alloc(...)
>     {
>         struct my_driver_viommu *my_viommu =
>                 iommufd_viommu_alloc(ictx, my_driver_viommu, core,
>                                      my_driver_viommu_ops);
>         /* Init my_viommu and related HW feature */
>         ....
>         return &my_viommu->core;
>     }
>
>     static struct iommu_domain_ops my_driver_domain_ops = {
>         ....
>         .viommu_alloc = my_driver_viommu_alloc,
>     };
>
> To make the Kernel config work between a driver and the iommufd core, put
> the for-driver allocation helpers into a new viommu_api file building with
> CONFIG_IOMMUFD_DRIVER.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>

> diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> new file mode 100644
> index 000000000000..c1731f080d6b
> --- /dev/null
> +++ b/drivers/iommu/iommufd/viommu_api.c
> @@ -0,0 +1,57 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + */
> +
> +#include "iommufd_private.h"
> +
> +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> +                                               size_t size,
> +                                               enum iommufd_object_type type)
> +{
> +       struct iommufd_object *obj;
> +       int rc;
> +
> +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> +       if (!obj)
> +               return ERR_PTR(-ENOMEM);
> +       obj->type = type;
> +       /* Starts out bias'd by 1 until it is removed from the xarray */
> +       refcount_set(&obj->shortterm_users, 1);
> +       refcount_set(&obj->users, 1);

here set refcont 1

iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
IOMMUFD_OBJ_DEVICE): refcont -> 1
refcount_inc(&idev->obj.users); refcount -> 2
will cause iommufd_device_unbind fail.

May remove refcount_inc(&idev->obj.users) in iommufd_device_bind

Thanks
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 2 weeks ago
On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
 
> > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > new file mode 100644
> > index 000000000000..c1731f080d6b
> > --- /dev/null
> > +++ b/drivers/iommu/iommufd/viommu_api.c
> > @@ -0,0 +1,57 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > + */
> > +
> > +#include "iommufd_private.h"
> > +
> > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > +                                               size_t size,
> > +                                               enum iommufd_object_type type)
> > +{
> > +       struct iommufd_object *obj;
> > +       int rc;
> > +
> > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > +       if (!obj)
> > +               return ERR_PTR(-ENOMEM);
> > +       obj->type = type;
> > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > +       refcount_set(&obj->shortterm_users, 1);
> > +       refcount_set(&obj->users, 1);
> 
> here set refcont 1
> 
> iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> IOMMUFD_OBJ_DEVICE): refcont -> 1
> refcount_inc(&idev->obj.users); refcount -> 2
> will cause iommufd_device_unbind fail.
> 
> May remove refcount_inc(&idev->obj.users) in iommufd_device_bind

Hmm, why would it fail? Or is it failing on your system?

This patch doesn't change the function but only moved it..

Thanks
Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 2 weeks ago
On Sat, 12 Oct 2024 at 12:49, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
>
> > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > new file mode 100644
> > > index 000000000000..c1731f080d6b
> > > --- /dev/null
> > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > > @@ -0,0 +1,57 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > > + */
> > > +
> > > +#include "iommufd_private.h"
> > > +
> > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > +                                               size_t size,
> > > +                                               enum iommufd_object_type type)
> > > +{
> > > +       struct iommufd_object *obj;
> > > +       int rc;
> > > +
> > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > +       if (!obj)
> > > +               return ERR_PTR(-ENOMEM);
> > > +       obj->type = type;
> > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > +       refcount_set(&obj->shortterm_users, 1);
> > > +       refcount_set(&obj->users, 1);
> >
> > here set refcont 1
> >
> > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > refcount_inc(&idev->obj.users); refcount -> 2
> > will cause iommufd_device_unbind fail.
> >
> > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
>
> Hmm, why would it fail? Or is it failing on your system?

Not sure, still in check, it may only be on my platform.

it hit
iommufd_object_remove:
if (WARN_ON(obj != to_destroy))

iommufd_device_bind refcount=2
iommufd_device_attach refcount=3
//still not sure which operation inc the count?
iommufd_device_detach refcount=4

Thanks



>
> This patch doesn't change the function but only moved it..
>
> Thanks
> Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 2 weeks ago
Hi, Nico

On Sat, 12 Oct 2024 at 18:18, Zhangfei Gao <zhangfei.gao@linaro.org> wrote:
>
> On Sat, 12 Oct 2024 at 12:49, Nicolin Chen <nicolinc@nvidia.com> wrote:
> >
> > On Sat, Oct 12, 2024 at 11:23:07AM +0800, Zhangfei Gao wrote:
> >
> > > > diff --git a/drivers/iommu/iommufd/viommu_api.c b/drivers/iommu/iommufd/viommu_api.c
> > > > new file mode 100644
> > > > index 000000000000..c1731f080d6b
> > > > --- /dev/null
> > > > +++ b/drivers/iommu/iommufd/viommu_api.c
> > > > @@ -0,0 +1,57 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> > > > + */
> > > > +
> > > > +#include "iommufd_private.h"
> > > > +
> > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > +                                               size_t size,
> > > > +                                               enum iommufd_object_type type)
> > > > +{
> > > > +       struct iommufd_object *obj;
> > > > +       int rc;
> > > > +
> > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > +       if (!obj)
> > > > +               return ERR_PTR(-ENOMEM);
> > > > +       obj->type = type;
> > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > +       refcount_set(&obj->users, 1);
> > >
> > > here set refcont 1
> > >
> > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > refcount_inc(&idev->obj.users); refcount -> 2
> > > will cause iommufd_device_unbind fail.
> > >
> > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> >
> > Hmm, why would it fail? Or is it failing on your system?
>
> Not sure, still in check, it may only be on my platform.
>
> it hit
> iommufd_object_remove:
> if (WARN_ON(obj != to_destroy))
>
> iommufd_device_bind refcount=2
> iommufd_device_attach refcount=3
> //still not sure which operation inc the count?
> iommufd_device_detach refcount=4
>

Have a question,
when should iommufd_vdevice_destroy be called, before or after
iommufd_device_unbind.

Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
(!refcount_dec_if_one(&obj->users)) check.

iommufd_device_bind
iommufd_device_attach
iommufd_vdevice_alloc_ioctl

iommufd_device_detach
iommufd_device_unbind // refcount check fail
iommufd_vdevice_destroy ref--

Thanks
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 2 weeks ago
On Mon, Oct 14, 2024 at 03:58:55PM +0800, Zhangfei Gao wrote:

> > > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > > +                                               size_t size,
> > > > > +                                               enum iommufd_object_type type)
> > > > > +{
> > > > > +       struct iommufd_object *obj;
> > > > > +       int rc;
> > > > > +
> > > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > > +       if (!obj)
> > > > > +               return ERR_PTR(-ENOMEM);
> > > > > +       obj->type = type;
> > > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > > +       refcount_set(&obj->users, 1);
> > > >
> > > > here set refcont 1
> > > >
> > > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > > refcount_inc(&idev->obj.users); refcount -> 2
> > > > will cause iommufd_device_unbind fail.
> > > >
> > > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> > >
> > > Hmm, why would it fail? Or is it failing on your system?
> >
> > Not sure, still in check, it may only be on my platform.
> >
> > it hit
> > iommufd_object_remove:
> > if (WARN_ON(obj != to_destroy))
> >
> > iommufd_device_bind refcount=2
> > iommufd_device_attach refcount=3
> > //still not sure which operation inc the count?
> > iommufd_device_detach refcount=4
> >
> 
> Have a question,
> when should iommufd_vdevice_destroy be called, before or after
> iommufd_device_unbind.

Before.

> Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
> (!refcount_dec_if_one(&obj->users)) check.

Hmm, where do we have an iommufd_vdevice_destroy after unbind?

> iommufd_device_bind
> iommufd_device_attach
> iommufd_vdevice_alloc_ioctl
> 
> iommufd_device_detach
> iommufd_device_unbind // refcount check fail
> iommufd_vdevice_destroy ref--

Things should be symmetric. As you suspected, vdevice should be
destroyed before iommufd_device_detach.

A vdev is an object on top of a vIOMMU obj and an idev obj, so
it takes a refcount from each of them. That's why idev couldn't
unbind.

Thanks
Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 1 week ago
On Mon, 14 Oct 2024 at 23:46, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Mon, Oct 14, 2024 at 03:58:55PM +0800, Zhangfei Gao wrote:
>
> > > > > > +struct iommufd_object *iommufd_object_alloc_elm(struct iommufd_ctx *ictx,
> > > > > > +                                               size_t size,
> > > > > > +                                               enum iommufd_object_type type)
> > > > > > +{
> > > > > > +       struct iommufd_object *obj;
> > > > > > +       int rc;
> > > > > > +
> > > > > > +       obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
> > > > > > +       if (!obj)
> > > > > > +               return ERR_PTR(-ENOMEM);
> > > > > > +       obj->type = type;
> > > > > > +       /* Starts out bias'd by 1 until it is removed from the xarray */
> > > > > > +       refcount_set(&obj->shortterm_users, 1);
> > > > > > +       refcount_set(&obj->users, 1);
> > > > >
> > > > > here set refcont 1
> > > > >
> > > > > iommufd_device_bind -> iommufd_object_alloc(ictx, idev,
> > > > > IOMMUFD_OBJ_DEVICE): refcont -> 1
> > > > > refcount_inc(&idev->obj.users); refcount -> 2
> > > > > will cause iommufd_device_unbind fail.
> > > > >
> > > > > May remove refcount_inc(&idev->obj.users) in iommufd_device_bind
> > > >
> > > > Hmm, why would it fail? Or is it failing on your system?
> > >
> > > Not sure, still in check, it may only be on my platform.
> > >
> > > it hit
> > > iommufd_object_remove:
> > > if (WARN_ON(obj != to_destroy))
> > >
> > > iommufd_device_bind refcount=2
> > > iommufd_device_attach refcount=3
> > > //still not sure which operation inc the count?
> > > iommufd_device_detach refcount=4
> > >
> >
> > Have a question,
> > when should iommufd_vdevice_destroy be called, before or after
> > iommufd_device_unbind.
>
> Before.
>
> > Now iommufd_vdevice_destroy (ref--) is after unbind, hits the if
> > (!refcount_dec_if_one(&obj->users)) check.
>
> Hmm, where do we have an iommufd_vdevice_destroy after unbind?

Looks like it is called by close fd?
[ 2480.909319]  iommufd_vdevice_destroy+0xdc/0x168
[ 2480.909324]  iommufd_fops_release+0xa4/0x140
[ 2480.909328]  __fput+0xd0/0x2e0
[ 2480.909331]  ____fput+0x1c/0x30
[ 2480.909333]  task_work_run+0x78/0xe0
[ 2480.909337]  do_exit+0x2fc/0xa50
[ 2480.909340]  do_group_exit+0x3c/0xa0
[ 2480.909344]  get_signal+0x96c/0x978
[ 2480.909346]  do_signal+0x94/0x3a8
[ 2480.909348]  do_notify_resume+0x100/0x190

>
> > iommufd_device_bind
> > iommufd_device_attach
> > iommufd_vdevice_alloc_ioctl
> >
> > iommufd_device_detach
> > iommufd_device_unbind // refcount check fail
> > iommufd_vdevice_destroy ref--
>
> Things should be symmetric. As you suspected, vdevice should be
> destroyed before iommufd_device_detach.

I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
this issue?
In checking whether close fd before unbind?

>
> A vdev is an object on top of a vIOMMU obj and an idev obj, so
> it takes a refcount from each of them. That's why idev couldn't
> unbind.

Thanks

>
> Thanks
> Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 1 week ago
On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:

> > > iommufd_device_bind
> > > iommufd_device_attach
> > > iommufd_vdevice_alloc_ioctl
> > >
> > > iommufd_device_detach
> > > iommufd_device_unbind // refcount check fail
> > > iommufd_vdevice_destroy ref--
> >
> > Things should be symmetric. As you suspected, vdevice should be
> > destroyed before iommufd_device_detach.
> 
> I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> this issue?
> In checking whether close fd before unbind?

Oops, my bad. I will provide a fix.

Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 1 week ago
On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> 
> > > > iommufd_device_bind
> > > > iommufd_device_attach
> > > > iommufd_vdevice_alloc_ioctl
> > > >
> > > > iommufd_device_detach
> > > > iommufd_device_unbind // refcount check fail
> > > > iommufd_vdevice_destroy ref--
> > >
> > > Things should be symmetric. As you suspected, vdevice should be
> > > destroyed before iommufd_device_detach.
> > 
> > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > this issue?
> > In checking whether close fd before unbind?
> 
> Oops, my bad. I will provide a fix.

This should fix the problem:

---------------------------------------------------------------------
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 5fd3dd420290..13100cfea29d 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
  */
 void iommufd_device_unbind(struct iommufd_device *idev)
 {
+	mutex_lock(&idev->igroup->lock);
+	/* idev->vdev object should be destroyed prior, yet just in case.. */
+	if (idev->vdev)
+		iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
+	mutex_unlock(&idev->igroup->lock);
 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
---------------------------------------------------------------------

Thanks
Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 1 week ago
On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> >
> > > > > iommufd_device_bind
> > > > > iommufd_device_attach
> > > > > iommufd_vdevice_alloc_ioctl
> > > > >
> > > > > iommufd_device_detach
> > > > > iommufd_device_unbind // refcount check fail
> > > > > iommufd_vdevice_destroy ref--
> > > >
> > > > Things should be symmetric. As you suspected, vdevice should be
> > > > destroyed before iommufd_device_detach.
> > >
> > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > this issue?
> > > In checking whether close fd before unbind?
> >
> > Oops, my bad. I will provide a fix.
>
> This should fix the problem:
>
> ---------------------------------------------------------------------
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 5fd3dd420290..13100cfea29d 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
>   */
>  void iommufd_device_unbind(struct iommufd_device *idev)
>  {
> +       mutex_lock(&idev->igroup->lock);
> +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> +       if (idev->vdev)
> +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> +       mutex_unlock(&idev->igroup->lock);
>         iommufd_object_destroy_user(idev->ictx, &idev->obj);
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> ---------------------------------------------------------------------

Not yet
[  574.162112] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000004
[  574.261102] pc : iommufd_object_remove+0x7c/0x278
[  574.265795] lr : iommufd_device_unbind+0x44/0x98
in check

Thanks

>
> Thanks
> Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Nicolin Chen 1 month, 1 week ago
On Wed, Oct 16, 2024 at 09:56:51AM +0800, Zhangfei Gao wrote:
> On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
> >
> > On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> > >
> > > > > > iommufd_device_bind
> > > > > > iommufd_device_attach
> > > > > > iommufd_vdevice_alloc_ioctl
> > > > > >
> > > > > > iommufd_device_detach
> > > > > > iommufd_device_unbind // refcount check fail
> > > > > > iommufd_vdevice_destroy ref--
> > > > >
> > > > > Things should be symmetric. As you suspected, vdevice should be
> > > > > destroyed before iommufd_device_detach.
> > > >
> > > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > > this issue?
> > > > In checking whether close fd before unbind?
> > >
> > > Oops, my bad. I will provide a fix.
> >
> > This should fix the problem:
> >
> > ---------------------------------------------------------------------
> > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > index 5fd3dd420290..13100cfea29d 100644
> > --- a/drivers/iommu/iommufd/device.c
> > +++ b/drivers/iommu/iommufd/device.c
> > @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
> >   */
> >  void iommufd_device_unbind(struct iommufd_device *idev)
> >  {
> > +       mutex_lock(&idev->igroup->lock);
> > +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> > +       if (idev->vdev)
> > +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> > +       mutex_unlock(&idev->igroup->lock);
> >         iommufd_object_destroy_user(idev->ictx, &idev->obj);
> >  }
> >  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> > ---------------------------------------------------------------------
> 
> Not yet
> [  574.162112] Unable to handle kernel NULL pointer dereference at
> virtual address 0000000000000004
> [  574.261102] pc : iommufd_object_remove+0x7c/0x278
> [  574.265795] lr : iommufd_device_unbind+0x44/0x98
> in check

Hmm, it's kinda odd it crashes inside iommufd_object_remove().
Did you happen to change something there?

The added iommufd_object_remove() is equivalent to userspace
calling the destroy ioctl on the vDEVICE object.

Nicolin
Re: [PATCH v3 03/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct
Posted by Zhangfei Gao 1 month, 1 week ago
On Wed, 16 Oct 2024 at 14:52, Nicolin Chen <nicolinc@nvidia.com> wrote:
>
> On Wed, Oct 16, 2024 at 09:56:51AM +0800, Zhangfei Gao wrote:
> > On Wed, 16 Oct 2024 at 02:44, Nicolin Chen <nicolinc@nvidia.com> wrote:
> > >
> > > On Mon, Oct 14, 2024 at 07:01:40PM -0700, Nicolin Chen wrote:
> > > > On Tue, Oct 15, 2024 at 09:15:01AM +0800, Zhangfei Gao wrote:
> > > >
> > > > > > > iommufd_device_bind
> > > > > > > iommufd_device_attach
> > > > > > > iommufd_vdevice_alloc_ioctl
> > > > > > >
> > > > > > > iommufd_device_detach
> > > > > > > iommufd_device_unbind // refcount check fail
> > > > > > > iommufd_vdevice_destroy ref--
> > > > > >
> > > > > > Things should be symmetric. As you suspected, vdevice should be
> > > > > > destroyed before iommufd_device_detach.
> > > > >
> > > > > I am trying based on your for_iommufd_viommu_p2-v3 branch, do you have
> > > > > this issue?
> > > > > In checking whether close fd before unbind?
> > > >
> > > > Oops, my bad. I will provide a fix.
> > >
> > > This should fix the problem:
> > >
> > > ---------------------------------------------------------------------
> > > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> > > index 5fd3dd420290..13100cfea29d 100644
> > > --- a/drivers/iommu/iommufd/device.c
> > > +++ b/drivers/iommu/iommufd/device.c
> > > @@ -277,6 +277,11 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
> > >   */
> > >  void iommufd_device_unbind(struct iommufd_device *idev)
> > >  {
> > > +       mutex_lock(&idev->igroup->lock);
> > > +       /* idev->vdev object should be destroyed prior, yet just in case.. */
> > > +       if (idev->vdev)
> > > +               iommufd_object_remove(idev->ictx, NULL, idev->vdev->obj.id, 0);
> > > +       mutex_unlock(&idev->igroup->lock);
> > >         iommufd_object_destroy_user(idev->ictx, &idev->obj);
> > >  }
> > >  EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
> > > ---------------------------------------------------------------------
> >
> > Not yet
> > [  574.162112] Unable to handle kernel NULL pointer dereference at
> > virtual address 0000000000000004
> > [  574.261102] pc : iommufd_object_remove+0x7c/0x278
> > [  574.265795] lr : iommufd_device_unbind+0x44/0x98
> > in check
>
> Hmm, it's kinda odd it crashes inside iommufd_object_remove().
> Did you happen to change something there?
>
> The added iommufd_object_remove() is equivalent to userspace
> calling the destroy ioctl on the vDEVICE object.
>
Yes, double confirmed, it can solve the issue.
The guest can stop and run again

The Null pointer may be caused by the added debug.

Thanks Nico.

> Nicolin