NVIDIA VCMDQ driver will have a driver-defined vDEVICE structure and do
some HW configurations with that.
To allow IOMMU drivers to define their own vDEVICE structures, move the
struct iommufd_vdevice to the public header and provide a pair of viommu
ops, similar to get_viommu_size and viommu_init.
Doing this, however, creates a new window between the vDEVICE allocation
and its driver-level initialization, during which an abort could happen
but it can't invoke a driver destroy function from the struct viommu_ops
since the driver structure isn't initialized yet. vIOMMU object doesn't
have this problem, since its destroy op is set via the viommu_ops by the
driver viommu_init function. Thus, vDEVICE should do something similar:
add a destroy function pointer inside the struct iommufd_vdevice instead
of the struct iommufd_viommu_ops.
Note that there is unlikely a use case for a type dependent vDEVICE, so
a static vdevice_size is probably enough for the near term instead of a
get_vdevice_size function op.
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/iommufd/iommufd_private.h | 7 -------
include/linux/iommufd.h | 26 +++++++++++++++++++++++++
drivers/iommu/iommufd/viommu.c | 26 ++++++++++++++++++++++++-
3 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 468717d5e5bc..e6b1eb2ab375 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -638,13 +638,6 @@ void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_vdevice_destroy(struct iommufd_object *obj);
-struct iommufd_vdevice {
- struct iommufd_object obj;
- struct iommufd_viommu *viommu;
- struct device *dev;
- u64 id; /* per-vIOMMU virtual ID */
-};
-
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 2d1bf2f97ee3..f3b5cfdb6d53 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -104,6 +104,16 @@ struct iommufd_viommu {
enum iommu_viommu_type type;
};
+struct iommufd_vdevice {
+ struct iommufd_object obj;
+ struct iommufd_viommu *viommu;
+ struct device *dev;
+ u64 id; /* per-vIOMMU virtual ID */
+
+ /* Clean up all driver-specific parts of an iommufd_vdevice */
+ void (*destroy)(struct iommufd_vdevice *vdev);
+};
+
/**
* struct iommufd_viommu_ops - vIOMMU specific operations
* @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
@@ -120,6 +130,14 @@ struct iommufd_viommu {
* array->entry_num to report the number of handled requests.
* The data structure of the array entry must be defined in
* include/uapi/linux/iommufd.h
+ * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU
+ * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or
+ * related HW procedure. @vdev is already initialized by iommufd
+ * core: vdev->dev and vdev->viommu pointers; vdev->id carries a
+ * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in
+ * include/uapi/linux/iommufd.h)
+ * If driver has a deinit function to revert what vdevice_init op
+ * does, it should set it to the @vdev->destroy function pointer
*/
struct iommufd_viommu_ops {
void (*destroy)(struct iommufd_viommu *viommu);
@@ -128,6 +146,8 @@ struct iommufd_viommu_ops {
const struct iommu_user_data *user_data);
int (*cache_invalidate)(struct iommufd_viommu *viommu,
struct iommu_user_data_array *array);
+ const size_t vdevice_size;
+ int (*vdevice_init)(struct iommufd_vdevice *vdev);
};
#if IS_ENABLED(CONFIG_IOMMUFD)
@@ -224,4 +244,10 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \
((drv_struct *)NULL)->member)))
+
+#define VDEVICE_STRUCT_SIZE(drv_struct, member) \
+ (sizeof(drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \
+ ((drv_struct *)NULL)->member)))
#endif
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index c5eea9900c54..28ea5d026222 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -116,6 +116,8 @@ void iommufd_vdevice_destroy(struct iommufd_object *obj)
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_viommu *viommu = vdev->viommu;
+ if (vdev->destroy)
+ vdev->destroy(vdev);
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
refcount_dec(&viommu->obj.users);
@@ -126,6 +128,7 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_vdevice_alloc *cmd = ucmd->cmd;
struct iommufd_vdevice *vdev, *curr;
+ size_t vdev_size = sizeof(*vdev);
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
u64 virt_id = cmd->virt_id;
@@ -150,7 +153,22 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
- vdev = iommufd_object_alloc_ucmd(ucmd, vdev, IOMMUFD_OBJ_VDEVICE);
+ if (viommu->ops && viommu->ops->vdevice_size) {
+ /*
+ * It is a driver bug for:
+ * - ops->vdevice_size smaller than the core structure size
+ * - not implementing a pairing ops->vdevice_init op
+ */
+ if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
+ !viommu->ops->vdevice_init)) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+ vdev_size = viommu->ops->vdevice_size;
+ }
+
+ vdev = (struct iommufd_vdevice *)_iommufd_object_alloc_ucmd(
+ ucmd, vdev_size, IOMMUFD_OBJ_VDEVICE);
if (IS_ERR(vdev)) {
rc = PTR_ERR(vdev);
goto out_put_idev;
@@ -168,6 +186,12 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
+ if (viommu->ops && viommu->ops->vdevice_init) {
+ rc = viommu->ops->vdevice_init(vdev);
+ if (rc)
+ goto out_put_idev;
+ }
+
cmd->out_vdevice_id = vdev->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
--
2.43.0
On Sat, Jun 14, 2025 at 12:14:33AM -0700, Nicolin Chen wrote:
> NVIDIA VCMDQ driver will have a driver-defined vDEVICE structure and do
> some HW configurations with that.
>
> To allow IOMMU drivers to define their own vDEVICE structures, move the
> struct iommufd_vdevice to the public header and provide a pair of viommu
> ops, similar to get_viommu_size and viommu_init.
>
> Doing this, however, creates a new window between the vDEVICE allocation
> and its driver-level initialization, during which an abort could happen
> but it can't invoke a driver destroy function from the struct viommu_ops
> since the driver structure isn't initialized yet. vIOMMU object doesn't
> have this problem, since its destroy op is set via the viommu_ops by the
> driver viommu_init function. Thus, vDEVICE should do something similar:
> add a destroy function pointer inside the struct iommufd_vdevice instead
> of the struct iommufd_viommu_ops.
>
> Note that there is unlikely a use case for a type dependent vDEVICE, so
> a static vdevice_size is probably enough for the near term instead of a
> get_vdevice_size function op.
>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
> drivers/iommu/iommufd/iommufd_private.h | 7 -------
> include/linux/iommufd.h | 26 +++++++++++++++++++++++++
> drivers/iommu/iommufd/viommu.c | 26 ++++++++++++++++++++++++-
> 3 files changed, 51 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 468717d5e5bc..e6b1eb2ab375 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -638,13 +638,6 @@ void iommufd_viommu_destroy(struct iommufd_object *obj);
> int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
> void iommufd_vdevice_destroy(struct iommufd_object *obj);
>
> -struct iommufd_vdevice {
> - struct iommufd_object obj;
> - struct iommufd_viommu *viommu;
> - struct device *dev;
> - u64 id; /* per-vIOMMU virtual ID */
> -};
> -
> #ifdef CONFIG_IOMMUFD_TEST
> int iommufd_test(struct iommufd_ucmd *ucmd);
> void iommufd_selftest_destroy(struct iommufd_object *obj);
> diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
> index 2d1bf2f97ee3..f3b5cfdb6d53 100644
> --- a/include/linux/iommufd.h
> +++ b/include/linux/iommufd.h
> @@ -104,6 +104,16 @@ struct iommufd_viommu {
> enum iommu_viommu_type type;
> };
>
> +struct iommufd_vdevice {
> + struct iommufd_object obj;
> + struct iommufd_viommu *viommu;
> + struct device *dev;
> + u64 id; /* per-vIOMMU virtual ID */
Nit: Why not call this viommu_id?
> +
> + /* Clean up all driver-specific parts of an iommufd_vdevice */
> + void (*destroy)(struct iommufd_vdevice *vdev);
> +};
> +
> /**
> * struct iommufd_viommu_ops - vIOMMU specific operations
> * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
> @@ -120,6 +130,14 @@ struct iommufd_viommu {
> * array->entry_num to report the number of handled requests.
> * The data structure of the array entry must be defined in
> * include/uapi/linux/iommufd.h
> + * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU
> + * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or
> + * related HW procedure. @vdev is already initialized by iommufd
> + * core: vdev->dev and vdev->viommu pointers; vdev->id carries a
> + * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in
> + * include/uapi/linux/iommufd.h)
> + * If driver has a deinit function to revert what vdevice_init op
> + * does, it should set it to the @vdev->destroy function pointer
> */
> struct iommufd_viommu_ops {
> void (*destroy)(struct iommufd_viommu *viommu);
> @@ -128,6 +146,8 @@ struct iommufd_viommu_ops {
> const struct iommu_user_data *user_data);
> int (*cache_invalidate)(struct iommufd_viommu *viommu,
> struct iommu_user_data_array *array);
> + const size_t vdevice_size;
> + int (*vdevice_init)(struct iommufd_vdevice *vdev);
> };
>
> #if IS_ENABLED(CONFIG_IOMMUFD)
> @@ -224,4 +244,10 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
> BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
> BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \
> ((drv_struct *)NULL)->member)))
> +
> +#define VDEVICE_STRUCT_SIZE(drv_struct, member) \
> + (sizeof(drv_struct) + \
> + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \
> + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \
> + ((drv_struct *)NULL)->member)))
> #endif
> diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
> index c5eea9900c54..28ea5d026222 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -116,6 +116,8 @@ void iommufd_vdevice_destroy(struct iommufd_object *obj)
> container_of(obj, struct iommufd_vdevice, obj);
> struct iommufd_viommu *viommu = vdev->viommu;
>
> + if (vdev->destroy)
> + vdev->destroy(vdev);
> /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
> xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
> refcount_dec(&viommu->obj.users);
> @@ -126,6 +128,7 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
> {
> struct iommu_vdevice_alloc *cmd = ucmd->cmd;
> struct iommufd_vdevice *vdev, *curr;
> + size_t vdev_size = sizeof(*vdev);
> struct iommufd_viommu *viommu;
> struct iommufd_device *idev;
> u64 virt_id = cmd->virt_id;
> @@ -150,7 +153,22 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
> goto out_put_idev;
> }
>
> - vdev = iommufd_object_alloc_ucmd(ucmd, vdev, IOMMUFD_OBJ_VDEVICE);
> + if (viommu->ops && viommu->ops->vdevice_size) {
> + /*
> + * It is a driver bug for:
> + * - ops->vdevice_size smaller than the core structure size
> + * - not implementing a pairing ops->vdevice_init op
> + */
> + if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
> + !viommu->ops->vdevice_init)) {
> + rc = -EOPNOTSUPP;
> + goto out_put_idev;
> + }
> + vdev_size = viommu->ops->vdevice_size;
> + }
> +
> + vdev = (struct iommufd_vdevice *)_iommufd_object_alloc_ucmd(
> + ucmd, vdev_size, IOMMUFD_OBJ_VDEVICE);
> if (IS_ERR(vdev)) {
> rc = PTR_ERR(vdev);
> goto out_put_idev;
> @@ -168,6 +186,12 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
> goto out_put_idev;
> }
>
> + if (viommu->ops && viommu->ops->vdevice_init) {
> + rc = viommu->ops->vdevice_init(vdev);
> + if (rc)
> + goto out_put_idev;
> + }
> +
> cmd->out_vdevice_id = vdev->obj.id;
> rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
>
Reviewed-by: Pranjal Shrivastava <praan@google.com>
> --
> 2.43.0
>
On Thu, Jun 19, 2025 at 10:26:28AM +0000, Pranjal Shrivastava wrote:
> > +struct iommufd_vdevice {
> > + struct iommufd_object obj;
> > + struct iommufd_viommu *viommu;
> > + struct device *dev;
> > + u64 id; /* per-vIOMMU virtual ID */
>
> Nit: Why not call this viommu_id?
And lets have a comment here too, we have so many different ids, what
id is this id and what it is used for, by who.
Jason
On Thu, Jun 19, 2025 at 08:44:25AM -0300, Jason Gunthorpe wrote:
> On Thu, Jun 19, 2025 at 10:26:28AM +0000, Pranjal Shrivastava wrote:
> > > +struct iommufd_vdevice {
> > > + struct iommufd_object obj;
> > > + struct iommufd_viommu *viommu;
> > > + struct device *dev;
> > > + u64 id; /* per-vIOMMU virtual ID */
> >
> > Nit: Why not call this viommu_id?
>
> And lets have a comment here too, we have so many different ids, what
> id is this id and what it is used for, by who.
Sure. Will change to "virt_id" and add some description here.
Thanks
Nicolin
On 6/14/25 15:14, Nicolin Chen wrote: > NVIDIA VCMDQ driver will have a driver-defined vDEVICE structure and do > some HW configurations with that. > > To allow IOMMU drivers to define their own vDEVICE structures, move the > struct iommufd_vdevice to the public header and provide a pair of viommu > ops, similar to get_viommu_size and viommu_init. > > Doing this, however, creates a new window between the vDEVICE allocation > and its driver-level initialization, during which an abort could happen > but it can't invoke a driver destroy function from the struct viommu_ops > since the driver structure isn't initialized yet. vIOMMU object doesn't > have this problem, since its destroy op is set via the viommu_ops by the > driver viommu_init function. Thus, vDEVICE should do something similar: > add a destroy function pointer inside the struct iommufd_vdevice instead > of the struct iommufd_viommu_ops. > > Note that there is unlikely a use case for a type dependent vDEVICE, so > a static vdevice_size is probably enough for the near term instead of a > get_vdevice_size function op. > > Reviewed-by: Jason Gunthorpe<jgg@nvidia.com> > Reviewed-by: Kevin Tian<kevin.tian@intel.com> > Signed-off-by: Nicolin Chen<nicolinc@nvidia.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
© 2016 - 2026 Red Hat, Inc.