[PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct

Nicolin Chen posted 29 patches 7 months ago
There is a newer version of this series
[PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Nicolin Chen 7 months ago
Add IOMMUFD_OBJ_HW_QUEUE with an iommufd_hw_queue structure, representing
a HW-accelerated queue type of IOMMU's physical queue that can be passed
through to a user space VM for direct hardware control, such as:
 - NVIDIA's Virtual Command Queue
 - AMD vIOMMU's Command Buffer, Event Log Buffer, and PPR Log Buffer

Introduce an allocator iommufd_hw_queue_alloc(). And add a pair of viommu
ops for iommufd to forward user space ioctls to IOMMU drivers.

Given that the first user of this HW QUEUE (tegra241-cmdqv) will need to
ensure the queue memory to be physically contiguous, add a flag property
in iommufd_viommu_ops and IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA to allow
driver to flag it so that the core will validate the physical pages of a
given guest queue.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Pranjal Shrivastava <praan@google.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 include/linux/iommufd.h | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 266ac6805213..923c66ccc15a 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -38,6 +38,7 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_VIOMMU,
 	IOMMUFD_OBJ_VDEVICE,
 	IOMMUFD_OBJ_VEVENTQ,
+	IOMMUFD_OBJ_HW_QUEUE,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -133,6 +134,24 @@ struct iommufd_vdevice {
 	u64 id; /* per-vIOMMU virtual ID */
 };
 
+struct iommufd_hw_queue {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommufd_viommu *viommu;
+	u64 base_addr; /* in guest physical address space */
+	size_t length;
+};
+
+enum iommufd_viommu_flags {
+	/*
+	 * The HW does not go through an address translation table but reads the
+	 * physical address space directly: iommufd core should pin the physical
+	 * pages backing the queue memory that's allocated for the HW QUEUE, and
+	 * ensure those physical pages are contiguous in the physical space.
+	 */
+	IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA = 1 << 0,
+};
+
 /**
  * struct iommufd_viommu_ops - vIOMMU specific operations
  * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
@@ -158,8 +177,18 @@ struct iommufd_vdevice {
  * @vdevice_destroy: Clean up all driver-specific parts of an iommufd_vdevice.
  *                   The memory of the vDEVICE will be free-ed by iommufd core
  *                   after calling this op
+ * @hw_queue_alloc: Allocate a HW QUEUE object for a HW-accelerated queue given
+ *                  the @type (must be defined in include/uapi/linux/iommufd.h)
+ *                  for the @viommu. @index carries the logical HW QUEUE ID per
+ *                  @viommu in a guest VM, for a multi-queue model; @base_addr
+ *                  carries the guest physical base address of the queue memory;
+ *                  @length carries the size of the queue
+ * @hw_queue_destroy: Clean up all driver-specific parts of an iommufd_hw_queue.
+ *                    The memory of the HW QUEUE will be free-ed by iommufd core
+ *                    after calling this op
  */
 struct iommufd_viommu_ops {
+	u32 flags;
 	void (*destroy)(struct iommufd_viommu *viommu);
 	struct iommu_domain *(*alloc_domain_nested)(
 		struct iommufd_viommu *viommu, u32 flags,
@@ -171,6 +200,10 @@ struct iommufd_viommu_ops {
 						 struct device *dev,
 						 u64 virt_id);
 	void (*vdevice_destroy)(struct iommufd_vdevice *vdev);
+	struct iommufd_hw_queue *(*hw_queue_alloc)(
+		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
+		unsigned int type, u32 index, u64 base_addr, size_t length);
+	void (*hw_queue_destroy)(struct iommufd_hw_queue *hw_queue);
 };
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
@@ -312,4 +345,18 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
 		}                                                              \
 		ret;                                                           \
 	})
+
+#define iommufd_hw_queue_alloc(ucmd, viommu, drv_struct, member)               \
+	({                                                                     \
+		drv_struct *ret;                                               \
+									       \
+		static_assert(__same_type(struct iommufd_viommu, *viommu));    \
+		ret = (drv_struct *)__iommufd_object_alloc_ucmd(               \
+			ucmd, ret, IOMMUFD_OBJ_HW_QUEUE, member.obj);          \
+		if (!IS_ERR(ret)) {                                            \
+			ret->member.viommu = viommu;                           \
+			ret->member.ictx = viommu->ictx;                       \
+		}                                                              \
+		ret;                                                           \
+	})
 #endif
-- 
2.43.0
Re: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Jason Gunthorpe 6 months, 3 weeks ago
On Sat, May 17, 2025 at 08:21:30PM -0700, Nicolin Chen wrote:
>  struct iommufd_viommu_ops {
> +	u32 flags;
>  	void (*destroy)(struct iommufd_viommu *viommu);
>  	struct iommu_domain *(*alloc_domain_nested)(
>  		struct iommufd_viommu *viommu, u32 flags,
> @@ -171,6 +200,10 @@ struct iommufd_viommu_ops {
>  						 struct device *dev,
>  						 u64 virt_id);
>  	void (*vdevice_destroy)(struct iommufd_vdevice *vdev);
> +	struct iommufd_hw_queue *(*hw_queue_alloc)(
> +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
> +		unsigned int type, u32 index, u64 base_addr, size_t length);

I think it would better to have two function pointers here than the flags:

 +	struct iommufd_hw_queue *(*hw_queue_alloc)(
 +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
 +		unsigned int type, u32 index, u64 s2_iova, size_t length);


 +	struct iommufd_hw_queue *(*hw_queue_alloc_phys)(
 +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
 +		unsigned int type, u32 index, phys_addr_t phys, size_t length);

Jason
Re: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Nicolin Chen 6 months, 3 weeks ago
On Fri, May 30, 2025 at 01:07:53PM -0300, Jason Gunthorpe wrote:
> On Sat, May 17, 2025 at 08:21:30PM -0700, Nicolin Chen wrote:
> >  struct iommufd_viommu_ops {
> > +	u32 flags;
> >  	void (*destroy)(struct iommufd_viommu *viommu);
> >  	struct iommu_domain *(*alloc_domain_nested)(
> >  		struct iommufd_viommu *viommu, u32 flags,
> > @@ -171,6 +200,10 @@ struct iommufd_viommu_ops {
> >  						 struct device *dev,
> >  						 u64 virt_id);
> >  	void (*vdevice_destroy)(struct iommufd_vdevice *vdev);
> > +	struct iommufd_hw_queue *(*hw_queue_alloc)(
> > +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
> > +		unsigned int type, u32 index, u64 base_addr, size_t length);
> 
> I think it would better to have two function pointers here than the flags:
> 
>  +	struct iommufd_hw_queue *(*hw_queue_alloc)(
>  +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
>  +		unsigned int type, u32 index, u64 s2_iova, size_t length);
> 
> 
>  +	struct iommufd_hw_queue *(*hw_queue_alloc_phys)(
>  +		struct iommufd_ucmd *ucmd, struct iommufd_viommu *viommu,
>  +		unsigned int type, u32 index, phys_addr_t phys, size_t length);

OK. I think these two should be exclusive then. Maybe it needs a
WARN_ON in iommufd_viommu_alloc.

Thanks
Nicolin
RE: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Tian, Kevin 7 months ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Sunday, May 18, 2025 11:22 AM
> 
> +
> +enum iommufd_viommu_flags {
> +	/*
> +	 * The HW does not go through an address translation table but
> reads the
> +	 * physical address space directly: iommufd core should pin the
> physical
> +	 * pages backing the queue memory that's allocated for the HW
> QUEUE, and
> +	 * ensure those physical pages are contiguous in the physical space.
> +	 */
> +	IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA = 1 << 0,
> +};

The queue itself doesn't read an address.

What about 'QUEUE_BASE_PA'?

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Re: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Nicolin Chen 6 months, 4 weeks ago
On Fri, May 23, 2025 at 07:55:18AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Sunday, May 18, 2025 11:22 AM
> > 
> > +
> > +enum iommufd_viommu_flags {
> > +	/*
> > +	 * The HW does not go through an address translation table but
> > reads the
> > +	 * physical address space directly: iommufd core should pin the
> > physical
> > +	 * pages backing the queue memory that's allocated for the HW
> > QUEUE, and
> > +	 * ensure those physical pages are contiguous in the physical space.
> > +	 */
> > +	IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA = 1 << 0,
> > +};
> 
> The queue itself doesn't read an address.
>
> What about 'QUEUE_BASE_PA'?

But the HW queue object represents the HW feature, not the guest
queue memory. So, it is accurate to say that it reads an address?

We have this in doc:
- IOMMUFD_OBJ_HW_QUEUE, representing a hardware accelerated queue, as a subset
  of IOMMU's virtualization features, for the IOMMU HW to directly read or write
  the virtual queue memory owned by a guest OS. This HW-acceleration feature can
  ...

Thanks
Nic
RE: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Tian, Kevin 6 months, 3 weeks ago
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Saturday, May 24, 2025 5:46 AM
> 
> On Fri, May 23, 2025 at 07:55:18AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Sunday, May 18, 2025 11:22 AM
> > >
> > > +
> > > +enum iommufd_viommu_flags {
> > > +	/*
> > > +	 * The HW does not go through an address translation table but
> > > reads the
> > > +	 * physical address space directly: iommufd core should pin the
> > > physical
> > > +	 * pages backing the queue memory that's allocated for the HW
> > > QUEUE, and
> > > +	 * ensure those physical pages are contiguous in the physical space.
> > > +	 */
> > > +	IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA = 1 << 0,
> > > +};
> >
> > The queue itself doesn't read an address.
> >
> > What about 'QUEUE_BASE_PA'?
> 
> But the HW queue object represents the HW feature, not the guest
> queue memory. So, it is accurate to say that it reads an address?
> 
> We have this in doc:
> - IOMMUFD_OBJ_HW_QUEUE, representing a hardware accelerated queue,
> as a subset
>   of IOMMU's virtualization features, for the IOMMU HW to directly read or
> write
>   the virtual queue memory owned by a guest OS. This HW-acceleration
> feature can
>   ...
> 

Okay. Then ACCESS_PA means both read/write?
Re: [PATCH v5 13/29] iommufd/viommu: Introduce IOMMUFD_OBJ_HW_QUEUE and its related struct
Posted by Nicolin Chen 6 months, 3 weeks ago
On Wed, May 28, 2025 at 08:12:41AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Saturday, May 24, 2025 5:46 AM
> > 
> > On Fri, May 23, 2025 at 07:55:18AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Sunday, May 18, 2025 11:22 AM
> > > >
> > > > +
> > > > +enum iommufd_viommu_flags {
> > > > +	/*
> > > > +	 * The HW does not go through an address translation table but
> > > > reads the
> > > > +	 * physical address space directly: iommufd core should pin the
> > > > physical
> > > > +	 * pages backing the queue memory that's allocated for the HW
> > > > QUEUE, and
> > > > +	 * ensure those physical pages are contiguous in the physical space.
> > > > +	 */
> > > > +	IOMMUFD_VIOMMU_FLAG_HW_QUEUE_READS_PA = 1 << 0,
> > > > +};
> > >
> > > The queue itself doesn't read an address.
> > >
> > > What about 'QUEUE_BASE_PA'?
> > 
> > But the HW queue object represents the HW feature, not the guest
> > queue memory. So, it is accurate to say that it reads an address?
> > 
> > We have this in doc:
> > - IOMMUFD_OBJ_HW_QUEUE, representing a hardware accelerated queue,
> > as a subset
> >   of IOMMU's virtualization features, for the IOMMU HW to directly read or
> > write
> >   the virtual queue memory owned by a guest OS. This HW-acceleration
> > feature can
> >   ...
> > 
> 
> Okay. Then ACCESS_PA means both read/write?

OK. IOMMUFD_VIOMMU_FLAG_HW_QUEUE_ACCESS_PA