[PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size

Nicolin Chen posted 2 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 2 weeks ago
It's more flexible to have a get_viommu_size op. Replace static vsmmu_size
and vsmmu_type with that.

Suggested-by: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c    |  8 ++------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c        |  4 ++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h        |  3 +--
 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c     | 14 ++++++++++++--
 4 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index c034d6c5468f..8cd8929bbfdf 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -423,10 +423,9 @@ size_t arm_smmu_get_viommu_size(struct device *dev,
 	if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
 		return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
 
-	if (!smmu->impl_ops || !smmu->impl_ops->vsmmu_size ||
-	    viommu_type != smmu->impl_ops->vsmmu_type)
+	if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
 		return 0;
-	return smmu->impl_ops->vsmmu_size;
+	return smmu->impl_ops->get_viommu_size(viommu_type);
 }
 
 int arm_vsmmu_init(struct iommufd_viommu *viommu,
@@ -451,9 +450,6 @@ int arm_vsmmu_init(struct iommufd_viommu *viommu,
 		return 0;
 	}
 
-	/* Unsupported type was rejected in arm_smmu_get_viommu_size() */
-	if (WARN_ON(viommu->type != smmu->impl_ops->vsmmu_type))
-		return -EOPNOTSUPP;
 	return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9f4ad3705801..f56113107c8a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -4716,8 +4716,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
 
 	ops = new_smmu->impl_ops;
 	if (ops) {
-		/* vsmmu_size and vsmmu_init ops must be paired */
-		if (WARN_ON(!ops->vsmmu_size != !ops->vsmmu_init)) {
+		/* get_viommu_size and vsmmu_init ops must be paired */
+		if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
 			ret = -EINVAL;
 			goto err_remove;
 		}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 3fa02c51df9f..e332f5ba2f8a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -728,8 +728,7 @@ struct arm_smmu_impl_ops {
 	 */
 	void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length,
 			 enum iommu_hw_info_type *type);
-	const size_t vsmmu_size;
-	const enum iommu_viommu_type vsmmu_type;
+	size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type);
 	int (*vsmmu_init)(struct arm_vsmmu *vsmmu,
 			  const struct iommu_user_data *user_data);
 };
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 4c86eacd36b1..46005ed52bc2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -832,6 +832,13 @@ static void *tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *length,
 	return info;
 }
 
+static size_t tegra241_cmdqv_get_vintf_size(enum iommu_viommu_type viommu_type)
+{
+	if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
+		return 0;
+	return VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core);
+}
+
 static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
 	/* For in-kernel use */
 	.get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
@@ -839,8 +846,7 @@ static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
 	.device_remove = tegra241_cmdqv_remove,
 	/* For user-space use */
 	.hw_info = tegra241_cmdqv_hw_info,
-	.vsmmu_size = VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core),
-	.vsmmu_type = IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
+	.get_viommu_size = tegra241_cmdqv_get_vintf_size,
 	.vsmmu_init = tegra241_cmdqv_init_vintf_user,
 };
 
@@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
 	phys_addr_t page0_base;
 	int ret;
 
+	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
+	if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
+		return -EOPNOTSUPP;
+
 	if (!user_data)
 		return -EINVAL;
 
-- 
2.43.0
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Pranjal Shrivastava 2 months, 2 weeks ago
On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> It's more flexible to have a get_viommu_size op. Replace static vsmmu_size
> and vsmmu_type with that.
> 
> Suggested-by: Will Deacon <will@kernel.org>
> Acked-by: Will Deacon <will@kernel.org>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c    |  8 ++------
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c        |  4 ++--
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h        |  3 +--
>  drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c     | 14 ++++++++++++--
>  4 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> index c034d6c5468f..8cd8929bbfdf 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> @@ -423,10 +423,9 @@ size_t arm_smmu_get_viommu_size(struct device *dev,
>  	if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
>  		return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
>  
> -	if (!smmu->impl_ops || !smmu->impl_ops->vsmmu_size ||
> -	    viommu_type != smmu->impl_ops->vsmmu_type)
> +	if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
>  		return 0;
> -	return smmu->impl_ops->vsmmu_size;
> +	return smmu->impl_ops->get_viommu_size(viommu_type);
>  }
>  
>  int arm_vsmmu_init(struct iommufd_viommu *viommu,
> @@ -451,9 +450,6 @@ int arm_vsmmu_init(struct iommufd_viommu *viommu,
>  		return 0;
>  	}
>  
> -	/* Unsupported type was rejected in arm_smmu_get_viommu_size() */
> -	if (WARN_ON(viommu->type != smmu->impl_ops->vsmmu_type))
> -		return -EOPNOTSUPP;
>  	return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
>  }
>  
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 9f4ad3705801..f56113107c8a 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -4716,8 +4716,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
>  
>  	ops = new_smmu->impl_ops;
>  	if (ops) {
> -		/* vsmmu_size and vsmmu_init ops must be paired */
> -		if (WARN_ON(!ops->vsmmu_size != !ops->vsmmu_init)) {
> +		/* get_viommu_size and vsmmu_init ops must be paired */
> +		if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
>  			ret = -EINVAL;
>  			goto err_remove;
>  		}
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 3fa02c51df9f..e332f5ba2f8a 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -728,8 +728,7 @@ struct arm_smmu_impl_ops {
>  	 */
>  	void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length,
>  			 enum iommu_hw_info_type *type);
> -	const size_t vsmmu_size;
> -	const enum iommu_viommu_type vsmmu_type;
> +	size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type);
>  	int (*vsmmu_init)(struct arm_vsmmu *vsmmu,
>  			  const struct iommu_user_data *user_data);
>  };
> diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> index 4c86eacd36b1..46005ed52bc2 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
> @@ -832,6 +832,13 @@ static void *tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *length,
>  	return info;
>  }
>  
> +static size_t tegra241_cmdqv_get_vintf_size(enum iommu_viommu_type viommu_type)
> +{
> +	if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
> +		return 0;
> +	return VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core);
> +}
> +
>  static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
>  	/* For in-kernel use */
>  	.get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
> @@ -839,8 +846,7 @@ static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
>  	.device_remove = tegra241_cmdqv_remove,
>  	/* For user-space use */
>  	.hw_info = tegra241_cmdqv_hw_info,
> -	.vsmmu_size = VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core),
> -	.vsmmu_type = IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
> +	.get_viommu_size = tegra241_cmdqv_get_vintf_size,
>  	.vsmmu_init = tegra241_cmdqv_init_vintf_user,
>  };
>  
> @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
>  	phys_addr_t page0_base;
>  	int ret;
>  
> +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> +	if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
> +		return -EOPNOTSUPP;
> +

Nit: I don't think we'd expect a call to this if the vintf_size returned
0? I see that in iommufd_viommu_alloc_ioctl, we already have a check:


        viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
	        if (!viommu_size) {
			rc = -EOPNOTSUPP;
			goto out_put_idev;
		}

And call ops->viommu_init only when the above isn't met. Thus,
if we still end up calling ops->viommu_init, shouldn't we BUG_ON() it?
I'd rather have the core code handle such things (since the driver is
simply implementing the ops) and BUG_ON() something that's terribly
wrong..

I can't see any ops->viommu_init being called elsewhere atm, let me
know if there's a different path that I missed..

Apart from the above nit,

Reviewed-by: Pranjal Shrivastava <praan@google.com>

>  	if (!user_data)
>  		return -EINVAL;
>  
> -- 
> 2.43.0
> 
>
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 2 weeks ago
On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> >  	phys_addr_t page0_base;
> >  	int ret;
> >  
> > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> > +	if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
> > +		return -EOPNOTSUPP;
> > +
> 
> Nit: I don't think we'd expect a call to this if the vintf_size returned
> 0? I see that in iommufd_viommu_alloc_ioctl, we already have a check:

It's added in the previous patch where I explained that this is
to detect data corruption. When something like that happens, it
would be often illogical.

> And call ops->viommu_init only when the above isn't met. Thus,
> if we still end up calling ops->viommu_init, shouldn't we BUG_ON() it?
> I'd rather have the core code handle such things (since the driver is
> simply implementing the ops) and BUG_ON() something that's terribly
> wrong..

BUG_ON is discouraged following the coding style:
https://docs.kernel.org/process/coding-style.html#use-warn-rather-than-bug

> I can't see any ops->viommu_init being called elsewhere atm, let me
> know if there's a different path that I missed..

I see it as a precaution that should never get triggered. But in
case that it happens, I don't want it to proceed further wasting
precious HW resource given that this function allocates a VINTF.

Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Pranjal Shrivastava 2 months, 2 weeks ago
On Wed, Jul 23, 2025 at 11:05:26AM -0700, Nicolin Chen wrote:
> On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> > >  	phys_addr_t page0_base;
> > >  	int ret;
> > >  
> > > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> > > +	if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
> > > +		return -EOPNOTSUPP;
> > > +
> > 
> > Nit: I don't think we'd expect a call to this if the vintf_size returned
> > 0? I see that in iommufd_viommu_alloc_ioctl, we already have a check:
> 
> It's added in the previous patch where I explained that this is
> to detect data corruption. When something like that happens, it
> would be often illogical.
> 

Right.. I got mis-led by the comment, my point is that if an
"unsupported type" was rejected in _get_vintf_size, we wouldn't be here
calling viommu_init since we error out based on the check in
iommufd_viommu_alloc_ioctl.. but yes, if there was some data corruption
that changed the viommu type between these calls, I guess it makes sense
to check and error out here.

> > And call ops->viommu_init only when the above isn't met. Thus,
> > if we still end up calling ops->viommu_init, shouldn't we BUG_ON() it?
> > I'd rather have the core code handle such things (since the driver is
> > simply implementing the ops) and BUG_ON() something that's terribly
> > wrong..
> 
> BUG_ON is discouraged following the coding style:
> https://docs.kernel.org/process/coding-style.html#use-warn-rather-than-bug
> 

Noted. Thanks.

> > I can't see any ops->viommu_init being called elsewhere atm, let me
> > know if there's a different path that I missed..
> 
> I see it as a precaution that should never get triggered. But in
> case that it happens, I don't want it to proceed further wasting
> precious HW resource given that this function allocates a VINTF.
> 

Agreed.

> Nicolin

Praan
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Pranjal Shrivastava 2 months, 1 week ago
On Wed, Jul 23, 2025 at 06:58:20PM +0000, Pranjal Shrivastava wrote:
> On Wed, Jul 23, 2025 at 11:05:26AM -0700, Nicolin Chen wrote:
> > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> > > >  	phys_addr_t page0_base;
> > > >  	int ret;
> > > >  
> > > > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */

Sorry, if this wasn't clear in the previous comment. I meant this
comment must be updated, the "unsupported type" wasn't rejected in
vintf_size, rather the type got corrupted which brought us here. Had the
vintf_size rejected it, we wouldn't be calling the init op.

Thanks,
Praan

> > > > +	if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
> > > > +		return -EOPNOTSUPP;
> > > > +
> > > 
> > > Nit: I don't think we'd expect a call to this if the vintf_size returned
> > > 0? I see that in iommufd_viommu_alloc_ioctl, we already have a check:
> > 
> > It's added in the previous patch where I explained that this is
> > to detect data corruption. When something like that happens, it
> > would be often illogical.
> > 
> 
> Right.. I got mis-led by the comment, my point is that if an
> "unsupported type" was rejected in _get_vintf_size, we wouldn't be here
> calling viommu_init since we error out based on the check in
> iommufd_viommu_alloc_ioctl.. but yes, if there was some data corruption
> that changed the viommu type between these calls, I guess it makes sense
> to check and error out here.
> 
> > > And call ops->viommu_init only when the above isn't met. Thus,
> > > if we still end up calling ops->viommu_init, shouldn't we BUG_ON() it?
> > > I'd rather have the core code handle such things (since the driver is
> > > simply implementing the ops) and BUG_ON() something that's terribly
> > > wrong..
> > 
> > BUG_ON is discouraged following the coding style:
> > https://docs.kernel.org/process/coding-style.html#use-warn-rather-than-bug
> > 
> 
> Noted. Thanks.
> 
> > > I can't see any ops->viommu_init being called elsewhere atm, let me
> > > know if there's a different path that I missed..
> > 
> > I see it as a precaution that should never get triggered. But in
> > case that it happens, I don't want it to proceed further wasting
> > precious HW resource given that this function allocates a VINTF.
> > 
> 
> Agreed.
> 
> > Nicolin
> 
> Praan
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 1 week ago
On Thu, Jul 24, 2025 at 08:55:50PM +0000, Pranjal Shrivastava wrote:
> On Wed, Jul 23, 2025 at 06:58:20PM +0000, Pranjal Shrivastava wrote:
> > On Wed, Jul 23, 2025 at 11:05:26AM -0700, Nicolin Chen wrote:
> > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> > > > >  	phys_addr_t page0_base;
> > > > >  	int ret;
> > > > >  
> > > > > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> 
> Sorry, if this wasn't clear in the previous comment. I meant this
> comment must be updated, the "unsupported type" wasn't rejected in
> vintf_size, rather the type got corrupted which brought us here.

Any unsupported type would be indeed rejected by the init op
callback. There is nothing wrong with that statement.

It indicates that we shouldn't see an unsupported type here,
unless some serious kernel bug like data corruption happens,
which is implied by the WARN_ON itself.

> Had the
> vintf_size rejected it, we wouldn't be calling the init op.

A data corruption could happen any time, not related to the
init op. A concurrent buggy thread can overwrite the vIOMMU
object when a write access to its adjacent memory overflows.

Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Mostafa Saleh 2 months, 1 week ago
Hi Nicolin,

On Thu, Jul 24, 2025 at 02:49:28PM -0700, Nicolin Chen wrote:
> On Thu, Jul 24, 2025 at 08:55:50PM +0000, Pranjal Shrivastava wrote:
> > On Wed, Jul 23, 2025 at 06:58:20PM +0000, Pranjal Shrivastava wrote:
> > > On Wed, Jul 23, 2025 at 11:05:26AM -0700, Nicolin Chen wrote:
> > > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > > > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> > > > > >  	phys_addr_t page0_base;
> > > > > >  	int ret;
> > > > > >  
> > > > > > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> > 
> > Sorry, if this wasn't clear in the previous comment. I meant this
> > comment must be updated, the "unsupported type" wasn't rejected in
> > vintf_size, rather the type got corrupted which brought us here.
> 
> Any unsupported type would be indeed rejected by the init op
> callback. There is nothing wrong with that statement.
> 
> It indicates that we shouldn't see an unsupported type here,
> unless some serious kernel bug like data corruption happens,
> which is implied by the WARN_ON itself.
> 
> > Had the
> > vintf_size rejected it, we wouldn't be calling the init op.
> 
> A data corruption could happen any time, not related to the
> init op. A concurrent buggy thread can overwrite the vIOMMU
> object when a write access to its adjacent memory overflows.

Can you please elaborate on that, as memory corruption can happen
any time event after the next check and there is no way to defend
against that?

Thanks,
Mostafa

> 
> Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 1 week ago
On Fri, Jul 25, 2025 at 09:18:35AM +0000, Mostafa Saleh wrote:
> > > > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > Had the
> > > vintf_size rejected it, we wouldn't be calling the init op.
> > 
> > A data corruption could happen any time, not related to the
> > init op. A concurrent buggy thread can overwrite the vIOMMU
> > object when a write access to its adjacent memory overflows.
> 
> Can you please elaborate on that, as memory corruption can happen
> any time event after the next check and there is no way to defend
> against that?

That narrative is under a condition (in the context) "when there
is a kernel bug corrupting data" :)

E.g. some new lines of code allocates a wrong size of memory and
writes above the size. If that memory is near this vIOMMU object
it might overwrite to this vIOMMU object that this function gets.

This certainly won't happen if everything is sane.

Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Mostafa Saleh 2 months, 1 week ago
On Fri, Jul 25, 2025 at 09:24:23AM -0700, Nicolin Chen wrote:
> On Fri, Jul 25, 2025 at 09:18:35AM +0000, Mostafa Saleh wrote:
> > > > > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > Had the
> > > > vintf_size rejected it, we wouldn't be calling the init op.
> > > 
> > > A data corruption could happen any time, not related to the
> > > init op. A concurrent buggy thread can overwrite the vIOMMU
> > > object when a write access to its adjacent memory overflows.
> > 
> > Can you please elaborate on that, as memory corruption can happen
> > any time event after the next check and there is no way to defend
> > against that?
> 
> That narrative is under a condition (in the context) "when there
> is a kernel bug corrupting data" :)
> 
> E.g. some new lines of code allocates a wrong size of memory and
> writes above the size. If that memory is near this vIOMMU object
> it might overwrite to this vIOMMU object that this function gets.
> 
> This certainly won't happen if everything is sane.

I see, but I don't think we should do anything about that, there are
100s of structs in the kernel, we can't add checks everywhere, and I
don't see anything special about this path to add an assertion, this
kind of defensive programming isn't really helpful. We just need to
review any new code properly :)

Thanks,
Mostafa

> 
> Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 1 week ago
On Fri, Jul 25, 2025 at 06:12:07PM +0000, Mostafa Saleh wrote:
> On Fri, Jul 25, 2025 at 09:24:23AM -0700, Nicolin Chen wrote:
> > On Fri, Jul 25, 2025 at 09:18:35AM +0000, Mostafa Saleh wrote:
> > > > > > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > > > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > > Had the
> > > > > vintf_size rejected it, we wouldn't be calling the init op.
> > > > 
> > > > A data corruption could happen any time, not related to the
> > > > init op. A concurrent buggy thread can overwrite the vIOMMU
> > > > object when a write access to its adjacent memory overflows.
> > > 
> > > Can you please elaborate on that, as memory corruption can happen
> > > any time event after the next check and there is no way to defend
> > > against that?
> > 
> > That narrative is under a condition (in the context) "when there
> > is a kernel bug corrupting data" :)
> > 
> > E.g. some new lines of code allocates a wrong size of memory and
> > writes above the size. If that memory is near this vIOMMU object
> > it might overwrite to this vIOMMU object that this function gets.
> > 
> > This certainly won't happen if everything is sane.
> 
> I see, but I don't think we should do anything about that, there are
> 100s of structs in the kernel, we can't add checks everywhere, and I
> don't see anything special about this path to add an assertion, this
> kind of defensive programming isn't really helpful. We just need to
> review any new code properly :)

It could help for debugging purpose when writing new lines of code.
Kernel has quite a lot of WARN_ONs fencing something that shouldn't
happen.

With that being said, I admit that this particular line is a bit of
overreacting. Removing it doesn't have too big impact, as something
else would likely crash when such a corruption does happen.

Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Pranjal Shrivastava 2 months, 1 week ago
On Thu, Jul 24, 2025 at 02:49:28PM -0700, Nicolin Chen wrote:
> On Thu, Jul 24, 2025 at 08:55:50PM +0000, Pranjal Shrivastava wrote:
> > On Wed, Jul 23, 2025 at 06:58:20PM +0000, Pranjal Shrivastava wrote:
> > > On Wed, Jul 23, 2025 at 11:05:26AM -0700, Nicolin Chen wrote:
> > > > On Wed, Jul 23, 2025 at 01:37:53PM +0000, Pranjal Shrivastava wrote:
> > > > > On Mon, Jul 21, 2025 at 01:04:44PM -0700, Nicolin Chen wrote:
> > > > > > @@ -1273,6 +1279,10 @@ tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
> > > > > >  	phys_addr_t page0_base;
> > > > > >  	int ret;
> > > > > >  
> > > > > > +	/* Unsupported type was rejected in tegra241_cmdqv_get_vintf_size() */
> > 
> > Sorry, if this wasn't clear in the previous comment. I meant this
> > comment must be updated, the "unsupported type" wasn't rejected in
> > vintf_size, rather the type got corrupted which brought us here.
> 
> Any unsupported type would be indeed rejected by the init op
> callback. There is nothing wrong with that statement.

The comment mentioned tegra241_cmdqv_get_vintf_size() which isn't the
init op. The statement: 

"unsupported type would be indeed rejected by the init op" 

isn't the same as:

"Unsupported type was rejected in tegra241_cmdqv_get_vintf_size()"

> 
> It indicates that we shouldn't see an unsupported type here,
> unless some serious kernel bug like data corruption happens,
> which is implied by the WARN_ON itself.
> 
> > Had the
> > vintf_size rejected it, we wouldn't be calling the init op.
> 
> A data corruption could happen any time, not related to the
> init op. A concurrent buggy thread can overwrite the vIOMMU
> object when a write access to its adjacent memory overflows.
> 

I'm agreeing with all of it, it's just that the comment says something 
was rejected in by the size op, which raises confusion as to why we're
in the init op. The init op rejecting something due to data corruption
is a different thing..

I totally get the point about data corruption, i.e.:

size op -> returned something valid
<data corruption>
init op -> rejecting corrupted type

Wheras I was just trying to understand a case where as per the comment:
"Unsupported type was rejected in tegra241_cmdqv_get_vintf_size()", 
i.e. ->size op returned 0, yet we ended up calling the init op

I guess I should've been more clear. Sorry for all the confusion.

> Nicolin

Praan
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Nicolin Chen 2 months, 1 week ago
On Fri, Jul 25, 2025 at 05:11:07AM +0000, Pranjal Shrivastava wrote:
> On Thu, Jul 24, 2025 at 02:49:28PM -0700, Nicolin Chen wrote:
> I'm agreeing with all of it, it's just that the comment says something 
> was rejected in by the size op, which raises confusion as to why we're
> in the init op. The init op rejecting something due to data corruption
> is a different thing..
> 
> I totally get the point about data corruption, i.e.:
> 
> size op -> returned something valid
> <data corruption>
> init op -> rejecting corrupted type
> 
> Wheras I was just trying to understand a case where as per the comment:
> "Unsupported type was rejected in tegra241_cmdqv_get_vintf_size()", 
> i.e. ->size op returned 0, yet we ended up calling the init op

Is the updated one in v4 fine to you?

/*
 * Unsupported type should be rejected by tegra241_cmdqv_get_vintf_size.
 * Seeing one here indicates a kernel bug or some data corruption.
 */

Nicolin
Re: [PATCH v3 2/2] iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
Posted by Pranjal Shrivastava 2 months, 1 week ago
On Fri, Jul 25, 2025 at 09:03:39AM -0700, Nicolin Chen wrote:
> On Fri, Jul 25, 2025 at 05:11:07AM +0000, Pranjal Shrivastava wrote:
> > On Thu, Jul 24, 2025 at 02:49:28PM -0700, Nicolin Chen wrote:
> > I'm agreeing with all of it, it's just that the comment says something 
> > was rejected in by the size op, which raises confusion as to why we're
> > in the init op. The init op rejecting something due to data corruption
> > is a different thing..
> > 
> > I totally get the point about data corruption, i.e.:
> > 
> > size op -> returned something valid
> > <data corruption>
> > init op -> rejecting corrupted type
> > 
> > Wheras I was just trying to understand a case where as per the comment:
> > "Unsupported type was rejected in tegra241_cmdqv_get_vintf_size()", 
> > i.e. ->size op returned 0, yet we ended up calling the init op
> 
> Is the updated one in v4 fine to you?
> 
> /*
>  * Unsupported type should be rejected by tegra241_cmdqv_get_vintf_size.
>  * Seeing one here indicates a kernel bug or some data corruption.
>  */

Yes, v4 looks fine.. but I was just trying to understand if the comment
was wrong, didn't necessarily need a re-spin just for that comment :)
Thanks for accommodating it though.

> 
> Nicolin