[v4] drm/panthor: support repeated mappings

[PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Caterina Shablia 3 months ago

From: Boris Brezillon <boris.brezillon@collabora.com>

Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
we can implement true atomic page updates, where any access in the
locked range done by the GPU has to wait for the page table updates
to land before proceeding.

This is needed for vkQueueBindSparse(), so we can replace the dummy
page mapped over the entire object by actual BO backed pages in an atomic
way.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index b39ea6acc6a9..9caaba03c5eb 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -387,6 +387,15 @@ struct panthor_vm {
 	 * flagged as faulty as a result.
 	 */
 	bool unhandled_fault;
+
+	/** @locked_region: Information about the currently locked region currently. */
+	struct {
+		/** @locked_region.start: Start of the locked region. */
+		u64 start;
+
+		/** @locked_region.size: Size of the locked region. */
+		u64 size;
+	} locked_region;
 };
 
 /**
@@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
 	}
 
 	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
+	if (!ret && vm->locked_region.size) {
+		lock_region(ptdev, vm->as.id, vm->locked_region.start, vm->locked_region.size);
+		ret = wait_ready(ptdev, vm->as.id);
+	}
 
 out_make_active:
 	if (!ret) {
@@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
 	struct io_pgtable_ops *ops = vm->pgtbl_ops;
 	u64 offset = 0;
 
+	drm_WARN_ON(&ptdev->base,
+		    (iova < vm->locked_region.start) ||
+		    (iova + size > vm->locked_region.start + vm->locked_region.size));
 	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
 
 	while (offset < size) {
@@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
 				iova + offset + unmapped_sz,
 				iova + offset + pgsize * pgcount,
 				iova, iova + size);
-			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
 			return  -EINVAL;
 		}
 		offset += unmapped_sz;
 	}
 
-	return panthor_vm_flush_range(vm, iova, size);
+	return 0;
 }
 
 static int
@@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
 	if (!size)
 		return 0;
 
+	drm_WARN_ON(&ptdev->base,
+		    (iova < vm->locked_region.start) ||
+		    (iova + size > vm->locked_region.start + vm->locked_region.size));
+
 	for_each_sgtable_dma_sg(sgt, sgl, count) {
 		dma_addr_t paddr = sg_dma_address(sgl);
 		size_t len = sg_dma_len(sgl);
@@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
 		offset = 0;
 	}
 
-	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
+	return 0;
 }
 
 static int flags_to_prot(u32 flags)
@@ -1654,6 +1673,38 @@ static const char *access_type_name(struct panthor_device *ptdev,
 	}
 }
 
+static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	int ret = 0;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size);
+	vm->locked_region.start = start;
+	vm->locked_region.size = size;
+	if (vm->as.id >= 0) {
+		lock_region(ptdev, vm->as.id, start, size);
+		ret = wait_ready(ptdev, vm->as.id);
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	return ret;
+}
+
+static void panthor_vm_unlock_region(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	if (vm->as.id >= 0) {
+		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
+		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm->as.id));
+	}
+	vm->locked_region.start = 0;
+	vm->locked_region.size = 0;
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
 static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
 {
 	bool has_unhandled_faults = false;
@@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
 
 	mutex_lock(&vm->op_lock);
 	vm->op_ctx = op;
+
+	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
+	if (ret)
+		goto out;
+
 	switch (op_type) {
 	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
 		if (vm->unusable) {
@@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
 		break;
 	}
 
+	panthor_vm_unlock_region(vm);
+
+out:
 	if (ret && flag_vm_unusable_on_failure)
 		vm->unusable = true;
 
-- 
2.47.2

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Steven Price 2 months, 4 weeks ago

On 07/07/2025 18:04, Caterina Shablia wrote:
> From: Boris Brezillon <boris.brezillon@collabora.com>
> 
> Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
> we can implement true atomic page updates, where any access in the
> locked range done by the GPU has to wait for the page table updates
> to land before proceeding.
> 
> This is needed for vkQueueBindSparse(), so we can replace the dummy
> page mapped over the entire object by actual BO backed pages in an atomic
> way.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
>  1 file changed, 62 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index b39ea6acc6a9..9caaba03c5eb 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -387,6 +387,15 @@ struct panthor_vm {
>  	 * flagged as faulty as a result.
>  	 */
>  	bool unhandled_fault;
> +
> +	/** @locked_region: Information about the currently locked region currently. */
> +	struct {
> +		/** @locked_region.start: Start of the locked region. */
> +		u64 start;
> +
> +		/** @locked_region.size: Size of the locked region. */
> +		u64 size;
> +	} locked_region;
>  };
>  
>  /**
> @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>  	}
>  
>  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> +	if (!ret && vm->locked_region.size) {
> +		lock_region(ptdev, vm->as.id, vm->locked_region.start, vm->locked_region.size);
> +		ret = wait_ready(ptdev, vm->as.id);
> +	}

Do we need to do this? It seems odd to restore a MMU context and
immediately set a lock region. Is there a good reason we can't just
WARN_ON if there's a lock region set in panthor_vm_idle()?

I think we need to briefly take vm->op_lock to ensure synchronisation
but that doesn't seem a big issue. Or perhaps there's a good reason that
I'm missing?

>  
>  out_make_active:
>  	if (!ret) {
> @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
>  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
>  	u64 offset = 0;
>  
> +	drm_WARN_ON(&ptdev->base,
> +		    (iova < vm->locked_region.start) ||
> +		    (iova + size > vm->locked_region.start + vm->locked_region.size));
>  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
>  
>  	while (offset < size) {
> @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
>  				iova + offset + unmapped_sz,
>  				iova + offset + pgsize * pgcount,
>  				iova, iova + size);
> -			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
>  			return  -EINVAL;
>  		}
>  		offset += unmapped_sz;
>  	}
>  
> -	return panthor_vm_flush_range(vm, iova, size);
> +	return 0;
>  }
>  
>  static int
> @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
>  	if (!size)
>  		return 0;
>  
> +	drm_WARN_ON(&ptdev->base,
> +		    (iova < vm->locked_region.start) ||
> +		    (iova + size > vm->locked_region.start + vm->locked_region.size));
> +
>  	for_each_sgtable_dma_sg(sgt, sgl, count) {
>  		dma_addr_t paddr = sg_dma_address(sgl);
>  		size_t len = sg_dma_len(sgl);
> @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
>  		offset = 0;
>  	}
>  
> -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> +	return 0;
>  }
>  
>  static int flags_to_prot(u32 flags)
> @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct panthor_device *ptdev,
>  	}
>  }
>  
> +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
> +{
> +	struct panthor_device *ptdev = vm->ptdev;
> +	int ret = 0;
> +
> +	mutex_lock(&ptdev->mmu->as.slots_lock);
> +	drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size);
> +	vm->locked_region.start = start;
> +	vm->locked_region.size = size;
> +	if (vm->as.id >= 0) {
> +		lock_region(ptdev, vm->as.id, start, size);
> +		ret = wait_ready(ptdev, vm->as.id);
> +	}
> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> +
> +	return ret;
> +}
> +
> +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> +{
> +	struct panthor_device *ptdev = vm->ptdev;
> +
> +	mutex_lock(&ptdev->mmu->as.slots_lock);
> +	if (vm->as.id >= 0) {
> +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
> +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm->as.id));
> +	}
> +	vm->locked_region.start = 0;
> +	vm->locked_region.size = 0;
> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> +}

Do we need to include a drm_dev_enter() somewhere here? I note that
panthor_vm_flush_range() has one and you've effectively replaced that
code with the above.

Thanks,
Steve

> +
>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
>  	bool has_unhandled_faults = false;
> @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
>  
>  	mutex_lock(&vm->op_lock);
>  	vm->op_ctx = op;
> +
> +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> +	if (ret)
> +		goto out;
> +
>  	switch (op_type) {
>  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
>  		if (vm->unusable) {
> @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
>  		break;
>  	}
>  
> +	panthor_vm_unlock_region(vm);
> +
> +out:
>  	if (ret && flag_vm_unusable_on_failure)
>  		vm->unusable = true;
>

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Boris Brezillon 1 month, 2 weeks ago

On Fri, 11 Jul 2025 14:30:21 +0100
Steven Price <steven.price@arm.com> wrote:

> On 07/07/2025 18:04, Caterina Shablia wrote:
> > From: Boris Brezillon <boris.brezillon@collabora.com>
> > 
> > Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
> > we can implement true atomic page updates, where any access in the
> > locked range done by the GPU has to wait for the page table updates
> > to land before proceeding.
> > 
> > This is needed for vkQueueBindSparse(), so we can replace the dummy
> > page mapped over the entire object by actual BO backed pages in an atomic
> > way.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
> > ---
> >  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
> >  1 file changed, 62 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > index b39ea6acc6a9..9caaba03c5eb 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -387,6 +387,15 @@ struct panthor_vm {
> >  	 * flagged as faulty as a result.
> >  	 */
> >  	bool unhandled_fault;
> > +
> > +	/** @locked_region: Information about the currently locked region currently. */
> > +	struct {
> > +		/** @locked_region.start: Start of the locked region. */
> > +		u64 start;
> > +
> > +		/** @locked_region.size: Size of the locked region. */
> > +		u64 size;
> > +	} locked_region;
> >  };
> >  
> >  /**
> > @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
> >  	}
> >  
> >  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> > +	if (!ret && vm->locked_region.size) {
> > +		lock_region(ptdev, vm->as.id, vm->locked_region.start, vm->locked_region.size);
> > +		ret = wait_ready(ptdev, vm->as.id);
> > +	}  
> 
> Do we need to do this? It seems odd to restore a MMU context and
> immediately set a lock region. Is there a good reason we can't just
> WARN_ON if there's a lock region set in panthor_vm_idle()?
> 
> I think we need to briefly take vm->op_lock to ensure synchronisation
> but that doesn't seem a big issue. Or perhaps there's a good reason that
> I'm missing?

Hm, I wish I had written a big fat comment along this change, because
indeed, it seems simpler to take the op_lock to ensure any in-flight PT
update is flushed before we make the AS active, and I definitely don't
remember why I didn't do that. Could be some locking order inversion of
some sort between the slot lock, or maybe I overthought this at the
time. In any case, it looks like it's worth a try.

> 
> >  
> >  out_make_active:
> >  	if (!ret) {
> > @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
> >  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
> >  	u64 offset = 0;
> >  
> > +	drm_WARN_ON(&ptdev->base,
> > +		    (iova < vm->locked_region.start) ||
> > +		    (iova + size > vm->locked_region.start + vm->locked_region.size));
> >  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
> >  
> >  	while (offset < size) {
> > @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
> >  				iova + offset + unmapped_sz,
> >  				iova + offset + pgsize * pgcount,
> >  				iova, iova + size);
> > -			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
> >  			return  -EINVAL;
> >  		}
> >  		offset += unmapped_sz;
> >  	}
> >  
> > -	return panthor_vm_flush_range(vm, iova, size);
> > +	return 0;
> >  }
> >  
> >  static int
> > @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
> >  	if (!size)
> >  		return 0;
> >  
> > +	drm_WARN_ON(&ptdev->base,
> > +		    (iova < vm->locked_region.start) ||
> > +		    (iova + size > vm->locked_region.start + vm->locked_region.size));
> > +
> >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> >  		dma_addr_t paddr = sg_dma_address(sgl);
> >  		size_t len = sg_dma_len(sgl);
> > @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
> >  		offset = 0;
> >  	}
> >  
> > -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> > +	return 0;
> >  }
> >  
> >  static int flags_to_prot(u32 flags)
> > @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct panthor_device *ptdev,
> >  	}
> >  }
> >  
> > +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
> > +{
> > +	struct panthor_device *ptdev = vm->ptdev;
> > +	int ret = 0;
> > +
> > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > +	drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size);
> > +	vm->locked_region.start = start;
> > +	vm->locked_region.size = size;
> > +	if (vm->as.id >= 0) {
> > +		lock_region(ptdev, vm->as.id, start, size);
> > +		ret = wait_ready(ptdev, vm->as.id);
> > +	}
> > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> > +{
> > +	struct panthor_device *ptdev = vm->ptdev;
> > +
> > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > +	if (vm->as.id >= 0) {
> > +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
> > +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm->as.id));
> > +	}
> > +	vm->locked_region.start = 0;
> > +	vm->locked_region.size = 0;
> > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > +}  
> 
> Do we need to include a drm_dev_enter() somewhere here? I note that
> panthor_vm_flush_range() has one and you've effectively replaced that
> code with the above.
> 
> Thanks,
> Steve
> 
> > +
> >  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> >  {
> >  	bool has_unhandled_faults = false;
> > @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
> >  
> >  	mutex_lock(&vm->op_lock);
> >  	vm->op_ctx = op;
> > +
> > +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> > +	if (ret)
> > +		goto out;
> > +
> >  	switch (op_type) {
> >  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
> >  		if (vm->unusable) {
> > @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
> >  		break;
> >  	}
> >  
> > +	panthor_vm_unlock_region(vm);
> > +
> > +out:
> >  	if (ret && flag_vm_unusable_on_failure)
> >  		vm->unusable = true;
> >    
>

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Caterina Shablia 2 months, 3 weeks ago

El viernes, 11 de julio de 2025 15:30:21 (hora de verano de Europa central), 
Steven Price escribió:
> On 07/07/2025 18:04, Caterina Shablia wrote:
> > From: Boris Brezillon <boris.brezillon@collabora.com>
> > 
> > Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
> > we can implement true atomic page updates, where any access in the
> > locked range done by the GPU has to wait for the page table updates
> > to land before proceeding.
> > 
> > This is needed for vkQueueBindSparse(), so we can replace the dummy
> > page mapped over the entire object by actual BO backed pages in an atomic
> > way.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
> > ---
> > 
> >  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
> >  1 file changed, 62 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
> > b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb
> > 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -387,6 +387,15 @@ struct panthor_vm {
> > 
> >  	 * flagged as faulty as a result.
> >  	 */
> >  	
> >  	bool unhandled_fault;
> > 
> > +
> > +	/** @locked_region: Information about the currently locked region
> > currently. */ +	struct {
> > +		/** @locked_region.start: Start of the locked region. 
*/
> > +		u64 start;
> > +
> > +		/** @locked_region.size: Size of the locked region. */
> > +		u64 size;
> > +	} locked_region;
> > 
> >  };
> >  
> >  /**
> > 
> > @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
> > 
> >  	}
> >  	
> >  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, 
transcfg,
> >  	vm->memattr);> 
> > +	if (!ret && vm->locked_region.size) {
> > +		lock_region(ptdev, vm->as.id, vm->locked_region.start,
> > vm->locked_region.size); +		ret = wait_ready(ptdev, vm-
>as.id);
> > +	}
> 
> Do we need to do this? It seems odd to restore a MMU context and
> immediately set a lock region. Is there a good reason we can't just
> WARN_ON if there's a lock region set in panthor_vm_idle()?
So IIUC, when things are otherwise idle and we do a vm_bind, the vm will be 
inactive, in which case we're not going to poke the mmu to inform it of the 
locked region, because it literally is not aware of this vm. Now if in the 
meanwhile something submits a job and activates the vm, we need to inform the 
mmu of the locked region, as vm_bind job might still be going on. I don't see 
why panthor_vm_idle while a region is locked would be a problem? That can 
arise e.g. when a job completes but there's a concurrent vm_bind going on?
> 
> I think we need to briefly take vm->op_lock to ensure synchronisation
> but that doesn't seem a big issue. Or perhaps there's a good reason that
> I'm missing?
I think you're right, all other accesses to locked_region are guarded by 
op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing 
region {,un}locks.
> 
> >  out_make_active:
> >  	if (!ret) {
> > 
> > @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm
> > *vm, u64 iova, u64 size)> 
> >  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
> >  	u64 offset = 0;
> > 
> > +	drm_WARN_ON(&ptdev->base,
> > +		    (iova < vm->locked_region.start) ||
> > +		    (iova + size > vm->locked_region.start + vm-
>locked_region.size));
> > 
> >  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm-
>as.id,
> >  	iova, size);
> >  	
> >  	while (offset < size) {
> > 
> > @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm
> > *vm, u64 iova, u64 size)> 
> >  				iova + offset + unmapped_sz,
> >  				iova + offset + pgsize * pgcount,
> >  				iova, iova + size);
> > 
> > -			panthor_vm_flush_range(vm, iova, offset + 
unmapped_sz);
> > 
> >  			return  -EINVAL;
> >  		
> >  		}
> >  		offset += unmapped_sz;
> >  	
> >  	}
> > 
> > -	return panthor_vm_flush_range(vm, iova, size);
> > +	return 0;
> > 
> >  }
> >  
> >  static int
> > 
> > @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova,
> > int prot,> 
> >  	if (!size)
> >  	
> >  		return 0;
> > 
> > +	drm_WARN_ON(&ptdev->base,
> > +		    (iova < vm->locked_region.start) ||
> > +		    (iova + size > vm->locked_region.start + vm-
>locked_region.size));
> > +
> > 
> >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> >  	
> >  		dma_addr_t paddr = sg_dma_address(sgl);
> >  		size_t len = sg_dma_len(sgl);
> > 
> > @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova,
> > int prot,> 
> >  		offset = 0;
> >  	
> >  	}
> > 
> > -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> > +	return 0;
> > 
> >  }
> >  
> >  static int flags_to_prot(u32 flags)
> > 
> > @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct
> > panthor_device *ptdev,> 
> >  	}
> >  
> >  }
> > 
> > +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64
> > size) +{
> > +	struct panthor_device *ptdev = vm->ptdev;
> > +	int ret = 0;
> > +
> > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > +	drm_WARN_ON(&ptdev->base, vm->locked_region.start ||
> > vm->locked_region.size); +	vm->locked_region.start = start;
> > +	vm->locked_region.size = size;
> > +	if (vm->as.id >= 0) {
> > +		lock_region(ptdev, vm->as.id, start, size);
> > +		ret = wait_ready(ptdev, vm->as.id);
> > +	}
> > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> > +{
> > +	struct panthor_device *ptdev = vm->ptdev;
> > +
> > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > +	if (vm->as.id >= 0) {
> > +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
> > +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm-
>as.id));
> > +	}
> > +	vm->locked_region.start = 0;
> > +	vm->locked_region.size = 0;
> > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > +}
> 
> Do we need to include a drm_dev_enter() somewhere here? I note that
> panthor_vm_flush_range() has one and you've effectively replaced that
> code with the above.
Looks like we should.
> 
> Thanks,
> Steve
> 
> > +
> > 
> >  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32
> >  status) {
> >  
> >  	bool has_unhandled_faults = false;
> > 
> > @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > panthor_vm_op_ctx *op,> 
> >  	mutex_lock(&vm->op_lock);
> >  	vm->op_ctx = op;
> > 
> > +
> > +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> > +	if (ret)
> > +		goto out;
> > +
> > 
> >  	switch (op_type) {
> >  	
> >  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
> >  		if (vm->unusable) {
> > 
> > @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > panthor_vm_op_ctx *op,> 
> >  		break;
> >  	
> >  	}
> > 
> > +	panthor_vm_unlock_region(vm);
> > +
> > 
> > +out:
> >  	if (ret && flag_vm_unusable_on_failure)
> >  	
> >  		vm->unusable = true;

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Caterina Shablia 2 months, 3 weeks ago

El martes, 15 de julio de 2025 17:08:09 (hora de verano de Europa central), 
Caterina Shablia escribió:
> El viernes, 11 de julio de 2025 15:30:21 (hora de verano de Europa central),
> Steven Price escribió:
> > On 07/07/2025 18:04, Caterina Shablia wrote:
> > > From: Boris Brezillon <boris.brezillon@collabora.com>
> > > 
> > > Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
> > > we can implement true atomic page updates, where any access in the
> > > locked range done by the GPU has to wait for the page table updates
> > > to land before proceeding.
> > > 
> > > This is needed for vkQueueBindSparse(), so we can replace the dummy
> > > page mapped over the entire object by actual BO backed pages in an
> > > atomic
> > > way.
> > > 
> > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
> > > ---
> > > 
> > >  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
> > >  1 file changed, 62 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
> > > b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb
> > > 100644
> > > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > > @@ -387,6 +387,15 @@ struct panthor_vm {
> > > 
> > >  	 * flagged as faulty as a result.
> > >  	 */
> > >  	
> > >  	bool unhandled_fault;
> > > 
> > > +
> > > +	/** @locked_region: Information about the currently locked region
> > > currently. */ +	struct {
> > > +		/** @locked_region.start: Start of the locked region.
> 
> */
> 
> > > +		u64 start;
> > > +
> > > +		/** @locked_region.size: Size of the locked region. */
> > > +		u64 size;
> > > +	} locked_region;
> > > 
> > >  };
> > >  
> > >  /**
> > > 
> > > @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
> > > 
> > >  	}
> > >  	
> > >  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab,
> 
> transcfg,
> 
> > >  	vm->memattr);>
> > > 
> > > +	if (!ret && vm->locked_region.size) {
> > > +		lock_region(ptdev, vm->as.id, vm->locked_region.start,
> > > vm->locked_region.size); +		ret = wait_ready(ptdev, vm-
> >
> >as.id);
> >
> > > +	}
> > 
> > Do we need to do this? It seems odd to restore a MMU context and
> > immediately set a lock region. Is there a good reason we can't just
> > WARN_ON if there's a lock region set in panthor_vm_idle()?
> 
> So IIUC, when things are otherwise idle and we do a vm_bind, the vm will be
> inactive, in which case we're not going to poke the mmu to inform it of the
> locked region, because it literally is not aware of this vm. Now if in the
> meanwhile something submits a job and activates the vm, we need to inform
> the mmu of the locked region, as vm_bind job might still be going on. I
> don't see why panthor_vm_idle while a region is locked would be a problem?
> That can arise e.g. when a job completes but there's a concurrent vm_bind
> going on?
> > I think we need to briefly take vm->op_lock to ensure synchronisation
> > but that doesn't seem a big issue. Or perhaps there's a good reason that
> > I'm missing?
> 
> I think you're right, all other accesses to locked_region are guarded by
> op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing
> region {,un}locks.
> 
> > >  out_make_active:
> > >  	if (!ret) {
> > > 
> > > @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm
> > > *vm, u64 iova, u64 size)>
> > > 
> > >  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
> > >  	u64 offset = 0;
> > > 
> > > +	drm_WARN_ON(&ptdev->base,
> > > +		    (iova < vm->locked_region.start) ||
> > > +		    (iova + size > vm->locked_region.start + vm-
> >
> >locked_region.size));
> >
> > >  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm-
> >
> >as.id,
> >
> > >  	iova, size);
> > >  	
> > >  	while (offset < size) {
> > > 
> > > @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct
> > > panthor_vm
> > > *vm, u64 iova, u64 size)>
> > > 
> > >  				iova + offset + unmapped_sz,
> > >  				iova + offset + pgsize * pgcount,
> > >  				iova, iova + size);
> > > 
> > > -			panthor_vm_flush_range(vm, iova, offset +
> 
> unmapped_sz);
> 
> > >  			return  -EINVAL;
> > >  		
> > >  		}
> > >  		offset += unmapped_sz;
> > >  	
> > >  	}
> > > 
> > > -	return panthor_vm_flush_range(vm, iova, size);
> > > +	return 0;
> > > 
> > >  }
> > >  
> > >  static int
> > > 
> > > @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
> > > iova,
> > > int prot,>
> > > 
> > >  	if (!size)
> > >  	
> > >  		return 0;
> > > 
> > > +	drm_WARN_ON(&ptdev->base,
> > > +		    (iova < vm->locked_region.start) ||
> > > +		    (iova + size > vm->locked_region.start + vm-
> >
> >locked_region.size));
> >
> > > +
> > > 
> > >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> > >  	
> > >  		dma_addr_t paddr = sg_dma_address(sgl);
> > >  		size_t len = sg_dma_len(sgl);
> > > 
> > > @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
> > > iova,
> > > int prot,>
> > > 
> > >  		offset = 0;
> > >  	
> > >  	}
> > > 
> > > -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> > > +	return 0;
> > > 
> > >  }
> > >  
> > >  static int flags_to_prot(u32 flags)
> > > 
> > > @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct
> > > panthor_device *ptdev,>
> > > 
> > >  	}
> > >  
> > >  }
> > > 
> > > +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64
> > > size) +{
> > > +	struct panthor_device *ptdev = vm->ptdev;
> > > +	int ret = 0;
> > > +
> > > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > +	drm_WARN_ON(&ptdev->base, vm->locked_region.start ||
> > > vm->locked_region.size); +	vm->locked_region.start = start;
> > > +	vm->locked_region.size = size;
> > > +	if (vm->as.id >= 0) {
> > > +		lock_region(ptdev, vm->as.id, start, size);
> > > +		ret = wait_ready(ptdev, vm->as.id);
> > > +	}
> > > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> > > +{
> > > +	struct panthor_device *ptdev = vm->ptdev;
> > > +
> > > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > +	if (vm->as.id >= 0) {
> > > +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
> > > +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm-
> >
> >as.id));
> >
> > > +	}
> > > +	vm->locked_region.start = 0;
> > > +	vm->locked_region.size = 0;
> > > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > > +}
> > 
> > Do we need to include a drm_dev_enter() somewhere here? I note that
> > panthor_vm_flush_range() has one and you've effectively replaced that
> > code with the above.
> 
> Looks like we should.
Actually not sure. I think I'm either misunderstanding what drm_dev_enter is, 
or there's other things that should be doing it. Notably 
panthor_mmu_as_{en,dis}able or their callers aren't doing drm_dev_enter, yet 
are poking the hw, so that seems to me like that code also runs the risk of 
poking the hw while/after it was unplugged, but I'm not confident in my 
understanding at all. I guess an extra drm_dev_enter here or there isn't going 
to harm anyone as it's recurrent so I'll put one around the call to 
lock_region in panthor_vm_lock_region as well.
> 
> > Thanks,
> > Steve
> > 
> > > +
> > > 
> > >  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32
> > >  status) {
> > >  
> > >  	bool has_unhandled_faults = false;
> > > 
> > > @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > > panthor_vm_op_ctx *op,>
> > > 
> > >  	mutex_lock(&vm->op_lock);
> > >  	vm->op_ctx = op;
> > > 
> > > +
> > > +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> > > +	if (ret)
> > > +		goto out;
> > > +
> > > 
> > >  	switch (op_type) {
> > >  	
> > >  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
> > >  		if (vm->unusable) {
> > > 
> > > @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > > panthor_vm_op_ctx *op,>
> > > 
> > >  		break;
> > >  	
> > >  	}
> > > 
> > > +	panthor_vm_unlock_region(vm);
> > > +
> > > 
> > > +out:
> > >  	if (ret && flag_vm_unusable_on_failure)
> > >  	
> > >  		vm->unusable = true;

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Steven Price 2 months, 3 weeks ago

On 15/07/2025 17:09, Caterina Shablia wrote:
> El martes, 15 de julio de 2025 17:08:09 (hora de verano de Europa central), 
> Caterina Shablia escribió:
>> El viernes, 11 de julio de 2025 15:30:21 (hora de verano de Europa central),
>> Steven Price escribió:
>>> On 07/07/2025 18:04, Caterina Shablia wrote:
>>>> From: Boris Brezillon <boris.brezillon@collabora.com>
>>>>
>>>> Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
>>>> we can implement true atomic page updates, where any access in the
>>>> locked range done by the GPU has to wait for the page table updates
>>>> to land before proceeding.
>>>>
>>>> This is needed for vkQueueBindSparse(), so we can replace the dummy
>>>> page mapped over the entire object by actual BO backed pages in an
>>>> atomic
>>>> way.
>>>>
>>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>>> Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
>>>> ---
>>>>
>>>>  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
>>>>  1 file changed, 62 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb
>>>> 100644
>>>> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> @@ -387,6 +387,15 @@ struct panthor_vm {
>>>>
>>>>  	 * flagged as faulty as a result.
>>>>  	 */
>>>>  	
>>>>  	bool unhandled_fault;
>>>>
>>>> +
>>>> +	/** @locked_region: Information about the currently locked region
>>>> currently. */ +	struct {
>>>> +		/** @locked_region.start: Start of the locked region.
>>
>> */
>>
>>>> +		u64 start;
>>>> +
>>>> +		/** @locked_region.size: Size of the locked region. */
>>>> +		u64 size;
>>>> +	} locked_region;
>>>>
>>>>  };
>>>>  
>>>>  /**
>>>>
>>>> @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>>>>
>>>>  	}
>>>>  	
>>>>  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab,
>>
>> transcfg,
>>
>>>>  	vm->memattr);>
>>>>
>>>> +	if (!ret && vm->locked_region.size) {
>>>> +		lock_region(ptdev, vm->as.id, vm->locked_region.start,
>>>> vm->locked_region.size); +		ret = wait_ready(ptdev, vm-
>>>
>>> as.id);
>>>
>>>> +	}
>>>
>>> Do we need to do this? It seems odd to restore a MMU context and
>>> immediately set a lock region. Is there a good reason we can't just
>>> WARN_ON if there's a lock region set in panthor_vm_idle()?
>>
>> So IIUC, when things are otherwise idle and we do a vm_bind, the vm will be
>> inactive, in which case we're not going to poke the mmu to inform it of the
>> locked region, because it literally is not aware of this vm. Now if in the
>> meanwhile something submits a job and activates the vm, we need to inform
>> the mmu of the locked region, as vm_bind job might still be going on. I
>> don't see why panthor_vm_idle while a region is locked would be a problem?
>> That can arise e.g. when a job completes but there's a concurrent vm_bind
>> going on?
>>> I think we need to briefly take vm->op_lock to ensure synchronisation
>>> but that doesn't seem a big issue. Or perhaps there's a good reason that
>>> I'm missing?
>>
>> I think you're right, all other accesses to locked_region are guarded by
>> op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing
>> region {,un}locks.
>>
>>>>  out_make_active:
>>>>  	if (!ret) {
>>>>
>>>> @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm
>>>> *vm, u64 iova, u64 size)>
>>>>
>>>>  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
>>>>  	u64 offset = 0;
>>>>
>>>> +	drm_WARN_ON(&ptdev->base,
>>>> +		    (iova < vm->locked_region.start) ||
>>>> +		    (iova + size > vm->locked_region.start + vm-
>>>
>>> locked_region.size));
>>>
>>>>  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm-
>>>
>>> as.id,
>>>
>>>>  	iova, size);
>>>>  	
>>>>  	while (offset < size) {
>>>>
>>>> @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct
>>>> panthor_vm
>>>> *vm, u64 iova, u64 size)>
>>>>
>>>>  				iova + offset + unmapped_sz,
>>>>  				iova + offset + pgsize * pgcount,
>>>>  				iova, iova + size);
>>>>
>>>> -			panthor_vm_flush_range(vm, iova, offset +
>>
>> unmapped_sz);
>>
>>>>  			return  -EINVAL;
>>>>  		
>>>>  		}
>>>>  		offset += unmapped_sz;
>>>>  	
>>>>  	}
>>>>
>>>> -	return panthor_vm_flush_range(vm, iova, size);
>>>> +	return 0;
>>>>
>>>>  }
>>>>  
>>>>  static int
>>>>
>>>> @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
>>>> iova,
>>>> int prot,>
>>>>
>>>>  	if (!size)
>>>>  	
>>>>  		return 0;
>>>>
>>>> +	drm_WARN_ON(&ptdev->base,
>>>> +		    (iova < vm->locked_region.start) ||
>>>> +		    (iova + size > vm->locked_region.start + vm-
>>>
>>> locked_region.size));
>>>
>>>> +
>>>>
>>>>  	for_each_sgtable_dma_sg(sgt, sgl, count) {
>>>>  	
>>>>  		dma_addr_t paddr = sg_dma_address(sgl);
>>>>  		size_t len = sg_dma_len(sgl);
>>>>
>>>> @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
>>>> iova,
>>>> int prot,>
>>>>
>>>>  		offset = 0;
>>>>  	
>>>>  	}
>>>>
>>>> -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
>>>> +	return 0;
>>>>
>>>>  }
>>>>  
>>>>  static int flags_to_prot(u32 flags)
>>>>
>>>> @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct
>>>> panthor_device *ptdev,>
>>>>
>>>>  	}
>>>>  
>>>>  }
>>>>
>>>> +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64
>>>> size) +{
>>>> +	struct panthor_device *ptdev = vm->ptdev;
>>>> +	int ret = 0;
>>>> +
>>>> +	mutex_lock(&ptdev->mmu->as.slots_lock);
>>>> +	drm_WARN_ON(&ptdev->base, vm->locked_region.start ||
>>>> vm->locked_region.size); +	vm->locked_region.start = start;
>>>> +	vm->locked_region.size = size;
>>>> +	if (vm->as.id >= 0) {
>>>> +		lock_region(ptdev, vm->as.id, start, size);
>>>> +		ret = wait_ready(ptdev, vm->as.id);
>>>> +	}
>>>> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static void panthor_vm_unlock_region(struct panthor_vm *vm)
>>>> +{
>>>> +	struct panthor_device *ptdev = vm->ptdev;
>>>> +
>>>> +	mutex_lock(&ptdev->mmu->as.slots_lock);
>>>> +	if (vm->as.id >= 0) {
>>>> +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
>>>> +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm-
>>>
>>> as.id));
>>>
>>>> +	}
>>>> +	vm->locked_region.start = 0;
>>>> +	vm->locked_region.size = 0;
>>>> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
>>>> +}
>>>
>>> Do we need to include a drm_dev_enter() somewhere here? I note that
>>> panthor_vm_flush_range() has one and you've effectively replaced that
>>> code with the above.
>>
>> Looks like we should.
> Actually not sure. I think I'm either misunderstanding what drm_dev_enter is, 
> or there's other things that should be doing it. Notably 
> panthor_mmu_as_{en,dis}able or their callers aren't doing drm_dev_enter, yet 
> are poking the hw, so that seems to me like that code also runs the risk of 
> poking the hw while/after it was unplugged, but I'm not confident in my 
> understanding at all. I guess an extra drm_dev_enter here or there isn't going 
> to harm anyone as it's recurrent so I'll put one around the call to 
> lock_region in panthor_vm_lock_region as well.

In theory all paths that touch GPU registers should be wrapped in a
drm_dev_enter() somewhere so that we don't poke the hardware when it's
gone. Of course in practice we don't really have hotpluggable hardware
so this isn't exactly well tested. So there might be existing bugs. As
you say it is possible to nest drm_dev_enter() so that might be easiest
in this case.

We do have the 'automotive' GPUs (not currently supported by panthor[1])
which support virtualisation where it is potentially possible to "rip
out" the GPU from a guest. So I suspect in the future we will start to
care more about this.

Thanks,
Steve

[1] I think the only public version is Mali-G78AE which is a job manager
GPU which would be panfrost anyway. I've no idea what the roadmap is for
future GPUs.

>>> Thanks,
>>> Steve
>>>
>>>> +
>>>>
>>>>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32
>>>>  status) {
>>>>  
>>>>  	bool has_unhandled_faults = false;
>>>>
>>>> @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
>>>> panthor_vm_op_ctx *op,>
>>>>
>>>>  	mutex_lock(&vm->op_lock);
>>>>  	vm->op_ctx = op;
>>>>
>>>> +
>>>> +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
>>>> +	if (ret)
>>>> +		goto out;
>>>> +
>>>>
>>>>  	switch (op_type) {
>>>>  	
>>>>  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
>>>>  		if (vm->unusable) {
>>>>
>>>> @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
>>>> panthor_vm_op_ctx *op,>
>>>>
>>>>  		break;
>>>>  	
>>>>  	}
>>>>
>>>> +	panthor_vm_unlock_region(vm);
>>>> +
>>>>
>>>> +out:
>>>>  	if (ret && flag_vm_unusable_on_failure)
>>>>  	
>>>>  		vm->unusable = true;
> 
> 
> 
>

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Caterina Shablia 2 months, 3 weeks ago

El martes, 15 de julio de 2025 17:08:09 (hora de verano de Europa central), 
Caterina Shablia escribió:
> El viernes, 11 de julio de 2025 15:30:21 (hora de verano de Europa central),
> Steven Price escribió:
> > On 07/07/2025 18:04, Caterina Shablia wrote:
> > > From: Boris Brezillon <boris.brezillon@collabora.com>
> > > 
> > > Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
> > > we can implement true atomic page updates, where any access in the
> > > locked range done by the GPU has to wait for the page table updates
> > > to land before proceeding.
> > > 
> > > This is needed for vkQueueBindSparse(), so we can replace the dummy
> > > page mapped over the entire object by actual BO backed pages in an
> > > atomic
> > > way.
> > > 
> > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > > Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
> > > ---
> > > 
> > >  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
> > >  1 file changed, 62 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
> > > b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb
> > > 100644
> > > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > > @@ -387,6 +387,15 @@ struct panthor_vm {
> > > 
> > >  	 * flagged as faulty as a result.
> > >  	 */
> > >  	
> > >  	bool unhandled_fault;
> > > 
> > > +
> > > +	/** @locked_region: Information about the currently locked region
> > > currently. */ +	struct {
> > > +		/** @locked_region.start: Start of the locked region.
> 
> */
> 
> > > +		u64 start;
> > > +
> > > +		/** @locked_region.size: Size of the locked region. */
> > > +		u64 size;
> > > +	} locked_region;
> > > 
> > >  };
> > >  
> > >  /**
> > > 
> > > @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
> > > 
> > >  	}
> > >  	
> > >  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab,
> 
> transcfg,
> 
> > >  	vm->memattr);>
> > > 
> > > +	if (!ret && vm->locked_region.size) {
> > > +		lock_region(ptdev, vm->as.id, vm->locked_region.start,
> > > vm->locked_region.size); +		ret = wait_ready(ptdev, vm-
> >
> >as.id);
> >
> > > +	}
> > 
> > Do we need to do this? It seems odd to restore a MMU context and
> > immediately set a lock region. Is there a good reason we can't just
> > WARN_ON if there's a lock region set in panthor_vm_idle()?
> 
> So IIUC, when things are otherwise idle and we do a vm_bind, the vm will be
> inactive, in which case we're not going to poke the mmu to inform it of the
> locked region, because it literally is not aware of this vm. Now if in the
> meanwhile something submits a job and activates the vm, we need to inform
> the mmu of the locked region, as vm_bind job might still be going on. I
> don't see why panthor_vm_idle while a region is locked would be a problem?
> That can arise e.g. when a job completes but there's a concurrent vm_bind
> going on?
> > I think we need to briefly take vm->op_lock to ensure synchronisation
> > but that doesn't seem a big issue. Or perhaps there's a good reason that
> > I'm missing?
> 
> I think you're right, all other accesses to locked_region are guarded by
> op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing
> region {,un}locks.
Actually no, that's not necessary. Access to locked_region is protected by 
slots_lock, which is held here. Trying to lock vm->op_lock would also be 
detrimental here, because these locks are often taken together and slots_lock 
is taken after op_lock is taken, so taking op_lock here would be extremely 
deadlockful.
> 
> > >  out_make_active:
> > >  	if (!ret) {
> > > 
> > > @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm
> > > *vm, u64 iova, u64 size)>
> > > 
> > >  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
> > >  	u64 offset = 0;
> > > 
> > > +	drm_WARN_ON(&ptdev->base,
> > > +		    (iova < vm->locked_region.start) ||
> > > +		    (iova + size > vm->locked_region.start + vm-
> >
> >locked_region.size));
> >
> > >  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm-
> >
> >as.id,
> >
> > >  	iova, size);
> > >  	
> > >  	while (offset < size) {
> > > 
> > > @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct
> > > panthor_vm
> > > *vm, u64 iova, u64 size)>
> > > 
> > >  				iova + offset + unmapped_sz,
> > >  				iova + offset + pgsize * pgcount,
> > >  				iova, iova + size);
> > > 
> > > -			panthor_vm_flush_range(vm, iova, offset +
> 
> unmapped_sz);
> 
> > >  			return  -EINVAL;
> > >  		
> > >  		}
> > >  		offset += unmapped_sz;
> > >  	
> > >  	}
> > > 
> > > -	return panthor_vm_flush_range(vm, iova, size);
> > > +	return 0;
> > > 
> > >  }
> > >  
> > >  static int
> > > 
> > > @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
> > > iova,
> > > int prot,>
> > > 
> > >  	if (!size)
> > >  	
> > >  		return 0;
> > > 
> > > +	drm_WARN_ON(&ptdev->base,
> > > +		    (iova < vm->locked_region.start) ||
> > > +		    (iova + size > vm->locked_region.start + vm-
> >
> >locked_region.size));
> >
> > > +
> > > 
> > >  	for_each_sgtable_dma_sg(sgt, sgl, count) {
> > >  	
> > >  		dma_addr_t paddr = sg_dma_address(sgl);
> > >  		size_t len = sg_dma_len(sgl);
> > > 
> > > @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
> > > iova,
> > > int prot,>
> > > 
> > >  		offset = 0;
> > >  	
> > >  	}
> > > 
> > > -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
> > > +	return 0;
> > > 
> > >  }
> > >  
> > >  static int flags_to_prot(u32 flags)
> > > 
> > > @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct
> > > panthor_device *ptdev,>
> > > 
> > >  	}
> > >  
> > >  }
> > > 
> > > +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64
> > > size) +{
> > > +	struct panthor_device *ptdev = vm->ptdev;
> > > +	int ret = 0;
> > > +
> > > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > +	drm_WARN_ON(&ptdev->base, vm->locked_region.start ||
> > > vm->locked_region.size); +	vm->locked_region.start = start;
> > > +	vm->locked_region.size = size;
> > > +	if (vm->as.id >= 0) {
> > > +		lock_region(ptdev, vm->as.id, start, size);
> > > +		ret = wait_ready(ptdev, vm->as.id);
> > > +	}
> > > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static void panthor_vm_unlock_region(struct panthor_vm *vm)
> > > +{
> > > +	struct panthor_device *ptdev = vm->ptdev;
> > > +
> > > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > +	if (vm->as.id >= 0) {
> > > +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
> > > +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm-
> >
> >as.id));
> >
> > > +	}
> > > +	vm->locked_region.start = 0;
> > > +	vm->locked_region.size = 0;
> > > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > > +}
> > 
> > Do we need to include a drm_dev_enter() somewhere here? I note that
> > panthor_vm_flush_range() has one and you've effectively replaced that
> > code with the above.
> 
> Looks like we should.
> 
> > Thanks,
> > Steve
> > 
> > > +
> > > 
> > >  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32
> > >  status) {
> > >  
> > >  	bool has_unhandled_faults = false;
> > > 
> > > @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > > panthor_vm_op_ctx *op,>
> > > 
> > >  	mutex_lock(&vm->op_lock);
> > >  	vm->op_ctx = op;
> > > 
> > > +
> > > +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
> > > +	if (ret)
> > > +		goto out;
> > > +
> > > 
> > >  	switch (op_type) {
> > >  	
> > >  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
> > >  		if (vm->unusable) {
> > > 
> > > @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
> > > panthor_vm_op_ctx *op,>
> > > 
> > >  		break;
> > >  	
> > >  	}
> > > 
> > > +	panthor_vm_unlock_region(vm);
> > > +
> > > 
> > > +out:
> > >  	if (ret && flag_vm_unusable_on_failure)
> > >  	
> > >  		vm->unusable = true;

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Steven Price 2 months, 3 weeks ago

On 15/07/2025 16:33, Caterina Shablia wrote:
> El martes, 15 de julio de 2025 17:08:09 (hora de verano de Europa central), 
> Caterina Shablia escribió:
>> El viernes, 11 de julio de 2025 15:30:21 (hora de verano de Europa central),
>> Steven Price escribió:
>>> On 07/07/2025 18:04, Caterina Shablia wrote:
>>>> From: Boris Brezillon <boris.brezillon@collabora.com>
>>>>
>>>> Move the lock/flush_mem operations around the gpuvm_sm_map() calls so
>>>> we can implement true atomic page updates, where any access in the
>>>> locked range done by the GPU has to wait for the page table updates
>>>> to land before proceeding.
>>>>
>>>> This is needed for vkQueueBindSparse(), so we can replace the dummy
>>>> page mapped over the entire object by actual BO backed pages in an
>>>> atomic
>>>> way.
>>>>
>>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>>> Signed-off-by: Caterina Shablia <caterina.shablia@collabora.com>
>>>> ---
>>>>
>>>>  drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++--
>>>>  1 file changed, 62 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb
>>>> 100644
>>>> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
>>>> @@ -387,6 +387,15 @@ struct panthor_vm {
>>>>
>>>>  	 * flagged as faulty as a result.
>>>>  	 */
>>>>  	
>>>>  	bool unhandled_fault;
>>>>
>>>> +
>>>> +	/** @locked_region: Information about the currently locked region
>>>> currently. */ +	struct {
>>>> +		/** @locked_region.start: Start of the locked region.
>>
>> */
>>
>>>> +		u64 start;
>>>> +
>>>> +		/** @locked_region.size: Size of the locked region. */
>>>> +		u64 size;
>>>> +	} locked_region;
>>>>
>>>>  };
>>>>  
>>>>  /**
>>>>
>>>> @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm)
>>>>
>>>>  	}
>>>>  	
>>>>  	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab,
>>
>> transcfg,
>>
>>>>  	vm->memattr);>
>>>>
>>>> +	if (!ret && vm->locked_region.size) {
>>>> +		lock_region(ptdev, vm->as.id, vm->locked_region.start,
>>>> vm->locked_region.size); +		ret = wait_ready(ptdev, vm-
>>>
>>> as.id);
>>>
>>>> +	}
>>>
>>> Do we need to do this? It seems odd to restore a MMU context and
>>> immediately set a lock region. Is there a good reason we can't just
>>> WARN_ON if there's a lock region set in panthor_vm_idle()?
>>
>> So IIUC, when things are otherwise idle and we do a vm_bind, the vm will be
>> inactive, in which case we're not going to poke the mmu to inform it of the
>> locked region, because it literally is not aware of this vm. Now if in the
>> meanwhile something submits a job and activates the vm, we need to inform
>> the mmu of the locked region, as vm_bind job might still be going on. I
>> don't see why panthor_vm_idle while a region is locked would be a problem?
>> That can arise e.g. when a job completes but there's a concurrent vm_bind
>> going on?

So it's absolutely fine (and normal) to perform a vm_bind while the VM
is idle. In this case there's no need to perform the lock because
there's nothing running on the GPU which could be affected by the page
tables being (temporarily) inconsistent.

What I'm questioning is the design where if, in the middle of a vm_bind
operation, a job comes in and then we set the lock region rather than
just waiting for the vm_bind operation to complete. AFAICT simply
synchronising on the vm->op_lock should achieve this.

>>> I think we need to briefly take vm->op_lock to ensure synchronisation
>>> but that doesn't seem a big issue. Or perhaps there's a good reason that
>>> I'm missing?
>>
>> I think you're right, all other accesses to locked_region are guarded by
>> op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing
>> region {,un}locks.
> Actually no, that's not necessary. Access to locked_region is protected by 
> slots_lock, which is held here. Trying to lock vm->op_lock would also be 
> detrimental here, because these locks are often taken together and slots_lock 
> is taken after op_lock is taken, so taking op_lock here would be extremely 
> deadlockful.

It would obviously be necessary to acquire vm->op_lock before
as.slots_lock as you say to avoid deadlocks. Note that as soon as
as.slots_lock is held vm->op_lock can be dropped.

I just find the current approach a little odd, and unless there's a good
reason for it would prefer that we don't enable a VM on a new address
space while there's an outstanding vm_bind still running. Obviously if
there's a good reason (e.g. we really do expect long running vm_bind
operations) then that just need documenting in the commit message. But
I'm not aware that's the case here.

Although in general I'm a bit wary of relying on the whole lock region
feature - previous GPUs have an errata. But maybe I'm being over
cautious there.

Thanks,

Steve

>>
>>>>  out_make_active:
>>>>  	if (!ret) {
>>>>
>>>> @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm
>>>> *vm, u64 iova, u64 size)>
>>>>
>>>>  	struct io_pgtable_ops *ops = vm->pgtbl_ops;
>>>>  	u64 offset = 0;
>>>>
>>>> +	drm_WARN_ON(&ptdev->base,
>>>> +		    (iova < vm->locked_region.start) ||
>>>> +		    (iova + size > vm->locked_region.start + vm-
>>>
>>> locked_region.size));
>>>
>>>>  	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm-
>>>
>>> as.id,
>>>
>>>>  	iova, size);
>>>>  	
>>>>  	while (offset < size) {
>>>>
>>>> @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct
>>>> panthor_vm
>>>> *vm, u64 iova, u64 size)>
>>>>
>>>>  				iova + offset + unmapped_sz,
>>>>  				iova + offset + pgsize * pgcount,
>>>>  				iova, iova + size);
>>>>
>>>> -			panthor_vm_flush_range(vm, iova, offset +
>>
>> unmapped_sz);
>>
>>>>  			return  -EINVAL;
>>>>  		
>>>>  		}
>>>>  		offset += unmapped_sz;
>>>>  	
>>>>  	}
>>>>
>>>> -	return panthor_vm_flush_range(vm, iova, size);
>>>> +	return 0;
>>>>
>>>>  }
>>>>  
>>>>  static int
>>>>
>>>> @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
>>>> iova,
>>>> int prot,>
>>>>
>>>>  	if (!size)
>>>>  	
>>>>  		return 0;
>>>>
>>>> +	drm_WARN_ON(&ptdev->base,
>>>> +		    (iova < vm->locked_region.start) ||
>>>> +		    (iova + size > vm->locked_region.start + vm-
>>>
>>> locked_region.size));
>>>
>>>> +
>>>>
>>>>  	for_each_sgtable_dma_sg(sgt, sgl, count) {
>>>>  	
>>>>  		dma_addr_t paddr = sg_dma_address(sgl);
>>>>  		size_t len = sg_dma_len(sgl);
>>>>
>>>> @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64
>>>> iova,
>>>> int prot,>
>>>>
>>>>  		offset = 0;
>>>>  	
>>>>  	}
>>>>
>>>> -	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
>>>> +	return 0;
>>>>
>>>>  }
>>>>  
>>>>  static int flags_to_prot(u32 flags)
>>>>
>>>> @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct
>>>> panthor_device *ptdev,>
>>>>
>>>>  	}
>>>>  
>>>>  }
>>>>
>>>> +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64
>>>> size) +{
>>>> +	struct panthor_device *ptdev = vm->ptdev;
>>>> +	int ret = 0;
>>>> +
>>>> +	mutex_lock(&ptdev->mmu->as.slots_lock);
>>>> +	drm_WARN_ON(&ptdev->base, vm->locked_region.start ||
>>>> vm->locked_region.size); +	vm->locked_region.start = start;
>>>> +	vm->locked_region.size = size;
>>>> +	if (vm->as.id >= 0) {
>>>> +		lock_region(ptdev, vm->as.id, start, size);
>>>> +		ret = wait_ready(ptdev, vm->as.id);
>>>> +	}
>>>> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static void panthor_vm_unlock_region(struct panthor_vm *vm)
>>>> +{
>>>> +	struct panthor_device *ptdev = vm->ptdev;
>>>> +
>>>> +	mutex_lock(&ptdev->mmu->as.slots_lock);
>>>> +	if (vm->as.id >= 0) {
>>>> +		write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM);
>>>> +		drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm-
>>>
>>> as.id));
>>>
>>>> +	}
>>>> +	vm->locked_region.start = 0;
>>>> +	vm->locked_region.size = 0;
>>>> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
>>>> +}
>>>
>>> Do we need to include a drm_dev_enter() somewhere here? I note that
>>> panthor_vm_flush_range() has one and you've effectively replaced that
>>> code with the above.
>>
>> Looks like we should.
>>
>>> Thanks,
>>> Steve
>>>
>>>> +
>>>>
>>>>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32
>>>>  status) {
>>>>  
>>>>  	bool has_unhandled_faults = false;
>>>>
>>>> @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
>>>> panthor_vm_op_ctx *op,>
>>>>
>>>>  	mutex_lock(&vm->op_lock);
>>>>  	vm->op_ctx = op;
>>>>
>>>> +
>>>> +	ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range);
>>>> +	if (ret)
>>>> +		goto out;
>>>> +
>>>>
>>>>  	switch (op_type) {
>>>>  	
>>>>  	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
>>>>  		if (vm->unusable) {
>>>>
>>>> @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct
>>>> panthor_vm_op_ctx *op,>
>>>>
>>>>  		break;
>>>>  	
>>>>  	}
>>>>
>>>> +	panthor_vm_unlock_region(vm);
>>>> +
>>>>
>>>> +out:
>>>>  	if (ret && flag_vm_unusable_on_failure)
>>>>  	
>>>>  		vm->unusable = true;
> 
> 
> 
>

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Boris Brezillon 1 month, 2 weeks ago

On Wed, 16 Jul 2025 16:43:24 +0100
Steven Price <steven.price@arm.com> wrote:

> >>> I think we need to briefly take vm->op_lock to ensure synchronisation
> >>> but that doesn't seem a big issue. Or perhaps there's a good reason that
> >>> I'm missing?  
> >>
> >> I think you're right, all other accesses to locked_region are guarded by
> >> op_lock. GPU job submit poke vm_active concurrently with vm_bind jobs doing
> >> region {,un}locks.  
> > Actually no, that's not necessary. Access to locked_region is protected by 
> > slots_lock, which is held here. Trying to lock vm->op_lock would also be 
> > detrimental here, because these locks are often taken together and slots_lock 
> > is taken after op_lock is taken, so taking op_lock here would be extremely 
> > deadlockful.  
> 
> It would obviously be necessary to acquire vm->op_lock before
> as.slots_lock as you say to avoid deadlocks. Note that as soon as
> as.slots_lock is held vm->op_lock can be dropped.

Yeah, lock ordering is not an issue, because we take slots_lock in this
function, so we're in full control of the ordering. And I wouldn't even
consider releasing op_lock as soon as we acquire slots_lock because

- that make things harder to reason about
- the locked section is not blocking on any sort of external event
- the locked section is pretty straightforward (so no excessive delays
expected here)

> 
> I just find the current approach a little odd, and unless there's a good
> reason for it would prefer that we don't enable a VM on a new address
> space while there's an outstanding vm_bind still running. Obviously if
> there's a good reason (e.g. we really do expect long running vm_bind
> operations) then that just need documenting in the commit message. But
> I'm not aware that's the case here.

I fully agree here. If there's no obvious reason to not serialize
vm_active() on VM bind ops, I'd opt for taking the VM op_lock and
calling it a day. And I honestly can't think of any:

- the VM op logic is all synchronous/non-blocking
- it's expected to be fast
- AS rotation is something I hope is not happening too often, otherwise
  we'll have other things to worry about (the whole CSG slot scheduling
  logic is quite involved, and I'd expect the
  BIND-while-making-AS-active to be rare enough that it becomes noise
  in the overall overhead of kernel-side GPU scheduling happening in
  Panthor)

> 
> Although in general I'm a bit wary of relying on the whole lock region
> feature - previous GPUs have an errata. But maybe I'm being over
> cautious there.

We're heavily relying on it already to allow updates of the VM while
the GPU is executing stuff. If that's problematic on v10+, I'd rather
know early :D.

Regards,

Boris

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Steven Price 1 month, 2 weeks ago

On 21/08/2025 12:51, Boris Brezillon wrote:
> On Wed, 16 Jul 2025 16:43:24 +0100
> Steven Price <steven.price@arm.com> wrote:
[...]
>> Although in general I'm a bit wary of relying on the whole lock region
>> feature - previous GPUs have an errata. But maybe I'm being over
>> cautious there.
> 
> We're heavily relying on it already to allow updates of the VM while
> the GPU is executing stuff. If that's problematic on v10+, I'd rather
> know early :D.

I think I'm just scarred by my experiences over a decade ago... ;)

I'm not aware of any issues with the modern[1] GPUs. The issue used to
be that the lock region could get accidentally unlocked by a cache flush
from another source - specifically the cache flush on job start flag.

It's also not a major issue if you keep the page tables consistent, the
lock region in theory allows a region to be in an inconsistent state -
but generally there's no need for that. AFAIK we mostly keep the tables
consistent anyway.

Thanks,
Steve

[1] Which in this context means well over a decade - it's somewhat scary
how long I've been doing this!

> Regards,
> 
> Boris

Re: [PATCH v4 1/7] drm/panthor: Add support for atomic page table updates

Posted by Boris Brezillon 1 month, 2 weeks ago

On Thu, 21 Aug 2025 16:02:09 +0100
Steven Price <steven.price@arm.com> wrote:

> On 21/08/2025 12:51, Boris Brezillon wrote:
> > On Wed, 16 Jul 2025 16:43:24 +0100
> > Steven Price <steven.price@arm.com> wrote:  
> [...]
> >> Although in general I'm a bit wary of relying on the whole lock region
> >> feature - previous GPUs have an errata. But maybe I'm being over
> >> cautious there.  
> > 
> > We're heavily relying on it already to allow updates of the VM while
> > the GPU is executing stuff. If that's problematic on v10+, I'd rather
> > know early :D.  
> 
> I think I'm just scarred by my experiences over a decade ago... ;)
> 
> I'm not aware of any issues with the modern[1] GPUs. The issue used to
> be that the lock region could get accidentally unlocked by a cache flush
> from another source - specifically the cache flush on job start flag.
> 
> It's also not a major issue if you keep the page tables consistent, the
> lock region in theory allows a region to be in an inconsistent state -
> but generally there's no need for that. AFAIK we mostly keep the tables
> consistent anyway.

Right, it's not a problem until we introduce sparse binding support, at
which point atomicity becomes important, and given remapping is not a
thing the io-pagetable layer provides (remap has to be unmap+map), I
need to rely on region locking to make it work, or we'll have to eat the
fault-but-not-really-because-its-being-remapped overhead/complexity.
Honestly, I'd rather rely on region locking if it's working, because
it's far simpler ;-).

[PATCH v4 1/7] drm/panthor: Add support for atomic page table updates
[PATCH v4 2/7] drm/gpuvm: Kill drm_gpuva_init()
[PATCH v4 3/7] drm/gpuvm: Pass map arguments through a struct
[PATCH v4 4/7] drm/gpuvm: Add a helper to check if two VA can be merged
[PATCH v4 5/7] drm/gpuvm: Add a flags field to drm_gpuvm_map_req/drm_gpuva_op_map
[PATCH v4 6/7] drm/gpuvm: Add DRM_GPUVA_REPEAT flag and logic
[PATCH v4 7/7] drm/panthor: Add support for repeated mappings