[PATCH v3 14/28] drm/amdgpu: check entity lock is held in amdgpu_ttm_job_submit

Pierre-Eric Pelloux-Prayer posted 28 patches 1 week, 3 days ago
[PATCH v3 14/28] drm/amdgpu: check entity lock is held in amdgpu_ttm_job_submit
Posted by Pierre-Eric Pelloux-Prayer 1 week, 3 days ago
drm_sched_job_arm and drm_sched_entity_push_job must be called
under the same lock to guarantee the order of execution.

This commit adds a check in amdgpu_ttm_job_submit and fix the
places where the lock was missing.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a803af015d05..164b49d768d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -163,7 +163,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 }
 
 static struct dma_fence *
-amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw)
+amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entity *entity,
+		      struct amdgpu_job *job, u32 num_dw)
 {
 	struct amdgpu_ring *ring;
 
@@ -171,6 +172,8 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 nu
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
 
+	lockdep_assert_held(&entity->lock);
+
 	return amdgpu_job_submit(job);
 }
 
@@ -268,7 +271,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
 		amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
 	}
 
-	dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw));
+	dma_fence_put(amdgpu_ttm_job_submit(adev, entity, job, num_dw));
 	return 0;
 }
 
@@ -1512,7 +1515,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
 				PAGE_SIZE, 0);
 
-	fence = amdgpu_ttm_job_submit(adev, job, num_dw);
+	fence = amdgpu_ttm_job_submit(adev, &adev->mman.default_entity, job, num_dw);
 	mutex_unlock(&adev->mman.default_entity.lock);
 
 	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
@@ -2336,7 +2339,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev,
 		byte_count -= cur_size_in_bytes;
 	}
 
-	*fence = amdgpu_ttm_job_submit(adev, job, num_dw);
+	*fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
 
 	return 0;
 
@@ -2379,7 +2382,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev,
 		byte_count -= cur_size;
 	}
 
-	*fence = amdgpu_ttm_job_submit(adev, job, num_dw);
+	*fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
 	return 0;
 }
 
-- 
2.43.0
Re: [PATCH v3 14/28] drm/amdgpu: check entity lock is held in amdgpu_ttm_job_submit
Posted by Christian König 1 week, 3 days ago
On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> drm_sched_job_arm and drm_sched_entity_push_job must be called
> under the same lock to guarantee the order of execution.
> 
> This commit adds a check in amdgpu_ttm_job_submit and fix the
> places where the lock was missing.
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index a803af015d05..164b49d768d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -163,7 +163,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>  }
>  
>  static struct dma_fence *
> -amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw)
> +amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entity *entity,
> +		      struct amdgpu_job *job, u32 num_dw)
>  {
>  	struct amdgpu_ring *ring;
>  
> @@ -171,6 +172,8 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 nu
>  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>  	WARN_ON(job->ibs[0].length_dw > num_dw);
>  
> +	lockdep_assert_held(&entity->lock);
> +
>  	return amdgpu_job_submit(job);
>  }
>  
> @@ -268,7 +271,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
>  		amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
>  	}
>  
> -	dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw));
> +	dma_fence_put(amdgpu_ttm_job_submit(adev, entity, job, num_dw));
>  	return 0;
>  }
>  
> @@ -1512,7 +1515,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
>  	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
>  				PAGE_SIZE, 0);
>  
> -	fence = amdgpu_ttm_job_submit(adev, job, num_dw);
> +	fence = amdgpu_ttm_job_submit(adev, &adev->mman.default_entity, job, num_dw);
>  	mutex_unlock(&adev->mman.default_entity.lock);
>  
>  	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
> @@ -2336,7 +2339,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev,
>  		byte_count -= cur_size_in_bytes;
>  	}
>  
> -	*fence = amdgpu_ttm_job_submit(adev, job, num_dw);
> +	*fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
>  
>  	return 0;
>  
> @@ -2379,7 +2382,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev,
>  		byte_count -= cur_size;
>  	}
>  
> -	*fence = amdgpu_ttm_job_submit(adev, job, num_dw);
> +	*fence = amdgpu_ttm_job_submit(adev, entity, job, num_dw);
>  	return 0;
>  }
>