[v3] drm/amdgpu: use all SDMA instances for TTM clears and moves

[PATCH v3 20/28] drm/amdgpu: allocate multiple clear entities

Posted by Pierre-Eric Pelloux-Prayer 2 months, 2 weeks ago

No functional change for now, as we always use entity 0.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 51 ++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +-
 3 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2ee48f76483d..56663e82efef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1323,7 +1323,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 		goto out;
 
 	r = amdgpu_fill_buffer(adev,
-			       &adev->mman.clear_entity, abo, 0, &bo->base._resv,
+			       &adev->mman.clear_entities[0], abo, 0, &bo->base._resv,
 			       &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
 	if (WARN_ON(r))
 		goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9024dde0c5a7..d7f041e43eca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2224,10 +2224,12 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
 	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 	u32 used_windows, reserved_windows;
+	u32 num_clear_entities;
 	uint64_t size;
-	int r;
+	int r, i, j;
 
-	reserved_windows = 3;
+	num_clear_entities = adev->sdma.num_instances;
+	reserved_windows = 2 + num_clear_entities;
 
 	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
 	    adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
@@ -2250,21 +2252,11 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 					  1, NULL);
 		if (r) {
 			dev_err(adev->dev,
-				"Failed setting up TTM BO move entity (%d)\n",
+				"Failed setting up TTM BO eviction entity (%d)\n",
 				r);
 			return 0;
 		}
 
-		r = drm_sched_entity_init(&adev->mman.clear_entity.base,
-					  DRM_SCHED_PRIORITY_NORMAL, &sched,
-					  1, NULL);
-		if (r) {
-			dev_err(adev->dev,
-				"Failed setting up TTM BO clear entity (%d)\n",
-				r);
-			goto error_free_entity;
-		}
-
 		r = drm_sched_entity_init(&adev->mman.move_entity.base,
 					  DRM_SCHED_PRIORITY_NORMAL, &sched,
 					  1, NULL);
@@ -2272,26 +2264,48 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 			dev_err(adev->dev,
 				"Failed setting up TTM BO move entity (%d)\n",
 				r);
-			drm_sched_entity_destroy(&adev->mman.clear_entity.base);
 			goto error_free_entity;
 		}
 
+		adev->mman.num_clear_entities = num_clear_entities;
+		adev->mman.clear_entities = kcalloc(num_clear_entities,
+						    sizeof(struct amdgpu_ttm_buffer_entity),
+						    GFP_KERNEL);
+		if (!adev->mman.clear_entities)
+			goto error_free_entity;
+
+		for (i = 0; i < num_clear_entities; i++) {
+			r = drm_sched_entity_init(&adev->mman.clear_entities[i].base,
+						  DRM_SCHED_PRIORITY_NORMAL, &sched,
+						  1, NULL);
+			if (r) {
+				for (j = 0; j < i; j++)
+					drm_sched_entity_destroy(
+						&adev->mman.clear_entities[j].base);
+				kfree(adev->mman.clear_entities);
+				goto error_free_entity;
+			}
+		}
+
 		/* Statically assign GART windows to each entity. */
 		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity,
 							     0, false, false);
 		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity,
 							     used_windows, true, true);
-		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity,
-							     used_windows, false, true);
+		for (i = 0; i < num_clear_entities; i++)
+			used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entities[i],
+								     used_windows, false, true);
 		WARN_ON(used_windows != reserved_windows);
 	} else {
 		drm_sched_entity_destroy(&adev->mman.default_entity.base);
-		drm_sched_entity_destroy(&adev->mman.clear_entity.base);
 		drm_sched_entity_destroy(&adev->mman.move_entity.base);
+		for (i = 0; i < num_clear_entities; i++)
+			drm_sched_entity_destroy(&adev->mman.clear_entities[i].base);
 		/* Drop all the old fences since re-creating the scheduler entities
 		 * will allocate new contexts.
 		 */
 		ttm_resource_manager_cleanup(man);
+		kfree(adev->mman.clear_entities);
 	}
 
 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -2456,8 +2470,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_device *adev,
 
 	if (!fence)
 		return -EINVAL;
-
-	entity = &adev->mman.clear_entity;
+	entity = &adev->mman.clear_entities[0];
 	*fence = dma_fence_get_stub();
 
 	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 0b3b03f43bab..250ef54a5550 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -72,8 +72,9 @@ struct amdgpu_mman {
 	struct mutex				gtt_window_lock;
 
 	struct amdgpu_ttm_buffer_entity default_entity; /* has no gart windows */
-	struct amdgpu_ttm_buffer_entity clear_entity;
 	struct amdgpu_ttm_buffer_entity move_entity;
+	struct amdgpu_ttm_buffer_entity *clear_entities;
+	u32 num_clear_entities;
 
 	struct amdgpu_vram_mgr vram_mgr;
 	struct amdgpu_gtt_mgr gtt_mgr;
-- 
2.43.0

Re: [PATCH v3 20/28] drm/amdgpu: allocate multiple clear entities

Posted by Christian König 2 months, 2 weeks ago


On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> No functional change for now, as we always use entity 0.
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 51 ++++++++++++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +-
>  3 files changed, 35 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 2ee48f76483d..56663e82efef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1323,7 +1323,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>  		goto out;
>  
>  	r = amdgpu_fill_buffer(adev,
> -			       &adev->mman.clear_entity, abo, 0, &bo->base._resv,
> +			       &adev->mman.clear_entities[0], abo, 0, &bo->base._resv,
>  			       &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>  	if (WARN_ON(r))
>  		goto out;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 9024dde0c5a7..d7f041e43eca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2224,10 +2224,12 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>  {
>  	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
>  	u32 used_windows, reserved_windows;
> +	u32 num_clear_entities;
>  	uint64_t size;
> -	int r;
> +	int r, i, j;
>  
> -	reserved_windows = 3;
> +	num_clear_entities = adev->sdma.num_instances;

Actually each SDMA instance has two ring buffers.

Additional to that using adev->sdma_num_instances is a not good idea here. That should probably rather come from the copy buffer funcs.

Regards,
Christian.

> +	reserved_windows = 2 + num_clear_entities;
>  
>  	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
>  	    adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
> @@ -2250,21 +2252,11 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>  					  1, NULL);
>  		if (r) {
>  			dev_err(adev->dev,
> -				"Failed setting up TTM BO move entity (%d)\n",
> +				"Failed setting up TTM BO eviction entity (%d)\n",
>  				r);
>  			return 0;
>  		}
>  
> -		r = drm_sched_entity_init(&adev->mman.clear_entity.base,
> -					  DRM_SCHED_PRIORITY_NORMAL, &sched,
> -					  1, NULL);
> -		if (r) {
> -			dev_err(adev->dev,
> -				"Failed setting up TTM BO clear entity (%d)\n",
> -				r);
> -			goto error_free_entity;
> -		}
> -
>  		r = drm_sched_entity_init(&adev->mman.move_entity.base,
>  					  DRM_SCHED_PRIORITY_NORMAL, &sched,
>  					  1, NULL);
> @@ -2272,26 +2264,48 @@ u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>  			dev_err(adev->dev,
>  				"Failed setting up TTM BO move entity (%d)\n",
>  				r);
> -			drm_sched_entity_destroy(&adev->mman.clear_entity.base);
>  			goto error_free_entity;
>  		}
>  
> +		adev->mman.num_clear_entities = num_clear_entities;
> +		adev->mman.clear_entities = kcalloc(num_clear_entities,
> +						    sizeof(struct amdgpu_ttm_buffer_entity),
> +						    GFP_KERNEL);
> +		if (!adev->mman.clear_entities)
> +			goto error_free_entity;
> +
> +		for (i = 0; i < num_clear_entities; i++) {
> +			r = drm_sched_entity_init(&adev->mman.clear_entities[i].base,
> +						  DRM_SCHED_PRIORITY_NORMAL, &sched,
> +						  1, NULL);
> +			if (r) {
> +				for (j = 0; j < i; j++)
> +					drm_sched_entity_destroy(
> +						&adev->mman.clear_entities[j].base);
> +				kfree(adev->mman.clear_entities);
> +				goto error_free_entity;
> +			}
> +		}
> +
>  		/* Statically assign GART windows to each entity. */
>  		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity,
>  							     0, false, false);
>  		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity,
>  							     used_windows, true, true);
> -		used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity,
> -							     used_windows, false, true);
> +		for (i = 0; i < num_clear_entities; i++)
> +			used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entities[i],
> +								     used_windows, false, true);
>  		WARN_ON(used_windows != reserved_windows);
>  	} else {
>  		drm_sched_entity_destroy(&adev->mman.default_entity.base);
> -		drm_sched_entity_destroy(&adev->mman.clear_entity.base);
>  		drm_sched_entity_destroy(&adev->mman.move_entity.base);
> +		for (i = 0; i < num_clear_entities; i++)
> +			drm_sched_entity_destroy(&adev->mman.clear_entities[i].base);
>  		/* Drop all the old fences since re-creating the scheduler entities
>  		 * will allocate new contexts.
>  		 */
>  		ttm_resource_manager_cleanup(man);
> +		kfree(adev->mman.clear_entities);
>  	}
>  
>  	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
> @@ -2456,8 +2470,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_device *adev,
>  
>  	if (!fence)
>  		return -EINVAL;
> -
> -	entity = &adev->mman.clear_entity;
> +	entity = &adev->mman.clear_entities[0];
>  	*fence = dma_fence_get_stub();
>  
>  	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 0b3b03f43bab..250ef54a5550 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -72,8 +72,9 @@ struct amdgpu_mman {
>  	struct mutex				gtt_window_lock;
>  
>  	struct amdgpu_ttm_buffer_entity default_entity; /* has no gart windows */
> -	struct amdgpu_ttm_buffer_entity clear_entity;
>  	struct amdgpu_ttm_buffer_entity move_entity;
> +	struct amdgpu_ttm_buffer_entity *clear_entities;
> +	u32 num_clear_entities;
>  
>  	struct amdgpu_vram_mgr vram_mgr;
>  	struct amdgpu_gtt_mgr gtt_mgr;