This way the caller can select the one it wants to use.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
5 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02c2479a8840..b59040a8771f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
stime = ktime_get();
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
+ r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
+ saddr, daddr, size, NULL, &fence,
false, 0);
if (r)
goto exit_do_move;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e08f58de4b17..c06c132a753c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
- AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
+ &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 42d448cd6a6d..c8d59ca2b3bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/**
* amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @entity: entity to run the window setup job
* @bo: buffer object to map
* @mem: memory object to map
* @mm_cur: range to map
@@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* Setup one of the GART windows to access a specific piece of memory or return
* the physical address for local memory.
*/
-static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
+ struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
unsigned int window, struct amdgpu_ring *ring,
@@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
+ r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job,
@@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
/**
* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
+ * @entity: entity to run the jobs
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
* @size: number of bytes to copy
@@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
*/
__attribute__((nonnull))
static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
@@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
/* Map src to window 0 and dst to window 1. */
- r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
+ r = amdgpu_ttm_map_buffer(entity,
+ src->bo, src->mem, &src_mm,
0, ring, tmz, &cur_size, &from);
if (r)
goto error;
- r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
+ r = amdgpu_ttm_map_buffer(entity,
+ dst->bo, dst->mem, &dst_mm,
1, ring, tmz, &cur_size, &to);
if (r)
goto error;
@@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
write_compress_disable));
}
- r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
&next, true, copy_flags);
if (r)
goto error;
@@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
src.offset = 0;
dst.offset = 0;
- r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+ r = amdgpu_ttm_copy_mem_to_mem(adev,
+ &adev->mman.move_entity.base,
+ &src, &dst,
new_mem->size,
amdgpu_bo_encrypted(abo),
bo->base.resv, &fence);
@@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
- false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+ r = amdgpu_fill_buffer(&adev->mman.move_entity,
+ abo, 0, NULL, &wipe_fence,
+ AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ struct drm_sched_entity *entity,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
- bool delayed, u64 k_job_id)
+ u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
int r;
- struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
- &adev->mman.move_entity.base;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job, k_job_id);
@@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
DMA_RESV_USAGE_BOOKKEEP);
}
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
@@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw,
- resv, vm_needs_flush, &job, false,
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
+ resv, vm_needs_flush, &job,
AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
@@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
return r;
}
-static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush, bool delayed,
+ bool vm_needs_flush,
u64 k_job_id)
{
struct amdgpu_device *adev = ring->adev;
@@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
- &job, delayed, k_job_id);
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
+ vm_needs_flush, &job, k_job_id);
if (r)
return r;
@@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
/* Never clear more than 256MiB at once to avoid timeouts */
size = min(cursor.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
+ &bo->tbo, bo->tbo.resource, &cursor,
1, ring, false, &size, &addr);
if (r)
goto err;
- r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
- &next, true, true,
+ r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
+ &next, true,
AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
goto err;
@@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
return r;
}
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **f,
- bool delayed,
- u64 k_job_id)
+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
/* Never fill more than 256MiB at once to avoid timeouts */
cur_size = min(dst.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+ r = amdgpu_ttm_map_buffer(&entity->base,
+ &bo->tbo, bo->tbo.resource, &dst,
1, ring, false, &cur_size, &to);
if (r)
goto error;
- r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true, delayed, k_job_id);
+ r = amdgpu_ttm_fill_mem(ring, &entity->base,
+ src_data, to, cur_size, resv,
+ &next, true, k_job_id);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index d2295d6c2b67..e1655f86a016 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_ring *ring,
+ struct drm_sched_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
@@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
struct dma_fence **fence);
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **fence,
- bool delayed,
- u64 k_job_id);
+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d74ff6e90590..09756132fa1b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
goto out_unlock;
}
- r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+ r = amdgpu_copy_buffer(ring, &entity->base,
+ gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
--
2.43.0
On 2025-11-13 11:05, Pierre-Eric Pelloux-Prayer wrote:
> This way the caller can select the one it wants to use.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
I agree with Christian's comment to eliminate the ring parameter where
it's implied by the entity. Other than that, the patch is
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
> 5 files changed, 60 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index 02c2479a8840..b59040a8771f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
> stime = ktime_get();
> for (i = 0; i < n; i++) {
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
> + saddr, daddr, size, NULL, &fence,
> false, 0);
> if (r)
> goto exit_do_move;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index e08f58de4b17..c06c132a753c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (r)
> goto out;
>
> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 42d448cd6a6d..c8d59ca2b3bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>
> /**
> * amdgpu_ttm_map_buffer - Map memory into the GART windows
> + * @entity: entity to run the window setup job
> * @bo: buffer object to map
> * @mem: memory object to map
> * @mm_cur: range to map
> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> * Setup one of the GART windows to access a specific piece of memory or return
> * the physical address for local memory.
> */
> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> + struct ttm_buffer_object *bo,
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>
> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
> + r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4 + num_bytes,
> AMDGPU_IB_POOL_DELAYED, &job,
> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> /**
> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> * @adev: amdgpu device
> + * @entity: entity to run the jobs
> * @src: buffer/address where to read from
> * @dst: buffer/address where to write to
> * @size: number of bytes to copy
> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> */
> __attribute__((nonnull))
> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> const struct amdgpu_copy_mem *src,
> const struct amdgpu_copy_mem *dst,
> uint64_t size, bool tmz,
> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>
> /* Map src to window 0 and dst to window 1. */
> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + src->bo, src->mem, &src_mm,
> 0, ring, tmz, &cur_size, &from);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + dst->bo, dst->mem, &dst_mm,
> 1, ring, tmz, &cur_size, &to);
> if (r)
> goto error;
> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> write_compress_disable));
> }
>
> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
> &next, true, copy_flags);
> if (r)
> goto error;
> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> src.offset = 0;
> dst.offset = 0;
>
> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
> + r = amdgpu_ttm_copy_mem_to_mem(adev,
> + &adev->mman.move_entity.base,
> + &src, &dst,
> new_mem->size,
> amdgpu_bo_encrypted(abo),
> bo->base.resv, &fence);
> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
> + abo, 0, NULL, &wipe_fence,
> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> struct amdgpu_job **job,
> - bool delayed, u64 k_job_id)
> + u64 k_job_id)
> {
> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
> int r;
> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
> - &adev->mman.move_entity.base;
> r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4, pool, job, k_job_id);
> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> DMA_RESV_USAGE_BOOKKEEP);
> }
>
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw,
> - resv, vm_needs_flush, &job, false,
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
> + resv, vm_needs_flush, &job,
> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
> if (r)
> return r;
> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> return r;
> }
>
> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint32_t src_data,
> uint64_t dst_addr, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> - bool vm_needs_flush, bool delayed,
> + bool vm_needs_flush,
> u64 k_job_id)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
> - &job, delayed, k_job_id);
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
> + vm_needs_flush, &job, k_job_id);
> if (r)
> return r;
>
> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> /* Never clear more than 256MiB at once to avoid timeouts */
> size = min(cursor.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
> + &bo->tbo, bo->tbo.resource, &cursor,
> 1, ring, false, &size, &addr);
> if (r)
> goto err;
>
> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> - &next, true, true,
> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
> + &next, true,
> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> if (r)
> goto err;
> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> return r;
> }
>
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **f,
> - bool delayed,
> - u64 k_job_id)
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> /* Never fill more than 256MiB at once to avoid timeouts */
> cur_size = min(dst.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
> + r = amdgpu_ttm_map_buffer(&entity->base,
> + &bo->tbo, bo->tbo.resource, &dst,
> 1, ring, false, &cur_size, &to);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
> - &next, true, delayed, k_job_id);
> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
> + src_data, to, cur_size, resv,
> + &next, true, k_job_id);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index d2295d6c2b67..e1655f86a016 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
> void amdgpu_ttm_fini(struct amdgpu_device *adev);
> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
> bool enable);
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> struct dma_resv *resv,
> struct dma_fence **fence);
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **fence,
> - bool delayed,
> - u64 k_job_id);
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index d74ff6e90590..09756132fa1b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
> goto out_unlock;
> }
>
> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
> + r = amdgpu_copy_buffer(ring, &entity->base,
> + gart_s, gart_d, size * PAGE_SIZE,
> NULL, &next, true, 0);
> if (r) {
> dev_err(adev->dev, "fail %d to copy memory\n", r);
On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
> This way the caller can select the one it wants to use.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
> 5 files changed, 60 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> index 02c2479a8840..b59040a8771f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
> stime = ktime_get();
> for (i = 0; i < n; i++) {
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
> + saddr, daddr, size, NULL, &fence,
> false, 0);
> if (r)
> goto exit_do_move;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index e08f58de4b17..c06c132a753c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (r)
> goto out;
>
> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
> if (WARN_ON(r))
> goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 42d448cd6a6d..c8d59ca2b3bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>
> /**
> * amdgpu_ttm_map_buffer - Map memory into the GART windows
> + * @entity: entity to run the window setup job
> * @bo: buffer object to map
> * @mem: memory object to map
> * @mm_cur: range to map
> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> * Setup one of the GART windows to access a specific piece of memory or return
> * the physical address for local memory.
> */
> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
> + struct ttm_buffer_object *bo,
Probably better to split this patch into multiple patches.
One which changes amdgpu_ttm_map_buffer() and then another one or two for the higher level copy_buffer and fill_buffer functions.
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window, struct amdgpu_ring *ring,
> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>
> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
> + r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4 + num_bytes,
> AMDGPU_IB_POOL_DELAYED, &job,
> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> /**
> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> * @adev: amdgpu device
> + * @entity: entity to run the jobs
> * @src: buffer/address where to read from
> * @dst: buffer/address where to write to
> * @size: number of bytes to copy
> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
> */
> __attribute__((nonnull))
> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> const struct amdgpu_copy_mem *src,
> const struct amdgpu_copy_mem *dst,
> uint64_t size, bool tmz,
> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>
> /* Map src to window 0 and dst to window 1. */
> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + src->bo, src->mem, &src_mm,
> 0, ring, tmz, &cur_size, &from);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
> + r = amdgpu_ttm_map_buffer(entity,
> + dst->bo, dst->mem, &dst_mm,
> 1, ring, tmz, &cur_size, &to);
> if (r)
> goto error;
> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> write_compress_disable));
> }
>
> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
> &next, true, copy_flags);
> if (r)
> goto error;
> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> src.offset = 0;
> dst.offset = 0;
>
> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
> + r = amdgpu_ttm_copy_mem_to_mem(adev,
> + &adev->mman.move_entity.base,
> + &src, &dst,
> new_mem->size,
> amdgpu_bo_encrypted(abo),
> bo->base.resv, &fence);
> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
> + abo, 0, NULL, &wipe_fence,
> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> + struct drm_sched_entity *entity,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> struct amdgpu_job **job,
> - bool delayed, u64 k_job_id)
> + u64 k_job_id)
> {
> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
> int r;
> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
> - &adev->mman.move_entity.base;
> r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4, pool, job, k_job_id);
> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> DMA_RESV_USAGE_BOOKKEEP);
> }
>
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw,
> - resv, vm_needs_flush, &job, false,
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
> + resv, vm_needs_flush, &job,
> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
> if (r)
> return r;
> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> return r;
> }
>
> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
> + uint32_t src_data,
> uint64_t dst_addr, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> - bool vm_needs_flush, bool delayed,
> + bool vm_needs_flush,
> u64 k_job_id)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
> - &job, delayed, k_job_id);
> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
> + vm_needs_flush, &job, k_job_id);
> if (r)
> return r;
>
> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> /* Never clear more than 256MiB at once to avoid timeouts */
> size = min(cursor.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
> + &bo->tbo, bo->tbo.resource, &cursor,
> 1, ring, false, &size, &addr);
> if (r)
> goto err;
>
> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> - &next, true, true,
> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
> + &next, true,
> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
> if (r)
> goto err;
> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> return r;
> }
>
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **f,
> - bool delayed,
> - u64 k_job_id)
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> /* Never fill more than 256MiB at once to avoid timeouts */
> cur_size = min(dst.size, 256ULL << 20);
>
> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
> + r = amdgpu_ttm_map_buffer(&entity->base,
> + &bo->tbo, bo->tbo.resource, &dst,
> 1, ring, false, &cur_size, &to);
> if (r)
> goto error;
>
> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
> - &next, true, delayed, k_job_id);
> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
> + src_data, to, cur_size, resv,
> + &next, true, k_job_id);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index d2295d6c2b67..e1655f86a016 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
> void amdgpu_ttm_fini(struct amdgpu_device *adev);
> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
> bool enable);
> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> + struct drm_sched_entity *entity,
If I'm not completely mistaken you should be able to drop the ring argument since that can be determined from the entity.
Apart from that looks rather good to me.
Regards,
Christian.
> + uint64_t src_offset,
> uint64_t dst_offset, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> struct dma_resv *resv,
> struct dma_fence **fence);
> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> - uint32_t src_data,
> - struct dma_resv *resv,
> - struct dma_fence **fence,
> - bool delayed,
> - u64 k_job_id);
> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
> + struct amdgpu_bo *bo,
> + uint32_t src_data,
> + struct dma_resv *resv,
> + struct dma_fence **f,
> + u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index d74ff6e90590..09756132fa1b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
> goto out_unlock;
> }
>
> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
> + r = amdgpu_copy_buffer(ring, &entity->base,
> + gart_s, gart_d, size * PAGE_SIZE,
> NULL, &next, true, 0);
> if (r) {
> dev_err(adev->dev, "fail %d to copy memory\n", r);
Le 14/11/2025 à 14:07, Christian König a écrit :
> On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
>> This way the caller can select the one it wants to use.
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
>> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
>> 5 files changed, 60 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> index 02c2479a8840..b59040a8771f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
>> stime = ktime_get();
>> for (i = 0; i < n; i++) {
>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
>> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
>> + saddr, daddr, size, NULL, &fence,
>> false, 0);
>> if (r)
>> goto exit_do_move;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index e08f58de4b17..c06c132a753c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>> if (r)
>> goto out;
>>
>> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
>> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
>> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>> if (WARN_ON(r))
>> goto out;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 42d448cd6a6d..c8d59ca2b3bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>
>> /**
>> * amdgpu_ttm_map_buffer - Map memory into the GART windows
>> + * @entity: entity to run the window setup job
>> * @bo: buffer object to map
>> * @mem: memory object to map
>> * @mm_cur: range to map
>> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>> * Setup one of the GART windows to access a specific piece of memory or return
>> * the physical address for local memory.
>> */
>> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>> + struct ttm_buffer_object *bo,
>
>
> Probably better to split this patch into multiple patches.
>
> One which changes amdgpu_ttm_map_buffer() and then another one or two for the higher level copy_buffer and fill_buffer functions.
OK.
>
>> struct ttm_resource *mem,
>> struct amdgpu_res_cursor *mm_cur,
>> unsigned int window, struct amdgpu_ring *ring,
>> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>
>> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
>> + r = amdgpu_job_alloc_with_ib(adev, entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED,
>> num_dw * 4 + num_bytes,
>> AMDGPU_IB_POOL_DELAYED, &job,
>> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> /**
>> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
>> * @adev: amdgpu device
>> + * @entity: entity to run the jobs
>> * @src: buffer/address where to read from
>> * @dst: buffer/address where to write to
>> * @size: number of bytes to copy
>> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>> */
>> __attribute__((nonnull))
>> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> + struct drm_sched_entity *entity,
>> const struct amdgpu_copy_mem *src,
>> const struct amdgpu_copy_mem *dst,
>> uint64_t size, bool tmz,
>> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>>
>> /* Map src to window 0 and dst to window 1. */
>> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
>> + r = amdgpu_ttm_map_buffer(entity,
>> + src->bo, src->mem, &src_mm,
>> 0, ring, tmz, &cur_size, &from);
>> if (r)
>> goto error;
>>
>> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
>> + r = amdgpu_ttm_map_buffer(entity,
>> + dst->bo, dst->mem, &dst_mm,
>> 1, ring, tmz, &cur_size, &to);
>> if (r)
>> goto error;
>> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>> write_compress_disable));
>> }
>>
>> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
>> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
>> &next, true, copy_flags);
>> if (r)
>> goto error;
>> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>> src.offset = 0;
>> dst.offset = 0;
>>
>> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
>> + r = amdgpu_ttm_copy_mem_to_mem(adev,
>> + &adev->mman.move_entity.base,
>> + &src, &dst,
>> new_mem->size,
>> amdgpu_bo_encrypted(abo),
>> bo->base.resv, &fence);
>> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>> struct dma_fence *wipe_fence = NULL;
>>
>> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
>> + abo, 0, NULL, &wipe_fence,
>> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>> if (r) {
>> goto error;
>> } else if (wipe_fence) {
>> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>> }
>>
>> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>> + struct drm_sched_entity *entity,
>> unsigned int num_dw,
>> struct dma_resv *resv,
>> bool vm_needs_flush,
>> struct amdgpu_job **job,
>> - bool delayed, u64 k_job_id)
>> + u64 k_job_id)
>> {
>> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
>> int r;
>> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
>> - &adev->mman.move_entity.base;
>> r = amdgpu_job_alloc_with_ib(adev, entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED,
>> num_dw * 4, pool, job, k_job_id);
>> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>> DMA_RESV_USAGE_BOOKKEEP);
>> }
>>
>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>> + uint64_t src_offset,
>> uint64_t dst_offset, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
>> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
>> - r = amdgpu_ttm_prepare_job(adev, num_dw,
>> - resv, vm_needs_flush, &job, false,
>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
>> + resv, vm_needs_flush, &job,
>> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
>> if (r)
>> return r;
>> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> return r;
>> }
>>
>> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>> + uint32_t src_data,
>> uint64_t dst_addr, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> - bool vm_needs_flush, bool delayed,
>> + bool vm_needs_flush,
>> u64 k_job_id)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
>> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
>> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
>> - &job, delayed, k_job_id);
>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
>> + vm_needs_flush, &job, k_job_id);
>> if (r)
>> return r;
>>
>> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> /* Never clear more than 256MiB at once to avoid timeouts */
>> size = min(cursor.size, 256ULL << 20);
>>
>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
>> + &bo->tbo, bo->tbo.resource, &cursor,
>> 1, ring, false, &size, &addr);
>> if (r)
>> goto err;
>>
>> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>> - &next, true, true,
>> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv,
>> + &next, true,
>> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
>> if (r)
>> goto err;
>> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> return r;
>> }
>>
>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> - uint32_t src_data,
>> - struct dma_resv *resv,
>> - struct dma_fence **f,
>> - bool delayed,
>> - u64 k_job_id)
>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>> + struct amdgpu_bo *bo,
>> + uint32_t src_data,
>> + struct dma_resv *resv,
>> + struct dma_fence **f,
>> + u64 k_job_id)
>> {
>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> /* Never fill more than 256MiB at once to avoid timeouts */
>> cur_size = min(dst.size, 256ULL << 20);
>>
>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
>> + r = amdgpu_ttm_map_buffer(&entity->base,
>> + &bo->tbo, bo->tbo.resource, &dst,
>> 1, ring, false, &cur_size, &to);
>> if (r)
>> goto error;
>>
>> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
>> - &next, true, delayed, k_job_id);
>> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
>> + src_data, to, cur_size, resv,
>> + &next, true, k_job_id);
>> if (r)
>> goto error;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index d2295d6c2b67..e1655f86a016 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
>> void amdgpu_ttm_fini(struct amdgpu_device *adev);
>> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
>> bool enable);
>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> + struct drm_sched_entity *entity,
>
> If I'm not completely mistaken you should be able to drop the ring argument since that can be determined from the entity.
OK will do.
Pierre-Eric
>
> Apart from that looks rather good to me.
>
> Regards,
> Christian.
>
>> + uint64_t src_offset,
>> uint64_t dst_offset, uint32_t byte_count,
>> struct dma_resv *resv,
>> struct dma_fence **fence,
>> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>> struct dma_resv *resv,
>> struct dma_fence **fence);
>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>> - uint32_t src_data,
>> - struct dma_resv *resv,
>> - struct dma_fence **fence,
>> - bool delayed,
>> - u64 k_job_id);
>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>> + struct amdgpu_bo *bo,
>> + uint32_t src_data,
>> + struct dma_resv *resv,
>> + struct dma_fence **f,
>> + u64 k_job_id);
>>
>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> index d74ff6e90590..09756132fa1b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>> goto out_unlock;
>> }
>>
>> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
>> + r = amdgpu_copy_buffer(ring, &entity->base,
>> + gart_s, gart_d, size * PAGE_SIZE,
>> NULL, &next, true, 0);
>> if (r) {
>> dev_err(adev->dev, "fail %d to copy memory\n", r);
Le 14/11/2025 à 15:41, Pierre-Eric Pelloux-Prayer a écrit :
>
>
> Le 14/11/2025 à 14:07, Christian König a écrit :
>> On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
>>> This way the caller can select the one it wants to use.
>>>
>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +-
>>> 5 files changed, 60 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/
>>> amd/amdgpu/amdgpu_benchmark.c
>>> index 02c2479a8840..b59040a8771f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
>>> @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device
>>> *adev, unsigned size,
>>> stime = ktime_get();
>>> for (i = 0; i < n; i++) {
>>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> - r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
>>> + r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base,
>>> + saddr, daddr, size, NULL, &fence,
>>> false, 0);
>>> if (r)
>>> goto exit_do_move;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/
>>> amd/amdgpu/amdgpu_object.c
>>> index e08f58de4b17..c06c132a753c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object
>>> *bo)
>>> if (r)
>>> goto out;
>>> - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
>>> - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>>> + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
>>> + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
>>> if (WARN_ON(r))
>>> goto out;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/
>>> amdgpu/amdgpu_ttm.c
>>> index 42d448cd6a6d..c8d59ca2b3bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>> /**
>>> * amdgpu_ttm_map_buffer - Map memory into the GART windows
>>> + * @entity: entity to run the window setup job
>>> * @bo: buffer object to map
>>> * @mem: memory object to map
>>> * @mm_cur: range to map
>>> @@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>>> * Setup one of the GART windows to access a specific piece of memory or
>>> return
>>> * the physical address for local memory.
>>> */
>>> -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>>> +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
>>> + struct ttm_buffer_object *bo,
>>
>>
>> Probably better to split this patch into multiple patches.
>>
>> One which changes amdgpu_ttm_map_buffer() and then another one or two for the
>> higher level copy_buffer and fill_buffer functions.
>
> OK.
>
>>
>>> struct ttm_resource *mem,
>>> struct amdgpu_res_cursor *mm_cur,
>>> unsigned int window, struct amdgpu_ring *ring,
>>> @@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>>> num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
>>> + r = amdgpu_job_alloc_with_ib(adev, entity,
>>> AMDGPU_FENCE_OWNER_UNDEFINED,
>>> num_dw * 4 + num_bytes,
>>> AMDGPU_IB_POOL_DELAYED, &job,
>>> @@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> /**
>>> * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
>>> * @adev: amdgpu device
>>> + * @entity: entity to run the jobs
>>> * @src: buffer/address where to read from
>>> * @dst: buffer/address where to write to
>>> * @size: number of bytes to copy
>>> @@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object
>>> *bo,
>>> */
>>> __attribute__((nonnull))
>>> static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>>> + struct drm_sched_entity *entity,
>>> const struct amdgpu_copy_mem *src,
>>> const struct amdgpu_copy_mem *dst,
>>> uint64_t size, bool tmz,
>>> @@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct
>>> amdgpu_device *adev,
>>> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>>> /* Map src to window 0 and dst to window 1. */
>>> - r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
>>> + r = amdgpu_ttm_map_buffer(entity,
>>> + src->bo, src->mem, &src_mm,
>>> 0, ring, tmz, &cur_size, &from);
>>> if (r)
>>> goto error;
>>> - r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
>>> + r = amdgpu_ttm_map_buffer(entity,
>>> + dst->bo, dst->mem, &dst_mm,
>>> 1, ring, tmz, &cur_size, &to);
>>> if (r)
>>> goto error;
>>> @@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct
>>> amdgpu_device *adev,
>>> write_compress_disable));
>>> }
>>> - r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
>>> + r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv,
>>> &next, true, copy_flags);
>>> if (r)
>>> goto error;
>>> @@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>> src.offset = 0;
>>> dst.offset = 0;
>>> - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
>>> + r = amdgpu_ttm_copy_mem_to_mem(adev,
>>> + &adev->mman.move_entity.base,
>>> + &src, &dst,
>>> new_mem->size,
>>> amdgpu_bo_encrypted(abo),
>>> bo->base.resv, &fence);
>>> @@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>>> struct dma_fence *wipe_fence = NULL;
>>> - r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
>>> - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>>> + r = amdgpu_fill_buffer(&adev->mman.move_entity,
>>> + abo, 0, NULL, &wipe_fence,
>>> + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
>>> if (r) {
>>> goto error;
>>> } else if (wipe_fence) {
>>> @@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct
>>> amdgpu_device *adev, bool enable)
>>> }
>>> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
>>> + struct drm_sched_entity *entity,
>>> unsigned int num_dw,
>>> struct dma_resv *resv,
>>> bool vm_needs_flush,
>>> struct amdgpu_job **job,
>>> - bool delayed, u64 k_job_id)
>>> + u64 k_job_id)
>>> {
>>> enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
>>> int r;
>>> - struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
>>> - &adev->mman.move_entity.base;
>>> r = amdgpu_job_alloc_with_ib(adev, entity,
>>> AMDGPU_FENCE_OWNER_UNDEFINED,
>>> num_dw * 4, pool, job, k_job_id);
>>> @@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device
>>> *adev,
>>> DMA_RESV_USAGE_BOOKKEEP);
>>> }
>>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>> + uint64_t src_offset,
>>> uint64_t dst_offset, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> @@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
>>> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
>>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
>>> - r = amdgpu_ttm_prepare_job(adev, num_dw,
>>> - resv, vm_needs_flush, &job, false,
>>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
>>> + resv, vm_needs_flush, &job,
>>> AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
>>> if (r)
>>> return r;
>>> @@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> return r;
>>> }
>>> -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>>> +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>> + uint32_t src_data,
>>> uint64_t dst_addr, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> - bool vm_needs_flush, bool delayed,
>>> + bool vm_needs_flush,
>>> u64 k_job_id)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring
>>> *ring, uint32_t src_data,
>>> max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
>>> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
>>> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
>>> - r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
>>> - &job, delayed, k_job_id);
>>> + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
>>> + vm_needs_flush, &job, k_job_id);
>>> if (r)
>>> return r;
>>> @@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> /* Never clear more than 256MiB at once to avoid timeouts */
>>> size = min(cursor.size, 256ULL << 20);
>>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
>>> + r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base,
>>> + &bo->tbo, bo->tbo.resource, &cursor,
>>> 1, ring, false, &size, &addr);
>>> if (r)
>>> goto err;
>>> - r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
>>> - &next, true, true,
>>> + r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0,
>>> addr, size, resv,
>>> + &next, true,
>>> AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
>>> if (r)
>>> goto err;
>>> @@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> return r;
>>> }
>>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> - uint32_t src_data,
>>> - struct dma_resv *resv,
>>> - struct dma_fence **f,
>>> - bool delayed,
>>> - u64 k_job_id)
>>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>>> + struct amdgpu_bo *bo,
>>> + uint32_t src_data,
>>> + struct dma_resv *resv,
>>> + struct dma_fence **f,
>>> + u64 k_job_id)
>>> {
>>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>>> @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> /* Never fill more than 256MiB at once to avoid timeouts */
>>> cur_size = min(dst.size, 256ULL << 20);
>>> - r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
>>> + r = amdgpu_ttm_map_buffer(&entity->base,
>>> + &bo->tbo, bo->tbo.resource, &dst,
>>> 1, ring, false, &cur_size, &to);
>>> if (r)
>>> goto error;
>>> - r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
>>> - &next, true, delayed, k_job_id);
>>> + r = amdgpu_ttm_fill_mem(ring, &entity->base,
>>> + src_data, to, cur_size, resv,
>>> + &next, true, k_job_id);
>>> if (r)
>>> goto error;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/
>>> amdgpu/amdgpu_ttm.h
>>> index d2295d6c2b67..e1655f86a016 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
>>> void amdgpu_ttm_fini(struct amdgpu_device *adev);
>>> void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
>>> bool enable);
>>> -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>>> +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> + struct drm_sched_entity *entity,
>>
>> If I'm not completely mistaken you should be able to drop the ring argument
>> since that can be determined from the entity.
>
> OK will do.
>
AFAIU the only way to get the ring from the entity is to get it from the
drm_gpu_scheduler pointer. This would require adding a new function:
struct drm_gpu_scheduler *
drm_sched_entity_get_scheduler(struct drm_sched_entity *entity) {
struct drm_gpu_scheduler *sched;
spin_lock(&entity->lock);
if (entity->rq)
sched = entity->rq->sched;
spin_unlock(&entity->lock);
return sched;
}
Alternatively, I can access the ring from the buffer_funcs_ring /
buffer_funcs_sched stored in amdgpu_mman.
What do you think?
Thanks,
Pierre-Eric
>
>
>>
>> Apart from that looks rather good to me.
>>
>> Regards,
>> Christian.
>>
>>> + uint64_t src_offset,
>>> uint64_t dst_offset, uint32_t byte_count,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence,
>>> @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> uint64_t src_offset,
>>> int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>>> struct dma_resv *resv,
>>> struct dma_fence **fence);
>>> -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>> - uint32_t src_data,
>>> - struct dma_resv *resv,
>>> - struct dma_fence **fence,
>>> - bool delayed,
>>> - u64 k_job_id);
>>> +int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
>>> + struct amdgpu_bo *bo,
>>> + uint32_t src_data,
>>> + struct dma_resv *resv,
>>> + struct dma_fence **f,
>>> + u64 k_job_id);
>>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/
>>> amdkfd/kfd_migrate.c
>>> index d74ff6e90590..09756132fa1b 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>>> @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev,
>>> dma_addr_t *sys,
>>> goto out_unlock;
>>> }
>>> - r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
>>> + r = amdgpu_copy_buffer(ring, &entity->base,
>>> + gart_s, gart_d, size * PAGE_SIZE,
>>> NULL, &next, true, 0);
>>> if (r) {
>>> dev_err(adev->dev, "fail %d to copy memory\n", r);
© 2016 - 2026 Red Hat, Inc.