Until now ttm stored a single pipelined eviction fence which means
drivers had to use a single entity for these evictions.
To lift this requirement, this commit allows up to 8 entities to
be used.
Ideally a dma_resv object would have been used as a container of
the eviction fences, but the locking rules makes it complex.
dma_resv all have the same ww_class, which means "Attempting to
lock more mutexes after ww_acquire_done." is an error.
One alternative considered was to introduced a 2nd ww_class for
specific resv to hold a single "transient" lock (= the resv lock
would only be held for a short period, without taking any other
locks).
The other option, is to statically reserve a fence array, and
extend the existing code to deal with N fences, instead of 1.
The driver is still responsible to reserve the correct number
of fence slots.
---
v2:
- simplified code
- dropped n_fences
- name changes
v3: use ttm_resource_manager_cleanup
---
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
.../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++--
drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +-
drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++---------
drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++---
drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++-----
include/drm/ttm/ttm_resource.h | 29 ++++++++----
6 files changed, 104 insertions(+), 57 deletions(-)
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 3148f5d3dbd6..8f71906c4238 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
int err;
man = ttm_manager_type(priv->ttm_dev, mem_type);
- man->move = dma_fence_get_stub();
+ man->eviction_fences[0] = dma_fence_get_stub();
bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
bo->type = bo_type;
@@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
ttm_bo_put(bo);
- dma_fence_put(man->move);
+ dma_fence_put(man->eviction_fences[0]);
}
static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
@@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
spin_lock_init(&fence_lock);
man = ttm_manager_type(priv->ttm_dev, fst_mem);
- man->move = alloc_mock_fence(test);
+ man->eviction_fences[0] = alloc_mock_fence(test);
- task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
+ task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
if (IS_ERR(task))
KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
@@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
err = ttm_bo_validate(bo, placement_val, &ctx_val);
dma_resv_unlock(bo->base.resv);
- dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
+ man->eviction_fences[0] = NULL;
KUNIT_EXPECT_EQ(test, err, 0);
KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index e6ea2bd01f07..c0e4e35e0442 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
struct ttm_resource_test_priv *priv = test->priv;
struct ttm_resource_manager *man;
size_t size = SZ_16K;
+ int i;
man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
KUNIT_ASSERT_NOT_NULL(test, man);
@@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
KUNIT_ASSERT_EQ(test, man->size, size);
KUNIT_ASSERT_EQ(test, man->usage, 0);
- KUNIT_ASSERT_NULL(test, man->move);
- KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+ KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4d9e68b21e7..0b3732ed6f6c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
EXPORT_SYMBOL(ttm_bo_unpin);
/*
- * Add the last move fence to the BO as kernel dependency and reserve a new
- * fence slot.
+ * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
+ * fence slots.
*/
-static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
- struct ttm_resource_manager *man,
- bool no_wait_gpu)
+static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
+ struct ttm_resource_manager *man,
+ bool no_wait_gpu)
{
struct dma_fence *fence;
- int ret;
+ int i;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
+ spin_lock(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ fence = man->eviction_fences[i];
+ if (!fence)
+ continue;
- if (!fence)
- return 0;
-
- if (no_wait_gpu) {
- ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
- dma_fence_put(fence);
- return ret;
+ if (no_wait_gpu) {
+ if (!dma_fence_is_signaled(fence)) {
+ spin_unlock(&man->eviction_lock);
+ return -EBUSY;
+ }
+ } else {
+ dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
+ }
}
+ spin_unlock(&man->eviction_lock);
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
-
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
- dma_fence_put(fence);
- return ret;
+ /* TODO: this call should be removed. */
+ return dma_resv_reserve_fences(bo->base.resv, 1);
}
/**
@@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
int i, ret;
ticket = dma_resv_locking_ctx(bo->base.resv);
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
+ ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
if (unlikely(ret))
return ret;
@@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
return ret;
}
- ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
+ ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
if (unlikely(ret)) {
ttm_resource_free(bo, res);
if (ret == -EBUSY)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index acbbca9d5c92..2ff35d55e462 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
ret = dma_resv_trylock(&fbo->base.base._resv);
WARN_ON(!ret);
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+ ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
if (ret) {
dma_resv_unlock(&fbo->base.base._resv);
kfree(fbo);
@@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
{
struct ttm_device *bdev = bo->bdev;
struct ttm_resource_manager *from;
+ struct dma_fence *tmp;
+ int i;
from = ttm_manager_type(bdev, bo->resource->mem_type);
/**
* BO doesn't have a TTM we need to bind/unbind. Just remember
- * this eviction and free up the allocation
+ * this eviction and free up the allocation.
+ * The fence will be saved in the first free slot or in the slot
+ * already used to store a fence from the same context. Since
+ * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
+ * evictions we should always find a slot to use.
*/
- spin_lock(&from->move_lock);
- if (!from->move || dma_fence_is_later(fence, from->move)) {
- dma_fence_put(from->move);
- from->move = dma_fence_get(fence);
+ spin_lock(&from->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ tmp = from->eviction_fences[i];
+ if (!tmp)
+ break;
+ if (fence->context != tmp->context)
+ continue;
+ if (dma_fence_is_later(fence, tmp)) {
+ dma_fence_put(tmp);
+ break;
+ }
+ goto unlock;
+ }
+ if (i < TTM_NUM_MOVE_FENCES) {
+ from->eviction_fences[i] = dma_fence_get(fence);
+ } else {
+ WARN(1, "not enough fence slots for all fence contexts");
+ spin_unlock(&from->eviction_lock);
+ dma_fence_wait(fence, false);
+ goto end;
}
- spin_unlock(&from->move_lock);
+unlock:
+ spin_unlock(&from->eviction_lock);
+end:
ttm_resource_free(bo, &bo->resource);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index e2c82ad07eb4..62c34cafa387 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
{
unsigned i;
- spin_lock_init(&man->move_lock);
man->bdev = bdev;
man->size = size;
man->usage = 0;
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
INIT_LIST_HEAD(&man->lru[i]);
- man->move = NULL;
+ spin_lock_init(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+ man->eviction_fences[i] = NULL;
}
EXPORT_SYMBOL(ttm_resource_manager_init);
@@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
};
struct dma_fence *fence;
- int ret;
+ int ret, i;
do {
ret = ttm_bo_evict_first(bdev, man, &ctx);
@@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
if (ret && ret != -ENOENT)
return ret;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
+ ret = 0;
- if (fence) {
- ret = dma_fence_wait(fence, false);
- dma_fence_put(fence);
- if (ret)
- return ret;
+ spin_lock(&man->eviction_lock);
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ fence = man->eviction_fences[i];
+ if (fence && !dma_fence_is_signaled(fence)) {
+ dma_fence_get(fence);
+ spin_unlock(&man->eviction_lock);
+ ret = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ if (ret)
+ return ret;
+ spin_lock(&man->eviction_lock);
+ }
}
+ spin_unlock(&man->eviction_lock);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ttm_resource_manager_evict_all);
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index f49daa504c36..50e6added509 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -50,6 +50,15 @@ struct io_mapping;
struct sg_table;
struct scatterlist;
+/**
+ * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
+ *
+ * Pipelined evictions can be spread on multiple entities. This
+ * is the max number of entities that can be used by the driver
+ * for that purpose.
+ */
+#define TTM_NUM_MOVE_FENCES 8
+
/**
* enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
*/
@@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
* @size: Size of the managed region.
* @bdev: ttm device this manager belongs to
* @func: structure pointer implementing the range manager. See above
- * @move_lock: lock for move fence
- * @move: The fence of the last pipelined move operation.
+ * @eviction_lock: lock for eviction fences
+ * @eviction_fences: The fences of the last pipelined move operation.
* @lru: The lru list for this memory type.
*
* This structure is used to identify and manage memory types for a device.
@@ -195,12 +204,12 @@ struct ttm_resource_manager {
struct ttm_device *bdev;
uint64_t size;
const struct ttm_resource_manager_func *func;
- spinlock_t move_lock;
- /*
- * Protected by @move_lock.
+ /* This is very similar to a dma_resv object, but locking rules make
+ * it difficult to use one in this context.
*/
- struct dma_fence *move;
+ spinlock_t eviction_lock;
+ struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
/*
* Protected by the bdev->lru_lock.
@@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
static inline void
ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
{
- dma_fence_put(man->move);
- man->move = NULL;
+ int i;
+
+ for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+ dma_fence_put(man->eviction_fences[i]);
+ man->eviction_fences[i] = NULL;
+ }
}
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
--
2.43.0
On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> Until now ttm stored a single pipelined eviction fence which means
> drivers had to use a single entity for these evictions.
>
> To lift this requirement, this commit allows up to 8 entities to
> be used.
>
> Ideally a dma_resv object would have been used as a container of
> the eviction fences, but the locking rules makes it complex.
> dma_resv all have the same ww_class, which means "Attempting to
> lock more mutexes after ww_acquire_done." is an error.
>
> One alternative considered was to introduced a 2nd ww_class for
> specific resv to hold a single "transient" lock (= the resv lock
> would only be held for a short period, without taking any other
> locks).
>
> The other option, is to statically reserve a fence array, and
> extend the existing code to deal with N fences, instead of 1.
>
> The driver is still responsible to reserve the correct number
> of fence slots.
>
> ---
> v2:
> - simplified code
> - dropped n_fences
> - name changes
> v3: use ttm_resource_manager_cleanup
> ---
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Going to push separately to drm-misc-next on Monday.
Regards,
Christian.
> ---
> .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++--
> drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +-
> drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++---------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++---
> drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++-----
> include/drm/ttm/ttm_resource.h | 29 ++++++++----
> 6 files changed, 104 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index 3148f5d3dbd6..8f71906c4238 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
> int err;
>
> man = ttm_manager_type(priv->ttm_dev, mem_type);
> - man->move = dma_fence_get_stub();
> + man->eviction_fences[0] = dma_fence_get_stub();
>
> bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
> bo->type = bo_type;
> @@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
> KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
>
> ttm_bo_put(bo);
> - dma_fence_put(man->move);
> + dma_fence_put(man->eviction_fences[0]);
> }
>
> static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
> @@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
>
> spin_lock_init(&fence_lock);
> man = ttm_manager_type(priv->ttm_dev, fst_mem);
> - man->move = alloc_mock_fence(test);
> + man->eviction_fences[0] = alloc_mock_fence(test);
>
> - task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
> + task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
> if (IS_ERR(task))
> KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
>
> @@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
> err = ttm_bo_validate(bo, placement_val, &ctx_val);
> dma_resv_unlock(bo->base.resv);
>
> - dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
> + dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
> + man->eviction_fences[0] = NULL;
>
> KUNIT_EXPECT_EQ(test, err, 0);
> KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> index e6ea2bd01f07..c0e4e35e0442 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
> struct ttm_resource_test_priv *priv = test->priv;
> struct ttm_resource_manager *man;
> size_t size = SZ_16K;
> + int i;
>
> man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
> KUNIT_ASSERT_NOT_NULL(test, man);
> @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
> KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
> KUNIT_ASSERT_EQ(test, man->size, size);
> KUNIT_ASSERT_EQ(test, man->usage, 0);
> - KUNIT_ASSERT_NULL(test, man->move);
> - KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> + KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
>
> for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index f4d9e68b21e7..0b3732ed6f6c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
> EXPORT_SYMBOL(ttm_bo_unpin);
>
> /*
> - * Add the last move fence to the BO as kernel dependency and reserve a new
> - * fence slot.
> + * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
> + * fence slots.
> */
> -static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
> - struct ttm_resource_manager *man,
> - bool no_wait_gpu)
> +static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
> + struct ttm_resource_manager *man,
> + bool no_wait_gpu)
> {
> struct dma_fence *fence;
> - int ret;
> + int i;
>
> - spin_lock(&man->move_lock);
> - fence = dma_fence_get(man->move);
> - spin_unlock(&man->move_lock);
> + spin_lock(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + fence = man->eviction_fences[i];
> + if (!fence)
> + continue;
>
> - if (!fence)
> - return 0;
> -
> - if (no_wait_gpu) {
> - ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
> - dma_fence_put(fence);
> - return ret;
> + if (no_wait_gpu) {
> + if (!dma_fence_is_signaled(fence)) {
> + spin_unlock(&man->eviction_lock);
> + return -EBUSY;
> + }
> + } else {
> + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> + }
> }
> + spin_unlock(&man->eviction_lock);
>
> - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> -
> - ret = dma_resv_reserve_fences(bo->base.resv, 1);
> - dma_fence_put(fence);
> - return ret;
> + /* TODO: this call should be removed. */
> + return dma_resv_reserve_fences(bo->base.resv, 1);
> }
>
> /**
> @@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
> int i, ret;
>
> ticket = dma_resv_locking_ctx(bo->base.resv);
> - ret = dma_resv_reserve_fences(bo->base.resv, 1);
> + ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
> if (unlikely(ret))
> return ret;
>
> @@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
> return ret;
> }
>
> - ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
> + ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
> if (unlikely(ret)) {
> ttm_resource_free(bo, res);
> if (ret == -EBUSY)
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index acbbca9d5c92..2ff35d55e462 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> ret = dma_resv_trylock(&fbo->base.base._resv);
> WARN_ON(!ret);
>
> - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
> + ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
> if (ret) {
> dma_resv_unlock(&fbo->base.base._resv);
> kfree(fbo);
> @@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
> {
> struct ttm_device *bdev = bo->bdev;
> struct ttm_resource_manager *from;
> + struct dma_fence *tmp;
> + int i;
>
> from = ttm_manager_type(bdev, bo->resource->mem_type);
>
> /**
> * BO doesn't have a TTM we need to bind/unbind. Just remember
> - * this eviction and free up the allocation
> + * this eviction and free up the allocation.
> + * The fence will be saved in the first free slot or in the slot
> + * already used to store a fence from the same context. Since
> + * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
> + * evictions we should always find a slot to use.
> */
> - spin_lock(&from->move_lock);
> - if (!from->move || dma_fence_is_later(fence, from->move)) {
> - dma_fence_put(from->move);
> - from->move = dma_fence_get(fence);
> + spin_lock(&from->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + tmp = from->eviction_fences[i];
> + if (!tmp)
> + break;
> + if (fence->context != tmp->context)
> + continue;
> + if (dma_fence_is_later(fence, tmp)) {
> + dma_fence_put(tmp);
> + break;
> + }
> + goto unlock;
> + }
> + if (i < TTM_NUM_MOVE_FENCES) {
> + from->eviction_fences[i] = dma_fence_get(fence);
> + } else {
> + WARN(1, "not enough fence slots for all fence contexts");
> + spin_unlock(&from->eviction_lock);
> + dma_fence_wait(fence, false);
> + goto end;
> }
> - spin_unlock(&from->move_lock);
>
> +unlock:
> + spin_unlock(&from->eviction_lock);
> +end:
> ttm_resource_free(bo, &bo->resource);
> }
>
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index e2c82ad07eb4..62c34cafa387 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
> {
> unsigned i;
>
> - spin_lock_init(&man->move_lock);
> man->bdev = bdev;
> man->size = size;
> man->usage = 0;
>
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> INIT_LIST_HEAD(&man->lru[i]);
> - man->move = NULL;
> + spin_lock_init(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> + man->eviction_fences[i] = NULL;
> }
> EXPORT_SYMBOL(ttm_resource_manager_init);
>
> @@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> .no_wait_gpu = false,
> };
> struct dma_fence *fence;
> - int ret;
> + int ret, i;
>
> do {
> ret = ttm_bo_evict_first(bdev, man, &ctx);
> @@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> if (ret && ret != -ENOENT)
> return ret;
>
> - spin_lock(&man->move_lock);
> - fence = dma_fence_get(man->move);
> - spin_unlock(&man->move_lock);
> + ret = 0;
>
> - if (fence) {
> - ret = dma_fence_wait(fence, false);
> - dma_fence_put(fence);
> - if (ret)
> - return ret;
> + spin_lock(&man->eviction_lock);
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + fence = man->eviction_fences[i];
> + if (fence && !dma_fence_is_signaled(fence)) {
> + dma_fence_get(fence);
> + spin_unlock(&man->eviction_lock);
> + ret = dma_fence_wait(fence, false);
> + dma_fence_put(fence);
> + if (ret)
> + return ret;
> + spin_lock(&man->eviction_lock);
> + }
> }
> + spin_unlock(&man->eviction_lock);
>
> - return 0;
> + return ret;
> }
> EXPORT_SYMBOL(ttm_resource_manager_evict_all);
>
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index f49daa504c36..50e6added509 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -50,6 +50,15 @@ struct io_mapping;
> struct sg_table;
> struct scatterlist;
>
> +/**
> + * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
> + *
> + * Pipelined evictions can be spread on multiple entities. This
> + * is the max number of entities that can be used by the driver
> + * for that purpose.
> + */
> +#define TTM_NUM_MOVE_FENCES 8
> +
> /**
> * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
> */
> @@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
> * @size: Size of the managed region.
> * @bdev: ttm device this manager belongs to
> * @func: structure pointer implementing the range manager. See above
> - * @move_lock: lock for move fence
> - * @move: The fence of the last pipelined move operation.
> + * @eviction_lock: lock for eviction fences
> + * @eviction_fences: The fences of the last pipelined move operation.
> * @lru: The lru list for this memory type.
> *
> * This structure is used to identify and manage memory types for a device.
> @@ -195,12 +204,12 @@ struct ttm_resource_manager {
> struct ttm_device *bdev;
> uint64_t size;
> const struct ttm_resource_manager_func *func;
> - spinlock_t move_lock;
>
> - /*
> - * Protected by @move_lock.
> + /* This is very similar to a dma_resv object, but locking rules make
> + * it difficult to use one in this context.
> */
> - struct dma_fence *move;
> + spinlock_t eviction_lock;
> + struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
>
> /*
> * Protected by the bdev->lru_lock.
> @@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
> static inline void
> ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
> {
> - dma_fence_put(man->move);
> - man->move = NULL;
> + int i;
> +
> + for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> + dma_fence_put(man->eviction_fences[i]);
> + man->eviction_fences[i] = NULL;
> + }
> }
>
> void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
Hey, Den 2025-11-21 kl. 16:12, skrev Christian König: > On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote: >> Until now ttm stored a single pipelined eviction fence which means >> drivers had to use a single entity for these evictions. >> >> To lift this requirement, this commit allows up to 8 entities to >> be used. >> >> Ideally a dma_resv object would have been used as a container of >> the eviction fences, but the locking rules makes it complex. >> dma_resv all have the same ww_class, which means "Attempting to >> lock more mutexes after ww_acquire_done." is an error. >> >> One alternative considered was to introduced a 2nd ww_class for >> specific resv to hold a single "transient" lock (= the resv lock >> would only be held for a short period, without taking any other >> locks). >> >> The other option, is to statically reserve a fence array, and >> extend the existing code to deal with N fences, instead of 1. >> >> The driver is still responsible to reserve the correct number >> of fence slots. >> >> --- >> v2: >> - simplified code >> - dropped n_fences >> - name changes >> v3: use ttm_resource_manager_cleanup >> --- >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> > > Reviewed-by: Christian König <christian.koenig@amd.com> > > Going to push separately to drm-misc-next on Monday. > Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array. Kind regards, ~Maarten Lankhorst
On 11/26/25 16:34, Maarten Lankhorst wrote: > Hey, > > Den 2025-11-21 kl. 16:12, skrev Christian König: >> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote: >>> Until now ttm stored a single pipelined eviction fence which means >>> drivers had to use a single entity for these evictions. >>> >>> To lift this requirement, this commit allows up to 8 entities to >>> be used. >>> >>> Ideally a dma_resv object would have been used as a container of >>> the eviction fences, but the locking rules makes it complex. >>> dma_resv all have the same ww_class, which means "Attempting to >>> lock more mutexes after ww_acquire_done." is an error. >>> >>> One alternative considered was to introduced a 2nd ww_class for >>> specific resv to hold a single "transient" lock (= the resv lock >>> would only be held for a short period, without taking any other >>> locks). >>> >>> The other option, is to statically reserve a fence array, and >>> extend the existing code to deal with N fences, instead of 1. >>> >>> The driver is still responsible to reserve the correct number >>> of fence slots. >>> >>> --- >>> v2: >>> - simplified code >>> - dropped n_fences >>> - name changes >>> v3: use ttm_resource_manager_cleanup >>> --- >>> >>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >> >> Reviewed-by: Christian König <christian.koenig@amd.com> >> >> Going to push separately to drm-misc-next on Monday. >> > Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array. Thanks for the note! But hui? We changed amdgpu to not touch the move fence. Give me a second. Thanks, Christian. > > Kind regards, > ~Maarten Lankhorst
Hey,
Den 2025-11-26 kl. 16:36, skrev Christian König:
> On 11/26/25 16:34, Maarten Lankhorst wrote:
>> Hey,
>>
>> Den 2025-11-21 kl. 16:12, skrev Christian König:
>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>>>> Until now ttm stored a single pipelined eviction fence which means
>>>> drivers had to use a single entity for these evictions.
>>>>
>>>> To lift this requirement, this commit allows up to 8 entities to
>>>> be used.
>>>>
>>>> Ideally a dma_resv object would have been used as a container of
>>>> the eviction fences, but the locking rules makes it complex.
>>>> dma_resv all have the same ww_class, which means "Attempting to
>>>> lock more mutexes after ww_acquire_done." is an error.
>>>>
>>>> One alternative considered was to introduced a 2nd ww_class for
>>>> specific resv to hold a single "transient" lock (= the resv lock
>>>> would only be held for a short period, without taking any other
>>>> locks).
>>>>
>>>> The other option, is to statically reserve a fence array, and
>>>> extend the existing code to deal with N fences, instead of 1.
>>>>
>>>> The driver is still responsible to reserve the correct number
>>>> of fence slots.
>>>>
>>>> ---
>>>> v2:
>>>> - simplified code
>>>> - dropped n_fences
>>>> - name changes
>>>> v3: use ttm_resource_manager_cleanup
>>>> ---
>>>>
>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>
>>> Going to push separately to drm-misc-next on Monday.
>>>
>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.
>
> Thanks for the note! But hui? We changed amdgpu to not touch the move fence.
>
> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD)
Author: Christian König <christian.koenig@amd.com>
Date: Wed Nov 26 13:13:03 2025 +0100
drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’
2188 | dma_fence_put(man->move);
| ^~
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’
2189 | man->move = NULL;
| ^~
Is what I see.
Kind regards,
~Maarten Lankhorst
On 11/26/25 16:39, Maarten Lankhorst wrote: > Hey, > > Den 2025-11-26 kl. 16:36, skrev Christian König: >> On 11/26/25 16:34, Maarten Lankhorst wrote: >>> Hey, >>> >>> Den 2025-11-21 kl. 16:12, skrev Christian König: >>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote: >>>>> Until now ttm stored a single pipelined eviction fence which means >>>>> drivers had to use a single entity for these evictions. >>>>> >>>>> To lift this requirement, this commit allows up to 8 entities to >>>>> be used. >>>>> >>>>> Ideally a dma_resv object would have been used as a container of >>>>> the eviction fences, but the locking rules makes it complex. >>>>> dma_resv all have the same ww_class, which means "Attempting to >>>>> lock more mutexes after ww_acquire_done." is an error. >>>>> >>>>> One alternative considered was to introduced a 2nd ww_class for >>>>> specific resv to hold a single "transient" lock (= the resv lock >>>>> would only be held for a short period, without taking any other >>>>> locks). >>>>> >>>>> The other option, is to statically reserve a fence array, and >>>>> extend the existing code to deal with N fences, instead of 1. >>>>> >>>>> The driver is still responsible to reserve the correct number >>>>> of fence slots. >>>>> >>>>> --- >>>>> v2: >>>>> - simplified code >>>>> - dropped n_fences >>>>> - name changes >>>>> v3: use ttm_resource_manager_cleanup >>>>> --- >>>>> >>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >>>> >>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>> >>>> Going to push separately to drm-misc-next on Monday. >>>> >>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array. >> >> Thanks for the note! But hui? We changed amdgpu to not touch the move fence. >> >> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD) > > Author: Christian König <christian.koenig@amd.com> > Date: Wed Nov 26 13:13:03 2025 +0100 > > drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’ > 2188 | dma_fence_put(man->move); > | ^~ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’ > 2189 | man->move = NULL; > | ^~ > > Is what I see. Ah, crap, I know what's going on. The patch to remove those lines is queued up to go upstream through amd-staging-drm-next instead of drm-misc-next. I will push this patch to drm-misc-next and sync up with Alex that it shouldn't go upstream through amd-staging-drm-next. Going to build test drm-tip the next time. Thanks, Christian. > > Kind regards, > ~Maarten Lankhorst
Hey, Den 2025-11-26 kl. 16:48, skrev Christian König: > On 11/26/25 16:39, Maarten Lankhorst wrote: >> Hey, >> >> Den 2025-11-26 kl. 16:36, skrev Christian König: >>> On 11/26/25 16:34, Maarten Lankhorst wrote: >>>> Hey, >>>> >>>> Den 2025-11-21 kl. 16:12, skrev Christian König: >>>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote: >>>>>> Until now ttm stored a single pipelined eviction fence which means >>>>>> drivers had to use a single entity for these evictions. >>>>>> >>>>>> To lift this requirement, this commit allows up to 8 entities to >>>>>> be used. >>>>>> >>>>>> Ideally a dma_resv object would have been used as a container of >>>>>> the eviction fences, but the locking rules makes it complex. >>>>>> dma_resv all have the same ww_class, which means "Attempting to >>>>>> lock more mutexes after ww_acquire_done." is an error. >>>>>> >>>>>> One alternative considered was to introduced a 2nd ww_class for >>>>>> specific resv to hold a single "transient" lock (= the resv lock >>>>>> would only be held for a short period, without taking any other >>>>>> locks). >>>>>> >>>>>> The other option, is to statically reserve a fence array, and >>>>>> extend the existing code to deal with N fences, instead of 1. >>>>>> >>>>>> The driver is still responsible to reserve the correct number >>>>>> of fence slots. >>>>>> >>>>>> --- >>>>>> v2: >>>>>> - simplified code >>>>>> - dropped n_fences >>>>>> - name changes >>>>>> v3: use ttm_resource_manager_cleanup >>>>>> --- >>>>>> >>>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >>>>> >>>>> Reviewed-by: Christian König <christian.koenig@amd.com> >>>>> >>>>> Going to push separately to drm-misc-next on Monday. >>>>> >>>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array. >>> >>> Thanks for the note! But hui? We changed amdgpu to not touch the move fence. >>> >>> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD) >> >> Author: Christian König <christian.koenig@amd.com> >> Date: Wed Nov 26 13:13:03 2025 +0100 >> >> drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest >> >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’ >> 2188 | dma_fence_put(man->move); >> | ^~ >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’ >> 2189 | man->move = NULL; >> | ^~ >> >> Is what I see. > > Ah, crap, I know what's going on. > > The patch to remove those lines is queued up to go upstream through amd-staging-drm-next instead of drm-misc-next. > > I will push this patch to drm-misc-next and sync up with Alex that it shouldn't go upstream through amd-staging-drm-next. > > Going to build test drm-tip the next time. Thank you, drm-tip now builds cleanly again! > > Thanks, > Christian. > > Kind regards, ~Maarten Lankhorst
© 2016 - 2025 Red Hat, Inc.