[PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling

Pierre-Eric Pelloux-Prayer posted 28 patches 1 week, 3 days ago
[PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Pierre-Eric Pelloux-Prayer 1 week, 3 days ago
Until now ttm stored a single pipelined eviction fence which means
drivers had to use a single entity for these evictions.

To lift this requirement, this commit allows up to 8 entities to
be used.

Ideally a dma_resv object would have been used as a container of
the eviction fences, but the locking rules makes it complex.
dma_resv all have the same ww_class, which means "Attempting to
lock more mutexes after ww_acquire_done." is an error.

One alternative considered was to introduced a 2nd ww_class for
specific resv to hold a single "transient" lock (= the resv lock
would only be held for a short period, without taking any other
locks).

The other option, is to statically reserve a fence array, and
extend the existing code to deal with N fences, instead of 1.

The driver is still responsible to reserve the correct number
of fence slots.

---
v2:
- simplified code
- dropped n_fences
- name changes
v3: use ttm_resource_manager_cleanup
---

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  | 11 +++--
 drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  5 +-
 drivers/gpu/drm/ttm/ttm_bo.c                  | 47 ++++++++++---------
 drivers/gpu/drm/ttm/ttm_bo_util.c             | 38 ++++++++++++---
 drivers/gpu/drm/ttm/ttm_resource.c            | 31 +++++++-----
 include/drm/ttm/ttm_resource.h                | 29 ++++++++----
 6 files changed, 104 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 3148f5d3dbd6..8f71906c4238 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
 	int err;
 
 	man = ttm_manager_type(priv->ttm_dev, mem_type);
-	man->move = dma_fence_get_stub();
+	man->eviction_fences[0] = dma_fence_get_stub();
 
 	bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
 	bo->type = bo_type;
@@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
 
 	ttm_bo_put(bo);
-	dma_fence_put(man->move);
+	dma_fence_put(man->eviction_fences[0]);
 }
 
 static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
@@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
 
 	spin_lock_init(&fence_lock);
 	man = ttm_manager_type(priv->ttm_dev, fst_mem);
-	man->move = alloc_mock_fence(test);
+	man->eviction_fences[0] = alloc_mock_fence(test);
 
-	task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
+	task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
 	if (IS_ERR(task))
 		KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
 
@@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
 	err = ttm_bo_validate(bo, placement_val, &ctx_val);
 	dma_resv_unlock(bo->base.resv);
 
-	dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
+	dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
+	man->eviction_fences[0] = NULL;
 
 	KUNIT_EXPECT_EQ(test, err, 0);
 	KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
index e6ea2bd01f07..c0e4e35e0442 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
@@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
 	struct ttm_resource_test_priv *priv = test->priv;
 	struct ttm_resource_manager *man;
 	size_t size = SZ_16K;
+	int i;
 
 	man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
 	KUNIT_ASSERT_NOT_NULL(test, man);
@@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
 	KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
 	KUNIT_ASSERT_EQ(test, man->size, size);
 	KUNIT_ASSERT_EQ(test, man->usage, 0);
-	KUNIT_ASSERT_NULL(test, man->move);
-	KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+		KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
 
 	for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f4d9e68b21e7..0b3732ed6f6c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
 EXPORT_SYMBOL(ttm_bo_unpin);
 
 /*
- * Add the last move fence to the BO as kernel dependency and reserve a new
- * fence slot.
+ * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
+ * fence slots.
  */
-static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
-				 struct ttm_resource_manager *man,
-				 bool no_wait_gpu)
+static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
+						struct ttm_resource_manager *man,
+						bool no_wait_gpu)
 {
 	struct dma_fence *fence;
-	int ret;
+	int i;
 
-	spin_lock(&man->move_lock);
-	fence = dma_fence_get(man->move);
-	spin_unlock(&man->move_lock);
+	spin_lock(&man->eviction_lock);
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+		fence = man->eviction_fences[i];
+		if (!fence)
+			continue;
 
-	if (!fence)
-		return 0;
-
-	if (no_wait_gpu) {
-		ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
-		dma_fence_put(fence);
-		return ret;
+		if (no_wait_gpu) {
+			if (!dma_fence_is_signaled(fence)) {
+				spin_unlock(&man->eviction_lock);
+				return -EBUSY;
+			}
+		} else {
+			dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
+		}
 	}
+	spin_unlock(&man->eviction_lock);
 
-	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
-
-	ret = dma_resv_reserve_fences(bo->base.resv, 1);
-	dma_fence_put(fence);
-	return ret;
+	/* TODO: this call should be removed. */
+	return dma_resv_reserve_fences(bo->base.resv, 1);
 }
 
 /**
@@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 	int i, ret;
 
 	ticket = dma_resv_locking_ctx(bo->base.resv);
-	ret = dma_resv_reserve_fences(bo->base.resv, 1);
+	ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
 	if (unlikely(ret))
 		return ret;
 
@@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
 				return ret;
 		}
 
-		ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
+		ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
 		if (unlikely(ret)) {
 			ttm_resource_free(bo, res);
 			if (ret == -EBUSY)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index acbbca9d5c92..2ff35d55e462 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	ret = dma_resv_trylock(&fbo->base.base._resv);
 	WARN_ON(!ret);
 
-	ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+	ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
 	if (ret) {
 		dma_resv_unlock(&fbo->base.base._resv);
 		kfree(fbo);
@@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
 {
 	struct ttm_device *bdev = bo->bdev;
 	struct ttm_resource_manager *from;
+	struct dma_fence *tmp;
+	int i;
 
 	from = ttm_manager_type(bdev, bo->resource->mem_type);
 
 	/**
 	 * BO doesn't have a TTM we need to bind/unbind. Just remember
-	 * this eviction and free up the allocation
+	 * this eviction and free up the allocation.
+	 * The fence will be saved in the first free slot or in the slot
+	 * already used to store a fence from the same context. Since
+	 * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
+	 * evictions we should always find a slot to use.
 	 */
-	spin_lock(&from->move_lock);
-	if (!from->move || dma_fence_is_later(fence, from->move)) {
-		dma_fence_put(from->move);
-		from->move = dma_fence_get(fence);
+	spin_lock(&from->eviction_lock);
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+		tmp = from->eviction_fences[i];
+		if (!tmp)
+			break;
+		if (fence->context != tmp->context)
+			continue;
+		if (dma_fence_is_later(fence, tmp)) {
+			dma_fence_put(tmp);
+			break;
+		}
+		goto unlock;
+	}
+	if (i < TTM_NUM_MOVE_FENCES) {
+		from->eviction_fences[i] = dma_fence_get(fence);
+	} else {
+		WARN(1, "not enough fence slots for all fence contexts");
+		spin_unlock(&from->eviction_lock);
+		dma_fence_wait(fence, false);
+		goto end;
 	}
-	spin_unlock(&from->move_lock);
 
+unlock:
+	spin_unlock(&from->eviction_lock);
+end:
 	ttm_resource_free(bo, &bo->resource);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index e2c82ad07eb4..62c34cafa387 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
 {
 	unsigned i;
 
-	spin_lock_init(&man->move_lock);
 	man->bdev = bdev;
 	man->size = size;
 	man->usage = 0;
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		INIT_LIST_HEAD(&man->lru[i]);
-	man->move = NULL;
+	spin_lock_init(&man->eviction_lock);
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
+		man->eviction_fences[i] = NULL;
 }
 EXPORT_SYMBOL(ttm_resource_manager_init);
 
@@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 		.no_wait_gpu = false,
 	};
 	struct dma_fence *fence;
-	int ret;
+	int ret, i;
 
 	do {
 		ret = ttm_bo_evict_first(bdev, man, &ctx);
@@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
 	if (ret && ret != -ENOENT)
 		return ret;
 
-	spin_lock(&man->move_lock);
-	fence = dma_fence_get(man->move);
-	spin_unlock(&man->move_lock);
+	ret = 0;
 
-	if (fence) {
-		ret = dma_fence_wait(fence, false);
-		dma_fence_put(fence);
-		if (ret)
-			return ret;
+	spin_lock(&man->eviction_lock);
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+		fence = man->eviction_fences[i];
+		if (fence && !dma_fence_is_signaled(fence)) {
+			dma_fence_get(fence);
+			spin_unlock(&man->eviction_lock);
+			ret = dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+			if (ret)
+				return ret;
+			spin_lock(&man->eviction_lock);
+		}
 	}
+	spin_unlock(&man->eviction_lock);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(ttm_resource_manager_evict_all);
 
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index f49daa504c36..50e6added509 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -50,6 +50,15 @@ struct io_mapping;
 struct sg_table;
 struct scatterlist;
 
+/**
+ * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
+ *
+ * Pipelined evictions can be spread on multiple entities. This
+ * is the max number of entities that can be used by the driver
+ * for that purpose.
+ */
+#define TTM_NUM_MOVE_FENCES 8
+
 /**
  * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
  */
@@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
  * @size: Size of the managed region.
  * @bdev: ttm device this manager belongs to
  * @func: structure pointer implementing the range manager. See above
- * @move_lock: lock for move fence
- * @move: The fence of the last pipelined move operation.
+ * @eviction_lock: lock for eviction fences
+ * @eviction_fences: The fences of the last pipelined move operation.
  * @lru: The lru list for this memory type.
  *
  * This structure is used to identify and manage memory types for a device.
@@ -195,12 +204,12 @@ struct ttm_resource_manager {
 	struct ttm_device *bdev;
 	uint64_t size;
 	const struct ttm_resource_manager_func *func;
-	spinlock_t move_lock;
 
-	/*
-	 * Protected by @move_lock.
+	/* This is very similar to a dma_resv object, but locking rules make
+	 * it difficult to use one in this context.
 	 */
-	struct dma_fence *move;
+	spinlock_t eviction_lock;
+	struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
 
 	/*
 	 * Protected by the bdev->lru_lock.
@@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
 static inline void
 ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 {
-	dma_fence_put(man->move);
-	man->move = NULL;
+	int i;
+
+	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
+		dma_fence_put(man->eviction_fences[i]);
+		man->eviction_fences[i] = NULL;
+	}
 }
 
 void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
-- 
2.43.0
Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Christian König 1 week, 3 days ago
On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> Until now ttm stored a single pipelined eviction fence which means
> drivers had to use a single entity for these evictions.
> 
> To lift this requirement, this commit allows up to 8 entities to
> be used.
> 
> Ideally a dma_resv object would have been used as a container of
> the eviction fences, but the locking rules makes it complex.
> dma_resv all have the same ww_class, which means "Attempting to
> lock more mutexes after ww_acquire_done." is an error.
> 
> One alternative considered was to introduced a 2nd ww_class for
> specific resv to hold a single "transient" lock (= the resv lock
> would only be held for a short period, without taking any other
> locks).
> 
> The other option, is to statically reserve a fence array, and
> extend the existing code to deal with N fences, instead of 1.
> 
> The driver is still responsible to reserve the correct number
> of fence slots.
> 
> ---
> v2:
> - simplified code
> - dropped n_fences
> - name changes
> v3: use ttm_resource_manager_cleanup
> ---
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

Going to push separately to drm-misc-next on Monday.

Regards,
Christian.

> ---
>  .../gpu/drm/ttm/tests/ttm_bo_validate_test.c  | 11 +++--
>  drivers/gpu/drm/ttm/tests/ttm_resource_test.c |  5 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 47 ++++++++++---------
>  drivers/gpu/drm/ttm/ttm_bo_util.c             | 38 ++++++++++++---
>  drivers/gpu/drm/ttm/ttm_resource.c            | 31 +++++++-----
>  include/drm/ttm/ttm_resource.h                | 29 ++++++++----
>  6 files changed, 104 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> index 3148f5d3dbd6..8f71906c4238 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
> @@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
>  	int err;
>  
>  	man = ttm_manager_type(priv->ttm_dev, mem_type);
> -	man->move = dma_fence_get_stub();
> +	man->eviction_fences[0] = dma_fence_get_stub();
>  
>  	bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
>  	bo->type = bo_type;
> @@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
>  	KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);
>  
>  	ttm_bo_put(bo);
> -	dma_fence_put(man->move);
> +	dma_fence_put(man->eviction_fences[0]);
>  }
>  
>  static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
> @@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
>  
>  	spin_lock_init(&fence_lock);
>  	man = ttm_manager_type(priv->ttm_dev, fst_mem);
> -	man->move = alloc_mock_fence(test);
> +	man->eviction_fences[0] = alloc_mock_fence(test);
>  
> -	task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
> +	task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
>  	if (IS_ERR(task))
>  		KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
>  
> @@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
>  	err = ttm_bo_validate(bo, placement_val, &ctx_val);
>  	dma_resv_unlock(bo->base.resv);
>  
> -	dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
> +	dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
> +	man->eviction_fences[0] = NULL;
>  
>  	KUNIT_EXPECT_EQ(test, err, 0);
>  	KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
> diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> index e6ea2bd01f07..c0e4e35e0442 100644
> --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c
> @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
>  	struct ttm_resource_test_priv *priv = test->priv;
>  	struct ttm_resource_manager *man;
>  	size_t size = SZ_16K;
> +	int i;
>  
>  	man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
>  	KUNIT_ASSERT_NOT_NULL(test, man);
> @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
>  	KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
>  	KUNIT_ASSERT_EQ(test, man->size, size);
>  	KUNIT_ASSERT_EQ(test, man->usage, 0);
> -	KUNIT_ASSERT_NULL(test, man->move);
> -	KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> +		KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);
>  
>  	for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>  		KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index f4d9e68b21e7..0b3732ed6f6c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
>  EXPORT_SYMBOL(ttm_bo_unpin);
>  
>  /*
> - * Add the last move fence to the BO as kernel dependency and reserve a new
> - * fence slot.
> + * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
> + * fence slots.
>   */
> -static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
> -				 struct ttm_resource_manager *man,
> -				 bool no_wait_gpu)
> +static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
> +						struct ttm_resource_manager *man,
> +						bool no_wait_gpu)
>  {
>  	struct dma_fence *fence;
> -	int ret;
> +	int i;
>  
> -	spin_lock(&man->move_lock);
> -	fence = dma_fence_get(man->move);
> -	spin_unlock(&man->move_lock);
> +	spin_lock(&man->eviction_lock);
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> +		fence = man->eviction_fences[i];
> +		if (!fence)
> +			continue;
>  
> -	if (!fence)
> -		return 0;
> -
> -	if (no_wait_gpu) {
> -		ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
> -		dma_fence_put(fence);
> -		return ret;
> +		if (no_wait_gpu) {
> +			if (!dma_fence_is_signaled(fence)) {
> +				spin_unlock(&man->eviction_lock);
> +				return -EBUSY;
> +			}
> +		} else {
> +			dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> +		}
>  	}
> +	spin_unlock(&man->eviction_lock);
>  
> -	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
> -
> -	ret = dma_resv_reserve_fences(bo->base.resv, 1);
> -	dma_fence_put(fence);
> -	return ret;
> +	/* TODO: this call should be removed. */
> +	return dma_resv_reserve_fences(bo->base.resv, 1);
>  }
>  
>  /**
> @@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
>  	int i, ret;
>  
>  	ticket = dma_resv_locking_ctx(bo->base.resv);
> -	ret = dma_resv_reserve_fences(bo->base.resv, 1);
> +	ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
>  	if (unlikely(ret))
>  		return ret;
>  
> @@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
>  				return ret;
>  		}
>  
> -		ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
> +		ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
>  		if (unlikely(ret)) {
>  			ttm_resource_free(bo, res);
>  			if (ret == -EBUSY)
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index acbbca9d5c92..2ff35d55e462 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  	ret = dma_resv_trylock(&fbo->base.base._resv);
>  	WARN_ON(!ret);
>  
> -	ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
> +	ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
>  	if (ret) {
>  		dma_resv_unlock(&fbo->base.base._resv);
>  		kfree(fbo);
> @@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
>  {
>  	struct ttm_device *bdev = bo->bdev;
>  	struct ttm_resource_manager *from;
> +	struct dma_fence *tmp;
> +	int i;
>  
>  	from = ttm_manager_type(bdev, bo->resource->mem_type);
>  
>  	/**
>  	 * BO doesn't have a TTM we need to bind/unbind. Just remember
> -	 * this eviction and free up the allocation
> +	 * this eviction and free up the allocation.
> +	 * The fence will be saved in the first free slot or in the slot
> +	 * already used to store a fence from the same context. Since
> +	 * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
> +	 * evictions we should always find a slot to use.
>  	 */
> -	spin_lock(&from->move_lock);
> -	if (!from->move || dma_fence_is_later(fence, from->move)) {
> -		dma_fence_put(from->move);
> -		from->move = dma_fence_get(fence);
> +	spin_lock(&from->eviction_lock);
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> +		tmp = from->eviction_fences[i];
> +		if (!tmp)
> +			break;
> +		if (fence->context != tmp->context)
> +			continue;
> +		if (dma_fence_is_later(fence, tmp)) {
> +			dma_fence_put(tmp);
> +			break;
> +		}
> +		goto unlock;
> +	}
> +	if (i < TTM_NUM_MOVE_FENCES) {
> +		from->eviction_fences[i] = dma_fence_get(fence);
> +	} else {
> +		WARN(1, "not enough fence slots for all fence contexts");
> +		spin_unlock(&from->eviction_lock);
> +		dma_fence_wait(fence, false);
> +		goto end;
>  	}
> -	spin_unlock(&from->move_lock);
>  
> +unlock:
> +	spin_unlock(&from->eviction_lock);
> +end:
>  	ttm_resource_free(bo, &bo->resource);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index e2c82ad07eb4..62c34cafa387 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
>  {
>  	unsigned i;
>  
> -	spin_lock_init(&man->move_lock);
>  	man->bdev = bdev;
>  	man->size = size;
>  	man->usage = 0;
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>  		INIT_LIST_HEAD(&man->lru[i]);
> -	man->move = NULL;
> +	spin_lock_init(&man->eviction_lock);
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
> +		man->eviction_fences[i] = NULL;
>  }
>  EXPORT_SYMBOL(ttm_resource_manager_init);
>  
> @@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  		.no_wait_gpu = false,
>  	};
>  	struct dma_fence *fence;
> -	int ret;
> +	int ret, i;
>  
>  	do {
>  		ret = ttm_bo_evict_first(bdev, man, &ctx);
> @@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>  	if (ret && ret != -ENOENT)
>  		return ret;
>  
> -	spin_lock(&man->move_lock);
> -	fence = dma_fence_get(man->move);
> -	spin_unlock(&man->move_lock);
> +	ret = 0;
>  
> -	if (fence) {
> -		ret = dma_fence_wait(fence, false);
> -		dma_fence_put(fence);
> -		if (ret)
> -			return ret;
> +	spin_lock(&man->eviction_lock);
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> +		fence = man->eviction_fences[i];
> +		if (fence && !dma_fence_is_signaled(fence)) {
> +			dma_fence_get(fence);
> +			spin_unlock(&man->eviction_lock);
> +			ret = dma_fence_wait(fence, false);
> +			dma_fence_put(fence);
> +			if (ret)
> +				return ret;
> +			spin_lock(&man->eviction_lock);
> +		}
>  	}
> +	spin_unlock(&man->eviction_lock);
>  
> -	return 0;
> +	return ret;
>  }
>  EXPORT_SYMBOL(ttm_resource_manager_evict_all);
>  
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index f49daa504c36..50e6added509 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -50,6 +50,15 @@ struct io_mapping;
>  struct sg_table;
>  struct scatterlist;
>  
> +/**
> + * define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
> + *
> + * Pipelined evictions can be spread on multiple entities. This
> + * is the max number of entities that can be used by the driver
> + * for that purpose.
> + */
> +#define TTM_NUM_MOVE_FENCES 8
> +
>  /**
>   * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
>   */
> @@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
>   * @size: Size of the managed region.
>   * @bdev: ttm device this manager belongs to
>   * @func: structure pointer implementing the range manager. See above
> - * @move_lock: lock for move fence
> - * @move: The fence of the last pipelined move operation.
> + * @eviction_lock: lock for eviction fences
> + * @eviction_fences: The fences of the last pipelined move operation.
>   * @lru: The lru list for this memory type.
>   *
>   * This structure is used to identify and manage memory types for a device.
> @@ -195,12 +204,12 @@ struct ttm_resource_manager {
>  	struct ttm_device *bdev;
>  	uint64_t size;
>  	const struct ttm_resource_manager_func *func;
> -	spinlock_t move_lock;
>  
> -	/*
> -	 * Protected by @move_lock.
> +	/* This is very similar to a dma_resv object, but locking rules make
> +	 * it difficult to use one in this context.
>  	 */
> -	struct dma_fence *move;
> +	spinlock_t eviction_lock;
> +	struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
>  
>  	/*
>  	 * Protected by the bdev->lru_lock.
> @@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man)
>  static inline void
>  ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>  {
> -	dma_fence_put(man->move);
> -	man->move = NULL;
> +	int i;
> +
> +	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
> +		dma_fence_put(man->eviction_fences[i]);
> +		man->eviction_fences[i] = NULL;
> +	}
>  }
>  
>  void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);

Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Maarten Lankhorst 5 days, 9 hours ago
Hey,

Den 2025-11-21 kl. 16:12, skrev Christian König:
> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>> Until now ttm stored a single pipelined eviction fence which means
>> drivers had to use a single entity for these evictions.
>>
>> To lift this requirement, this commit allows up to 8 entities to
>> be used.
>>
>> Ideally a dma_resv object would have been used as a container of
>> the eviction fences, but the locking rules makes it complex.
>> dma_resv all have the same ww_class, which means "Attempting to
>> lock more mutexes after ww_acquire_done." is an error.
>>
>> One alternative considered was to introduced a 2nd ww_class for
>> specific resv to hold a single "transient" lock (= the resv lock
>> would only be held for a short period, without taking any other
>> locks).
>>
>> The other option, is to statically reserve a fence array, and
>> extend the existing code to deal with N fences, instead of 1.
>>
>> The driver is still responsible to reserve the correct number
>> of fence slots.
>>
>> ---
>> v2:
>> - simplified code
>> - dropped n_fences
>> - name changes
>> v3: use ttm_resource_manager_cleanup
>> ---
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> 
> Reviewed-by: Christian König <christian.koenig@amd.com>
> 
> Going to push separately to drm-misc-next on Monday.
> 
Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.

Kind regards,
~Maarten Lankhorst
Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Christian König 5 days, 9 hours ago
On 11/26/25 16:34, Maarten Lankhorst wrote:
> Hey,
> 
> Den 2025-11-21 kl. 16:12, skrev Christian König:
>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>>> Until now ttm stored a single pipelined eviction fence which means
>>> drivers had to use a single entity for these evictions.
>>>
>>> To lift this requirement, this commit allows up to 8 entities to
>>> be used.
>>>
>>> Ideally a dma_resv object would have been used as a container of
>>> the eviction fences, but the locking rules makes it complex.
>>> dma_resv all have the same ww_class, which means "Attempting to
>>> lock more mutexes after ww_acquire_done." is an error.
>>>
>>> One alternative considered was to introduced a 2nd ww_class for
>>> specific resv to hold a single "transient" lock (= the resv lock
>>> would only be held for a short period, without taking any other
>>> locks).
>>>
>>> The other option, is to statically reserve a fence array, and
>>> extend the existing code to deal with N fences, instead of 1.
>>>
>>> The driver is still responsible to reserve the correct number
>>> of fence slots.
>>>
>>> ---
>>> v2:
>>> - simplified code
>>> - dropped n_fences
>>> - name changes
>>> v3: use ttm_resource_manager_cleanup
>>> ---
>>>
>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> Going to push separately to drm-misc-next on Monday.
>>
> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.

Thanks for the note! But hui? We changed amdgpu to not touch the move fence.

Give me a second.

Thanks,
Christian.

> 
> Kind regards,
> ~Maarten Lankhorst

Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Maarten Lankhorst 5 days, 9 hours ago
Hey,

Den 2025-11-26 kl. 16:36, skrev Christian König:
> On 11/26/25 16:34, Maarten Lankhorst wrote:
>> Hey,
>>
>> Den 2025-11-21 kl. 16:12, skrev Christian König:
>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>>>> Until now ttm stored a single pipelined eviction fence which means
>>>> drivers had to use a single entity for these evictions.
>>>>
>>>> To lift this requirement, this commit allows up to 8 entities to
>>>> be used.
>>>>
>>>> Ideally a dma_resv object would have been used as a container of
>>>> the eviction fences, but the locking rules makes it complex.
>>>> dma_resv all have the same ww_class, which means "Attempting to
>>>> lock more mutexes after ww_acquire_done." is an error.
>>>>
>>>> One alternative considered was to introduced a 2nd ww_class for
>>>> specific resv to hold a single "transient" lock (= the resv lock
>>>> would only be held for a short period, without taking any other
>>>> locks).
>>>>
>>>> The other option, is to statically reserve a fence array, and
>>>> extend the existing code to deal with N fences, instead of 1.
>>>>
>>>> The driver is still responsible to reserve the correct number
>>>> of fence slots.
>>>>
>>>> ---
>>>> v2:
>>>> - simplified code
>>>> - dropped n_fences
>>>> - name changes
>>>> v3: use ttm_resource_manager_cleanup
>>>> ---
>>>>
>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>
>>> Going to push separately to drm-misc-next on Monday.
>>>
>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.
> 
> Thanks for the note! But hui? We changed amdgpu to not touch the move fence.
> 
> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD)

Author: Christian König <christian.koenig@amd.com>
Date:   Wed Nov 26 13:13:03 2025 +0100

    drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’
 2188 |                 dma_fence_put(man->move);
      |                                  ^~
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’
 2189 |                 man->move = NULL;
      |                    ^~

Is what I see.

Kind regards,
~Maarten Lankhorst
Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Christian König 5 days, 9 hours ago
On 11/26/25 16:39, Maarten Lankhorst wrote:
> Hey,
> 
> Den 2025-11-26 kl. 16:36, skrev Christian König:
>> On 11/26/25 16:34, Maarten Lankhorst wrote:
>>> Hey,
>>>
>>> Den 2025-11-21 kl. 16:12, skrev Christian König:
>>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>>>>> Until now ttm stored a single pipelined eviction fence which means
>>>>> drivers had to use a single entity for these evictions.
>>>>>
>>>>> To lift this requirement, this commit allows up to 8 entities to
>>>>> be used.
>>>>>
>>>>> Ideally a dma_resv object would have been used as a container of
>>>>> the eviction fences, but the locking rules makes it complex.
>>>>> dma_resv all have the same ww_class, which means "Attempting to
>>>>> lock more mutexes after ww_acquire_done." is an error.
>>>>>
>>>>> One alternative considered was to introduced a 2nd ww_class for
>>>>> specific resv to hold a single "transient" lock (= the resv lock
>>>>> would only be held for a short period, without taking any other
>>>>> locks).
>>>>>
>>>>> The other option, is to statically reserve a fence array, and
>>>>> extend the existing code to deal with N fences, instead of 1.
>>>>>
>>>>> The driver is still responsible to reserve the correct number
>>>>> of fence slots.
>>>>>
>>>>> ---
>>>>> v2:
>>>>> - simplified code
>>>>> - dropped n_fences
>>>>> - name changes
>>>>> v3: use ttm_resource_manager_cleanup
>>>>> ---
>>>>>
>>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>>>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>
>>>> Going to push separately to drm-misc-next on Monday.
>>>>
>>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.
>>
>> Thanks for the note! But hui? We changed amdgpu to not touch the move fence.
>>
>> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD)
> 
> Author: Christian König <christian.koenig@amd.com>
> Date:   Wed Nov 26 13:13:03 2025 +0100
> 
>     drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest
> 
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’
>  2188 |                 dma_fence_put(man->move);
>       |                                  ^~
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’
>  2189 |                 man->move = NULL;
>       |                    ^~
> 
> Is what I see.

Ah, crap, I know what's going on.

The patch to remove those lines is queued up to go upstream through amd-staging-drm-next instead of drm-misc-next.

I will push this patch to drm-misc-next and sync up with Alex that it shouldn't go upstream through amd-staging-drm-next.

Going to build test drm-tip the next time.

Thanks,
Christian.


> 
> Kind regards,
> ~Maarten Lankhorst

Re: [PATCH v3 19/28] drm/ttm: rework pipelined eviction fence handling
Posted by Maarten Lankhorst 5 days, 8 hours ago
Hey,

Den 2025-11-26 kl. 16:48, skrev Christian König:
> On 11/26/25 16:39, Maarten Lankhorst wrote:
>> Hey,
>>
>> Den 2025-11-26 kl. 16:36, skrev Christian König:
>>> On 11/26/25 16:34, Maarten Lankhorst wrote:
>>>> Hey,
>>>>
>>>> Den 2025-11-21 kl. 16:12, skrev Christian König:
>>>>> On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
>>>>>> Until now ttm stored a single pipelined eviction fence which means
>>>>>> drivers had to use a single entity for these evictions.
>>>>>>
>>>>>> To lift this requirement, this commit allows up to 8 entities to
>>>>>> be used.
>>>>>>
>>>>>> Ideally a dma_resv object would have been used as a container of
>>>>>> the eviction fences, but the locking rules makes it complex.
>>>>>> dma_resv all have the same ww_class, which means "Attempting to
>>>>>> lock more mutexes after ww_acquire_done." is an error.
>>>>>>
>>>>>> One alternative considered was to introduced a 2nd ww_class for
>>>>>> specific resv to hold a single "transient" lock (= the resv lock
>>>>>> would only be held for a short period, without taking any other
>>>>>> locks).
>>>>>>
>>>>>> The other option, is to statically reserve a fence array, and
>>>>>> extend the existing code to deal with N fences, instead of 1.
>>>>>>
>>>>>> The driver is still responsible to reserve the correct number
>>>>>> of fence slots.
>>>>>>
>>>>>> ---
>>>>>> v2:
>>>>>> - simplified code
>>>>>> - dropped n_fences
>>>>>> - name changes
>>>>>> v3: use ttm_resource_manager_cleanup
>>>>>> ---
>>>>>>
>>>>>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>>>>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> Going to push separately to drm-misc-next on Monday.
>>>>>
>>>> Pushing this broke drm-tip, the amd driver fails to build, as it's not using the eviction_fences array.
>>>
>>> Thanks for the note! But hui? We changed amdgpu to not touch the move fence.
>>>
>>> Give me a second.commit 13bec21f5f4cdabdf06725e5a8dee0b9b56ff671 (HEAD -> drm-tip, drm-tip/drm-tip, drm-tip/HEAD)
>>
>> Author: Christian König <christian.koenig@amd.com>
>> Date:   Wed Nov 26 13:13:03 2025 +0100
>>
>>     drm-tip: 2025y-11m-26d-12h-12m-41s UTC integration manifest
>>
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2188:34: error: ‘struct ttm_resource_manager’ has no member named ‘move’
>>  2188 |                 dma_fence_put(man->move);
>>       |                                  ^~
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:2189:20: error: ‘struct ttm_resource_manager’ has no member named ‘move’
>>  2189 |                 man->move = NULL;
>>       |                    ^~
>>
>> Is what I see.
> 
> Ah, crap, I know what's going on.
> 
> The patch to remove those lines is queued up to go upstream through amd-staging-drm-next instead of drm-misc-next.
> 
> I will push this patch to drm-misc-next and sync up with Alex that it shouldn't go upstream through amd-staging-drm-next.
> 
> Going to build test drm-tip the next time.
Thank you, drm-tip now builds cleanly again!

> 
> Thanks,
> Christian.
> 
> 

Kind regards,
~Maarten Lankhorst