From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 66D081F583D;
	Tue,  3 Jun 2025 09:32:00 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943120; cv=none;
 b=SbgxlsoH6EeKsLFHHHeEDMcT3WkdMyhbOBVRlN583m6KR1Mk/OFaHFqIA60y2PJSWjVyq0i0C3P1TNLsnMKRnLBVjfBNwwAj+SEt4hUxPaeOkuLWDsgP6/KpThawz7k0o5MV7p8dX9RHfwbMEAW3dHTZsASdjql/nx0wX4rqQkY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943120; c=relaxed/simple;
	bh=vu+k3OVXrurN0RDNX/7oaLaHr4+RCz+6uUmJJ1xRlw8=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=gZkyTHgpE13pzJW+Ttyi9NRlCtyxmueZjRXXJhD7hvo13jBmrCXqMmwD2fLNCpoQ2oD2OPji9ZXybTkZOc7II9rFSc1LpMuWzWmw0vE99v82lI9mYMWPGcFbW1QJrYP9m5FQIs//6YzGASg9TjUMhC45uE0l1RHJ9Z4Pomv2okA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=AStVO9L5; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="AStVO9L5"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5B145C4CEEF;
	Tue,  3 Jun 2025 09:31:56 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943120;
	bh=vu+k3OVXrurN0RDNX/7oaLaHr4+RCz+6uUmJJ1xRlw8=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=AStVO9L5QlzmiP0MOdqk9vID9qjB2nKFZAiYkkRJNlgh2IPqsaAqOUmMkURZHmfKn
	 W6vtH6yGFxN4IcLZNLjTfSi8/Bmg/AdRAkohcidBeOXfMtoXzo4lUoC2HEClnQU9KU
	 VHh6wWn3JlpncWWJSWKgQuPJrPgn1+nfVG4W+qFMV/TfEptRICmBgrt3ukB1f4/hhn
	 Qri4Yhjk0lI622PpglqmaRZDx1V7PhX9IbGKAojfPJxrj0O/7qRbUEu4jLRY1FwUSS
	 jO1elk1dlm1pfE8XQoEUPStE6cQneFNkXq9gnin2vNwqxBsLG4nCs5Pc6B9CuPatPS
	 Ny629OCECm08g==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 1/6] drm/sched: Avoid memory leaks with cancel_job()
 callback
Date: Tue,  3 Jun 2025 11:31:26 +0200
Message-ID: <20250603093130.100159-3-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Since its inception, the GPU scheduler can leak memory if the driver
calls drm_sched_fini() while there are still jobs in flight.

The simplest way to solve this in a backwards compatible manner is by
adding a new callback, drm_sched_backend_ops.cancel_job(), which
instructs the driver to signal the hardware fence associated with the
job. Afterwards, the scheduler can savely use the established free_job()
callback for freeing the job.

Implement the new backend_ops callback cancel_job().

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Philipp Stanner <phasta@kernel.org>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 34 ++++++++++++++++----------
 include/drm/gpu_scheduler.h            |  9 +++++++
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/sched=
uler/sched_main.c
index d20726d7adf0..3f14f1e151fa 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1352,6 +1352,18 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, =
const struct drm_sched_init_
 }
 EXPORT_SYMBOL(drm_sched_init);
=20
+static void drm_sched_kill_remaining_jobs(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *job, *tmp;
+
+	/* All other accessors are stopped. No locking necessary. */
+	list_for_each_entry_safe_reverse(job, tmp, &sched->pending_list, list) {
+		sched->ops->cancel_job(job);
+		list_del(&job->list);
+		sched->ops->free_job(job);
+	}
+}
+
 /**
  * drm_sched_fini - Destroy a gpu scheduler
  *
@@ -1359,19 +1371,11 @@ EXPORT_SYMBOL(drm_sched_init);
  *
  * Tears down and cleans up the scheduler.
  *
- * This stops submission of new jobs to the hardware through
- * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.fr=
ee_job()
- * will not be called for all jobs still in drm_gpu_scheduler.pending_list.
- * There is no solution for this currently. Thus, it is up to the driver t=
o make
- * sure that:
- *
- *  a) drm_sched_fini() is only called after for all submitted jobs
- *     drm_sched_backend_ops.free_job() has been called or that
- *  b) the jobs for which drm_sched_backend_ops.free_job() has not been ca=
lled
- *     after drm_sched_fini() ran are freed manually.
- *
- * FIXME: Take care of the above problem and prevent this function from le=
aking
- * the jobs in drm_gpu_scheduler.pending_list under any circumstances.
+ * This stops submission of new jobs to the hardware through &struct
+ * drm_sched_backend_ops.run_job. If &struct drm_sched_backend_ops.cancel_=
job
+ * is implemented, all jobs will be canceled through it and afterwards cle=
aned
+ * up through &struct drm_sched_backend_ops.free_job. If cancel_job is not
+ * implemented, memory could leak.
  */
 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 {
@@ -1401,6 +1405,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	/* Confirm no work left behind accessing device structures */
 	cancel_delayed_work_sync(&sched->work_tdr);
=20
+	/* Avoid memory leaks if supported by the driver. */
+	if (sched->ops->cancel_job)
+		drm_sched_kill_remaining_jobs(sched);
+
 	if (sched->own_submit_wq)
 		destroy_workqueue(sched->submit_wq);
 	sched->ready =3D false;
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e62a7214e052..81dcbfc8c223 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -512,6 +512,15 @@ struct drm_sched_backend_ops {
          * and it's time to clean it up.
 	 */
 	void (*free_job)(struct drm_sched_job *sched_job);
+
+	/**
+	 * @cancel_job: Used by the scheduler to guarantee remaining jobs' fences
+	 * get signaled in drm_sched_fini().
+	 *
+	 * Drivers need to signal the passed job's hardware fence with
+	 * -ECANCELED in this callback. They must not free the job.
+	 */
+	void (*cancel_job)(struct drm_sched_job *sched_job);
 };
=20
 /**
--=20
2.49.0
From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 836E91F583D;
	Tue,  3 Jun 2025 09:32:04 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943124; cv=none;
 b=QZhvA/ESs5GZhiAwEZuOujxDO9/o8ZJ+1+8ZghoXVMJDoLyDvs4oKz9eSJaIiw/fGoB5kdXJqnc4oEMR1bQjxXwgweKJiBBerGRv/wrNV2wa7e5NcGudErK1HtYsBtKnLHVi6DhohgdKPapxxIlwpHsd4zBnDcWh8E3NTlGUKcg=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943124; c=relaxed/simple;
	bh=zZqb+juyV2Gb8VcSm5wm2p8p57d1rLPa6J1u4dkRt3U=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=JMQYoCT7qnMYWdq18v7N0oSSQb+bzKrZlwrmKdbO52/T+9EFQrX6fxOgJjecT8tcayDkUzSxVRxmQBkrJGouWaCtTuKmQ6xDfXSxMvrl5smcZERlbAIZXStsB6IZBc0J0FMe13Ems/TKrLYKXS+38Q4V1VI0Bz4TbYj6I1fE1ug=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=sDzoBpzU; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="sDzoBpzU"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id B5EF8C4CEED;
	Tue,  3 Jun 2025 09:32:00 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943124;
	bh=zZqb+juyV2Gb8VcSm5wm2p8p57d1rLPa6J1u4dkRt3U=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=sDzoBpzUr0Och6Hfve0Eha+YAJ5WqhwF2eHHKBjosT4vk1+xNjwsr5HcSX9/zZN2U
	 5W87v778jw8CJz6E/gdk64LMwPFAiWJonm1KPdiPU8zwwnpdlXSMY0MrlY9ckxQV+F
	 U6s3uGFP5iJi5l8S5skpU/Xx+T3jz3qznBoJzjvdjEi3YwEG+2m/xoUzsMb5cIVw2E
	 cBmm07dABGQb3r7009ke5g2bKGgnK2YdyvUaDEaVMnT2TfS7/izQB+17OOrWGnl/Qt
	 hUcqVOWiSVZslmViQAlrduamQ5MHLEJR5VfxAZA2dOGeJ+1/iBgRlAnesJH0Ee3uXj
	 VpxleU3v7n+DQ==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 2/6] drm/sched/tests: Implement cancel_job()
Date: Tue,  3 Jun 2025 11:31:27 +0200
Message-ID: <20250603093130.100159-4-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

The GPU scheduler now provides a new callback to prevent memory leaks on
scheduler teardown. The callback is optional, but should be implemented
since it simplifies the cleanup code path.

Moreover, the unit tests serve as a resource for understanding the
canonical usage of the scheduler API and should therefore support the
callback.

Provide the backend_ops callback cancel_job() in the unit tests.

This code is WIP and still buggy. Take it more as an RFC. It seems that
it interferes negatively with timeout handling, which is broken in the
sense of the timeout handler not signaling the hardware fence.

That should be repaired and cleaned up, but it's probably better to do
that in a separate series.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 .../gpu/drm/scheduler/tests/mock_scheduler.c  | 71 +++++++------------
 drivers/gpu/drm/scheduler/tests/sched_tests.h |  4 +-
 2 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu=
/drm/scheduler/tests/mock_scheduler.c
index 7f947ab9d322..33864b179704 100644
--- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
+++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
@@ -55,7 +55,7 @@ void drm_mock_sched_entity_free(struct drm_mock_sched_ent=
ity *entity)
 	drm_sched_entity_destroy(&entity->base);
 }
=20
-static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job)
+static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job, in=
t err)
 {
 	struct drm_mock_scheduler *sched =3D
 		drm_sched_to_mock_sched(job->base.sched);
@@ -63,8 +63,11 @@ static void drm_mock_sched_job_complete(struct drm_mock_=
sched_job *job)
 	lockdep_assert_held(&sched->lock);
=20
 	job->flags |=3D DRM_MOCK_SCHED_JOB_DONE;
-	list_move_tail(&job->link, &sched->done_list);
-	dma_fence_signal_locked(&job->hw_fence);
+	list_del(&job->link);
+	if (!dma_fence_is_signaled(&job->hw_fence)) {
+		dma_fence_set_error(&job->hw_fence, err);
+		dma_fence_signal(&job->hw_fence);
+	}
 	complete(&job->done);
 }
=20
@@ -89,7 +92,7 @@ drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer)
 			break;
=20
 		sched->hw_timeline.cur_seqno =3D job->hw_fence.seqno;
-		drm_mock_sched_job_complete(job);
+		drm_mock_sched_job_complete(job, 0);
 	}
 	spin_unlock_irqrestore(&sched->lock, flags);
=20
@@ -212,26 +215,33 @@ mock_sched_timedout_job(struct drm_sched_job *sched_j=
ob)
=20
 static void mock_sched_free_job(struct drm_sched_job *sched_job)
 {
-	struct drm_mock_scheduler *sched =3D
-			drm_sched_to_mock_sched(sched_job->sched);
 	struct drm_mock_sched_job *job =3D drm_sched_job_to_mock_job(sched_job);
-	unsigned long flags;
=20
-	/* Remove from the scheduler done list. */
-	spin_lock_irqsave(&sched->lock, flags);
-	list_del(&job->link);
-	spin_unlock_irqrestore(&sched->lock, flags);
 	dma_fence_put(&job->hw_fence);
-
 	drm_sched_job_cleanup(sched_job);
=20
 	/* Mock job itself is freed by the kunit framework. */
 }
=20
+static void mock_sched_cancel_job(struct drm_sched_job *sched_job)
+{
+	struct drm_mock_scheduler *sched =3D
+		drm_sched_to_mock_sched(sched_job->sched);
+	struct drm_mock_sched_job *job =3D drm_sched_job_to_mock_job(sched_job);
+
+	hrtimer_cancel(&job->timer);
+
+	spin_lock_irq(&sched->lock);
+	if (!dma_fence_is_signaled(&job->hw_fence))
+		drm_mock_sched_job_complete(job, -ECANCELED);
+	spin_unlock_irq(&sched->lock);
+}
+
 static const struct drm_sched_backend_ops drm_mock_scheduler_ops =3D {
 	.run_job =3D mock_sched_run_job,
 	.timedout_job =3D mock_sched_timedout_job,
-	.free_job =3D mock_sched_free_job
+	.free_job =3D mock_sched_free_job,
+	.cancel_job =3D mock_sched_cancel_job,
 };
=20
 /**
@@ -265,7 +275,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct ku=
nit *test, long timeout)
 	sched->hw_timeline.context =3D dma_fence_context_alloc(1);
 	atomic_set(&sched->hw_timeline.next_seqno, 0);
 	INIT_LIST_HEAD(&sched->job_list);
-	INIT_LIST_HEAD(&sched->done_list);
 	spin_lock_init(&sched->lock);
=20
 	return sched;
@@ -280,38 +289,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct k=
unit *test, long timeout)
  */
 void drm_mock_sched_fini(struct drm_mock_scheduler *sched)
 {
-	struct drm_mock_sched_job *job, *next;
-	unsigned long flags;
-	LIST_HEAD(list);
-
-	drm_sched_wqueue_stop(&sched->base);
-
-	/* Force complete all unfinished jobs. */
-	spin_lock_irqsave(&sched->lock, flags);
-	list_for_each_entry_safe(job, next, &sched->job_list, link)
-		list_move_tail(&job->link, &list);
-	spin_unlock_irqrestore(&sched->lock, flags);
-
-	list_for_each_entry(job, &list, link)
-		hrtimer_cancel(&job->timer);
-
-	spin_lock_irqsave(&sched->lock, flags);
-	list_for_each_entry_safe(job, next, &list, link)
-		drm_mock_sched_job_complete(job);
-	spin_unlock_irqrestore(&sched->lock, flags);
-
-	/*
-	 * Free completed jobs and jobs not yet processed by the DRM scheduler
-	 * free worker.
-	 */
-	spin_lock_irqsave(&sched->lock, flags);
-	list_for_each_entry_safe(job, next, &sched->done_list, link)
-		list_move_tail(&job->link, &list);
-	spin_unlock_irqrestore(&sched->lock, flags);
-
-	list_for_each_entry_safe(job, next, &list, link)
-		mock_sched_free_job(&job->base);
-
 	drm_sched_fini(&sched->base);
 }
=20
@@ -346,7 +323,7 @@ unsigned int drm_mock_sched_advance(struct drm_mock_sch=
eduler *sched,
 		if (sched->hw_timeline.cur_seqno < job->hw_fence.seqno)
 			break;
=20
-		drm_mock_sched_job_complete(job);
+		drm_mock_sched_job_complete(job, 0);
 		found++;
 	}
 unlock:
diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/dr=
m/scheduler/tests/sched_tests.h
index fbba38137f0c..a905db835ccc 100644
--- a/drivers/gpu/drm/scheduler/tests/sched_tests.h
+++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h
@@ -32,9 +32,8 @@
  *
  * @base: DRM scheduler base class
  * @test: Backpointer to owning the kunit test case
- * @lock: Lock to protect the simulated @hw_timeline, @job_list and @done_=
list
+ * @lock: Lock to protect the simulated @hw_timeline, @job_list
  * @job_list: List of jobs submitted to the mock GPU
- * @done_list: List of jobs completed by the mock GPU
  * @hw_timeline: Simulated hardware timeline has a @context, @next_seqno a=
nd
  *		 @cur_seqno for implementing a struct dma_fence signaling the
  *		 simulated job completion.
@@ -49,7 +48,6 @@ struct drm_mock_scheduler {
=20
 	spinlock_t		lock;
 	struct list_head	job_list;
-	struct list_head	done_list;
=20
 	struct {
 		u64		context;
--=20
2.49.0
From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5BE71283FD0;
	Tue,  3 Jun 2025 09:32:08 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943129; cv=none;
 b=E65F0DPDhbF9V5GCABS/92Yaw5OulxZbaPd11rHqoxhFWZZ7oGeFfMu+cIsTKoORVLwOE5y6OtMQwM5AFTJS5SXNqTr8L5AhzfV8egyM3iHl88qzQGFeOHudQ2RUfVCcX+UUBe1hmjycmgt8if/62d55ufAvHAQa1wFHceXMVwU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943129; c=relaxed/simple;
	bh=2LsldMn9KbjhN49aH+7l3APjDKUmx0ZfcTTrHA+Kjes=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=GhGkvqM9n4NORlSF32UH5oWh/NZ8yyyqNEJ0PfmcCsQowpTqOQJJuVmyUDw+0EzTp4xXaAlx6y+DIctxM/LY7t6e1tzDjsIegtgbGoiFBa65cXGKbEQUAjW93gKSJOADkKmeMe0cfZ0H+4nNpvCsL/02ir37bXz4/Lj631v7SMQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=qkaF9EiV; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="qkaF9EiV"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id E6E96C4CEED;
	Tue,  3 Jun 2025 09:32:04 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943128;
	bh=2LsldMn9KbjhN49aH+7l3APjDKUmx0ZfcTTrHA+Kjes=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=qkaF9EiV6GWLI8NT9yoEwaqhYjdwiQv17ZoLGjUrmIgS2NI3SGkIgcjX9af/fJDEy
	 lVuo6ROARw63m/QTrInG3dQs4KpUIFbmuseNf0GbV4LdzS0pYUzKkY4KiB9B+KMaVc
	 uNPSdfeqGg8aiWrgzdnLaohJRgbRDUxX7Mx4Cp3nMVl/qZcGIQaKPVJTFxQsiDwJg3
	 R92bY9NhhZar3KRK1GnXmP6KU7tjzDAc734CjVusaGya5JlgPr1GyIQznf6I8OOD7h
	 YbQistnA/7KGn5NuHaSrsgRXBQD/dd4oS6VYwZ3AkG21aZ1bWntlOSrxauKCQA2bZC
	 D2eE3ieD1pArQ==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 3/6] drm/sched: Warn if pending list is not empty
Date: Tue,  3 Jun 2025 11:31:28 +0200
Message-ID: <20250603093130.100159-5-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

drm_sched_fini() can leak jobs under certain circumstances.

Warn if that happens.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 drivers/gpu/drm/scheduler/sched_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/sched=
uler/sched_main.c
index 3f14f1e151fa..df12d5aaa1af 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1414,6 +1414,9 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 	sched->ready =3D false;
 	kfree(sched->sched_rq);
 	sched->sched_rq =3D NULL;
+
+	if (!list_empty(&sched->pending_list))
+		dev_err(sched->dev, "Tearing down scheduler while jobs are pending!\n");
 }
 EXPORT_SYMBOL(drm_sched_fini);
=20
--=20
2.49.0
From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 530EB2820C1;
	Tue,  3 Jun 2025 09:32:13 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943133; cv=none;
 b=Y07anPabavsL464zFm2pY9mMJN46PFHXL+0FW3SKE5/tVrtZZltxL6KBJniM/JF3o166PFGSB16uMLSg69NNtpv2b8fQvn9JN7hQyJCPb6ASvvtKbBxQE1/oDsx1SWjy1S+kYPnu3yTrjcdV8Wyswhh9h7+RBcl4+LyzhA+S754=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943133; c=relaxed/simple;
	bh=JlGlrAf5irnLGCz57gLr8CZ1XlTgVDnTWZ4rzZoyCTg=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=Y67RCaqUscvWXyHwW3kZ6zDsoKYePMlKMHCPMLd2972w6h5tnmXxjZY3nNkmVx/u12/B6i31BWydGfFEctyFMzIVSzL/SeHOlGZwL9ufYcVSJ+ZHikrMemS4WzmbOt7k1xSDUEljJQo1sZ9LFaNxKhS3XDx/0bngiF318bCRvZM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=iZ3iD+Uw; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="iZ3iD+Uw"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 43EDBC4CEEF;
	Tue,  3 Jun 2025 09:32:09 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943133;
	bh=JlGlrAf5irnLGCz57gLr8CZ1XlTgVDnTWZ4rzZoyCTg=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=iZ3iD+UwvikJeVTacU5gVEdfONFFSfTUnENiItII3sdU8eQEjkfIxAH3zUWSD2ZDE
	 KfHmvzXsHpYRXKPYNu7Xc6AJoZlhAq4iG4qeA56magRzGcU2O20jFVMWr0IuiqNBwQ
	 dWGFo20wWBL3M68IV4d8etgB3KAgC9jzTN87fYNkkVHBBkt/hScitnKqHSqlp7oRqn
	 PYvRzanvdQjqOupHR1c5VEFyu7qLmkOfgVmNmL/kBhBMKHpRI4/a574CtEHkC2xglR
	 Ee8eu3XaITO1M0BIgxWU+Ua+3WY4PXHZo4LxZ+kc4yQhO32oJFxrwtKxShpPKAKDMp
	 M4uNvTEbatGJQ==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 4/6] drm/nouveau: Make fence container helper usable
 driver-wide
Date: Tue,  3 Jun 2025 11:31:29 +0200
Message-ID: <20250603093130.100159-6-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

In order to implement a new DRM GPU scheduler callback in Nouveau, a
helper for obtaining a nouveau_fence from a dma_fence is necessary. Such
a helper exists already inside nouveau_fence.c, called from_fence().

Make that helper available to other C files with a more precise name.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 20 +++++++-------------
 drivers/gpu/drm/nouveau/nouveau_fence.h |  6 ++++++
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouv=
eau/nouveau_fence.c
index d5654e26d5bc..869d4335c0f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -38,12 +38,6 @@
 static const struct dma_fence_ops nouveau_fence_ops_uevent;
 static const struct dma_fence_ops nouveau_fence_ops_legacy;
=20
-static inline struct nouveau_fence *
-from_fence(struct dma_fence *fence)
-{
-	return container_of(fence, struct nouveau_fence, base);
-}
-
 static inline struct nouveau_fence_chan *
 nouveau_fctx(struct nouveau_fence *fence)
 {
@@ -77,7 +71,7 @@ nouveau_local_fence(struct dma_fence *fence, struct nouve=
au_drm *drm)
 	    fence->ops !=3D &nouveau_fence_ops_uevent)
 		return NULL;
=20
-	return from_fence(fence);
+	return to_nouveau_fence(fence);
 }
=20
 void
@@ -268,7 +262,7 @@ nouveau_fence_done(struct nouveau_fence *fence)
 static long
 nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
 	unsigned long sleep_time =3D NSEC_PER_MSEC / 1000;
 	unsigned long t =3D jiffies, timeout =3D t + wait;
=20
@@ -448,7 +442,7 @@ static const char *nouveau_fence_get_get_driver_name(st=
ruct dma_fence *fence)
=20
 static const char *nouveau_fence_get_timeline_name(struct dma_fence *f)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
 	struct nouveau_fence_chan *fctx =3D nouveau_fctx(fence);
=20
 	return !fctx->dead ? fctx->name : "dead channel";
@@ -462,7 +456,7 @@ static const char *nouveau_fence_get_timeline_name(stru=
ct dma_fence *f)
  */
 static bool nouveau_fence_is_signaled(struct dma_fence *f)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
 	struct nouveau_fence_chan *fctx =3D nouveau_fctx(fence);
 	struct nouveau_channel *chan;
 	bool ret =3D false;
@@ -478,7 +472,7 @@ static bool nouveau_fence_is_signaled(struct dma_fence =
*f)
=20
 static bool nouveau_fence_no_signaling(struct dma_fence *f)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
=20
 	/*
 	 * caller should have a reference on the fence,
@@ -503,7 +497,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence=
 *f)
=20
 static void nouveau_fence_release(struct dma_fence *f)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
 	struct nouveau_fence_chan *fctx =3D nouveau_fctx(fence);
=20
 	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
@@ -521,7 +515,7 @@ static const struct dma_fence_ops nouveau_fence_ops_leg=
acy =3D {
=20
 static bool nouveau_fence_enable_signaling(struct dma_fence *f)
 {
-	struct nouveau_fence *fence =3D from_fence(f);
+	struct nouveau_fence *fence =3D to_nouveau_fence(f);
 	struct nouveau_fence_chan *fctx =3D nouveau_fctx(fence);
 	bool ret;
=20
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouv=
eau/nouveau_fence.h
index 8bc065acfe35..c3595c2197b5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -17,6 +17,12 @@ struct nouveau_fence {
 	unsigned long timeout;
 };
=20
+static inline struct nouveau_fence *
+to_nouveau_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct nouveau_fence, base);
+}
+
 int  nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel =
*);
 int  nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *);
 void nouveau_fence_unref(struct nouveau_fence **);
--=20
2.49.0
From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 342FF2820DB;
	Tue,  3 Jun 2025 09:32:17 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943138; cv=none;
 b=gjA5lSoaMssWKfQdsVwRFRhYQp++yeIMSzgMjdhJ17npZ2FIFsRIx4XnvmO36dsiBvRPmEj4CpjzTreh/EydhGwea1OmwAADFXnTdNP12OWtzWScWKVp0dlJfwaNE+YXEB/6nE86YknuRc49esy5kVywYDqOvqB4DopAX6kc8T4=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943138; c=relaxed/simple;
	bh=WCLD/qkwZkVgAwWKv39qjH8O02jRnPMeHdVASHi442o=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=l9UxWrTvAIDldonMSHv5USl+xr394HrJyk+WfjxipXdqMjsvJe7wcNPq3AirlkXA0cIzC/hWRmQRmdkVIZQUKJnKPMEZoCT3fIr06m5WgfkFKCkIREagWpqw2S/n7zvR6khyRnXeXWGigBYa+SDcFRQYSHjye+3swlewBy5DSpQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=N5E6GWgC; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="N5E6GWgC"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id A4CE4C4CEED;
	Tue,  3 Jun 2025 09:32:13 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943137;
	bh=WCLD/qkwZkVgAwWKv39qjH8O02jRnPMeHdVASHi442o=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=N5E6GWgC+IQQrb+kWCIzWbI9jhe6JBR6IcYX31arnC9PcigyAzvnpOyeJxNJTNd0S
	 /fw5IgT2zCYNww7V9a8cohQ2k5k/D5FPFOY0OenLmQNCfos8lbrNuYPIZq+0HWmrVS
	 fLvKgLg8lKaR7+M5oeQZ+tSAgTIlwqEsSpHFiqkRnIw16gdXQjNIkKwrZv35A/MZjc
	 +BDrs4Y19XXaOzcpjOR4REDXhsqLdLg9slJBDLbJvIzMzUCWdHgvkfEjueEnDqsLzp
	 IuVFgK/eSRXP9osfYu1VRPh0yI/J4Pk3oMOg5C65RNDCSycOAtsZnoFBpZLOrF87qT
	 rL7K1EbRWpaiQ==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 5/6] drm/nouveau: Add new callback for scheduler teardown
Date: Tue,  3 Jun 2025 11:31:30 +0200
Message-ID: <20250603093130.100159-7-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

There is a new callback for always tearing the scheduler down in a
leak-free, deadlock-free manner.

Port Nouveau as its first user by providing the scheduler with a
callback that ensures the fence context gets killed in drm_sched_fini().

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 15 +++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
 drivers/gpu/drm/nouveau/nouveau_sched.c | 15 ++++++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouv=
eau/nouveau_fence.c
index 869d4335c0f4..1c30ce686c6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -240,6 +240,21 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 	return ret;
 }
=20
+void
+nouveau_fence_cancel(struct nouveau_fence *fence)
+{
+	struct nouveau_fence_chan *fctx =3D nouveau_fctx(fence);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fctx->lock, flags);
+	if (!dma_fence_is_signaled(&fence->base)) {
+		dma_fence_set_error(&fence->base, -ECANCELED);
+		if (nouveau_fence_signal(fence))
+			nvif_event_block(&fctx->event);
+	}
+	spin_unlock_irqrestore(&fctx->lock, flags);
+}
+
 bool
 nouveau_fence_done(struct nouveau_fence *fence)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouv=
eau/nouveau_fence.h
index c3595c2197b5..4d8f78cd6ebc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -29,6 +29,7 @@ void nouveau_fence_unref(struct nouveau_fence **);
=20
 int  nouveau_fence_emit(struct nouveau_fence *);
 bool nouveau_fence_done(struct nouveau_fence *);
+void nouveau_fence_cancel(struct nouveau_fence *fence);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, boo=
l exclusive, bool intr);
=20
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouv=
eau/nouveau_sched.c
index 460a5fb02412..2ec62059c351 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -11,6 +11,7 @@
 #include "nouveau_exec.h"
 #include "nouveau_abi16.h"
 #include "nouveau_sched.h"
+#include "nouveau_chan.h"
=20
 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
=20
@@ -393,10 +394,23 @@ nouveau_sched_free_job(struct drm_sched_job *sched_jo=
b)
 	nouveau_job_fini(job);
 }
=20
+static void
+nouveau_sched_cancel_job(struct drm_sched_job *sched_job)
+{
+	struct nouveau_fence *fence;
+	struct nouveau_job *job;
+
+	job =3D to_nouveau_job(sched_job);
+	fence =3D to_nouveau_fence(job->done_fence);
+
+	nouveau_fence_cancel(fence);
+}
+
 static const struct drm_sched_backend_ops nouveau_sched_ops =3D {
 	.run_job =3D nouveau_sched_run_job,
 	.timedout_job =3D nouveau_sched_timedout_job,
 	.free_job =3D nouveau_sched_free_job,
+	.cancel_job =3D nouveau_sched_cancel_job,
 };
=20
 static int
@@ -482,7 +496,6 @@ nouveau_sched_create(struct nouveau_sched **psched, str=
uct nouveau_drm *drm,
 	return 0;
 }
=20
-
 static void
 nouveau_sched_fini(struct nouveau_sched *sched)
 {
--=20
2.49.0
From nobody Sun Feb  8 15:59:21 2026
Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org
 [10.30.226.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8254B280A4B;
	Tue,  3 Jun 2025 09:32:22 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=10.30.226.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1748943142; cv=none;
 b=Q7vfkX9gsQvItg0cynFsnnlFQRIjD57r42OrdqEQBI3T87LQav6u2hjdZrT2WUDpQ5lpQ9nuR6JyMq19r10hW7J9ZjFnsUXeoLxcZckCagXo9FQRLdzOnJ++J40Tqgtu6k6kVdxH9SiwXzukDWnuHC4EnekEOQtZH5vWmRnxEIE=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1748943142; c=relaxed/simple;
	bh=kFeQuslhr/3hnNLg6rE8C4RDktELrENqA7ibJjiDglQ=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=Gq3Bg8HvpKufbcBlWZFicB6gHnyt6DV1Ro2IYpiKetiLDCP8EisYSqxx9h1JP06TAHbTI8nP5EdoGyWws9LClZdMlOnVY0kJmPVg/BPvCA7OxLjazLP7tylrLsvJfBkSt5O+hq/jeq30K2l4bqlhViKr/D4cQK5b6honh+qi7yA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b=mX4uBJum; arc=none smtp.client-ip=10.30.226.201
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org
 header.b="mX4uBJum"
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 28E79C4CEEF;
	Tue,  3 Jun 2025 09:32:17 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
	s=k20201202; t=1748943142;
	bh=kFeQuslhr/3hnNLg6rE8C4RDktELrENqA7ibJjiDglQ=;
	h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
	b=mX4uBJumN92za8qp6zQklTFaJQiZVYLPGHIb4mtOShkZ9ryqeM1AvJqaz+76xqg1i
	 +ySJtdeZOHLaV39EwXCoSz332CZMP5uDTDCGcO8c2QD+YMns4IAjS+dQNK/pyKOlIY
	 rou1DMgkGHnUAhHATp1zziEJyoVck290a1DGNcc3hINQlpCVvji4xvcgDfc3e0+OTO
	 227T2aPxqqRALFMd8WsLnWkIbp1V7gYvFR/ZFtUd7/E1hGhwzFcSTxly2cmBDZciPt
	 Kbh+I6HRcoOWk9AqWFicKRbBwBB5gpyXREsarTV3uFMx0hgSzxRPo0HWFEcwpelfvi
	 eS9Mn056PklTQ==
From: Philipp Stanner <phasta@kernel.org>
To: Lyude Paul <lyude@redhat.com>,
	Danilo Krummrich <dakr@kernel.org>,
	David Airlie <airlied@gmail.com>,
	Simona Vetter <simona@ffwll.ch>,
	Matthew Brost <matthew.brost@intel.com>,
	Philipp Stanner <phasta@kernel.org>,
	=?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	Sumit Semwal <sumit.semwal@linaro.org>,
	Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Cc: dri-devel@lists.freedesktop.org,
	nouveau@lists.freedesktop.org,
	linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org
Subject: [RFC PATCH 6/6] drm/nouveau: Remove waitque for sched teardown
Date: Tue,  3 Jun 2025 11:31:31 +0200
Message-ID: <20250603093130.100159-8-phasta@kernel.org>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250603093130.100159-2-phasta@kernel.org>
References: <20250603093130.100159-2-phasta@kernel.org>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

struct nouveau_sched contains a waitque needed to prevent
drm_sched_fini() from being called while there are still jobs pending.
Doing so so far would have caused memory leaks.

With the new memleak-free mode of operation switched on in
drm_sched_fini() by providing the callback
nouveau_sched_fence_context_kill() the waitque is not necessary anymore.

Remove the waitque.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 drivers/gpu/drm/nouveau/nouveau_sched.c | 20 +++++++-------------
 drivers/gpu/drm/nouveau/nouveau_sched.h |  9 +++------
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  |  8 ++++----
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouv=
eau/nouveau_sched.c
index 2ec62059c351..7d9c3418e76b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -122,11 +122,9 @@ nouveau_job_done(struct nouveau_job *job)
 {
 	struct nouveau_sched *sched =3D job->sched;
=20
-	spin_lock(&sched->job.list.lock);
+	spin_lock(&sched->job_list.lock);
 	list_del(&job->entry);
-	spin_unlock(&sched->job.list.lock);
-
-	wake_up(&sched->job.wq);
+	spin_unlock(&sched->job_list.lock);
 }
=20
 void
@@ -307,9 +305,9 @@ nouveau_job_submit(struct nouveau_job *job)
 	}
=20
 	/* Submit was successful; add the job to the schedulers job list. */
-	spin_lock(&sched->job.list.lock);
-	list_add(&job->entry, &sched->job.list.head);
-	spin_unlock(&sched->job.list.lock);
+	spin_lock(&sched->job_list.lock);
+	list_add(&job->entry, &sched->job_list.head);
+	spin_unlock(&sched->job_list.lock);
=20
 	drm_sched_job_arm(&job->base);
 	job->done_fence =3D dma_fence_get(&job->base.s_fence->finished);
@@ -460,9 +458,8 @@ nouveau_sched_init(struct nouveau_sched *sched, struct =
nouveau_drm *drm,
 		goto fail_sched;
=20
 	mutex_init(&sched->mutex);
-	spin_lock_init(&sched->job.list.lock);
-	INIT_LIST_HEAD(&sched->job.list.head);
-	init_waitqueue_head(&sched->job.wq);
+	spin_lock_init(&sched->job_list.lock);
+	INIT_LIST_HEAD(&sched->job_list.head);
=20
 	return 0;
=20
@@ -502,9 +499,6 @@ nouveau_sched_fini(struct nouveau_sched *sched)
 	struct drm_gpu_scheduler *drm_sched =3D &sched->base;
 	struct drm_sched_entity *entity =3D &sched->entity;
=20
-	rmb(); /* for list_empty to work without lock */
-	wait_event(sched->job.wq, list_empty(&sched->job.list.head));
-
 	drm_sched_entity_fini(entity);
 	drm_sched_fini(drm_sched);
=20
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouv=
eau/nouveau_sched.h
index 20cd1da8db73..b98c3f0bef30 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -103,12 +103,9 @@ struct nouveau_sched {
 	struct mutex mutex;
=20
 	struct {
-		struct {
-			struct list_head head;
-			spinlock_t lock;
-		} list;
-		struct wait_queue_head wq;
-	} job;
+		struct list_head head;
+		spinlock_t lock;
+	} job_list;
 };
=20
 int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm=
 *drm,
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouve=
au/nouveau_uvmm.c
index 48f105239f42..ddfc46bc1b3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64=
 addr, u64 range)
 	u64 end =3D addr + range;
=20
 again:
-	spin_lock(&sched->job.list.lock);
-	list_for_each_entry(__job, &sched->job.list.head, entry) {
+	spin_lock(&sched->job_list.lock);
+	list_for_each_entry(__job, &sched->job_list.head, entry) {
 		struct nouveau_uvmm_bind_job *bind_job =3D to_uvmm_bind_job(__job);
=20
 		list_for_each_op(op, &bind_job->ops) {
@@ -1030,7 +1030,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64=
 addr, u64 range)
=20
 				if (!(end <=3D op_addr || addr >=3D op_end)) {
 					nouveau_uvmm_bind_job_get(bind_job);
-					spin_unlock(&sched->job.list.lock);
+					spin_unlock(&sched->job_list.lock);
 					wait_for_completion(&bind_job->complete);
 					nouveau_uvmm_bind_job_put(bind_job);
 					goto again;
@@ -1038,7 +1038,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64=
 addr, u64 range)
 			}
 		}
 	}
-	spin_unlock(&sched->job.list.lock);
+	spin_unlock(&sched->job_list.lock);
 }
=20
 static int
--=20
2.49.0