[v2] drm/sched: Fix memory leaks in drm_sched_fini()

[PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Philipp Stanner 9 months, 2 weeks ago

The unit tests so far took care manually of avoiding memory leaks that
might have occurred when calling drm_sched_fini().

The scheduler now takes care by itself of avoiding memory leaks if the
driver provides the callback drm_sched_backend_ops.kill_fence_context().

Implement that callback for the unit tests. Remove the manual cleanup
code.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
 .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
index f999c8859cf7..a72d26ca8262 100644
--- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
+++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
@@ -228,10 +228,30 @@ static void mock_sched_free_job(struct drm_sched_job *sched_job)
 	/* Mock job itself is freed by the kunit framework. */
 }
 
+static void mock_sched_fence_context_kill(struct drm_gpu_scheduler *gpu_sched)
+{
+	struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(gpu_sched);
+	struct drm_mock_sched_job *job;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sched->lock, flags);
+	list_for_each_entry(job, &sched->job_list, link) {
+		spin_lock(&job->lock);
+		if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
+			dma_fence_set_error(&job->hw_fence, -ECANCELED);
+			dma_fence_signal_locked(&job->hw_fence);
+		}
+		complete(&job->done);
+		spin_unlock(&job->lock);
+	}
+	spin_unlock_irqrestore(&sched->lock, flags);
+}
+
 static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
 	.run_job = mock_sched_run_job,
 	.timedout_job = mock_sched_timedout_job,
-	.free_job = mock_sched_free_job
+	.free_job = mock_sched_free_job,
+	.kill_fence_context = mock_sched_fence_context_kill,
 };
 
 /**
@@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct drm_mock_scheduler *sched)
 		drm_mock_sched_job_complete(job);
 	spin_unlock_irqrestore(&sched->lock, flags);
 
-	/*
-	 * Free completed jobs and jobs not yet processed by the DRM scheduler
-	 * free worker.
-	 */
-	spin_lock_irqsave(&sched->lock, flags);
-	list_for_each_entry_safe(job, next, &sched->done_list, link)
-		list_move_tail(&job->link, &list);
-	spin_unlock_irqrestore(&sched->lock, flags);
-
-	list_for_each_entry_safe(job, next, &list, link)
-		mock_sched_free_job(&job->base);
-
 	drm_sched_fini(&sched->base);
 }
 
-- 
2.48.1

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Philipp Stanner 9 months ago

On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
> The unit tests so far took care manually of avoiding memory leaks
> that
> might have occurred when calling drm_sched_fini().
> 
> The scheduler now takes care by itself of avoiding memory leaks if
> the
> driver provides the callback
> drm_sched_backend_ops.kill_fence_context().
> 
> Implement that callback for the unit tests. Remove the manual cleanup
> code.

@Tvrtko: On a scale from 1-10, how much do you love this patch? :)

P.

> 
> Signed-off-by: Philipp Stanner <phasta@kernel.org>
> ---
>  .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34 ++++++++++++-----
> --
>  1 file changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> index f999c8859cf7..a72d26ca8262 100644
> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
> drm_sched_job *sched_job)
>  	/* Mock job itself is freed by the kunit framework. */
>  }
>  
> +static void mock_sched_fence_context_kill(struct drm_gpu_scheduler
> *gpu_sched)
> +{
> +	struct drm_mock_scheduler *sched =
> drm_sched_to_mock_sched(gpu_sched);
> +	struct drm_mock_sched_job *job;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&sched->lock, flags);
> +	list_for_each_entry(job, &sched->job_list, link) {
> +		spin_lock(&job->lock);
> +		if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
> +			dma_fence_set_error(&job->hw_fence, -
> ECANCELED);
> +			dma_fence_signal_locked(&job->hw_fence);
> +		}
> +		complete(&job->done);
> +		spin_unlock(&job->lock);
> +	}
> +	spin_unlock_irqrestore(&sched->lock, flags);
> +}
> +
>  static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
>  	.run_job = mock_sched_run_job,
>  	.timedout_job = mock_sched_timedout_job,
> -	.free_job = mock_sched_free_job
> +	.free_job = mock_sched_free_job,
> +	.kill_fence_context = mock_sched_fence_context_kill,
>  };
>  
>  /**
> @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
> drm_mock_scheduler *sched)
>  		drm_mock_sched_job_complete(job);
>  	spin_unlock_irqrestore(&sched->lock, flags);
>  
> -	/*
> -	 * Free completed jobs and jobs not yet processed by the DRM
> scheduler
> -	 * free worker.
> -	 */
> -	spin_lock_irqsave(&sched->lock, flags);
> -	list_for_each_entry_safe(job, next, &sched->done_list, link)
> -		list_move_tail(&job->link, &list);
> -	spin_unlock_irqrestore(&sched->lock, flags);
> -
> -	list_for_each_entry_safe(job, next, &list, link)
> -		mock_sched_free_job(&job->base);
> -
>  	drm_sched_fini(&sched->base);
>  }
>

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Tvrtko Ursulin 9 months ago

Hi Philipp,

On 08/05/2025 12:03, Philipp Stanner wrote:
> On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
>> The unit tests so far took care manually of avoiding memory leaks
>> that
>> might have occurred when calling drm_sched_fini().
>>
>> The scheduler now takes care by itself of avoiding memory leaks if
>> the
>> driver provides the callback
>> drm_sched_backend_ops.kill_fence_context().
>>
>> Implement that callback for the unit tests. Remove the manual cleanup
>> code.
> 
> @Tvrtko: On a scale from 1-10, how much do you love this patch? :)

Specific patch aside, it is the series as a whole I would like to be 
sure there isn't a more elegant way to achieve the same end result.

Like that sketch of a counter proposal I sent for the reasons listed 
with it. Which were, AFAIR, to avoid needing to add more state machine, 
to avoid mandating drivers have to keep an internal list, and to align 
better with the existing prototypes in the sched ops table (where 
everything operates on jobs).

Regards,

Tvrtko

>> Signed-off-by: Philipp Stanner <phasta@kernel.org>
>> ---
>>   .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34 ++++++++++++-----
>> --
>>   1 file changed, 21 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>> b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>> index f999c8859cf7..a72d26ca8262 100644
>> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>> @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
>> drm_sched_job *sched_job)
>>   	/* Mock job itself is freed by the kunit framework. */
>>   }
>>   
>> +static void mock_sched_fence_context_kill(struct drm_gpu_scheduler
>> *gpu_sched)
>> +{
>> +	struct drm_mock_scheduler *sched =
>> drm_sched_to_mock_sched(gpu_sched);
>> +	struct drm_mock_sched_job *job;
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&sched->lock, flags);
>> +	list_for_each_entry(job, &sched->job_list, link) {
>> +		spin_lock(&job->lock);
>> +		if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
>> +			dma_fence_set_error(&job->hw_fence, -
>> ECANCELED);
>> +			dma_fence_signal_locked(&job->hw_fence);
>> +		}
>> +		complete(&job->done);
>> +		spin_unlock(&job->lock);
>> +	}
>> +	spin_unlock_irqrestore(&sched->lock, flags);
>> +}
>> +
>>   static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
>>   	.run_job = mock_sched_run_job,
>>   	.timedout_job = mock_sched_timedout_job,
>> -	.free_job = mock_sched_free_job
>> +	.free_job = mock_sched_free_job,
>> +	.kill_fence_context = mock_sched_fence_context_kill,
>>   };
>>   
>>   /**
>> @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
>> drm_mock_scheduler *sched)
>>   		drm_mock_sched_job_complete(job);
>>   	spin_unlock_irqrestore(&sched->lock, flags);
>>   
>> -	/*
>> -	 * Free completed jobs and jobs not yet processed by the DRM
>> scheduler
>> -	 * free worker.
>> -	 */
>> -	spin_lock_irqsave(&sched->lock, flags);
>> -	list_for_each_entry_safe(job, next, &sched->done_list, link)
>> -		list_move_tail(&job->link, &list);
>> -	spin_unlock_irqrestore(&sched->lock, flags);
>> -
>> -	list_for_each_entry_safe(job, next, &list, link)
>> -		mock_sched_free_job(&job->base);
>> -
>>   	drm_sched_fini(&sched->base);
>>   }
>>   
>

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Philipp Stanner 9 months ago

On Thu, 2025-05-08 at 13:51 +0100, Tvrtko Ursulin wrote:
> 
> Hi Philipp,
> 
> On 08/05/2025 12:03, Philipp Stanner wrote:
> > On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
> > > The unit tests so far took care manually of avoiding memory leaks
> > > that
> > > might have occurred when calling drm_sched_fini().
> > > 
> > > The scheduler now takes care by itself of avoiding memory leaks
> > > if
> > > the
> > > driver provides the callback
> > > drm_sched_backend_ops.kill_fence_context().
> > > 
> > > Implement that callback for the unit tests. Remove the manual
> > > cleanup
> > > code.
> > 
> > @Tvrtko: On a scale from 1-10, how much do you love this patch? :)
> 
> Specific patch aside, it is the series as a whole I would like to be 
> sure there isn't a more elegant way to achieve the same end result.

I count this as a 9/10 \o/

But jokes aside:

> 
> Like that sketch of a counter proposal I sent for the reasons listed 
> with it. Which were, AFAIR, to avoid needing to add more state
> machine, 

Well the state machine added is basically just the waitqueue. The
WRITE_ONCE booleans are currently just for correctness and clarity.
I've looked at them and want to remove them all in an other patch,
because I think they're not needed (workqueue handles that)

But yes, the added state is > 0

> to avoid mandating drivers have to keep an internal list,

That's not mandated by the scheduler, but by logic itself. All drivers
need to have a list of on-flight fences. Otherwise the drivers would
have no chance of signaling those fences once their GPU tells them to
do so.

I have now provided two users of the new API, nouveau and the unit
tests. Can you think of a party for which the suggested approach
wouldn't work?


Don't get me wrong, your approach does work and it definitely has its
charm. However, I think what I propose here is syntactically a bit
cleaner because the classical order of a fence first being signaled in
the driver and then the associated job being freed as usual by the
scheduler is guaranteed. IOW, we primarily rely on the signaling path.

Either way, neither your nor my approach would have worked out of the
box in Nouveau without that driver exploding.

>  and to align 
> better with the existing prototypes in the sched ops table (where 
> everything operates on jobs).

That's not a hard criteria IMO. Those are sched_backend_ops, not
sched_job_backend_ops, and prepare_job() already takes a parameter
other than a job.


Cheers,
P.

> 
> Regards,
> 
> Tvrtko
> 
> > > Signed-off-by: Philipp Stanner <phasta@kernel.org>
> > > ---
> > >   .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34
> > > ++++++++++++-----
> > > --
> > >   1 file changed, 21 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > index f999c8859cf7..a72d26ca8262 100644
> > > --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
> > > drm_sched_job *sched_job)
> > >   	/* Mock job itself is freed by the kunit framework. */
> > >   }
> > >   
> > > +static void mock_sched_fence_context_kill(struct
> > > drm_gpu_scheduler
> > > *gpu_sched)
> > > +{
> > > +	struct drm_mock_scheduler *sched =
> > > drm_sched_to_mock_sched(gpu_sched);
> > > +	struct drm_mock_sched_job *job;
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(&sched->lock, flags);
> > > +	list_for_each_entry(job, &sched->job_list, link) {
> > > +		spin_lock(&job->lock);
> > > +		if (!dma_fence_is_signaled_locked(&job-
> > > >hw_fence)) {
> > > +			dma_fence_set_error(&job->hw_fence, -
> > > ECANCELED);
> > > +			dma_fence_signal_locked(&job->hw_fence);
> > > +		}
> > > +		complete(&job->done);
> > > +		spin_unlock(&job->lock);
> > > +	}
> > > +	spin_unlock_irqrestore(&sched->lock, flags);
> > > +}
> > > +
> > >   static const struct drm_sched_backend_ops
> > > drm_mock_scheduler_ops = {
> > >   	.run_job = mock_sched_run_job,
> > >   	.timedout_job = mock_sched_timedout_job,
> > > -	.free_job = mock_sched_free_job
> > > +	.free_job = mock_sched_free_job,
> > > +	.kill_fence_context = mock_sched_fence_context_kill,
> > >   };
> > >   
> > >   /**
> > > @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
> > > drm_mock_scheduler *sched)
> > >   		drm_mock_sched_job_complete(job);
> > >   	spin_unlock_irqrestore(&sched->lock, flags);
> > >   
> > > -	/*
> > > -	 * Free completed jobs and jobs not yet processed by the
> > > DRM
> > > scheduler
> > > -	 * free worker.
> > > -	 */
> > > -	spin_lock_irqsave(&sched->lock, flags);
> > > -	list_for_each_entry_safe(job, next, &sched->done_list,
> > > link)
> > > -		list_move_tail(&job->link, &list);
> > > -	spin_unlock_irqrestore(&sched->lock, flags);
> > > -
> > > -	list_for_each_entry_safe(job, next, &list, link)
> > > -		mock_sched_free_job(&job->base);
> > > -
> > >   	drm_sched_fini(&sched->base);
> > >   }
> > >   
> > 
>

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Tvrtko Ursulin 8 months, 4 weeks ago

On 12/05/2025 09:00, Philipp Stanner wrote:
> On Thu, 2025-05-08 at 13:51 +0100, Tvrtko Ursulin wrote:
>>
>> Hi Philipp,
>>
>> On 08/05/2025 12:03, Philipp Stanner wrote:
>>> On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
>>>> The unit tests so far took care manually of avoiding memory leaks
>>>> that
>>>> might have occurred when calling drm_sched_fini().
>>>>
>>>> The scheduler now takes care by itself of avoiding memory leaks
>>>> if
>>>> the
>>>> driver provides the callback
>>>> drm_sched_backend_ops.kill_fence_context().
>>>>
>>>> Implement that callback for the unit tests. Remove the manual
>>>> cleanup
>>>> code.
>>>
>>> @Tvrtko: On a scale from 1-10, how much do you love this patch? :)
>>
>> Specific patch aside, it is the series as a whole I would like to be
>> sure there isn't a more elegant way to achieve the same end result.
> 
> I count this as a 9/10 \o/

:) Yes, sorry, it would a bit lower than that, at least until someone 
can point out a fatal flaw in my alternative. :)

> But jokes aside:
> 
>>
>> Like that sketch of a counter proposal I sent for the reasons listed
>> with it. Which were, AFAIR, to avoid needing to add more state
>> machine,
> 
> Well the state machine added is basically just the waitqueue. The
> WRITE_ONCE booleans are currently just for correctness and clarity.
> I've looked at them and want to remove them all in an other patch,
> because I think they're not needed (workqueue handles that)
> 
> But yes, the added state is > 0
> 
>> to avoid mandating drivers have to keep an internal list,
> 
> That's not mandated by the scheduler, but by logic itself. All drivers
> need to have a list of on-flight fences. Otherwise the drivers would
> have no chance of signaling those fences once their GPU tells them to
> do so.

Probably it would be hard to signal without tracking of some sort yes, 
although it wouldn't have to be indexed by fence context, or looked up 
by it so maybe still simpler.

More importantly I think with this comment I was thinking about the fact 
that with ops->cancel_job() approach I was able to remove the _done_ 
list tracking from the mock scheduler.

> I have now provided two users of the new API, nouveau and the unit
> tests. Can you think of a party for which the suggested approach
> wouldn't work?

I did not think along those lines yet so don't know. I just thought it 
was too much code to implement a relatively simple thing and that also a 
few things in the design bothered me.

If you look at the diffstat from my proposal and ignore kerneldoc and 
unit test stats, it literally adds 8 lines to drm_sched_fini() and a 
single line to gpu_scheduler.h:

+       void (*cancel_job)(struct drm_sched_job *sched_job);

And in the former after it stops the workers:

+       if (sched->ops->cancel_job) {
+               struct drm_sched_job *job;
+
+               list_for_each_entry_reverse(job, &sched->pending_list, 
list) {
+                       sched->ops->cancel_job(job);
+                       sched->ops->free_job(job);
+               }
+       }

To me this looks quite clean. Unless, I say this again, I am missing 
some fatal flaw why it doesn't work.

> Don't get me wrong, your approach does work and it definitely has its
> charm. However, I think what I propose here is syntactically a bit
> cleaner because the classical order of a fence first being signaled in
> the driver and then the associated job being freed as usual by the
> scheduler is guaranteed. IOW, we primarily rely on the signaling path.
> 
> Either way, neither your nor my approach would have worked out of the
> box in Nouveau without that driver exploding.

What do you mean by this - the latest version of your series does or 
does not work for nouveau?

Regards,

Tvrtko

> 
>>   and to align
>> better with the existing prototypes in the sched ops table (where
>> everything operates on jobs).
> 
> That's not a hard criteria IMO. Those are sched_backend_ops, not
> sched_job_backend_ops, and prepare_job() already takes a parameter
> other than a job.
> 
> 
> Cheers,
> P.
> 
>>
>> Regards,
>>
>> Tvrtko
>>
>>>> Signed-off-by: Philipp Stanner <phasta@kernel.org>
>>>> ---
>>>>    .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34
>>>> ++++++++++++-----
>>>> --
>>>>    1 file changed, 21 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>> b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>> index f999c8859cf7..a72d26ca8262 100644
>>>> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>> @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
>>>> drm_sched_job *sched_job)
>>>>    	/* Mock job itself is freed by the kunit framework. */
>>>>    }
>>>>    
>>>> +static void mock_sched_fence_context_kill(struct
>>>> drm_gpu_scheduler
>>>> *gpu_sched)
>>>> +{
>>>> +	struct drm_mock_scheduler *sched =
>>>> drm_sched_to_mock_sched(gpu_sched);
>>>> +	struct drm_mock_sched_job *job;
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&sched->lock, flags);
>>>> +	list_for_each_entry(job, &sched->job_list, link) {
>>>> +		spin_lock(&job->lock);
>>>> +		if (!dma_fence_is_signaled_locked(&job-
>>>>> hw_fence)) {
>>>> +			dma_fence_set_error(&job->hw_fence, -
>>>> ECANCELED);
>>>> +			dma_fence_signal_locked(&job->hw_fence);
>>>> +		}
>>>> +		complete(&job->done);
>>>> +		spin_unlock(&job->lock);
>>>> +	}
>>>> +	spin_unlock_irqrestore(&sched->lock, flags);
>>>> +}
>>>> +
>>>>    static const struct drm_sched_backend_ops
>>>> drm_mock_scheduler_ops = {
>>>>    	.run_job = mock_sched_run_job,
>>>>    	.timedout_job = mock_sched_timedout_job,
>>>> -	.free_job = mock_sched_free_job
>>>> +	.free_job = mock_sched_free_job,
>>>> +	.kill_fence_context = mock_sched_fence_context_kill,
>>>>    };
>>>>    
>>>>    /**
>>>> @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
>>>> drm_mock_scheduler *sched)
>>>>    		drm_mock_sched_job_complete(job);
>>>>    	spin_unlock_irqrestore(&sched->lock, flags);
>>>>    
>>>> -	/*
>>>> -	 * Free completed jobs and jobs not yet processed by the
>>>> DRM
>>>> scheduler
>>>> -	 * free worker.
>>>> -	 */
>>>> -	spin_lock_irqsave(&sched->lock, flags);
>>>> -	list_for_each_entry_safe(job, next, &sched->done_list,
>>>> link)
>>>> -		list_move_tail(&job->link, &list);
>>>> -	spin_unlock_irqrestore(&sched->lock, flags);
>>>> -
>>>> -	list_for_each_entry_safe(job, next, &list, link)
>>>> -		mock_sched_free_job(&job->base);
>>>> -
>>>>    	drm_sched_fini(&sched->base);
>>>>    }
>>>>    
>>>
>>
>

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Philipp Stanner 8 months, 4 weeks ago

On Wed, 2025-05-14 at 09:30 +0100, Tvrtko Ursulin wrote:
> 
> On 12/05/2025 09:00, Philipp Stanner wrote:
> > On Thu, 2025-05-08 at 13:51 +0100, Tvrtko Ursulin wrote:
> > > 
> > > Hi Philipp,
> > > 
> > > On 08/05/2025 12:03, Philipp Stanner wrote:
> > > > On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
> > > > > The unit tests so far took care manually of avoiding memory
> > > > > leaks
> > > > > that
> > > > > might have occurred when calling drm_sched_fini().
> > > > > 
> > > > > The scheduler now takes care by itself of avoiding memory
> > > > > leaks
> > > > > if
> > > > > the
> > > > > driver provides the callback
> > > > > drm_sched_backend_ops.kill_fence_context().
> > > > > 
> > > > > Implement that callback for the unit tests. Remove the manual
> > > > > cleanup
> > > > > code.
> > > > 
> > > > @Tvrtko: On a scale from 1-10, how much do you love this patch?
> > > > :)
> > > 
> > > Specific patch aside, it is the series as a whole I would like to
> > > be
> > > sure there isn't a more elegant way to achieve the same end
> > > result.
> > 
> > I count this as a 9/10 \o/
> 
> :) Yes, sorry, it would a bit lower than that, at least until someone
> can point out a fatal flaw in my alternative. :)

There's no fatal flaw in my approach either :)

> 
> > But jokes aside:
> > 
> > > 
> > > Like that sketch of a counter proposal I sent for the reasons
> > > listed
> > > with it. Which were, AFAIR, to avoid needing to add more state
> > > machine,
> > 
> > Well the state machine added is basically just the waitqueue. The
> > WRITE_ONCE booleans are currently just for correctness and clarity.
> > I've looked at them and want to remove them all in an other patch,
> > because I think they're not needed (workqueue handles that)
> > 
> > But yes, the added state is > 0
> > 
> > > to avoid mandating drivers have to keep an internal list,
> > 
> > That's not mandated by the scheduler, but by logic itself. All
> > drivers
> > need to have a list of on-flight fences. Otherwise the drivers
> > would
> > have no chance of signaling those fences once their GPU tells them
> > to
> > do so.
> 
> Probably it would be hard to signal without tracking of some sort
> yes, 
> although it wouldn't have to be indexed by fence context, or looked
> up 
> by it so maybe still simpler.

Well, the decisive point remains that all drivers must know all on-air
fences, so they all are able to signal those fences.

> 
> More importantly I think with this comment I was thinking about the
> fact 
> that with ops->cancel_job() approach I was able to remove the _done_ 
> list tracking from the mock scheduler.

If the done_list is just about avoiding memory leaks, then I should
also be able to remove it completely, shouldn't I? In this patch here I
already remove the leak-related code in drm_mock_sched_fini(), but
hadn't looked too deeply into what else the done_list does. Is it just
about the leaks?

> 
> > I have now provided two users of the new API, nouveau and the unit
> > tests. Can you think of a party for which the suggested approach
> > wouldn't work?
> 
> I did not think along those lines yet so don't know. I just thought
> it 
> was too much code to implement a relatively simple thing and that
> also a 
> few things in the design bothered me.
> 
> If you look at the diffstat from my proposal and ignore kerneldoc and
> unit test stats, it literally adds 8 lines to drm_sched_fini() and a 
> single line to gpu_scheduler.h:
> 
> +       void (*cancel_job)(struct drm_sched_job *sched_job);
> 
> And in the former after it stops the workers:
> 
> +       if (sched->ops->cancel_job) {
> +               struct drm_sched_job *job;
> +
> +               list_for_each_entry_reverse(job, &sched-
> >pending_list, 
> list) {
> +                       sched->ops->cancel_job(job);
> +                       sched->ops->free_job(job);
> +               }
> +       }
> 
> To me this looks quite clean. Unless, I say this again, I am missing 
> some fatal flaw why it doesn't work.

It does work. I've stated that before. But mine works, too. And mine
doesn't have a hard blocker either, as far as I can see.

So let's focus on the main differences:

Your version adds fewer lines of code, that's correct.

I think cleaning up through just having the driver signal the fences
all at once is better because
   1. The very same code path both for "legacy-fini" and "new-fini" is
      responsible for cleaning up the jobs. Notably, the free_job()
      callback is only invoked by the same parties as before, primarily
      the work items. We don't add a new, only sometimes running, code
      path *that free's jobs*.
   2. The scheduler's already fractured design doesn't fracture
      further: the free_job work item remains responsible for calling
      free_job(). Having just one party being responsible for one thing
      is a desirable design goal.
   3. Considering that many drivers generously (ab)use API internals of
      the scheduler, and considering that there is already ambiguity of
      who's responsible for handling job lifetimes, I believe it is
      safer not to add an additional code path that can free jobs, but
      keep that one path.
   4. It reads more cleanly: "We're not canceling a single job here,
      we're killing all associated jobs now all at once", raising
      awareness for driver programmers that this is a significant
      event: we're tearing down while not all of your jobs have
      finished on your GPU.

> 
> > Don't get me wrong, your approach does work and it definitely has
> > its
> > charm. However, I think what I propose here is syntactically a bit
> > cleaner because the classical order of a fence first being signaled
> > in
> > the driver and then the associated job being freed as usual by the
> > scheduler is guaranteed. IOW, we primarily rely on the signaling
> > path.
> > 
> > Either way, neither your nor my approach would have worked out of
> > the
> > box in Nouveau without that driver exploding.
> 
> What do you mean by this - the latest version of your series does or 
> does not work for nouveau?

Mine works with Nouveau, but revealed a bug in Nouveau [1]. Yours would
have ran into that bug, too.

My point is just that your job-by-job approach wouldn't have been
superior to my approach in practice, i.e., when implementing it in the
first "beta tester", Nouveau. Would have been the same problem in
different color.

So just because a cancel_job() callback would result in fewer lines of
code wouldn't mean it's superior in practice. I expect the same to be
the case for other drivers, especially those who use scheduler
internals.

So, summarizing:
 * My approach works. Your approach works.
 * It works for all drivers, because they all have a list of fences.
 * It communicates more clearly to the driver what this is all about.
 * It keeps the scheduler's design more consistent regarding
   responsibility / code paths for job life times.

P.

[1] https://lore.kernel.org/dri-devel/20250415121900.55719-2-phasta@kernel.org/

> 
> Regards,
> 
> Tvrtko
> 
> > 
> > >   and to align
> > > better with the existing prototypes in the sched ops table (where
> > > everything operates on jobs).
> > 
> > That's not a hard criteria IMO. Those are sched_backend_ops, not
> > sched_job_backend_ops, and prepare_job() already takes a parameter
> > other than a job.
> > 
> > 
> > Cheers,
> > P.
> > 
> > > 
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > > > Signed-off-by: Philipp Stanner <phasta@kernel.org>
> > > > > ---
> > > > >    .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34
> > > > > ++++++++++++-----
> > > > > --
> > > > >    1 file changed, 21 insertions(+), 13 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > index f999c8859cf7..a72d26ca8262 100644
> > > > > --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
> > > > > @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
> > > > > drm_sched_job *sched_job)
> > > > >     /* Mock job itself is freed by the kunit framework. */
> > > > >    }
> > > > >    
> > > > > +static void mock_sched_fence_context_kill(struct
> > > > > drm_gpu_scheduler
> > > > > *gpu_sched)
> > > > > +{
> > > > > + struct drm_mock_scheduler *sched =
> > > > > drm_sched_to_mock_sched(gpu_sched);
> > > > > + struct drm_mock_sched_job *job;
> > > > > + unsigned long flags;
> > > > > +
> > > > > + spin_lock_irqsave(&sched->lock, flags);
> > > > > + list_for_each_entry(job, &sched->job_list, link) {
> > > > > + spin_lock(&job->lock);
> > > > > + if (!dma_fence_is_signaled_locked(&job-
> > > > > > hw_fence)) {
> > > > > + dma_fence_set_error(&job->hw_fence, -
> > > > > ECANCELED);
> > > > > + dma_fence_signal_locked(&job->hw_fence);
> > > > > + }
> > > > > + complete(&job->done);
> > > > > + spin_unlock(&job->lock);
> > > > > + }
> > > > > + spin_unlock_irqrestore(&sched->lock, flags);
> > > > > +}
> > > > > +
> > > > >    static const struct drm_sched_backend_ops
> > > > > drm_mock_scheduler_ops = {
> > > > >     .run_job = mock_sched_run_job,
> > > > >     .timedout_job = mock_sched_timedout_job,
> > > > > - .free_job = mock_sched_free_job
> > > > > + .free_job = mock_sched_free_job,
> > > > > + .kill_fence_context = mock_sched_fence_context_kill,
> > > > >    };
> > > > >    
> > > > >    /**
> > > > > @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
> > > > > drm_mock_scheduler *sched)
> > > > >     drm_mock_sched_job_complete(job);
> > > > >     spin_unlock_irqrestore(&sched->lock, flags);
> > > > >    
> > > > > - /*
> > > > > - * Free completed jobs and jobs not yet processed by the
> > > > > DRM
> > > > > scheduler
> > > > > - * free worker.
> > > > > - */
> > > > > - spin_lock_irqsave(&sched->lock, flags);
> > > > > - list_for_each_entry_safe(job, next, &sched->done_list,
> > > > > link)
> > > > > - list_move_tail(&job->link, &list);
> > > > > - spin_unlock_irqrestore(&sched->lock, flags);
> > > > > -
> > > > > - list_for_each_entry_safe(job, next, &list, link)
> > > > > - mock_sched_free_job(&job->base);
> > > > > -
> > > > >     drm_sched_fini(&sched->base);
> > > > >    }
> > > > >    
> > > > 
> > > 
> > 
>

Re: [PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design

Posted by Tvrtko Ursulin 8 months, 3 weeks ago

On 14/05/2025 10:19, Philipp Stanner wrote:
> On Wed, 2025-05-14 at 09:30 +0100, Tvrtko Ursulin wrote:
>>
>> On 12/05/2025 09:00, Philipp Stanner wrote:
>>> On Thu, 2025-05-08 at 13:51 +0100, Tvrtko Ursulin wrote:
>>>>
>>>> Hi Philipp,
>>>>
>>>> On 08/05/2025 12:03, Philipp Stanner wrote:
>>>>> On Thu, 2025-04-24 at 11:55 +0200, Philipp Stanner wrote:
>>>>>> The unit tests so far took care manually of avoiding memory
>>>>>> leaks
>>>>>> that
>>>>>> might have occurred when calling drm_sched_fini().
>>>>>>
>>>>>> The scheduler now takes care by itself of avoiding memory
>>>>>> leaks
>>>>>> if
>>>>>> the
>>>>>> driver provides the callback
>>>>>> drm_sched_backend_ops.kill_fence_context().
>>>>>>
>>>>>> Implement that callback for the unit tests. Remove the manual
>>>>>> cleanup
>>>>>> code.
>>>>>
>>>>> @Tvrtko: On a scale from 1-10, how much do you love this patch?
>>>>> :)
>>>>
>>>> Specific patch aside, it is the series as a whole I would like to
>>>> be
>>>> sure there isn't a more elegant way to achieve the same end
>>>> result.
>>>
>>> I count this as a 9/10 \o/
>>
>> :) Yes, sorry, it would a bit lower than that, at least until someone
>> can point out a fatal flaw in my alternative. :)
> 
> There's no fatal flaw in my approach either :)
> 
>>
>>> But jokes aside:
>>>
>>>>
>>>> Like that sketch of a counter proposal I sent for the reasons
>>>> listed
>>>> with it. Which were, AFAIR, to avoid needing to add more state
>>>> machine,
>>>
>>> Well the state machine added is basically just the waitqueue. The
>>> WRITE_ONCE booleans are currently just for correctness and clarity.
>>> I've looked at them and want to remove them all in an other patch,
>>> because I think they're not needed (workqueue handles that)
>>>
>>> But yes, the added state is > 0
>>>
>>>> to avoid mandating drivers have to keep an internal list,
>>>
>>> That's not mandated by the scheduler, but by logic itself. All
>>> drivers
>>> need to have a list of on-flight fences. Otherwise the drivers
>>> would
>>> have no chance of signaling those fences once their GPU tells them
>>> to
>>> do so.
>>
>> Probably it would be hard to signal without tracking of some sort
>> yes,
>> although it wouldn't have to be indexed by fence context, or looked
>> up
>> by it so maybe still simpler.
> 
> Well, the decisive point remains that all drivers must know all on-air
> fences, so they all are able to signal those fences.
> 
>>
>> More importantly I think with this comment I was thinking about the
>> fact
>> that with ops->cancel_job() approach I was able to remove the _done_
>> list tracking from the mock scheduler.
> 
> If the done_list is just about avoiding memory leaks, then I should
> also be able to remove it completely, shouldn't I? In this patch here I
> already remove the leak-related code in drm_mock_sched_fini(), but
> hadn't looked too deeply into what else the done_list does. Is it just
> about the leaks?

Yes, I added it specifically to enable freeing completed jobs before 
calling drm_sched_fini (so mock scheduler does not have to look at the 
scheduler internal sched->job_list). On a glance you should be able to 
remove it too and it looks it should just work. You can peek at my patch 
for how to remove it completely if it helps.

>>> I have now provided two users of the new API, nouveau and the unit
>>> tests. Can you think of a party for which the suggested approach
>>> wouldn't work?
>>
>> I did not think along those lines yet so don't know. I just thought
>> it
>> was too much code to implement a relatively simple thing and that
>> also a
>> few things in the design bothered me.
>>
>> If you look at the diffstat from my proposal and ignore kerneldoc and
>> unit test stats, it literally adds 8 lines to drm_sched_fini() and a
>> single line to gpu_scheduler.h:
>>
>> +       void (*cancel_job)(struct drm_sched_job *sched_job);
>>
>> And in the former after it stops the workers:
>>
>> +       if (sched->ops->cancel_job) {
>> +               struct drm_sched_job *job;
>> +
>> +               list_for_each_entry_reverse(job, &sched-
>>> pending_list,
>> list) {
>> +                       sched->ops->cancel_job(job);
>> +                       sched->ops->free_job(job);
>> +               }
>> +       }
>>
>> To me this looks quite clean. Unless, I say this again, I am missing
>> some fatal flaw why it doesn't work.
> 
> It does work. I've stated that before. But mine works, too. And mine
> doesn't have a hard blocker either, as far as I can see.
> 
> So let's focus on the main differences:
> 
> Your version adds fewer lines of code, that's correct.
> 
> I think cleaning up through just having the driver signal the fences
> all at once is better because
>     1. The very same code path both for "legacy-fini" and "new-fini" is
>        responsible for cleaning up the jobs. Notably, the free_job()
>        callback is only invoked by the same parties as before, primarily
>        the work items. We don't add a new, only sometimes running, code
>        path *that free's jobs*.
>     2. The scheduler's already fractured design doesn't fracture
>        further: the free_job work item remains responsible for calling
>        free_job(). Having just one party being responsible for one thing
>        is a desirable design goal.
>     3. Considering that many drivers generously (ab)use API internals of
>        the scheduler, and considering that there is already ambiguity of
>        who's responsible for handling job lifetimes, I believe it is
>        safer not to add an additional code path that can free jobs, but
>        keep that one path.

All three look the same root argument. I would not be too concerned 
since it is still "one party" - the scheduler internals.

>     4. It reads more cleanly: "We're not canceling a single job here,
>        we're killing all associated jobs now all at once", raising
>        awareness for driver programmers that this is a significant
>        event: we're tearing down while not all of your jobs have
>        finished on your GPU.

It looks we simply may end up not agreeing.

I don't see most of the points above as significant, or even valid as a 
differentiator between the two approaches, while what I object to the 
most is adding of more state machine stuff. Because that is what I think 
makes the scheduler harder to maintain. More flags, more stopping some 
workers while leaving some run, etc.

A new callback to cancel a single job is IMO completely clean. 
Equivalent to how we already have ->timedout_job(), which, in the 
context of the recent discussions, really has the semantics of "cancel 
if it hasn't completed in the meantime". So I even wonder if there could 
be scope to share something with ->cancel_job(). The design or the 
implementation.

Regards,

Tvrtko

>>
>>> Don't get me wrong, your approach does work and it definitely has
>>> its
>>> charm. However, I think what I propose here is syntactically a bit
>>> cleaner because the classical order of a fence first being signaled
>>> in
>>> the driver and then the associated job being freed as usual by the
>>> scheduler is guaranteed. IOW, we primarily rely on the signaling
>>> path.
>>>
>>> Either way, neither your nor my approach would have worked out of
>>> the
>>> box in Nouveau without that driver exploding.
>>
>> What do you mean by this - the latest version of your series does or
>> does not work for nouveau?
> 
> Mine works with Nouveau, but revealed a bug in Nouveau [1]. Yours would
> have ran into that bug, too.
> 
> My point is just that your job-by-job approach wouldn't have been
> superior to my approach in practice, i.e., when implementing it in the
> first "beta tester", Nouveau. Would have been the same problem in
> different color.
> 
> So just because a cancel_job() callback would result in fewer lines of
> code wouldn't mean it's superior in practice. I expect the same to be
> the case for other drivers, especially those who use scheduler
> internals.
> 
> So, summarizing:
>   * My approach works. Your approach works.
>   * It works for all drivers, because they all have a list of fences.
>   * It communicates more clearly to the driver what this is all about.
>   * It keeps the scheduler's design more consistent regarding
>     responsibility / code paths for job life times.
> 
> P.
> 
> [1] https://lore.kernel.org/dri-devel/20250415121900.55719-2-phasta@kernel.org/
> 
>>
>> Regards,
>>
>> Tvrtko
>>
>>>
>>>>    and to align
>>>> better with the existing prototypes in the sched ops table (where
>>>> everything operates on jobs).
>>>
>>> That's not a hard criteria IMO. Those are sched_backend_ops, not
>>> sched_job_backend_ops, and prepare_job() already takes a parameter
>>> other than a job.
>>>
>>>
>>> Cheers,
>>> P.
>>>
>>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>>
>>>>>> Signed-off-by: Philipp Stanner <phasta@kernel.org>
>>>>>> ---
>>>>>>     .../gpu/drm/scheduler/tests/mock_scheduler.c  | 34
>>>>>> ++++++++++++-----
>>>>>> --
>>>>>>     1 file changed, 21 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>>>> b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>>>> index f999c8859cf7..a72d26ca8262 100644
>>>>>> --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>>>> +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c
>>>>>> @@ -228,10 +228,30 @@ static void mock_sched_free_job(struct
>>>>>> drm_sched_job *sched_job)
>>>>>>      /* Mock job itself is freed by the kunit framework. */
>>>>>>     }
>>>>>>     
>>>>>> +static void mock_sched_fence_context_kill(struct
>>>>>> drm_gpu_scheduler
>>>>>> *gpu_sched)
>>>>>> +{
>>>>>> + struct drm_mock_scheduler *sched =
>>>>>> drm_sched_to_mock_sched(gpu_sched);
>>>>>> + struct drm_mock_sched_job *job;
>>>>>> + unsigned long flags;
>>>>>> +
>>>>>> + spin_lock_irqsave(&sched->lock, flags);
>>>>>> + list_for_each_entry(job, &sched->job_list, link) {
>>>>>> + spin_lock(&job->lock);
>>>>>> + if (!dma_fence_is_signaled_locked(&job-
>>>>>>> hw_fence)) {
>>>>>> + dma_fence_set_error(&job->hw_fence, -
>>>>>> ECANCELED);
>>>>>> + dma_fence_signal_locked(&job->hw_fence);
>>>>>> + }
>>>>>> + complete(&job->done);
>>>>>> + spin_unlock(&job->lock);
>>>>>> + }
>>>>>> + spin_unlock_irqrestore(&sched->lock, flags);
>>>>>> +}
>>>>>> +
>>>>>>     static const struct drm_sched_backend_ops
>>>>>> drm_mock_scheduler_ops = {
>>>>>>      .run_job = mock_sched_run_job,
>>>>>>      .timedout_job = mock_sched_timedout_job,
>>>>>> - .free_job = mock_sched_free_job
>>>>>> + .free_job = mock_sched_free_job,
>>>>>> + .kill_fence_context = mock_sched_fence_context_kill,
>>>>>>     };
>>>>>>     
>>>>>>     /**
>>>>>> @@ -300,18 +320,6 @@ void drm_mock_sched_fini(struct
>>>>>> drm_mock_scheduler *sched)
>>>>>>      drm_mock_sched_job_complete(job);
>>>>>>      spin_unlock_irqrestore(&sched->lock, flags);
>>>>>>     
>>>>>> - /*
>>>>>> - * Free completed jobs and jobs not yet processed by the
>>>>>> DRM
>>>>>> scheduler
>>>>>> - * free worker.
>>>>>> - */
>>>>>> - spin_lock_irqsave(&sched->lock, flags);
>>>>>> - list_for_each_entry_safe(job, next, &sched->done_list,
>>>>>> link)
>>>>>> - list_move_tail(&job->link, &list);
>>>>>> - spin_unlock_irqrestore(&sched->lock, flags);
>>>>>> -
>>>>>> - list_for_each_entry_safe(job, next, &list, link)
>>>>>> - mock_sched_free_job(&job->base);
>>>>>> -
>>>>>>      drm_sched_fini(&sched->base);
>>>>>>     }
>>>>>>     
>>>>>
>>>>
>>>
>>
>

[PATCH v2 1/6] drm/sched: Fix teardown leaks with waitqueue
[PATCH v2 2/6] drm/sched: Prevent teardown waitque from blocking too long
[PATCH v2 3/6] drm/sched: Warn if pending list is not empty
[PATCH v2 4/6] drm/nouveau: Add new callback for scheduler teardown
[PATCH v2 5/6] drm/nouveau: Remove waitque for sched teardown
[PATCH v2 6/6] drm/sched: Port unit tests to new cleanup design