[PATCH] drm/sched: warn about drm_sched_job_init()'s partial init

Philipp Stanner posted 1 patch 1 month ago
drivers/gpu/drm/scheduler/sched_main.c | 4 ++++
include/drm/gpu_scheduler.h            | 8 ++++++++
2 files changed, 12 insertions(+)
[PATCH] drm/sched: warn about drm_sched_job_init()'s partial init
Posted by Philipp Stanner 1 month ago
drm_sched_job_init()'s name suggests that after the function succeeded,
parameter "job" will be fully initialized. This is not the case; some
members are only later set, notably drm_sched_job.sched by
drm_sched_job_arm().

Document that drm_sched_job_init() does not set all struct members.

Document the lifetime of drm_sched_job.sched.

Signed-off-by: Philipp Stanner <pstanner@redhat.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 4 ++++
 include/drm/gpu_scheduler.h            | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index dab8cca79eb7..8c1c4739f36d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -771,6 +771,10 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
  * Drivers must make sure drm_sched_job_cleanup() if this function returns
  * successfully, even when @job is aborted before drm_sched_job_arm() is called.
  *
+ * Note that this function does not assign a valid value to each struct member
+ * of struct drm_sched_job. Take a look at that struct's documentation to see
+ * who sets which struct member with what lifetime.
+ *
  * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
  * has died, which can mean that there's no valid runqueue for a @entity.
  * This function returns -ENOENT in this case (which probably should be -EIO as
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index ab161289d1bf..95e17504e46a 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -340,6 +340,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
 struct drm_sched_job {
 	struct spsc_node		queue_node;
 	struct list_head		list;
+
+	/**
+	 * @sched:
+	 *
+	 * The scheduler this job is or will be scheduled on. Gets set by
+	 * drm_sched_job_arm(). Valid until drm_sched_backend_ops.free_job()
+	 * has finished.
+	 */
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
 
-- 
2.47.0
Re: [PATCH] drm/sched: warn about drm_sched_job_init()'s partial init
Posted by Matthew Brost 1 month ago
On Wed, Oct 23, 2024 at 04:15:31PM +0200, Philipp Stanner wrote:
> drm_sched_job_init()'s name suggests that after the function succeeded,
> parameter "job" will be fully initialized. This is not the case; some
> members are only later set, notably drm_sched_job.sched by
> drm_sched_job_arm().
> 
> Document that drm_sched_job_init() does not set all struct members.
> 
> Document the lifetime of drm_sched_job.sched.
> 
> Signed-off-by: Philipp Stanner <pstanner@redhat.com>

Reviewed-by: Matthew Brost <matthew.brost@intel.com>

> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 4 ++++
>  include/drm/gpu_scheduler.h            | 8 ++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index dab8cca79eb7..8c1c4739f36d 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -771,6 +771,10 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
>   * Drivers must make sure drm_sched_job_cleanup() if this function returns
>   * successfully, even when @job is aborted before drm_sched_job_arm() is called.
>   *
> + * Note that this function does not assign a valid value to each struct member
> + * of struct drm_sched_job. Take a look at that struct's documentation to see
> + * who sets which struct member with what lifetime.
> + *
>   * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
>   * has died, which can mean that there's no valid runqueue for a @entity.
>   * This function returns -ENOENT in this case (which probably should be -EIO as
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index ab161289d1bf..95e17504e46a 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -340,6 +340,14 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>  struct drm_sched_job {
>  	struct spsc_node		queue_node;
>  	struct list_head		list;
> +
> +	/**
> +	 * @sched:
> +	 *
> +	 * The scheduler this job is or will be scheduled on. Gets set by
> +	 * drm_sched_job_arm(). Valid until drm_sched_backend_ops.free_job()
> +	 * has finished.
> +	 */
>  	struct drm_gpu_scheduler	*sched;
>  	struct drm_sched_fence		*s_fence;
>  
> -- 
> 2.47.0
>
Re: [PATCH] drm/sched: warn about drm_sched_job_init()'s partial init
Posted by Philipp Stanner 1 month ago
On Fri, 2024-10-25 at 02:32 +0000, Matthew Brost wrote:
> On Wed, Oct 23, 2024 at 04:15:31PM +0200, Philipp Stanner wrote:
> > drm_sched_job_init()'s name suggests that after the function
> > succeeded,
> > parameter "job" will be fully initialized. This is not the case;
> > some
> > members are only later set, notably drm_sched_job.sched by
> > drm_sched_job_arm().
> > 
> > Document that drm_sched_job_init() does not set all struct members.
> > 
> > Document the lifetime of drm_sched_job.sched.
> > 
> > Signed-off-by: Philipp Stanner <pstanner@redhat.com>
> 
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> 

Applied to drm-misc-next, thank you.

P.


> > ---
> >  drivers/gpu/drm/scheduler/sched_main.c | 4 ++++
> >  include/drm/gpu_scheduler.h            | 8 ++++++++
> >  2 files changed, 12 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index dab8cca79eb7..8c1c4739f36d 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -771,6 +771,10 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
> >   * Drivers must make sure drm_sched_job_cleanup() if this function
> > returns
> >   * successfully, even when @job is aborted before
> > drm_sched_job_arm() is called.
> >   *
> > + * Note that this function does not assign a valid value to each
> > struct member
> > + * of struct drm_sched_job. Take a look at that struct's
> > documentation to see
> > + * who sets which struct member with what lifetime.
> > + *
> >   * WARNING: amdgpu abuses &drm_sched.ready to signal when the
> > hardware
> >   * has died, which can mean that there's no valid runqueue for a
> > @entity.
> >   * This function returns -ENOENT in this case (which probably
> > should be -EIO as
> > diff --git a/include/drm/gpu_scheduler.h
> > b/include/drm/gpu_scheduler.h
> > index ab161289d1bf..95e17504e46a 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -340,6 +340,14 @@ struct drm_sched_fence
> > *to_drm_sched_fence(struct dma_fence *f);
> >  struct drm_sched_job {
> >  	struct spsc_node		queue_node;
> >  	struct list_head		list;
> > +
> > +	/**
> > +	 * @sched:
> > +	 *
> > +	 * The scheduler this job is or will be scheduled on. Gets
> > set by
> > +	 * drm_sched_job_arm(). Valid until
> > drm_sched_backend_ops.free_job()
> > +	 * has finished.
> > +	 */
> >  	struct drm_gpu_scheduler	*sched;
> >  	struct drm_sched_fence		*s_fence;
> >  
> > -- 
> > 2.47.0
> > 
>