drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++------- 1 file changed, 167 insertions(+), 66 deletions(-)
The timeout logic provided by drm_sched leads to races when we try
to suspend it while the drm_sched workqueue queues more jobs. Let's
overhaul the timeout handling in panthor to have our own delayed work
that's resumed/suspended when a group is resumed/suspended. When an
actual timeout occurs, we call drm_sched_fault() to report it
through drm_sched, still. But otherwise, the drm_sched timeout is
disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
how we protect modifications on the timer.
One issue seems to be when we call drm_sched_suspend_timeout() from
both queue_run_job() and tick_work() which could lead to races due to
drm_sched_suspend_timeout() not having a lock. Another issue seems to
be in queue_run_job() if the group is not scheduled, we suspend the
timeout again which undoes what drm_sched_job_begin() did when calling
drm_sched_start_timeout(). So the timeout does not reset when a job
is finished.
Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Daniel Stone <daniels@collabora.com>
Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++-------
1 file changed, 167 insertions(+), 66 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 4d31d1967716..5f02d2ec28f9 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -360,17 +360,20 @@ struct panthor_queue {
/** @entity: DRM scheduling entity used for this queue. */
struct drm_sched_entity entity;
- /**
- * @remaining_time: Time remaining before the job timeout expires.
- *
- * The job timeout is suspended when the queue is not scheduled by the
- * FW. Every time we suspend the timer, we need to save the remaining
- * time so we can restore it later on.
- */
- unsigned long remaining_time;
+ /** @timeout: Queue timeout related fields. */
+ struct {
+ /** @timeout.work: Work executed when a queue timeout occurs. */
+ struct delayed_work work;
- /** @timeout_suspended: True if the job timeout was suspended. */
- bool timeout_suspended;
+ /**
+ * @timeout.remaining: Time remaining before a queue timeout.
+ *
+ * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
+ * When the timer is suspended, it's set to the time remaining when the
+ * timer was suspended.
+ */
+ unsigned long remaining;
+ } timeout;
/**
* @doorbell_id: Doorbell assigned to this queue.
@@ -1031,6 +1034,82 @@ group_unbind_locked(struct panthor_group *group)
return 0;
}
+static bool
+group_is_idle(struct panthor_group *group)
+{
+ struct panthor_device *ptdev = group->ptdev;
+ u32 inactive_queues;
+
+ if (group->csg_id >= 0)
+ return ptdev->scheduler->csg_slots[group->csg_id].idle;
+
+ inactive_queues = group->idle_queues | group->blocked_queues;
+ return hweight32(inactive_queues) == group->queue_count;
+}
+
+static void
+queue_suspend_timeout(struct panthor_queue *queue)
+{
+ unsigned long qtimeout, now;
+ struct panthor_group *group;
+ struct panthor_job *job;
+ bool timer_was_active;
+
+ spin_lock(&queue->fence_ctx.lock);
+
+ /* Already suspended, nothing to do. */
+ if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT)
+ goto out_unlock;
+
+ job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
+ struct panthor_job, node);
+ group = job ? job->group : NULL;
+
+ /* If the queue is blocked and the group is idle, we want the timer to
+ * keep running because the group can't be unblocked by other queues,
+ * so it has to come from an external source, and we want to timebox
+ * this external signalling.
+ */
+ if (group && (group->blocked_queues & BIT(job->queue_idx)) &&
+ group_is_idle(group))
+ goto out_unlock;
+
+ now = jiffies;
+ qtimeout = queue->timeout.work.timer.expires;
+
+ /* Cancel the timer. */
+ timer_was_active = cancel_delayed_work(&queue->timeout.work);
+ if (!timer_was_active || !job)
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+ else if (time_after(qtimeout, now))
+ queue->timeout.remaining = qtimeout - now;
+ else
+ queue->timeout.remaining = 0;
+
+ if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+
+out_unlock:
+ spin_unlock(&queue->fence_ctx.lock);
+}
+
+static void
+queue_resume_timeout(struct panthor_queue *queue)
+{
+ spin_lock(&queue->fence_ctx.lock);
+
+ /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
+ if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
+ mod_delayed_work(queue->scheduler.timeout_wq,
+ &queue->timeout.work,
+ queue->timeout.remaining);
+
+ queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
+ }
+
+ spin_unlock(&queue->fence_ctx.lock);
+}
+
/**
* cs_slot_prog_locked() - Program a queue slot
* @ptdev: Device.
@@ -1069,10 +1148,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
CS_IDLE_EMPTY |
CS_STATE_MASK |
CS_EXTRACT_EVENT);
- if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
- drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
- queue->timeout_suspended = false;
- }
+ if (queue->iface.input->insert != queue->iface.input->extract)
+ queue_resume_timeout(queue);
}
/**
@@ -1099,14 +1176,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
CS_STATE_STOP,
CS_STATE_MASK);
- /* If the queue is blocked, we want to keep the timeout running, so
- * we can detect unbounded waits and kill the group when that happens.
- */
- if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
- queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
- queue->timeout_suspended = true;
- WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
- }
+ queue_suspend_timeout(queue);
return 0;
}
@@ -1888,19 +1958,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
return ctx->group_count == sched->csg_slot_count;
}
-static bool
-group_is_idle(struct panthor_group *group)
-{
- struct panthor_device *ptdev = group->ptdev;
- u32 inactive_queues;
-
- if (group->csg_id >= 0)
- return ptdev->scheduler->csg_slots[group->csg_id].idle;
-
- inactive_queues = group->idle_queues | group->blocked_queues;
- return hweight32(inactive_queues) == group->queue_count;
-}
-
static bool
group_can_run(struct panthor_group *group)
{
@@ -2888,35 +2945,50 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
xa_unlock(&gpool->xa);
}
-static void group_sync_upd_work(struct work_struct *work)
+static bool queue_check_job_completion(struct panthor_queue *queue)
{
- struct panthor_group *group =
- container_of(work, struct panthor_group, sync_upd_work);
+ struct panthor_syncobj_64b *syncobj = NULL;
struct panthor_job *job, *job_tmp;
+ bool cookie, progress = false;
LIST_HEAD(done_jobs);
- u32 queue_idx;
- bool cookie;
cookie = dma_fence_begin_signalling();
- for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
- struct panthor_queue *queue = group->queues[queue_idx];
- struct panthor_syncobj_64b *syncobj;
+ spin_lock(&queue->fence_ctx.lock);
+ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+ if (!syncobj) {
+ struct panthor_group *group = job->group;
- if (!queue)
- continue;
+ syncobj = group->syncobjs->kmap +
+ (job->queue_idx * sizeof(*syncobj));
+ }
- syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
+ if (syncobj->seqno < job->done_fence->seqno)
+ break;
- spin_lock(&queue->fence_ctx.lock);
- list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (syncobj->seqno < job->done_fence->seqno)
- break;
+ list_move_tail(&job->node, &done_jobs);
+ dma_fence_signal_locked(job->done_fence);
+ }
- list_move_tail(&job->node, &done_jobs);
- dma_fence_signal_locked(job->done_fence);
- }
- spin_unlock(&queue->fence_ctx.lock);
+ if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
+ /* If we have no job left, we cancel the timer, and reset remaining
+ * time to its default so it can be restarted next time
+ * queue_resume_timeout() is called.
+ */
+ cancel_delayed_work(&queue->timeout.work);
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+
+ /* If there's no job pending, we consider it progress to avoid a
+ * spurious timeout if the timeout handler and the sync update
+ * handler raced.
+ */
+ progress = true;
+ } else if (!list_empty(&done_jobs)) {
+ mod_delayed_work(queue->scheduler.timeout_wq,
+ &queue->timeout.work,
+ msecs_to_jiffies(JOB_TIMEOUT_MS));
+ progress = true;
}
+ spin_unlock(&queue->fence_ctx.lock);
dma_fence_end_signalling(cookie);
list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
@@ -2926,6 +2998,27 @@ static void group_sync_upd_work(struct work_struct *work)
panthor_job_put(&job->base);
}
+ return progress;
+}
+
+static void group_sync_upd_work(struct work_struct *work)
+{
+ struct panthor_group *group =
+ container_of(work, struct panthor_group, sync_upd_work);
+ u32 queue_idx;
+ bool cookie;
+
+ cookie = dma_fence_begin_signalling();
+ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
+ struct panthor_queue *queue = group->queues[queue_idx];
+
+ if (!queue)
+ continue;
+
+ queue_check_job_completion(queue);
+ }
+ dma_fence_end_signalling(cookie);
+
group_put(group);
}
@@ -3173,17 +3266,6 @@ queue_run_job(struct drm_sched_job *sched_job)
queue->iface.input->insert = job->ringbuf.end;
if (group->csg_id < 0) {
- /* If the queue is blocked, we want to keep the timeout running, so we
- * can detect unbounded waits and kill the group when that happens.
- * Otherwise, we suspend the timeout so the time we spend waiting for
- * a CSG slot is not counted.
- */
- if (!(group->blocked_queues & BIT(job->queue_idx)) &&
- !queue->timeout_suspended) {
- queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
- queue->timeout_suspended = true;
- }
-
group_schedule_locked(group, BIT(job->queue_idx));
} else {
gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
@@ -3192,6 +3274,7 @@ queue_run_job(struct drm_sched_job *sched_job)
pm_runtime_get(ptdev->base.dev);
sched->pm.has_ref = true;
}
+ queue_resume_timeout(queue);
panthor_devfreq_record_busy(sched->ptdev);
}
@@ -3241,6 +3324,11 @@ queue_timedout_job(struct drm_sched_job *sched_job)
queue_start(queue);
+ /* We already flagged the queue as faulty, make sure we don't get
+ * called again.
+ */
+ queue->scheduler.timeout = MAX_SCHEDULE_TIMEOUT;
+
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -3283,6 +3371,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
}
+static void queue_timeout_work(struct work_struct *work)
+{
+ struct panthor_queue *queue = container_of(work, struct panthor_queue,
+ timeout.work.work);
+ bool progress;
+
+ progress = queue_check_job_completion(queue);
+ if (!progress)
+ drm_sched_fault(&queue->scheduler);
+}
+
static struct panthor_queue *
group_create_queue(struct panthor_group *group,
const struct drm_panthor_queue_create *args)
@@ -3298,7 +3397,7 @@ group_create_queue(struct panthor_group *group,
* their profiling status.
*/
.credit_limit = args->ringbuf_size / sizeof(u64),
- .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
+ .timeout = MAX_SCHEDULE_TIMEOUT,
.timeout_wq = group->ptdev->reset.wq,
.name = "panthor-queue",
.dev = group->ptdev->base.dev,
@@ -3321,6 +3420,8 @@ group_create_queue(struct panthor_group *group,
if (!queue)
return ERR_PTR(-ENOMEM);
+ queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
+ INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
queue->fence_ctx.id = dma_fence_context_alloc(1);
spin_lock_init(&queue->fence_ctx.lock);
INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
base-commit: b72f66f22c0e39ae6684c43fead774c13db24e73
--
2.43.0
On 10.03.2025 13:30, Ashley Smith wrote:
> The timeout logic provided by drm_sched leads to races when we try
> to suspend it while the drm_sched workqueue queues more jobs. Let's
> overhaul the timeout handling in panthor to have our own delayed work
> that's resumed/suspended when a group is resumed/suspended. When an
> actual timeout occurs, we call drm_sched_fault() to report it
> through drm_sched, still. But otherwise, the drm_sched timeout is
> disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
> how we protect modifications on the timer.
>
> One issue seems to be when we call drm_sched_suspend_timeout() from
> both queue_run_job() and tick_work() which could lead to races due to
> drm_sched_suspend_timeout() not having a lock. Another issue seems to
> be in queue_run_job() if the group is not scheduled, we suspend the
> timeout again which undoes what drm_sched_job_begin() did when calling
> drm_sched_start_timeout(). So the timeout does not reset when a job
> is finished.
>
> Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Tested-by: Daniel Stone <daniels@collabora.com>
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
Reviewed-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++-------
> 1 file changed, 167 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 4d31d1967716..5f02d2ec28f9 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -360,17 +360,20 @@ struct panthor_queue {
> /** @entity: DRM scheduling entity used for this queue. */
> struct drm_sched_entity entity;
>
> - /**
> - * @remaining_time: Time remaining before the job timeout expires.
> - *
> - * The job timeout is suspended when the queue is not scheduled by the
> - * FW. Every time we suspend the timer, we need to save the remaining
> - * time so we can restore it later on.
> - */
> - unsigned long remaining_time;
> + /** @timeout: Queue timeout related fields. */
> + struct {
> + /** @timeout.work: Work executed when a queue timeout occurs. */
> + struct delayed_work work;
Nit: Maybe for the sake of sticking to the convention of naming already
existing delayed_work structs in a way that reflects their goal, call
this one 'timeout_work'.
>
> - /** @timeout_suspended: True if the job timeout was suspended. */
> - bool timeout_suspended;
> + /**
> + * @timeout.remaining: Time remaining before a queue timeout.
> + *
> + * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
> + * When the timer is suspended, it's set to the time remaining when the
> + * timer was suspended.
> + */
> + unsigned long remaining;
> + } timeout;
>
> /**
> * @doorbell_id: Doorbell assigned to this queue.
> @@ -1031,6 +1034,82 @@ group_unbind_locked(struct panthor_group *group)
> return 0;
> }
>
> +static bool
> +group_is_idle(struct panthor_group *group)
> +{
> + struct panthor_device *ptdev = group->ptdev;
> + u32 inactive_queues;
> +
> + if (group->csg_id >= 0)
> + return ptdev->scheduler->csg_slots[group->csg_id].idle;
> +
> + inactive_queues = group->idle_queues | group->blocked_queues;
> + return hweight32(inactive_queues) == group->queue_count;
> +}
> +
> +static void
> +queue_suspend_timeout(struct panthor_queue *queue)
> +{
> + unsigned long qtimeout, now;
> + struct panthor_group *group;
> + struct panthor_job *job;
> + bool timer_was_active;
> +
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* Already suspended, nothing to do. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT)
> + goto out_unlock;
> +
> + job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
> + struct panthor_job, node);
> + group = job ? job->group : NULL;
> +
> + /* If the queue is blocked and the group is idle, we want the timer to
> + * keep running because the group can't be unblocked by other queues,
> + * so it has to come from an external source, and we want to timebox
> + * this external signalling.
> + */
> + if (group && (group->blocked_queues & BIT(job->queue_idx)) &&
> + group_is_idle(group))
> + goto out_unlock;
> +
> + now = jiffies;
> + qtimeout = queue->timeout.work.timer.expires;
> +
> + /* Cancel the timer. */
> + timer_was_active = cancel_delayed_work(&queue->timeout.work);
> + if (!timer_was_active || !job)
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + else if (time_after(qtimeout, now))
> + queue->timeout.remaining = qtimeout - now;
> + else
> + queue->timeout.remaining = 0;
> +
> + if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> +out_unlock:
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> +static void
> +queue_resume_timeout(struct panthor_queue *queue)
> +{
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + queue->timeout.remaining);
> +
> + queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
> + }
> +
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> /**
> * cs_slot_prog_locked() - Program a queue slot
> * @ptdev: Device.
> @@ -1069,10 +1148,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_IDLE_EMPTY |
> CS_STATE_MASK |
> CS_EXTRACT_EVENT);
> - if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
> - drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
> - queue->timeout_suspended = false;
> - }
> + if (queue->iface.input->insert != queue->iface.input->extract)
> + queue_resume_timeout(queue);
> }
>
> /**
> @@ -1099,14 +1176,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_STATE_STOP,
> CS_STATE_MASK);
>
> - /* If the queue is blocked, we want to keep the timeout running, so
> - * we can detect unbounded waits and kill the group when that happens.
> - */
> - if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
> - }
> + queue_suspend_timeout(queue);
>
> return 0;
> }
> @@ -1888,19 +1958,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
> return ctx->group_count == sched->csg_slot_count;
> }
>
> -static bool
> -group_is_idle(struct panthor_group *group)
> -{
> - struct panthor_device *ptdev = group->ptdev;
> - u32 inactive_queues;
> -
> - if (group->csg_id >= 0)
> - return ptdev->scheduler->csg_slots[group->csg_id].idle;
> -
> - inactive_queues = group->idle_queues | group->blocked_queues;
> - return hweight32(inactive_queues) == group->queue_count;
> -}
> -
> static bool
> group_can_run(struct panthor_group *group)
> {
> @@ -2888,35 +2945,50 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
> xa_unlock(&gpool->xa);
> }
>
> -static void group_sync_upd_work(struct work_struct *work)
> +static bool queue_check_job_completion(struct panthor_queue *queue)
> {
> - struct panthor_group *group =
> - container_of(work, struct panthor_group, sync_upd_work);
> + struct panthor_syncobj_64b *syncobj = NULL;
> struct panthor_job *job, *job_tmp;
> + bool cookie, progress = false;
> LIST_HEAD(done_jobs);
> - u32 queue_idx;
> - bool cookie;
>
> cookie = dma_fence_begin_signalling();
> - for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> - struct panthor_queue *queue = group->queues[queue_idx];
> - struct panthor_syncobj_64b *syncobj;
> + spin_lock(&queue->fence_ctx.lock);
> + list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> + if (!syncobj) {
> + struct panthor_group *group = job->group;
>
> - if (!queue)
> - continue;
> + syncobj = group->syncobjs->kmap +
> + (job->queue_idx * sizeof(*syncobj));
> + }
>
> - syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
> + if (syncobj->seqno < job->done_fence->seqno)
> + break;
>
> - spin_lock(&queue->fence_ctx.lock);
> - list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> - if (syncobj->seqno < job->done_fence->seqno)
> - break;
> + list_move_tail(&job->node, &done_jobs);
> + dma_fence_signal_locked(job->done_fence);
> + }
>
> - list_move_tail(&job->node, &done_jobs);
> - dma_fence_signal_locked(job->done_fence);
> - }
> - spin_unlock(&queue->fence_ctx.lock);
> + if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
> + /* If we have no job left, we cancel the timer, and reset remaining
Nit: "If we have no jobs [...]"
> + * time to its default so it can be restarted next time
> + * queue_resume_timeout() is called.
> + */
> + cancel_delayed_work(&queue->timeout.work);
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> + /* If there's no job pending, we consider it progress to avoid a
> + * spurious timeout if the timeout handler and the sync update
> + * handler raced.
> + */
> + progress = true;
> + } else if (!list_empty(&done_jobs)) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + msecs_to_jiffies(JOB_TIMEOUT_MS));
> + progress = true;
> }
> + spin_unlock(&queue->fence_ctx.lock);
> dma_fence_end_signalling(cookie);
>
> list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> @@ -2926,6 +2998,27 @@ static void group_sync_upd_work(struct work_struct *work)
> panthor_job_put(&job->base);
> }
>
> + return progress;
> +}
> +
> +static void group_sync_upd_work(struct work_struct *work)
> +{
> + struct panthor_group *group =
> + container_of(work, struct panthor_group, sync_upd_work);
> + u32 queue_idx;
> + bool cookie;
> +
> + cookie = dma_fence_begin_signalling();
> + for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> + struct panthor_queue *queue = group->queues[queue_idx];
> +
> + if (!queue)
> + continue;
> +
> + queue_check_job_completion(queue);
> + }
> + dma_fence_end_signalling(cookie);
> +
> group_put(group);
> }
>
> @@ -3173,17 +3266,6 @@ queue_run_job(struct drm_sched_job *sched_job)
> queue->iface.input->insert = job->ringbuf.end;
>
> if (group->csg_id < 0) {
> - /* If the queue is blocked, we want to keep the timeout running, so we
> - * can detect unbounded waits and kill the group when that happens.
> - * Otherwise, we suspend the timeout so the time we spend waiting for
> - * a CSG slot is not counted.
> - */
> - if (!(group->blocked_queues & BIT(job->queue_idx)) &&
> - !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - }
> -
> group_schedule_locked(group, BIT(job->queue_idx));
> } else {
> gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
> @@ -3192,6 +3274,7 @@ queue_run_job(struct drm_sched_job *sched_job)
> pm_runtime_get(ptdev->base.dev);
> sched->pm.has_ref = true;
> }
> + queue_resume_timeout(queue);
> panthor_devfreq_record_busy(sched->ptdev);
> }
>
> @@ -3241,6 +3324,11 @@ queue_timedout_job(struct drm_sched_job *sched_job)
>
> queue_start(queue);
>
> + /* We already flagged the queue as faulty, make sure we don't get
> + * called again.
> + */
> + queue->scheduler.timeout = MAX_SCHEDULE_TIMEOUT;
> +
> return DRM_GPU_SCHED_STAT_NOMINAL;
> }
>
> @@ -3283,6 +3371,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
> return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
> }
>
> +static void queue_timeout_work(struct work_struct *work)
> +{
> + struct panthor_queue *queue = container_of(work, struct panthor_queue,
> + timeout.work.work);
> + bool progress;
> +
> + progress = queue_check_job_completion(queue);
> + if (!progress)
> + drm_sched_fault(&queue->scheduler);
> +}
> +
> static struct panthor_queue *
> group_create_queue(struct panthor_group *group,
> const struct drm_panthor_queue_create *args)
> @@ -3298,7 +3397,7 @@ group_create_queue(struct panthor_group *group,
> * their profiling status.
> */
> .credit_limit = args->ringbuf_size / sizeof(u64),
> - .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
> + .timeout = MAX_SCHEDULE_TIMEOUT,
> .timeout_wq = group->ptdev->reset.wq,
> .name = "panthor-queue",
> .dev = group->ptdev->base.dev,
> @@ -3321,6 +3420,8 @@ group_create_queue(struct panthor_group *group,
> if (!queue)
> return ERR_PTR(-ENOMEM);
>
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
> queue->fence_ctx.id = dma_fence_context_alloc(1);
> spin_lock_init(&queue->fence_ctx.lock);
> INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
>
> base-commit: b72f66f22c0e39ae6684c43fead774c13db24e73
> --
> 2.43.0
>
Cheers,
Adrian Larumbe
On Wed, 19 Mar 2025 19:51:47 +0000
Adrian Larumbe <adrian.larumbe@collabora.com> wrote:
> On 10.03.2025 13:30, Ashley Smith wrote:
> > The timeout logic provided by drm_sched leads to races when we try
> > to suspend it while the drm_sched workqueue queues more jobs. Let's
> > overhaul the timeout handling in panthor to have our own delayed work
> > that's resumed/suspended when a group is resumed/suspended. When an
> > actual timeout occurs, we call drm_sched_fault() to report it
> > through drm_sched, still. But otherwise, the drm_sched timeout is
> > disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
> > how we protect modifications on the timer.
> >
> > One issue seems to be when we call drm_sched_suspend_timeout() from
> > both queue_run_job() and tick_work() which could lead to races due to
> > drm_sched_suspend_timeout() not having a lock. Another issue seems to
> > be in queue_run_job() if the group is not scheduled, we suspend the
> > timeout again which undoes what drm_sched_job_begin() did when calling
> > drm_sched_start_timeout(). So the timeout does not reset when a job
> > is finished.
> >
> > Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Tested-by: Daniel Stone <daniels@collabora.com>
> > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> > Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
>
> Reviewed-by: Adrián Larumbe <adrian.larumbe@collabora.com>
>
> > ---
> > drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++-------
> > 1 file changed, 167 insertions(+), 66 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> > index 4d31d1967716..5f02d2ec28f9 100644
> > --- a/drivers/gpu/drm/panthor/panthor_sched.c
> > +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> > @@ -360,17 +360,20 @@ struct panthor_queue {
> > /** @entity: DRM scheduling entity used for this queue. */
> > struct drm_sched_entity entity;
> >
> > - /**
> > - * @remaining_time: Time remaining before the job timeout expires.
> > - *
> > - * The job timeout is suspended when the queue is not scheduled by the
> > - * FW. Every time we suspend the timer, we need to save the remaining
> > - * time so we can restore it later on.
> > - */
> > - unsigned long remaining_time;
> > + /** @timeout: Queue timeout related fields. */
> > + struct {
> > + /** @timeout.work: Work executed when a queue timeout occurs. */
> > + struct delayed_work work;
>
> Nit: Maybe for the sake of sticking to the convention of naming already
> existing delayed_work structs in a way that reflects their goal, call
> this one 'timeout_work'.
It's already under the timeout struct, and naming it timeout_work would
be redundant IMHO (timeout.timeout_work vs timeout.work).
On Mon, Mar 10, 2025 at 01:30:09PM +0000, Ashley Smith wrote:
> The timeout logic provided by drm_sched leads to races when we try
> to suspend it while the drm_sched workqueue queues more jobs. Let's
> overhaul the timeout handling in panthor to have our own delayed work
> that's resumed/suspended when a group is resumed/suspended. When an
> actual timeout occurs, we call drm_sched_fault() to report it
> through drm_sched, still. But otherwise, the drm_sched timeout is
> disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
> how we protect modifications on the timer.
>
> One issue seems to be when we call drm_sched_suspend_timeout() from
> both queue_run_job() and tick_work() which could lead to races due to
> drm_sched_suspend_timeout() not having a lock. Another issue seems to
> be in queue_run_job() if the group is not scheduled, we suspend the
> timeout again which undoes what drm_sched_job_begin() did when calling
> drm_sched_start_timeout(). So the timeout does not reset when a job
> is finished.
>
> Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Tested-by: Daniel Stone <daniels@collabora.com>
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Best regards,
Liviu
> ---
> drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++-------
> 1 file changed, 167 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 4d31d1967716..5f02d2ec28f9 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -360,17 +360,20 @@ struct panthor_queue {
> /** @entity: DRM scheduling entity used for this queue. */
> struct drm_sched_entity entity;
>
> - /**
> - * @remaining_time: Time remaining before the job timeout expires.
> - *
> - * The job timeout is suspended when the queue is not scheduled by the
> - * FW. Every time we suspend the timer, we need to save the remaining
> - * time so we can restore it later on.
> - */
> - unsigned long remaining_time;
> + /** @timeout: Queue timeout related fields. */
> + struct {
> + /** @timeout.work: Work executed when a queue timeout occurs. */
> + struct delayed_work work;
>
> - /** @timeout_suspended: True if the job timeout was suspended. */
> - bool timeout_suspended;
> + /**
> + * @timeout.remaining: Time remaining before a queue timeout.
> + *
> + * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
> + * When the timer is suspended, it's set to the time remaining when the
> + * timer was suspended.
> + */
> + unsigned long remaining;
> + } timeout;
>
> /**
> * @doorbell_id: Doorbell assigned to this queue.
> @@ -1031,6 +1034,82 @@ group_unbind_locked(struct panthor_group *group)
> return 0;
> }
>
> +static bool
> +group_is_idle(struct panthor_group *group)
> +{
> + struct panthor_device *ptdev = group->ptdev;
> + u32 inactive_queues;
> +
> + if (group->csg_id >= 0)
> + return ptdev->scheduler->csg_slots[group->csg_id].idle;
> +
> + inactive_queues = group->idle_queues | group->blocked_queues;
> + return hweight32(inactive_queues) == group->queue_count;
> +}
> +
> +static void
> +queue_suspend_timeout(struct panthor_queue *queue)
> +{
> + unsigned long qtimeout, now;
> + struct panthor_group *group;
> + struct panthor_job *job;
> + bool timer_was_active;
> +
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* Already suspended, nothing to do. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT)
> + goto out_unlock;
> +
> + job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
> + struct panthor_job, node);
> + group = job ? job->group : NULL;
> +
> + /* If the queue is blocked and the group is idle, we want the timer to
> + * keep running because the group can't be unblocked by other queues,
> + * so it has to come from an external source, and we want to timebox
> + * this external signalling.
> + */
> + if (group && (group->blocked_queues & BIT(job->queue_idx)) &&
> + group_is_idle(group))
> + goto out_unlock;
> +
> + now = jiffies;
> + qtimeout = queue->timeout.work.timer.expires;
> +
> + /* Cancel the timer. */
> + timer_was_active = cancel_delayed_work(&queue->timeout.work);
> + if (!timer_was_active || !job)
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + else if (time_after(qtimeout, now))
> + queue->timeout.remaining = qtimeout - now;
> + else
> + queue->timeout.remaining = 0;
> +
> + if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> +out_unlock:
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> +static void
> +queue_resume_timeout(struct panthor_queue *queue)
> +{
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + queue->timeout.remaining);
> +
> + queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
> + }
> +
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> /**
> * cs_slot_prog_locked() - Program a queue slot
> * @ptdev: Device.
> @@ -1069,10 +1148,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_IDLE_EMPTY |
> CS_STATE_MASK |
> CS_EXTRACT_EVENT);
> - if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
> - drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
> - queue->timeout_suspended = false;
> - }
> + if (queue->iface.input->insert != queue->iface.input->extract)
> + queue_resume_timeout(queue);
> }
>
> /**
> @@ -1099,14 +1176,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_STATE_STOP,
> CS_STATE_MASK);
>
> - /* If the queue is blocked, we want to keep the timeout running, so
> - * we can detect unbounded waits and kill the group when that happens.
> - */
> - if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
> - }
> + queue_suspend_timeout(queue);
>
> return 0;
> }
> @@ -1888,19 +1958,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
> return ctx->group_count == sched->csg_slot_count;
> }
>
> -static bool
> -group_is_idle(struct panthor_group *group)
> -{
> - struct panthor_device *ptdev = group->ptdev;
> - u32 inactive_queues;
> -
> - if (group->csg_id >= 0)
> - return ptdev->scheduler->csg_slots[group->csg_id].idle;
> -
> - inactive_queues = group->idle_queues | group->blocked_queues;
> - return hweight32(inactive_queues) == group->queue_count;
> -}
> -
> static bool
> group_can_run(struct panthor_group *group)
> {
> @@ -2888,35 +2945,50 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
> xa_unlock(&gpool->xa);
> }
>
> -static void group_sync_upd_work(struct work_struct *work)
> +static bool queue_check_job_completion(struct panthor_queue *queue)
> {
> - struct panthor_group *group =
> - container_of(work, struct panthor_group, sync_upd_work);
> + struct panthor_syncobj_64b *syncobj = NULL;
> struct panthor_job *job, *job_tmp;
> + bool cookie, progress = false;
> LIST_HEAD(done_jobs);
> - u32 queue_idx;
> - bool cookie;
>
> cookie = dma_fence_begin_signalling();
> - for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> - struct panthor_queue *queue = group->queues[queue_idx];
> - struct panthor_syncobj_64b *syncobj;
> + spin_lock(&queue->fence_ctx.lock);
> + list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> + if (!syncobj) {
> + struct panthor_group *group = job->group;
>
> - if (!queue)
> - continue;
> + syncobj = group->syncobjs->kmap +
> + (job->queue_idx * sizeof(*syncobj));
> + }
>
> - syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
> + if (syncobj->seqno < job->done_fence->seqno)
> + break;
>
> - spin_lock(&queue->fence_ctx.lock);
> - list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> - if (syncobj->seqno < job->done_fence->seqno)
> - break;
> + list_move_tail(&job->node, &done_jobs);
> + dma_fence_signal_locked(job->done_fence);
> + }
>
> - list_move_tail(&job->node, &done_jobs);
> - dma_fence_signal_locked(job->done_fence);
> - }
> - spin_unlock(&queue->fence_ctx.lock);
> + if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
> + /* If we have no job left, we cancel the timer, and reset remaining
> + * time to its default so it can be restarted next time
> + * queue_resume_timeout() is called.
> + */
> + cancel_delayed_work(&queue->timeout.work);
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> + /* If there's no job pending, we consider it progress to avoid a
> + * spurious timeout if the timeout handler and the sync update
> + * handler raced.
> + */
> + progress = true;
> + } else if (!list_empty(&done_jobs)) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + msecs_to_jiffies(JOB_TIMEOUT_MS));
> + progress = true;
> }
> + spin_unlock(&queue->fence_ctx.lock);
> dma_fence_end_signalling(cookie);
>
> list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> @@ -2926,6 +2998,27 @@ static void group_sync_upd_work(struct work_struct *work)
> panthor_job_put(&job->base);
> }
>
> + return progress;
> +}
> +
> +static void group_sync_upd_work(struct work_struct *work)
> +{
> + struct panthor_group *group =
> + container_of(work, struct panthor_group, sync_upd_work);
> + u32 queue_idx;
> + bool cookie;
> +
> + cookie = dma_fence_begin_signalling();
> + for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> + struct panthor_queue *queue = group->queues[queue_idx];
> +
> + if (!queue)
> + continue;
> +
> + queue_check_job_completion(queue);
> + }
> + dma_fence_end_signalling(cookie);
> +
> group_put(group);
> }
>
> @@ -3173,17 +3266,6 @@ queue_run_job(struct drm_sched_job *sched_job)
> queue->iface.input->insert = job->ringbuf.end;
>
> if (group->csg_id < 0) {
> - /* If the queue is blocked, we want to keep the timeout running, so we
> - * can detect unbounded waits and kill the group when that happens.
> - * Otherwise, we suspend the timeout so the time we spend waiting for
> - * a CSG slot is not counted.
> - */
> - if (!(group->blocked_queues & BIT(job->queue_idx)) &&
> - !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - }
> -
> group_schedule_locked(group, BIT(job->queue_idx));
> } else {
> gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
> @@ -3192,6 +3274,7 @@ queue_run_job(struct drm_sched_job *sched_job)
> pm_runtime_get(ptdev->base.dev);
> sched->pm.has_ref = true;
> }
> + queue_resume_timeout(queue);
> panthor_devfreq_record_busy(sched->ptdev);
> }
>
> @@ -3241,6 +3324,11 @@ queue_timedout_job(struct drm_sched_job *sched_job)
>
> queue_start(queue);
>
> + /* We already flagged the queue as faulty, make sure we don't get
> + * called again.
> + */
> + queue->scheduler.timeout = MAX_SCHEDULE_TIMEOUT;
> +
> return DRM_GPU_SCHED_STAT_NOMINAL;
> }
>
> @@ -3283,6 +3371,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
> return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
> }
>
> +static void queue_timeout_work(struct work_struct *work)
> +{
> + struct panthor_queue *queue = container_of(work, struct panthor_queue,
> + timeout.work.work);
> + bool progress;
> +
> + progress = queue_check_job_completion(queue);
> + if (!progress)
> + drm_sched_fault(&queue->scheduler);
> +}
> +
> static struct panthor_queue *
> group_create_queue(struct panthor_group *group,
> const struct drm_panthor_queue_create *args)
> @@ -3298,7 +3397,7 @@ group_create_queue(struct panthor_group *group,
> * their profiling status.
> */
> .credit_limit = args->ringbuf_size / sizeof(u64),
> - .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
> + .timeout = MAX_SCHEDULE_TIMEOUT,
> .timeout_wq = group->ptdev->reset.wq,
> .name = "panthor-queue",
> .dev = group->ptdev->base.dev,
> @@ -3321,6 +3420,8 @@ group_create_queue(struct panthor_group *group,
> if (!queue)
> return ERR_PTR(-ENOMEM);
>
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
> queue->fence_ctx.id = dma_fence_context_alloc(1);
> spin_lock_init(&queue->fence_ctx.lock);
> INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
>
> base-commit: b72f66f22c0e39ae6684c43fead774c13db24e73
> --
> 2.43.0
>
--
====================
| I would like to |
| fix the world, |
| but they're not |
| giving me the |
\ source code! /
---------------
¯\_(ツ)_/¯
On 10/03/2025 13:30, Ashley Smith wrote:
> The timeout logic provided by drm_sched leads to races when we try
> to suspend it while the drm_sched workqueue queues more jobs. Let's
> overhaul the timeout handling in panthor to have our own delayed work
> that's resumed/suspended when a group is resumed/suspended. When an
> actual timeout occurs, we call drm_sched_fault() to report it
> through drm_sched, still. But otherwise, the drm_sched timeout is
> disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
> how we protect modifications on the timer.
>
> One issue seems to be when we call drm_sched_suspend_timeout() from
> both queue_run_job() and tick_work() which could lead to races due to
> drm_sched_suspend_timeout() not having a lock. Another issue seems to
> be in queue_run_job() if the group is not scheduled, we suspend the
> timeout again which undoes what drm_sched_job_begin() did when calling
> drm_sched_start_timeout(). So the timeout does not reset when a job
> is finished.
>
> Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Tested-by: Daniel Stone <daniels@collabora.com>
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Steve
> ---
> drivers/gpu/drm/panthor/panthor_sched.c | 233 +++++++++++++++++-------
> 1 file changed, 167 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 4d31d1967716..5f02d2ec28f9 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -360,17 +360,20 @@ struct panthor_queue {
> /** @entity: DRM scheduling entity used for this queue. */
> struct drm_sched_entity entity;
>
> - /**
> - * @remaining_time: Time remaining before the job timeout expires.
> - *
> - * The job timeout is suspended when the queue is not scheduled by the
> - * FW. Every time we suspend the timer, we need to save the remaining
> - * time so we can restore it later on.
> - */
> - unsigned long remaining_time;
> + /** @timeout: Queue timeout related fields. */
> + struct {
> + /** @timeout.work: Work executed when a queue timeout occurs. */
> + struct delayed_work work;
>
> - /** @timeout_suspended: True if the job timeout was suspended. */
> - bool timeout_suspended;
> + /**
> + * @timeout.remaining: Time remaining before a queue timeout.
> + *
> + * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
> + * When the timer is suspended, it's set to the time remaining when the
> + * timer was suspended.
> + */
> + unsigned long remaining;
> + } timeout;
>
> /**
> * @doorbell_id: Doorbell assigned to this queue.
> @@ -1031,6 +1034,82 @@ group_unbind_locked(struct panthor_group *group)
> return 0;
> }
>
> +static bool
> +group_is_idle(struct panthor_group *group)
> +{
> + struct panthor_device *ptdev = group->ptdev;
> + u32 inactive_queues;
> +
> + if (group->csg_id >= 0)
> + return ptdev->scheduler->csg_slots[group->csg_id].idle;
> +
> + inactive_queues = group->idle_queues | group->blocked_queues;
> + return hweight32(inactive_queues) == group->queue_count;
> +}
> +
> +static void
> +queue_suspend_timeout(struct panthor_queue *queue)
> +{
> + unsigned long qtimeout, now;
> + struct panthor_group *group;
> + struct panthor_job *job;
> + bool timer_was_active;
> +
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* Already suspended, nothing to do. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT)
> + goto out_unlock;
> +
> + job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
> + struct panthor_job, node);
> + group = job ? job->group : NULL;
> +
> + /* If the queue is blocked and the group is idle, we want the timer to
> + * keep running because the group can't be unblocked by other queues,
> + * so it has to come from an external source, and we want to timebox
> + * this external signalling.
> + */
> + if (group && (group->blocked_queues & BIT(job->queue_idx)) &&
> + group_is_idle(group))
> + goto out_unlock;
> +
> + now = jiffies;
> + qtimeout = queue->timeout.work.timer.expires;
> +
> + /* Cancel the timer. */
> + timer_was_active = cancel_delayed_work(&queue->timeout.work);
> + if (!timer_was_active || !job)
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + else if (time_after(qtimeout, now))
> + queue->timeout.remaining = qtimeout - now;
> + else
> + queue->timeout.remaining = 0;
> +
> + if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> +out_unlock:
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> +static void
> +queue_resume_timeout(struct panthor_queue *queue)
> +{
> + spin_lock(&queue->fence_ctx.lock);
> +
> + /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
> + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + queue->timeout.remaining);
> +
> + queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
> + }
> +
> + spin_unlock(&queue->fence_ctx.lock);
> +}
> +
> /**
> * cs_slot_prog_locked() - Program a queue slot
> * @ptdev: Device.
> @@ -1069,10 +1148,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_IDLE_EMPTY |
> CS_STATE_MASK |
> CS_EXTRACT_EVENT);
> - if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
> - drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
> - queue->timeout_suspended = false;
> - }
> + if (queue->iface.input->insert != queue->iface.input->extract)
> + queue_resume_timeout(queue);
> }
>
> /**
> @@ -1099,14 +1176,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> CS_STATE_STOP,
> CS_STATE_MASK);
>
> - /* If the queue is blocked, we want to keep the timeout running, so
> - * we can detect unbounded waits and kill the group when that happens.
> - */
> - if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
> - }
> + queue_suspend_timeout(queue);
>
> return 0;
> }
> @@ -1888,19 +1958,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
> return ctx->group_count == sched->csg_slot_count;
> }
>
> -static bool
> -group_is_idle(struct panthor_group *group)
> -{
> - struct panthor_device *ptdev = group->ptdev;
> - u32 inactive_queues;
> -
> - if (group->csg_id >= 0)
> - return ptdev->scheduler->csg_slots[group->csg_id].idle;
> -
> - inactive_queues = group->idle_queues | group->blocked_queues;
> - return hweight32(inactive_queues) == group->queue_count;
> -}
> -
> static bool
> group_can_run(struct panthor_group *group)
> {
> @@ -2888,35 +2945,50 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
> xa_unlock(&gpool->xa);
> }
>
> -static void group_sync_upd_work(struct work_struct *work)
> +static bool queue_check_job_completion(struct panthor_queue *queue)
> {
> - struct panthor_group *group =
> - container_of(work, struct panthor_group, sync_upd_work);
> + struct panthor_syncobj_64b *syncobj = NULL;
> struct panthor_job *job, *job_tmp;
> + bool cookie, progress = false;
> LIST_HEAD(done_jobs);
> - u32 queue_idx;
> - bool cookie;
>
> cookie = dma_fence_begin_signalling();
> - for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> - struct panthor_queue *queue = group->queues[queue_idx];
> - struct panthor_syncobj_64b *syncobj;
> + spin_lock(&queue->fence_ctx.lock);
> + list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> + if (!syncobj) {
> + struct panthor_group *group = job->group;
>
> - if (!queue)
> - continue;
> + syncobj = group->syncobjs->kmap +
> + (job->queue_idx * sizeof(*syncobj));
> + }
>
> - syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
> + if (syncobj->seqno < job->done_fence->seqno)
> + break;
>
> - spin_lock(&queue->fence_ctx.lock);
> - list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> - if (syncobj->seqno < job->done_fence->seqno)
> - break;
> + list_move_tail(&job->node, &done_jobs);
> + dma_fence_signal_locked(job->done_fence);
> + }
>
> - list_move_tail(&job->node, &done_jobs);
> - dma_fence_signal_locked(job->done_fence);
> - }
> - spin_unlock(&queue->fence_ctx.lock);
> + if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
> + /* If we have no job left, we cancel the timer, and reset remaining
> + * time to its default so it can be restarted next time
> + * queue_resume_timeout() is called.
> + */
> + cancel_delayed_work(&queue->timeout.work);
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> +
> + /* If there's no job pending, we consider it progress to avoid a
> + * spurious timeout if the timeout handler and the sync update
> + * handler raced.
> + */
> + progress = true;
> + } else if (!list_empty(&done_jobs)) {
> + mod_delayed_work(queue->scheduler.timeout_wq,
> + &queue->timeout.work,
> + msecs_to_jiffies(JOB_TIMEOUT_MS));
> + progress = true;
> }
> + spin_unlock(&queue->fence_ctx.lock);
> dma_fence_end_signalling(cookie);
>
> list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> @@ -2926,6 +2998,27 @@ static void group_sync_upd_work(struct work_struct *work)
> panthor_job_put(&job->base);
> }
>
> + return progress;
> +}
> +
> +static void group_sync_upd_work(struct work_struct *work)
> +{
> + struct panthor_group *group =
> + container_of(work, struct panthor_group, sync_upd_work);
> + u32 queue_idx;
> + bool cookie;
> +
> + cookie = dma_fence_begin_signalling();
> + for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
> + struct panthor_queue *queue = group->queues[queue_idx];
> +
> + if (!queue)
> + continue;
> +
> + queue_check_job_completion(queue);
> + }
> + dma_fence_end_signalling(cookie);
> +
> group_put(group);
> }
>
> @@ -3173,17 +3266,6 @@ queue_run_job(struct drm_sched_job *sched_job)
> queue->iface.input->insert = job->ringbuf.end;
>
> if (group->csg_id < 0) {
> - /* If the queue is blocked, we want to keep the timeout running, so we
> - * can detect unbounded waits and kill the group when that happens.
> - * Otherwise, we suspend the timeout so the time we spend waiting for
> - * a CSG slot is not counted.
> - */
> - if (!(group->blocked_queues & BIT(job->queue_idx)) &&
> - !queue->timeout_suspended) {
> - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
> - queue->timeout_suspended = true;
> - }
> -
> group_schedule_locked(group, BIT(job->queue_idx));
> } else {
> gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
> @@ -3192,6 +3274,7 @@ queue_run_job(struct drm_sched_job *sched_job)
> pm_runtime_get(ptdev->base.dev);
> sched->pm.has_ref = true;
> }
> + queue_resume_timeout(queue);
> panthor_devfreq_record_busy(sched->ptdev);
> }
>
> @@ -3241,6 +3324,11 @@ queue_timedout_job(struct drm_sched_job *sched_job)
>
> queue_start(queue);
>
> + /* We already flagged the queue as faulty, make sure we don't get
> + * called again.
> + */
> + queue->scheduler.timeout = MAX_SCHEDULE_TIMEOUT;
> +
> return DRM_GPU_SCHED_STAT_NOMINAL;
> }
>
> @@ -3283,6 +3371,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
> return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
> }
>
> +static void queue_timeout_work(struct work_struct *work)
> +{
> + struct panthor_queue *queue = container_of(work, struct panthor_queue,
> + timeout.work.work);
> + bool progress;
> +
> + progress = queue_check_job_completion(queue);
> + if (!progress)
> + drm_sched_fault(&queue->scheduler);
> +}
> +
> static struct panthor_queue *
> group_create_queue(struct panthor_group *group,
> const struct drm_panthor_queue_create *args)
> @@ -3298,7 +3397,7 @@ group_create_queue(struct panthor_group *group,
> * their profiling status.
> */
> .credit_limit = args->ringbuf_size / sizeof(u64),
> - .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
> + .timeout = MAX_SCHEDULE_TIMEOUT,
> .timeout_wq = group->ptdev->reset.wq,
> .name = "panthor-queue",
> .dev = group->ptdev->base.dev,
> @@ -3321,6 +3420,8 @@ group_create_queue(struct panthor_group *group,
> if (!queue)
> return ERR_PTR(-ENOMEM);
>
> + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
> + INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
> queue->fence_ctx.id = dma_fence_context_alloc(1);
> spin_lock_init(&queue->fence_ctx.lock);
> INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
>
> base-commit: b72f66f22c0e39ae6684c43fead774c13db24e73
© 2016 - 2026 Red Hat, Inc.