drivers/gpu/drm/panthor/panthor_drv.c | 2 +- drivers/gpu/drm/panthor/panthor_sched.c | 32 +++++++++++++++++-------- drivers/gpu/drm/panthor/panthor_sched.h | 3 ++- 3 files changed, 25 insertions(+), 12 deletions(-)
Userspace relies on the ring field of gpu_scheduler tracepoints to
identify a drm_gpu_scheduler. The value of the ring field is taken from
sched->name.
Because we typically have multiple schedulers running in parallel in
each process, assign unique names to schedulers such that userspace can
distinguish them.
Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
---
v2:
- include drm_client_id in the name to be truly unique
- remove unnecessary NULL in drm_sched_init_args initialization
- reformat to column width 100
---
drivers/gpu/drm/panthor/panthor_drv.c | 2 +-
drivers/gpu/drm/panthor/panthor_sched.c | 32 +++++++++++++++++--------
drivers/gpu/drm/panthor/panthor_sched.h | 3 ++-
3 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 9256806eb6623..be962b1387f03 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1105,7 +1105,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
if (ret)
goto out;
- ret = panthor_group_create(pfile, args, queue_args);
+ ret = panthor_group_create(pfile, args, queue_args, file->client_id);
if (ret < 0)
goto out;
args->group_handle = ret;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index ba5dc3e443d9c..62f17476e5852 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -360,6 +360,9 @@ struct panthor_queue {
/** @entity: DRM scheduling entity used for this queue. */
struct drm_sched_entity entity;
+ /** @name: DRM scheduler name for this queue. */
+ char name[32];
+
/**
* @remaining_time: Time remaining before the job timeout expires.
*
@@ -3308,9 +3311,10 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
static struct panthor_queue *
group_create_queue(struct panthor_group *group,
- const struct drm_panthor_queue_create *args)
+ const struct drm_panthor_queue_create *args,
+ u64 drm_client_id, u32 gid, u32 qid)
{
- const struct drm_sched_init_args sched_args = {
+ struct drm_sched_init_args sched_args = {
.ops = &panthor_queue_sched_ops,
.submit_wq = group->ptdev->scheduler->wq,
.num_rqs = 1,
@@ -3323,7 +3327,6 @@ group_create_queue(struct panthor_group *group,
.credit_limit = args->ringbuf_size / sizeof(u64),
.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
.timeout_wq = group->ptdev->reset.wq,
- .name = "panthor-queue",
.dev = group->ptdev->base.dev,
};
struct drm_gpu_scheduler *drm_sched;
@@ -3398,6 +3401,11 @@ group_create_queue(struct panthor_group *group,
if (ret)
goto err_free_queue;
+ /* assign a unique name */
+ snprintf(queue->name, sizeof(queue->name), "panthor-queue-%llu-%u-%u",
+ drm_client_id, gid, qid);
+ sched_args.name = queue->name;
+
ret = drm_sched_init(&queue->scheduler, &sched_args);
if (ret)
goto err_free_queue;
@@ -3447,7 +3455,8 @@ static void add_group_kbo_sizes(struct panthor_device *ptdev,
int panthor_group_create(struct panthor_file *pfile,
const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args)
+ const struct drm_panthor_queue_create *queue_args,
+ u64 drm_client_id)
{
struct panthor_device *ptdev = pfile->ptdev;
struct panthor_group_pool *gpool = pfile->groups;
@@ -3540,12 +3549,16 @@ int panthor_group_create(struct panthor_file *pfile,
memset(group->syncobjs->kmap, 0,
group_args->queues.count * sizeof(struct panthor_syncobj_64b));
+ ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
+ if (ret)
+ goto err_put_group;
+
for (i = 0; i < group_args->queues.count; i++) {
- group->queues[i] = group_create_queue(group, &queue_args[i]);
+ group->queues[i] = group_create_queue(group, &queue_args[i], drm_client_id, gid, i);
if (IS_ERR(group->queues[i])) {
ret = PTR_ERR(group->queues[i]);
group->queues[i] = NULL;
- goto err_put_group;
+ goto err_erase_gid;
}
group->queue_count++;
@@ -3553,10 +3566,6 @@ int panthor_group_create(struct panthor_file *pfile,
group->idle_queues = GENMASK(group->queue_count - 1, 0);
- ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
- if (ret)
- goto err_put_group;
-
mutex_lock(&sched->reset.lock);
if (atomic_read(&sched->reset.in_progress)) {
panthor_group_stop(group);
@@ -3575,6 +3584,9 @@ int panthor_group_create(struct panthor_file *pfile,
return gid;
+err_erase_gid:
+ xa_erase(&gpool->xa, gid);
+
err_put_group:
group_put(group);
return ret;
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 742b0b4ff3a3c..f4a475aa34c0a 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -21,7 +21,8 @@ struct panthor_job;
int panthor_group_create(struct panthor_file *pfile,
const struct drm_panthor_group_create *group_args,
- const struct drm_panthor_queue_create *queue_args);
+ const struct drm_panthor_queue_create *queue_args,
+ u64 drm_client_id);
int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
int panthor_group_get_state(struct panthor_file *pfile,
struct drm_panthor_group_get_state *get_state);
--
2.51.0.318.gd7df087d1a-goog
On Fri, 29 Aug 2025 16:02:50 -0700 Chia-I Wu <olvaffe@gmail.com> wrote: > Userspace relies on the ring field of gpu_scheduler tracepoints to > identify a drm_gpu_scheduler. The value of the ring field is taken from > sched->name. > > Because we typically have multiple schedulers running in parallel in > each process, assign unique names to schedulers such that userspace can > distinguish them. > > Signed-off-by: Chia-I Wu <olvaffe@gmail.com> > > --- > > v2: > - include drm_client_id in the name to be truly unique > - remove unnecessary NULL in drm_sched_init_args initialization > - reformat to column width 100 > --- > drivers/gpu/drm/panthor/panthor_drv.c | 2 +- > drivers/gpu/drm/panthor/panthor_sched.c | 32 +++++++++++++++++-------- > drivers/gpu/drm/panthor/panthor_sched.h | 3 ++- > 3 files changed, 25 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c > index 9256806eb6623..be962b1387f03 100644 > --- a/drivers/gpu/drm/panthor/panthor_drv.c > +++ b/drivers/gpu/drm/panthor/panthor_drv.c > @@ -1105,7 +1105,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, > if (ret) > goto out; > > - ret = panthor_group_create(pfile, args, queue_args); > + ret = panthor_group_create(pfile, args, queue_args, file->client_id); Hm, maybe it's time we start passing drm_file instead of panthor_file to limit the number of arguments, but I guess this can be done in a follow-up patch. > if (ret < 0) > goto out; > args->group_handle = ret; > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index ba5dc3e443d9c..62f17476e5852 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -360,6 +360,9 @@ struct panthor_queue { > /** @entity: DRM scheduling entity used for this queue. */ > struct drm_sched_entity entity; > > + /** @name: DRM scheduler name for this queue. */ > + char name[32]; The base string ("panthor-queue---") is already 16 characters. You then have a group ID that's below 128 IIRC, and a queue ID that's no more than 15, so that's 5 more chars. This leaves you 10 chars for the client ID (theoretically a 64-bit integer). I know the logic is sane because you truncate the string, but I'm wondering if we shouldn't make this string bigger to cover the theoretical max client_id, or simply dynamically allocate it (kasprintf()), so we don't have to think about it if we end up adding more stuff to the string. > + > /** > * @remaining_time: Time remaining before the job timeout expires. > * > @@ -3308,9 +3311,10 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev, > > static struct panthor_queue * > group_create_queue(struct panthor_group *group, > - const struct drm_panthor_queue_create *args) > + const struct drm_panthor_queue_create *args, > + u64 drm_client_id, u32 gid, u32 qid) > { > - const struct drm_sched_init_args sched_args = { > + struct drm_sched_init_args sched_args = { > .ops = &panthor_queue_sched_ops, > .submit_wq = group->ptdev->scheduler->wq, > .num_rqs = 1, > @@ -3323,7 +3327,6 @@ group_create_queue(struct panthor_group *group, > .credit_limit = args->ringbuf_size / sizeof(u64), > .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), > .timeout_wq = group->ptdev->reset.wq, > - .name = "panthor-queue", > .dev = group->ptdev->base.dev, > }; > struct drm_gpu_scheduler *drm_sched; > @@ -3398,6 +3401,11 @@ group_create_queue(struct panthor_group *group, > if (ret) > goto err_free_queue; > > + /* assign a unique name */ > + snprintf(queue->name, sizeof(queue->name), "panthor-queue-%llu-%u-%u", > + drm_client_id, gid, qid); > + sched_args.name = queue->name; > + > ret = drm_sched_init(&queue->scheduler, &sched_args); > if (ret) > goto err_free_queue; > @@ -3447,7 +3455,8 @@ static void add_group_kbo_sizes(struct panthor_device *ptdev, > > int panthor_group_create(struct panthor_file *pfile, > const struct drm_panthor_group_create *group_args, > - const struct drm_panthor_queue_create *queue_args) > + const struct drm_panthor_queue_create *queue_args, > + u64 drm_client_id) > { > struct panthor_device *ptdev = pfile->ptdev; > struct panthor_group_pool *gpool = pfile->groups; > @@ -3540,12 +3549,16 @@ int panthor_group_create(struct panthor_file *pfile, > memset(group->syncobjs->kmap, 0, > group_args->queues.count * sizeof(struct panthor_syncobj_64b)); > > + ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); > + if (ret) > + goto err_put_group; > + > for (i = 0; i < group_args->queues.count; i++) { > - group->queues[i] = group_create_queue(group, &queue_args[i]); > + group->queues[i] = group_create_queue(group, &queue_args[i], drm_client_id, gid, i); > if (IS_ERR(group->queues[i])) { > ret = PTR_ERR(group->queues[i]); > group->queues[i] = NULL; > - goto err_put_group; > + goto err_erase_gid; > } > > group->queue_count++; > @@ -3553,10 +3566,6 @@ int panthor_group_create(struct panthor_file *pfile, > > group->idle_queues = GENMASK(group->queue_count - 1, 0); > > - ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); > - if (ret) > - goto err_put_group; > - > mutex_lock(&sched->reset.lock); > if (atomic_read(&sched->reset.in_progress)) { > panthor_group_stop(group); > @@ -3575,6 +3584,9 @@ int panthor_group_create(struct panthor_file *pfile, > > return gid; > > +err_erase_gid: > + xa_erase(&gpool->xa, gid); > + > err_put_group: > group_put(group); > return ret; > diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h > index 742b0b4ff3a3c..f4a475aa34c0a 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.h > +++ b/drivers/gpu/drm/panthor/panthor_sched.h > @@ -21,7 +21,8 @@ struct panthor_job; > > int panthor_group_create(struct panthor_file *pfile, > const struct drm_panthor_group_create *group_args, > - const struct drm_panthor_queue_create *queue_args); > + const struct drm_panthor_queue_create *queue_args, > + u64 drm_client_id); > int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); > int panthor_group_get_state(struct panthor_file *pfile, > struct drm_panthor_group_get_state *get_state);
On Mon, Sep 1, 2025 at 12:27 AM Boris Brezillon <boris.brezillon@collabora.com> wrote: <snipped> > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > > index ba5dc3e443d9c..62f17476e5852 100644 > > --- a/drivers/gpu/drm/panthor/panthor_sched.c > > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > > @@ -360,6 +360,9 @@ struct panthor_queue { > > /** @entity: DRM scheduling entity used for this queue. */ > > struct drm_sched_entity entity; > > > > + /** @name: DRM scheduler name for this queue. */ > > + char name[32]; > > The base string ("panthor-queue---") is already 16 characters. You then > have a group ID that's below 128 IIRC, and a queue ID that's no more > than 15, so that's 5 more chars. This leaves you 10 chars for the > client ID (theoretically a 64-bit integer). I know the logic is sane > because you truncate the string, but I'm wondering if we shouldn't make > this string bigger to cover the theoretical max client_id, or simply > dynamically allocate it (kasprintf()), so we don't have to think about > it if we end up adding more stuff to the string. It seems we don't validate queue count. Sending https://lore.kernel.org/lkml/20250902192001.409738-1-olvaffe@gmail.com/ for that. On a user device that opens the render node once per second, 10 chars are good for 317 years. It lasts significantly shorter on a test device, but the uptime is also significantly shorter on such a device (hopefully). But kasprintf should be harmless here. I can certainly switch to it.
© 2016 - 2025 Red Hat, Inc.