This adds a new debugfs file named "sched_groups" allowing a user to
query information about all userspace clients scheduler groups.
As we uses drm_device.filelist to achieve it, we also expose the
client_id with the task information to differentiate one client
from another inside the same task.
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 1 +
drivers/gpu/drm/panthor/panthor_sched.c | 127 ++++++++++++++++++++++++
drivers/gpu/drm/panthor/panthor_sched.h | 4 +
3 files changed, 132 insertions(+)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 0d825d63d712..aa390597d355 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1450,6 +1450,7 @@ static const struct file_operations panthor_drm_driver_fops = {
static void panthor_debugfs_init(struct drm_minor *minor)
{
panthor_mmu_debugfs_init(minor);
+ panthor_sched_debugfs_init(minor);
}
#endif
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index f15abeef4ece..0c1ddbfbe164 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 or MIT
/* Copyright 2023 Collabora ltd. */
+#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
#include <drm/drm_exec.h>
#include <drm/drm_gem_shmem_helper.h>
@@ -3581,3 +3582,129 @@ int panthor_sched_init(struct panthor_device *ptdev)
ptdev->scheduler = sched;
return 0;
}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *panthor_csg_priority_names[PANTHOR_CSG_PRIORITY_COUNT] = {
+ "LOW",
+ "MEDIUM",
+ "HIGH",
+ "REALTIME"
+};
+
+static const char *panthor_group_state_names[PANTHOR_CS_GROUP_UNKNOWN_STATE] = {
+ "CREATED",
+ "ACTIVE",
+ "SUSPENDED",
+ "TERMINATED"
+};
+
+static void show_panthor_queue(const struct panthor_queue *queue,
+ u32 queue_index,
+ struct seq_file *m)
+{
+ seq_printf(m, "queue %u:", queue_index);
+ seq_printf(m, " priority %u", queue->priority);
+ seq_printf(m, " doorbell_id %d", queue->doorbell_id);
+ seq_puts(m, "\n");
+}
+
+static void show_panthor_group(struct panthor_group *group,
+ u32 group_handle,
+ struct seq_file *m)
+{
+ u32 i;
+
+ group_get(group);
+
+ seq_printf(m, "group %u:", group_handle);
+ seq_printf(m, " priority %s", group->priority < PANTHOR_CSG_PRIORITY_COUNT ?
+ panthor_csg_priority_names[group->priority] : "UNKNOWN");
+ seq_printf(m, " state %s", group->state < PANTHOR_CS_GROUP_UNKNOWN_STATE ?
+ panthor_group_state_names[group->state] : "UNKNOWN");
+ seq_printf(m, " csg_id %d", group->csg_id);
+ seq_printf(m, " csg_priority %d", group->csg_priority);
+ seq_printf(m, " compute_core_mask 0x%016llx", group->compute_core_mask);
+ seq_printf(m, " fragment_core_mask 0x%016llx", group->fragment_core_mask);
+ seq_printf(m, " tiler_core_mask 0x%016llx", group->tiler_core_mask);
+ seq_printf(m, " max_compute_cores %d", group->max_compute_cores);
+ seq_printf(m, " max_fragment_cores %d", group->max_fragment_cores);
+ seq_printf(m, " max_tiler_cores %d", group->max_tiler_cores);
+ seq_puts(m, "\n");
+
+ for (i = 0; i < group->queue_count; i++)
+ show_panthor_queue(group->queues[i], i, m);
+
+ group_put(group);
+}
+
+static int show_file_group_pool(const struct panthor_file *pfile, struct seq_file *m)
+{
+ struct panthor_group_pool *gpool = pfile->groups;
+ struct panthor_group *group;
+ unsigned long i;
+
+ if (IS_ERR_OR_NULL(gpool))
+ return 0;
+
+ xa_for_each(&gpool->xa, i, group)
+ show_panthor_group(group, i, m);
+
+ return 0;
+}
+
+static int show_each_file(struct seq_file *m, void *arg)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *ddev = node->minor->dev;
+ int (*show)(const struct panthor_file *, struct seq_file *) = node->info_ent->data;
+ struct drm_file *file;
+ int ret;
+
+ ret = mutex_lock_interruptible(&ddev->filelist_mutex);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(file, &ddev->filelist, lhead) {
+ struct task_struct *task;
+ struct panthor_file *pfile = file->driver_priv;
+ struct pid *pid;
+
+ /*
+ * Although we have a valid reference on file->pid, that does
+ * not guarantee that the task_struct who called get_pid() is
+ * still alive (e.g. get_pid(current) => fork() => exit()).
+ * Therefore, we need to protect this ->comm access using RCU.
+ */
+ rcu_read_lock();
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "client_id %8llu pid %8d command %s:\n", file->client_id,
+ pid_nr(pid), task ? task->comm : "<unknown>");
+ rcu_read_unlock();
+
+ ret = show(pfile, m);
+ if (ret < 0)
+ break;
+
+ seq_puts(m, "\n");
+ }
+
+ mutex_unlock(&ddev->filelist_mutex);
+ return ret;
+}
+
+static struct drm_info_list panthor_sched_debugfs_list[] = {
+ { "sched_groups", show_each_file, 0, show_file_group_pool },
+};
+
+/**
+ * panthor_sched_debugfs_init() - Initialize scheduler debugfs entries
+ * @minor: Minor.
+ */
+void panthor_sched_debugfs_init(struct drm_minor *minor)
+{
+ drm_debugfs_create_files(panthor_sched_debugfs_list,
+ ARRAY_SIZE(panthor_sched_debugfs_list),
+ minor->debugfs_root, minor);
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
index 3a30d2328b30..577239aa66bf 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -47,4 +47,8 @@ void panthor_sched_resume(struct panthor_device *ptdev);
void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
+#ifdef CONFIG_DEBUG_FS
+void panthor_sched_debugfs_init(struct drm_minor *minor);
+#endif
+
#endif
--
2.46.0
On 18/09/2024 09:50, Mary Guillemard wrote: > This adds a new debugfs file named "sched_groups" allowing a user to > query information about all userspace clients scheduler groups. > > As we uses drm_device.filelist to achieve it, we also expose the > client_id with the task information to differentiate one client > from another inside the same task. > > Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Can you explain a bit more why you want this? I'm not against the idea but I'm wary of adding 'debugging features' because often they fall into one of two camps: * Genuinely useful features that shouldn't be in debugfs because people want a stable API. * Niche features that take maintainance effort despite not being particularly useful. > --- > drivers/gpu/drm/panthor/panthor_drv.c | 1 + > drivers/gpu/drm/panthor/panthor_sched.c | 127 ++++++++++++++++++++++++ > drivers/gpu/drm/panthor/panthor_sched.h | 4 + > 3 files changed, 132 insertions(+) > > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c > index 0d825d63d712..aa390597d355 100644 > --- a/drivers/gpu/drm/panthor/panthor_drv.c > +++ b/drivers/gpu/drm/panthor/panthor_drv.c > @@ -1450,6 +1450,7 @@ static const struct file_operations panthor_drm_driver_fops = { > static void panthor_debugfs_init(struct drm_minor *minor) > { > panthor_mmu_debugfs_init(minor); > + panthor_sched_debugfs_init(minor); > } > #endif > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index f15abeef4ece..0c1ddbfbe164 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -1,6 +1,7 @@ > // SPDX-License-Identifier: GPL-2.0 or MIT > /* Copyright 2023 Collabora ltd. */ > > +#include <drm/drm_debugfs.h> > #include <drm/drm_drv.h> > #include <drm/drm_exec.h> > #include <drm/drm_gem_shmem_helper.h> > @@ -3581,3 +3582,129 @@ int panthor_sched_init(struct panthor_device *ptdev) > ptdev->scheduler = sched; > return 0; > } > + > +#ifdef CONFIG_DEBUG_FS > +static const char *panthor_csg_priority_names[PANTHOR_CSG_PRIORITY_COUNT] = { > + "LOW", > + "MEDIUM", > + "HIGH", > + "REALTIME" > +}; > + > +static const char *panthor_group_state_names[PANTHOR_CS_GROUP_UNKNOWN_STATE] = { > + "CREATED", > + "ACTIVE", > + "SUSPENDED", > + "TERMINATED" > +}; > + > +static void show_panthor_queue(const struct panthor_queue *queue, > + u32 queue_index, > + struct seq_file *m) > +{ > + seq_printf(m, "queue %u:", queue_index); > + seq_printf(m, " priority %u", queue->priority); > + seq_printf(m, " doorbell_id %d", queue->doorbell_id); > + seq_puts(m, "\n"); > +} > + > +static void show_panthor_group(struct panthor_group *group, > + u32 group_handle, > + struct seq_file *m) > +{ > + u32 i; > + > + group_get(group); This looks wrong - this function relies on group not becoming invalid before the call, so we'd better be holding a sufficient lock/reference on the group before show_panthor_group() is called otherwise we've got a race before the group_get() call. Although I can see this bug is present elsewhere in panthor_sched.c now I look... > + > + seq_printf(m, "group %u:", group_handle); > + seq_printf(m, " priority %s", group->priority < PANTHOR_CSG_PRIORITY_COUNT ? > + panthor_csg_priority_names[group->priority] : "UNKNOWN"); > + seq_printf(m, " state %s", group->state < PANTHOR_CS_GROUP_UNKNOWN_STATE ? > + panthor_group_state_names[group->state] : "UNKNOWN"); > + seq_printf(m, " csg_id %d", group->csg_id); > + seq_printf(m, " csg_priority %d", group->csg_priority); > + seq_printf(m, " compute_core_mask 0x%016llx", group->compute_core_mask); > + seq_printf(m, " fragment_core_mask 0x%016llx", group->fragment_core_mask); > + seq_printf(m, " tiler_core_mask 0x%016llx", group->tiler_core_mask); > + seq_printf(m, " max_compute_cores %d", group->max_compute_cores); > + seq_printf(m, " max_fragment_cores %d", group->max_fragment_cores); > + seq_printf(m, " max_tiler_cores %d", group->max_tiler_cores); > + seq_puts(m, "\n"); > + > + for (i = 0; i < group->queue_count; i++) > + show_panthor_queue(group->queues[i], i, m); > + > + group_put(group); > +} > + > +static int show_file_group_pool(const struct panthor_file *pfile, struct seq_file *m) > +{ > + struct panthor_group_pool *gpool = pfile->groups; > + struct panthor_group *group; > + unsigned long i; > + > + if (IS_ERR_OR_NULL(gpool)) > + return 0; > + > + xa_for_each(&gpool->xa, i, group) > + show_panthor_group(group, i, m); Here I see no protection against a racing call to PANTHOR_GROUP_DESTROY, so group could become invalid between xa_for_each returning it and the call to show_panthor_group(). Steve > + > + return 0; > +} > + > +static int show_each_file(struct seq_file *m, void *arg) > +{ > + struct drm_info_node *node = (struct drm_info_node *)m->private; > + struct drm_device *ddev = node->minor->dev; > + int (*show)(const struct panthor_file *, struct seq_file *) = node->info_ent->data; > + struct drm_file *file; > + int ret; > + > + ret = mutex_lock_interruptible(&ddev->filelist_mutex); > + if (ret) > + return ret; > + > + list_for_each_entry(file, &ddev->filelist, lhead) { > + struct task_struct *task; > + struct panthor_file *pfile = file->driver_priv; > + struct pid *pid; > + > + /* > + * Although we have a valid reference on file->pid, that does > + * not guarantee that the task_struct who called get_pid() is > + * still alive (e.g. get_pid(current) => fork() => exit()). > + * Therefore, we need to protect this ->comm access using RCU. > + */ > + rcu_read_lock(); > + pid = rcu_dereference(file->pid); > + task = pid_task(pid, PIDTYPE_TGID); > + seq_printf(m, "client_id %8llu pid %8d command %s:\n", file->client_id, > + pid_nr(pid), task ? task->comm : "<unknown>"); > + rcu_read_unlock(); > + > + ret = show(pfile, m); > + if (ret < 0) > + break; > + > + seq_puts(m, "\n"); > + } > + > + mutex_unlock(&ddev->filelist_mutex); > + return ret; > +} > + > +static struct drm_info_list panthor_sched_debugfs_list[] = { > + { "sched_groups", show_each_file, 0, show_file_group_pool }, > +}; > + > +/** > + * panthor_sched_debugfs_init() - Initialize scheduler debugfs entries > + * @minor: Minor. > + */ > +void panthor_sched_debugfs_init(struct drm_minor *minor) > +{ > + drm_debugfs_create_files(panthor_sched_debugfs_list, > + ARRAY_SIZE(panthor_sched_debugfs_list), > + minor->debugfs_root, minor); > +} > +#endif /* CONFIG_DEBUG_FS */ > diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h > index 3a30d2328b30..577239aa66bf 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.h > +++ b/drivers/gpu/drm/panthor/panthor_sched.h > @@ -47,4 +47,8 @@ void panthor_sched_resume(struct panthor_device *ptdev); > void panthor_sched_report_mmu_fault(struct panthor_device *ptdev); > void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); > > +#ifdef CONFIG_DEBUG_FS > +void panthor_sched_debugfs_init(struct drm_minor *minor); > +#endif > + > #endif
© 2016 - 2024 Red Hat, Inc.