[PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources

Adrián Larumbe posted 4 patches 4 weeks, 1 day ago
There is a newer version of this series
[PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Adrián Larumbe 4 weeks, 1 day ago
From: Boris Brezillon <boris.brezillon@collabora.com>

A JM context describes GPU HW resourdce allocation for the jobs bound to
it. At the time of writing this, it only holds the JS and queue
scheduler priorities.

When a context is created, all the scheduler entities created for job
slots will have the same priority.

Until context creation and destruction and attaching a context ID to
a job submission are exposed to UM, all jobs shall be bound to the
default Panfrost file context, which has medium priority.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_device.h |   4 +-
 drivers/gpu/drm/panfrost/panfrost_drv.c    |  19 +-
 drivers/gpu/drm/panfrost/panfrost_job.c    | 203 +++++++++++++++++----
 drivers/gpu/drm/panfrost/panfrost_job.h    |  25 ++-
 4 files changed, 210 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 077525a3ad68..5b164871eb95 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -15,6 +15,7 @@
 #include <drm/gpu_scheduler.h>
 
 #include "panfrost_devfreq.h"
+#include "panfrost_job.h"
 
 struct panfrost_device;
 struct panfrost_mmu;
@@ -22,7 +23,6 @@ struct panfrost_job_slot;
 struct panfrost_job;
 struct panfrost_perfcnt;
 
-#define NUM_JOB_SLOTS 3
 #define MAX_PM_DOMAINS 5
 
 enum panfrost_drv_comp_bits {
@@ -206,7 +206,7 @@ struct panfrost_engine_usage {
 struct panfrost_file_priv {
 	struct panfrost_device *pfdev;
 
-	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
+	struct xarray jm_ctxs;
 
 	struct panfrost_mmu *mmu;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 1ea6c509a5d5..398c067457d9 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -279,6 +279,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 	struct panfrost_file_priv *file_priv = file->driver_priv;
 	struct drm_panfrost_submit *args = data;
 	struct drm_syncobj *sync_out = NULL;
+	struct panfrost_jm_ctx *jm_ctx;
 	struct panfrost_job *job;
 	int ret = 0, slot;
 
@@ -294,10 +295,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 			return -ENODEV;
 	}
 
+	/* TODO: Use the default JM context until ctx management IOCTLs are exposed */
+	jm_ctx = panfrost_jm_ctx_from_handle(file, 0);
+	if (!jm_ctx) {
+		ret = -EINVAL;
+		goto out_put_syncout;
+	}
+
 	job = kzalloc(sizeof(*job), GFP_KERNEL);
 	if (!job) {
 		ret = -ENOMEM;
-		goto out_put_syncout;
+		goto out_put_jm_ctx;
 	}
 
 	kref_init(&job->refcount);
@@ -307,12 +315,13 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 	job->requirements = args->requirements;
 	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
 	job->mmu = file_priv->mmu;
+	job->ctx = panfrost_jm_ctx_get(jm_ctx);
 	job->engine_usage = &file_priv->engine_usage;
 
 	slot = panfrost_job_get_slot(job);
 
 	ret = drm_sched_job_init(&job->base,
-				 &file_priv->sched_entity[slot],
+				 &jm_ctx->slots[slot].sched_entity,
 				 1, NULL, file->client_id);
 	if (ret)
 		goto out_put_job;
@@ -338,6 +347,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 		drm_sched_job_cleanup(&job->base);
 out_put_job:
 	panfrost_job_put(job);
+out_put_jm_ctx:
+	panfrost_jm_ctx_put(jm_ctx);
 out_put_syncout:
 	if (sync_out)
 		drm_syncobj_put(sync_out);
@@ -564,7 +575,7 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
 		goto err_free;
 	}
 
-	ret = panfrost_job_open(panfrost_priv);
+	ret = panfrost_job_open(file);
 	if (ret)
 		goto err_job;
 
@@ -583,7 +594,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
 	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
 
 	panfrost_perfcnt_close(file);
-	panfrost_job_close(panfrost_priv);
+	panfrost_job_close(file);
 
 	panfrost_mmu_ctx_put(panfrost_priv->mmu);
 	kfree(panfrost_priv);
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 82acabb21b27..b6853add307c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -8,6 +8,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/dma-resv.h>
+#include <drm/drm_auth.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/panfrost_drm.h>
 
@@ -22,6 +23,7 @@
 #include "panfrost_mmu.h"
 #include "panfrost_dump.h"
 
+#define MAX_JM_CTX_PER_FILE 128
 #define JOB_TIMEOUT_MS 500
 
 #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
@@ -222,7 +224,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 
 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
 	 * start */
-	cfg |= JS_CONFIG_THREAD_PRI(8) |
+	cfg |= job->ctx->config |
 		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
 		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
 		panfrost_get_job_chain_flag(job);
@@ -359,6 +361,7 @@ static void panfrost_job_cleanup(struct kref *ref)
 		kvfree(job->bos);
 	}
 
+	panfrost_jm_ctx_put(job->ctx);
 	kfree(job);
 }
 
@@ -917,39 +920,184 @@ void panfrost_job_fini(struct panfrost_device *pfdev)
 	destroy_workqueue(pfdev->reset.wq);
 }
 
-int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
+int panfrost_job_open(struct drm_file *file)
+{
+	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
+	int ret;
+
+	struct drm_panfrost_jm_ctx_create default_jm_ctx = {
+		.priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
+	};
+
+	xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
+
+	ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
+	if (ret)
+		return ret;
+
+	/* We expect the default context to be assigned handle 0. */
+	if (WARN_ON(default_jm_ctx.handle))
+		return -EINVAL;
+
+	return 0;
+}
+
+void panfrost_job_close(struct drm_file *file)
+{
+	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
+	struct panfrost_jm_ctx *jm_ctx;
+	unsigned long i;
+
+	xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
+		panfrost_jm_ctx_destroy(file, i);
+
+	xa_destroy(&panfrost_priv->jm_ctxs);
+}
+
+int panfrost_job_is_idle(struct panfrost_device *pfdev)
 {
-	struct panfrost_device *pfdev = panfrost_priv->pfdev;
 	struct panfrost_job_slot *js = pfdev->js;
-	struct drm_gpu_scheduler *sched;
-	int ret, i;
+	int i;
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
-		sched = &js->queue[i].sched;
-		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
-					    DRM_SCHED_PRIORITY_NORMAL, &sched,
-					    1, NULL);
-		if (WARN_ON(ret))
-			return ret;
+		/* If there are any jobs in the HW queue, we're not idle */
+		if (atomic_read(&js->queue[i].sched.credit_count))
+			return false;
 	}
+
+	return true;
+}
+
+static void panfrost_jm_ctx_release(struct kref *kref)
+{
+	struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
+
+	kfree(jm_ctx);
+}
+
+void
+panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
+{
+	if (jm_ctx)
+		kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
+}
+
+struct panfrost_jm_ctx *
+panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
+{
+	if (jm_ctx)
+		kref_get(&jm_ctx->refcnt);
+
+	return jm_ctx;
+}
+
+struct panfrost_jm_ctx *
+panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
+{
+	struct panfrost_file_priv *priv = file->driver_priv;
+	struct panfrost_jm_ctx *jm_ctx;
+
+	xa_lock(&priv->jm_ctxs);
+	jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
+	xa_unlock(&priv->jm_ctxs);
+
+	return jm_ctx;
+}
+
+static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
+					 enum drm_panfrost_jm_ctx_priority in,
+					 enum drm_sched_priority *out)
+{
+	switch (in) {
+	case PANFROST_JM_CTX_PRIORITY_LOW:
+		*out = DRM_SCHED_PRIORITY_LOW;
+		return 0;
+	case PANFROST_JM_CTX_PRIORITY_MEDIUM:
+		*out = DRM_SCHED_PRIORITY_NORMAL;
+		return 0;
+	case PANFROST_JM_CTX_PRIORITY_HIGH:
+		/* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
+		if (!capable(CAP_SYS_NICE) && !drm_is_current_master(file))
+			return -EACCES;
+
+		*out = DRM_SCHED_PRIORITY_HIGH;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int panfrost_jm_ctx_create(struct drm_file *file,
+			   struct drm_panfrost_jm_ctx_create *args)
+{
+	struct panfrost_file_priv *priv = file->driver_priv;
+	struct panfrost_device *pfdev = priv->pfdev;
+	enum drm_sched_priority sched_prio;
+	struct panfrost_jm_ctx *jm_ctx;
+
+	int ret;
+
+	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
+	if (!jm_ctx)
+		return -ENOMEM;
+
+	kref_init(&jm_ctx->refcnt);
+
+	/* Same priority for all JS within a single context */
+	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
+
+	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
+	if (ret)
+		goto err_put_jm_ctx;
+
+	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
+		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
+		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
+
+		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
+					    &sched, 1, NULL);
+		if (ret)
+			goto err_put_jm_ctx;
+
+		js_ctx->enabled = true;
+	}
+
+	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
+		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
+	if (ret)
+		goto err_put_jm_ctx;
+
 	return 0;
+
+err_put_jm_ctx:
+	panfrost_jm_ctx_put(jm_ctx);
+	return ret;
 }
 
-void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
+int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
 {
-	struct panfrost_device *pfdev = panfrost_priv->pfdev;
-	int i;
+	struct panfrost_file_priv *priv = file->driver_priv;
+	struct panfrost_device *pfdev = priv->pfdev;
+	struct panfrost_jm_ctx *jm_ctx;
 
-	for (i = 0; i < NUM_JOB_SLOTS; i++)
-		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
+	jm_ctx = xa_erase(&priv->jm_ctxs, handle);
+	if (!jm_ctx)
+		return -EINVAL;
+
+	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
+		if (jm_ctx->slots[i].enabled)
+			drm_sched_entity_destroy(&jm_ctx->slots[i].sched_entity);
+	}
 
 	/* Kill in-flight jobs */
 	spin_lock(&pfdev->js->job_lock);
-	for (i = 0; i < NUM_JOB_SLOTS; i++) {
-		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
-		int j;
+	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
+		struct drm_sched_entity *entity = &jm_ctx->slots[i].sched_entity;
+
+		if (!jm_ctx->slots[i].enabled)
+			continue;
 
-		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
+		for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
 			struct panfrost_job *job = pfdev->jobs[i][j];
 			u32 cmd;
 
@@ -980,18 +1128,7 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
 		}
 	}
 	spin_unlock(&pfdev->js->job_lock);
-}
 
-int panfrost_job_is_idle(struct panfrost_device *pfdev)
-{
-	struct panfrost_job_slot *js = pfdev->js;
-	int i;
-
-	for (i = 0; i < NUM_JOB_SLOTS; i++) {
-		/* If there are any jobs in the HW queue, we're not idle */
-		if (atomic_read(&js->queue[i].sched.credit_count))
-			return false;
-	}
-
-	return true;
+	panfrost_jm_ctx_put(jm_ctx);
+	return 0;
 }
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index ec581b97852b..f3e5010e6cf5 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -18,6 +18,7 @@ struct panfrost_job {
 
 	struct panfrost_device *pfdev;
 	struct panfrost_mmu *mmu;
+	struct panfrost_jm_ctx *ctx;
 
 	/* Fence to be signaled by IRQ handler when the job is complete. */
 	struct dma_fence *done_fence;
@@ -39,10 +40,30 @@ struct panfrost_job {
 	u64 start_cycles;
 };
 
+struct panfrost_js_ctx {
+	struct drm_sched_entity sched_entity;
+	bool enabled;
+};
+
+#define NUM_JOB_SLOTS 3
+
+struct panfrost_jm_ctx {
+	struct kref refcnt;
+	u32 config;
+	struct panfrost_js_ctx slots[NUM_JOB_SLOTS];
+};
+
+int panfrost_jm_ctx_create(struct drm_file *file,
+			   struct drm_panfrost_jm_ctx_create *args);
+int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle);
+void panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx);
+struct panfrost_jm_ctx *panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx);
+struct panfrost_jm_ctx *panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle);
+
 int panfrost_job_init(struct panfrost_device *pfdev);
 void panfrost_job_fini(struct panfrost_device *pfdev);
-int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
-void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
+int panfrost_job_open(struct drm_file *file);
+void panfrost_job_close(struct drm_file *file);
 int panfrost_job_get_slot(struct panfrost_job *job);
 int panfrost_job_push(struct panfrost_job *job);
 void panfrost_job_put(struct panfrost_job *job);
-- 
2.50.0

Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Steven Price 3 weeks, 1 day ago
On 04/09/2025 01:08, Adrián Larumbe wrote:
> From: Boris Brezillon <boris.brezillon@collabora.com>
> 
> A JM context describes GPU HW resourdce allocation for the jobs bound to
> it. At the time of writing this, it only holds the JS and queue
> scheduler priorities.
> 
> When a context is created, all the scheduler entities created for job
> slots will have the same priority.
> 
> Until context creation and destruction and attaching a context ID to
> a job submission are exposed to UM, all jobs shall be bound to the
> default Panfrost file context, which has medium priority.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_device.h |   4 +-
>  drivers/gpu/drm/panfrost/panfrost_drv.c    |  19 +-
>  drivers/gpu/drm/panfrost/panfrost_job.c    | 203 +++++++++++++++++----
>  drivers/gpu/drm/panfrost/panfrost_job.h    |  25 ++-
>  4 files changed, 210 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 077525a3ad68..5b164871eb95 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -15,6 +15,7 @@
>  #include <drm/gpu_scheduler.h>
>  
>  #include "panfrost_devfreq.h"
> +#include "panfrost_job.h"
>  
>  struct panfrost_device;
>  struct panfrost_mmu;
> @@ -22,7 +23,6 @@ struct panfrost_job_slot;
>  struct panfrost_job;
>  struct panfrost_perfcnt;
>  
> -#define NUM_JOB_SLOTS 3
>  #define MAX_PM_DOMAINS 5
>  
>  enum panfrost_drv_comp_bits {
> @@ -206,7 +206,7 @@ struct panfrost_engine_usage {
>  struct panfrost_file_priv {
>  	struct panfrost_device *pfdev;
>  
> -	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
> +	struct xarray jm_ctxs;
>  
>  	struct panfrost_mmu *mmu;
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 1ea6c509a5d5..398c067457d9 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -279,6 +279,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  	struct panfrost_file_priv *file_priv = file->driver_priv;
>  	struct drm_panfrost_submit *args = data;
>  	struct drm_syncobj *sync_out = NULL;
> +	struct panfrost_jm_ctx *jm_ctx;
>  	struct panfrost_job *job;
>  	int ret = 0, slot;
>  
> @@ -294,10 +295,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  			return -ENODEV;
>  	}
>  
> +	/* TODO: Use the default JM context until ctx management IOCTLs are exposed */
> +	jm_ctx = panfrost_jm_ctx_from_handle(file, 0);
> +	if (!jm_ctx) {
> +		ret = -EINVAL;
> +		goto out_put_syncout;
> +	}
> +
>  	job = kzalloc(sizeof(*job), GFP_KERNEL);
>  	if (!job) {
>  		ret = -ENOMEM;
> -		goto out_put_syncout;
> +		goto out_put_jm_ctx;
>  	}
>  
>  	kref_init(&job->refcount);
> @@ -307,12 +315,13 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  	job->requirements = args->requirements;
>  	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
>  	job->mmu = file_priv->mmu;
> +	job->ctx = panfrost_jm_ctx_get(jm_ctx);
>  	job->engine_usage = &file_priv->engine_usage;
>  
>  	slot = panfrost_job_get_slot(job);
>  
>  	ret = drm_sched_job_init(&job->base,
> -				 &file_priv->sched_entity[slot],
> +				 &jm_ctx->slots[slot].sched_entity,
>  				 1, NULL, file->client_id);
>  	if (ret)
>  		goto out_put_job;
> @@ -338,6 +347,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  		drm_sched_job_cleanup(&job->base);
>  out_put_job:
>  	panfrost_job_put(job);
> +out_put_jm_ctx:
> +	panfrost_jm_ctx_put(jm_ctx);
>  out_put_syncout:
>  	if (sync_out)
>  		drm_syncobj_put(sync_out);
> @@ -564,7 +575,7 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
>  		goto err_free;
>  	}
>  
> -	ret = panfrost_job_open(panfrost_priv);
> +	ret = panfrost_job_open(file);
>  	if (ret)
>  		goto err_job;
>  
> @@ -583,7 +594,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
>  	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
>  
>  	panfrost_perfcnt_close(file);
> -	panfrost_job_close(panfrost_priv);
> +	panfrost_job_close(file);
>  
>  	panfrost_mmu_ctx_put(panfrost_priv->mmu);
>  	kfree(panfrost_priv);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 82acabb21b27..b6853add307c 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -8,6 +8,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/dma-resv.h>
> +#include <drm/drm_auth.h>
>  #include <drm/gpu_scheduler.h>
>  #include <drm/panfrost_drm.h>
>  
> @@ -22,6 +23,7 @@
>  #include "panfrost_mmu.h"
>  #include "panfrost_dump.h"
>  
> +#define MAX_JM_CTX_PER_FILE 128

128 seems like a very large number, do we have a reason to set this so high?

>  #define JOB_TIMEOUT_MS 500
>  
>  #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
> @@ -222,7 +224,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  
>  	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
>  	 * start */
> -	cfg |= JS_CONFIG_THREAD_PRI(8) |
> +	cfg |= job->ctx->config |

Note that the thread priority on Midgard/Bifrost is pretty pointless. I
don't believe kbase ever set this to anything other than 8. In theory it
should balance priority between different slots but "How the priority
values affect issue rates from different slots is implementation
dependent" (which I think is wording for 'not implemented'!).

My main concern is that since kbase never used it there's a possibility
for undiscovered hardware bugs.

>  		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
>  		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
>  		panfrost_get_job_chain_flag(job);
> @@ -359,6 +361,7 @@ static void panfrost_job_cleanup(struct kref *ref)
>  		kvfree(job->bos);
>  	}
>  
> +	panfrost_jm_ctx_put(job->ctx);
>  	kfree(job);
>  }
>  
> @@ -917,39 +920,184 @@ void panfrost_job_fini(struct panfrost_device *pfdev)
>  	destroy_workqueue(pfdev->reset.wq);
>  }
>  
> -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
> +int panfrost_job_open(struct drm_file *file)
> +{
> +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> +	int ret;
> +
> +	struct drm_panfrost_jm_ctx_create default_jm_ctx = {
> +		.priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
> +	};
> +
> +	xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
> +
> +	ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
> +	if (ret)
> +		return ret;
> +
> +	/* We expect the default context to be assigned handle 0. */
> +	if (WARN_ON(default_jm_ctx.handle))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +void panfrost_job_close(struct drm_file *file)
> +{
> +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> +	struct panfrost_jm_ctx *jm_ctx;
> +	unsigned long i;
> +
> +	xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
> +		panfrost_jm_ctx_destroy(file, i);
> +
> +	xa_destroy(&panfrost_priv->jm_ctxs);
> +}
> +
> +int panfrost_job_is_idle(struct panfrost_device *pfdev)
>  {
> -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
>  	struct panfrost_job_slot *js = pfdev->js;
> -	struct drm_gpu_scheduler *sched;
> -	int ret, i;
> +	int i;
>  
>  	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> -		sched = &js->queue[i].sched;
> -		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
> -					    DRM_SCHED_PRIORITY_NORMAL, &sched,
> -					    1, NULL);
> -		if (WARN_ON(ret))
> -			return ret;
> +		/* If there are any jobs in the HW queue, we're not idle */
> +		if (atomic_read(&js->queue[i].sched.credit_count))
> +			return false;
>  	}
> +
> +	return true;
> +}
> +
> +static void panfrost_jm_ctx_release(struct kref *kref)
> +{
> +	struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
> +
> +	kfree(jm_ctx);
> +}
> +
> +void
> +panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
> +{
> +	if (jm_ctx)
> +		kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
> +}
> +
> +struct panfrost_jm_ctx *
> +panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
> +{
> +	if (jm_ctx)
> +		kref_get(&jm_ctx->refcnt);
> +
> +	return jm_ctx;
> +}
> +
> +struct panfrost_jm_ctx *
> +panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
> +{
> +	struct panfrost_file_priv *priv = file->driver_priv;
> +	struct panfrost_jm_ctx *jm_ctx;
> +
> +	xa_lock(&priv->jm_ctxs);
> +	jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
> +	xa_unlock(&priv->jm_ctxs);
> +
> +	return jm_ctx;
> +}
> +
> +static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
> +					 enum drm_panfrost_jm_ctx_priority in,
> +					 enum drm_sched_priority *out)
> +{
> +	switch (in) {
> +	case PANFROST_JM_CTX_PRIORITY_LOW:
> +		*out = DRM_SCHED_PRIORITY_LOW;
> +		return 0;
> +	case PANFROST_JM_CTX_PRIORITY_MEDIUM:
> +		*out = DRM_SCHED_PRIORITY_NORMAL;
> +		return 0;
> +	case PANFROST_JM_CTX_PRIORITY_HIGH:
> +		/* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
> +		if (!capable(CAP_SYS_NICE) && !drm_is_current_master(file))
> +			return -EACCES;
> +
> +		*out = DRM_SCHED_PRIORITY_HIGH;
> +		return 0;
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +int panfrost_jm_ctx_create(struct drm_file *file,
> +			   struct drm_panfrost_jm_ctx_create *args)
> +{
> +	struct panfrost_file_priv *priv = file->driver_priv;
> +	struct panfrost_device *pfdev = priv->pfdev;
> +	enum drm_sched_priority sched_prio;
> +	struct panfrost_jm_ctx *jm_ctx;
> +
> +	int ret;
> +
> +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> +	if (!jm_ctx)
> +		return -ENOMEM;
> +
> +	kref_init(&jm_ctx->refcnt);
> +
> +	/* Same priority for all JS within a single context */
> +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> +
> +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> +	if (ret)
> +		goto err_put_jm_ctx;
> +
> +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> +
> +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> +					    &sched, 1, NULL);
> +		if (ret)
> +			goto err_put_jm_ctx;
> +
> +		js_ctx->enabled = true;
> +	}
> +
> +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> +	if (ret)
> +		goto err_put_jm_ctx;

On error here we just jump down and call panfrost_jm_ctx_put() which
will free jm_ctx but won't destroy any of the drm_sched_entities. There
seems to be something a bit off with the lifetime management here.

Should panfrost_jm_ctx_release() be responsible for tearing down the
context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
reference?

Steve

> +
>  	return 0;
> +
> +err_put_jm_ctx:
> +	panfrost_jm_ctx_put(jm_ctx);
> +	return ret;
>  }
>  
> -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
>  {
> -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
> -	int i;
> +	struct panfrost_file_priv *priv = file->driver_priv;
> +	struct panfrost_device *pfdev = priv->pfdev;
> +	struct panfrost_jm_ctx *jm_ctx;
>  
> -	for (i = 0; i < NUM_JOB_SLOTS; i++)
> -		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
> +	jm_ctx = xa_erase(&priv->jm_ctxs, handle);
> +	if (!jm_ctx)
> +		return -EINVAL;
> +
> +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> +		if (jm_ctx->slots[i].enabled)
> +			drm_sched_entity_destroy(&jm_ctx->slots[i].sched_entity);
> +	}
>  
>  	/* Kill in-flight jobs */
>  	spin_lock(&pfdev->js->job_lock);
> -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> -		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
> -		int j;
> +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> +		struct drm_sched_entity *entity = &jm_ctx->slots[i].sched_entity;
> +
> +		if (!jm_ctx->slots[i].enabled)
> +			continue;
>  
> -		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> +		for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
>  			struct panfrost_job *job = pfdev->jobs[i][j];
>  			u32 cmd;
>  
> @@ -980,18 +1128,7 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
>  		}
>  	}
>  	spin_unlock(&pfdev->js->job_lock);
> -}
>  
> -int panfrost_job_is_idle(struct panfrost_device *pfdev)
> -{
> -	struct panfrost_job_slot *js = pfdev->js;
> -	int i;
> -
> -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> -		/* If there are any jobs in the HW queue, we're not idle */
> -		if (atomic_read(&js->queue[i].sched.credit_count))
> -			return false;
> -	}
> -
> -	return true;
> +	panfrost_jm_ctx_put(jm_ctx);
> +	return 0;
>  }
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index ec581b97852b..f3e5010e6cf5 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -18,6 +18,7 @@ struct panfrost_job {
>  
>  	struct panfrost_device *pfdev;
>  	struct panfrost_mmu *mmu;
> +	struct panfrost_jm_ctx *ctx;
>  
>  	/* Fence to be signaled by IRQ handler when the job is complete. */
>  	struct dma_fence *done_fence;
> @@ -39,10 +40,30 @@ struct panfrost_job {
>  	u64 start_cycles;
>  };
>  
> +struct panfrost_js_ctx {
> +	struct drm_sched_entity sched_entity;
> +	bool enabled;
> +};
> +
> +#define NUM_JOB_SLOTS 3
> +
> +struct panfrost_jm_ctx {
> +	struct kref refcnt;
> +	u32 config;
> +	struct panfrost_js_ctx slots[NUM_JOB_SLOTS];
> +};
> +
> +int panfrost_jm_ctx_create(struct drm_file *file,
> +			   struct drm_panfrost_jm_ctx_create *args);
> +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle);
> +void panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx);
> +struct panfrost_jm_ctx *panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx);
> +struct panfrost_jm_ctx *panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle);
> +
>  int panfrost_job_init(struct panfrost_device *pfdev);
>  void panfrost_job_fini(struct panfrost_device *pfdev);
> -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
> -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
> +int panfrost_job_open(struct drm_file *file);
> +void panfrost_job_close(struct drm_file *file);
>  int panfrost_job_get_slot(struct panfrost_job *job);
>  int panfrost_job_push(struct panfrost_job *job);
>  void panfrost_job_put(struct panfrost_job *job);

Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Adrián Larumbe 3 weeks ago
Hi Steven,

On 10.09.2025 16:42, Steven Price wrote:
> On 04/09/2025 01:08, Adrián Larumbe wrote:
> > From: Boris Brezillon <boris.brezillon@collabora.com>
> >
> > A JM context describes GPU HW resourdce allocation for the jobs bound to
> > it. At the time of writing this, it only holds the JS and queue
> > scheduler priorities.
> >
> > When a context is created, all the scheduler entities created for job
> > slots will have the same priority.
> >
> > Until context creation and destruction and attaching a context ID to
> > a job submission are exposed to UM, all jobs shall be bound to the
> > default Panfrost file context, which has medium priority.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_device.h |   4 +-
> >  drivers/gpu/drm/panfrost/panfrost_drv.c    |  19 +-
> >  drivers/gpu/drm/panfrost/panfrost_job.c    | 203 +++++++++++++++++----
> >  drivers/gpu/drm/panfrost/panfrost_job.h    |  25 ++-
> >  4 files changed, 210 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index 077525a3ad68..5b164871eb95 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -15,6 +15,7 @@
> >  #include <drm/gpu_scheduler.h>
> >
> >  #include "panfrost_devfreq.h"
> > +#include "panfrost_job.h"
> >
> >  struct panfrost_device;
> >  struct panfrost_mmu;
> > @@ -22,7 +23,6 @@ struct panfrost_job_slot;
> >  struct panfrost_job;
> >  struct panfrost_perfcnt;
> >
> > -#define NUM_JOB_SLOTS 3
> >  #define MAX_PM_DOMAINS 5
> >
> >  enum panfrost_drv_comp_bits {
> > @@ -206,7 +206,7 @@ struct panfrost_engine_usage {
> >  struct panfrost_file_priv {
> >  	struct panfrost_device *pfdev;
> >
> > -	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
> > +	struct xarray jm_ctxs;
> >
> >  	struct panfrost_mmu *mmu;
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 1ea6c509a5d5..398c067457d9 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -279,6 +279,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  	struct panfrost_file_priv *file_priv = file->driver_priv;
> >  	struct drm_panfrost_submit *args = data;
> >  	struct drm_syncobj *sync_out = NULL;
> > +	struct panfrost_jm_ctx *jm_ctx;
> >  	struct panfrost_job *job;
> >  	int ret = 0, slot;
> >
> > @@ -294,10 +295,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  			return -ENODEV;
> >  	}
> >
> > +	/* TODO: Use the default JM context until ctx management IOCTLs are exposed */
> > +	jm_ctx = panfrost_jm_ctx_from_handle(file, 0);
> > +	if (!jm_ctx) {
> > +		ret = -EINVAL;
> > +		goto out_put_syncout;
> > +	}
> > +
> >  	job = kzalloc(sizeof(*job), GFP_KERNEL);
> >  	if (!job) {
> >  		ret = -ENOMEM;
> > -		goto out_put_syncout;
> > +		goto out_put_jm_ctx;
> >  	}
> >
> >  	kref_init(&job->refcount);
> > @@ -307,12 +315,13 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  	job->requirements = args->requirements;
> >  	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
> >  	job->mmu = file_priv->mmu;
> > +	job->ctx = panfrost_jm_ctx_get(jm_ctx);
> >  	job->engine_usage = &file_priv->engine_usage;
> >
> >  	slot = panfrost_job_get_slot(job);
> >
> >  	ret = drm_sched_job_init(&job->base,
> > -				 &file_priv->sched_entity[slot],
> > +				 &jm_ctx->slots[slot].sched_entity,
> >  				 1, NULL, file->client_id);
> >  	if (ret)
> >  		goto out_put_job;
> > @@ -338,6 +347,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  		drm_sched_job_cleanup(&job->base);
> >  out_put_job:
> >  	panfrost_job_put(job);
> > +out_put_jm_ctx:
> > +	panfrost_jm_ctx_put(jm_ctx);
> >  out_put_syncout:
> >  	if (sync_out)
> >  		drm_syncobj_put(sync_out);
> > @@ -564,7 +575,7 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
> >  		goto err_free;
> >  	}
> >
> > -	ret = panfrost_job_open(panfrost_priv);
> > +	ret = panfrost_job_open(file);
> >  	if (ret)
> >  		goto err_job;
> >
> > @@ -583,7 +594,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
> >  	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> >
> >  	panfrost_perfcnt_close(file);
> > -	panfrost_job_close(panfrost_priv);
> > +	panfrost_job_close(file);
> >
> >  	panfrost_mmu_ctx_put(panfrost_priv->mmu);
> >  	kfree(panfrost_priv);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 82acabb21b27..b6853add307c 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/platform_device.h>
> >  #include <linux/pm_runtime.h>
> >  #include <linux/dma-resv.h>
> > +#include <drm/drm_auth.h>
> >  #include <drm/gpu_scheduler.h>
> >  #include <drm/panfrost_drm.h>
> >
> > @@ -22,6 +23,7 @@
> >  #include "panfrost_mmu.h"
> >  #include "panfrost_dump.h"
> >
> > +#define MAX_JM_CTX_PER_FILE 128
>
> 128 seems like a very large number, do we have a reason to set this so high?

Maybe I can drop it down to 64, to make sure it remains a power of two.

>
> >  #define JOB_TIMEOUT_MS 500
> >
> >  #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
> > @@ -222,7 +224,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >
> >  	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
> >  	 * start */
> > -	cfg |= JS_CONFIG_THREAD_PRI(8) |
> > +	cfg |= job->ctx->config |
>
> Note that the thread priority on Midgard/Bifrost is pretty pointless. I
> don't believe kbase ever set this to anything other than 8. In theory it
> should balance priority between different slots but "How the priority
> values affect issue rates from different slots is implementation
> dependent" (which I think is wording for 'not implemented'!).
>
> My main concern is that since kbase never used it there's a possibility
> for undiscovered hardware bugs.

In that case, to avoid unexpected behaviour, I'll remove this part in the next revision.

> >  		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
> >  		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
> >  		panfrost_get_job_chain_flag(job);
> > @@ -359,6 +361,7 @@ static void panfrost_job_cleanup(struct kref *ref)
> >  		kvfree(job->bos);
> >  	}
> >
> > +	panfrost_jm_ctx_put(job->ctx);
> >  	kfree(job);
> >  }
> >
> > @@ -917,39 +920,184 @@ void panfrost_job_fini(struct panfrost_device *pfdev)
> >  	destroy_workqueue(pfdev->reset.wq);
> >  }
> >
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_job_open(struct drm_file *file)
> > +{
> > +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > +	int ret;
> > +
> > +	struct drm_panfrost_jm_ctx_create default_jm_ctx = {
> > +		.priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
> > +	};
> > +
> > +	xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
> > +
> > +	ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* We expect the default context to be assigned handle 0. */
> > +	if (WARN_ON(default_jm_ctx.handle))
> > +		return -EINVAL;
> > +
> > +	return 0;
> > +}
> > +
> > +void panfrost_job_close(struct drm_file *file)
> > +{
> > +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +	unsigned long i;
> > +
> > +	xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
> > +		panfrost_jm_ctx_destroy(file, i);
> > +
> > +	xa_destroy(&panfrost_priv->jm_ctxs);
> > +}
> > +
> > +int panfrost_job_is_idle(struct panfrost_device *pfdev)
> >  {
> > -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
> >  	struct panfrost_job_slot *js = pfdev->js;
> > -	struct drm_gpu_scheduler *sched;
> > -	int ret, i;
> > +	int i;
> >
> >  	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		sched = &js->queue[i].sched;
> > -		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
> > -					    DRM_SCHED_PRIORITY_NORMAL, &sched,
> > -					    1, NULL);
> > -		if (WARN_ON(ret))
> > -			return ret;
> > +		/* If there are any jobs in the HW queue, we're not idle */
> > +		if (atomic_read(&js->queue[i].sched.credit_count))
> > +			return false;
> >  	}
> > +
> > +	return true;
> > +}
> > +
> > +static void panfrost_jm_ctx_release(struct kref *kref)
> > +{
> > +	struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
> > +
> > +	kfree(jm_ctx);
> > +}
> > +
> > +void
> > +panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > +	if (jm_ctx)
> > +		kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > +	if (jm_ctx)
> > +		kref_get(&jm_ctx->refcnt);
> > +
> > +	return jm_ctx;
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
> > +{
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +
> > +	xa_lock(&priv->jm_ctxs);
> > +	jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
> > +	xa_unlock(&priv->jm_ctxs);
> > +
> > +	return jm_ctx;
> > +}
> > +
> > +static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
> > +					 enum drm_panfrost_jm_ctx_priority in,
> > +					 enum drm_sched_priority *out)
> > +{
> > +	switch (in) {
> > +	case PANFROST_JM_CTX_PRIORITY_LOW:
> > +		*out = DRM_SCHED_PRIORITY_LOW;
> > +		return 0;
> > +	case PANFROST_JM_CTX_PRIORITY_MEDIUM:
> > +		*out = DRM_SCHED_PRIORITY_NORMAL;
> > +		return 0;
> > +	case PANFROST_JM_CTX_PRIORITY_HIGH:
> > +		/* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
> > +		if (!capable(CAP_SYS_NICE) && !drm_is_current_master(file))
> > +			return -EACCES;
> > +
> > +		*out = DRM_SCHED_PRIORITY_HIGH;
> > +		return 0;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +}
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > +			   struct drm_panfrost_jm_ctx_create *args)
> > +{
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_device *pfdev = priv->pfdev;
> > +	enum drm_sched_priority sched_prio;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +
> > +	int ret;
> > +
> > +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> > +	if (!jm_ctx)
> > +		return -ENOMEM;
> > +
> > +	kref_init(&jm_ctx->refcnt);
> > +
> > +	/* Same priority for all JS within a single context */
> > +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> > +
> > +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> > +	if (ret)
> > +		goto err_put_jm_ctx;
> > +
> > +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> > +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> > +
> > +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> > +					    &sched, 1, NULL);
> > +		if (ret)
> > +			goto err_put_jm_ctx;
> > +
> > +		js_ctx->enabled = true;
> > +	}
> > +
> > +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> > +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> > +	if (ret)
> > +		goto err_put_jm_ctx;
>
> On error here we just jump down and call panfrost_jm_ctx_put() which
> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> seems to be something a bit off with the lifetime management here.
>
> Should panfrost_jm_ctx_release() be responsible for tearing down the
> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> reference?
>
> Steve
>
> > +
> >  	return 0;
> > +
> > +err_put_jm_ctx:
> > +	panfrost_jm_ctx_put(jm_ctx);
> > +	return ret;
> >  }
> >
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
> >  {
> > -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
> > -	int i;
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_device *pfdev = priv->pfdev;
> > +	struct panfrost_jm_ctx *jm_ctx;
> >
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++)
> > -		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
> > +	jm_ctx = xa_erase(&priv->jm_ctxs, handle);
> > +	if (!jm_ctx)
> > +		return -EINVAL;
> > +
> > +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > +		if (jm_ctx->slots[i].enabled)
> > +			drm_sched_entity_destroy(&jm_ctx->slots[i].sched_entity);
> > +	}
> >
> >  	/* Kill in-flight jobs */
> >  	spin_lock(&pfdev->js->job_lock);
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
> > -		int j;
> > +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > +		struct drm_sched_entity *entity = &jm_ctx->slots[i].sched_entity;
> > +
> > +		if (!jm_ctx->slots[i].enabled)
> > +			continue;
> >
> > -		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> > +		for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> >  			struct panfrost_job *job = pfdev->jobs[i][j];
> >  			u32 cmd;
> >
> > @@ -980,18 +1128,7 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> >  		}
> >  	}
> >  	spin_unlock(&pfdev->js->job_lock);
> > -}
> >
> > -int panfrost_job_is_idle(struct panfrost_device *pfdev)
> > -{
> > -	struct panfrost_job_slot *js = pfdev->js;
> > -	int i;
> > -
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		/* If there are any jobs in the HW queue, we're not idle */
> > -		if (atomic_read(&js->queue[i].sched.credit_count))
> > -			return false;
> > -	}
> > -
> > -	return true;
> > +	panfrost_jm_ctx_put(jm_ctx);
> > +	return 0;
> >  }
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> > index ec581b97852b..f3e5010e6cf5 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> > @@ -18,6 +18,7 @@ struct panfrost_job {
> >
> >  	struct panfrost_device *pfdev;
> >  	struct panfrost_mmu *mmu;
> > +	struct panfrost_jm_ctx *ctx;
> >
> >  	/* Fence to be signaled by IRQ handler when the job is complete. */
> >  	struct dma_fence *done_fence;
> > @@ -39,10 +40,30 @@ struct panfrost_job {
> >  	u64 start_cycles;
> >  };
> >
> > +struct panfrost_js_ctx {
> > +	struct drm_sched_entity sched_entity;
> > +	bool enabled;
> > +};
> > +
> > +#define NUM_JOB_SLOTS 3
> > +
> > +struct panfrost_jm_ctx {
> > +	struct kref refcnt;
> > +	u32 config;
> > +	struct panfrost_js_ctx slots[NUM_JOB_SLOTS];
> > +};
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > +			   struct drm_panfrost_jm_ctx_create *args);
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle);
> > +void panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle);
> > +
> >  int panfrost_job_init(struct panfrost_device *pfdev);
> >  void panfrost_job_fini(struct panfrost_device *pfdev);
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
> > +int panfrost_job_open(struct drm_file *file);
> > +void panfrost_job_close(struct drm_file *file);
> >  int panfrost_job_get_slot(struct panfrost_job *job);
> >  int panfrost_job_push(struct panfrost_job *job);
> >  void panfrost_job_put(struct panfrost_job *job);


Adrian Larumbe
Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Boris Brezillon 3 weeks, 1 day ago
On Wed, 10 Sep 2025 16:42:32 +0100
Steven Price <steven.price@arm.com> wrote:

> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > +			   struct drm_panfrost_jm_ctx_create *args)
> > +{
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_device *pfdev = priv->pfdev;
> > +	enum drm_sched_priority sched_prio;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +
> > +	int ret;
> > +
> > +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> > +	if (!jm_ctx)
> > +		return -ENOMEM;
> > +
> > +	kref_init(&jm_ctx->refcnt);
> > +
> > +	/* Same priority for all JS within a single context */
> > +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> > +
> > +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> > +	if (ret)
> > +		goto err_put_jm_ctx;
> > +
> > +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> > +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> > +
> > +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> > +					    &sched, 1, NULL);
> > +		if (ret)
> > +			goto err_put_jm_ctx;
> > +
> > +		js_ctx->enabled = true;
> > +	}
> > +
> > +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> > +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> > +	if (ret)
> > +		goto err_put_jm_ctx;  
> 
> On error here we just jump down and call panfrost_jm_ctx_put() which
> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> seems to be something a bit off with the lifetime management here.
> 
> Should panfrost_jm_ctx_release() be responsible for tearing down the
> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> reference?

The idea was to kill/cancel any pending jobs as soon as userspace
releases the context, like we were doing previously when the FD was
closed. If we defer this ctx teardown to the release() function, we're
basically waiting for all jobs to complete, which:

1. doesn't encourage userspace to have proper control over the contexts
   lifetime
2. might use GPU/mem resources to execute jobs no one cares about
   anymore
Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Steven Price 3 weeks, 1 day ago
On 10/09/2025 16:52, Boris Brezillon wrote:
> On Wed, 10 Sep 2025 16:42:32 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>>> +int panfrost_jm_ctx_create(struct drm_file *file,
>>> +			   struct drm_panfrost_jm_ctx_create *args)
>>> +{
>>> +	struct panfrost_file_priv *priv = file->driver_priv;
>>> +	struct panfrost_device *pfdev = priv->pfdev;
>>> +	enum drm_sched_priority sched_prio;
>>> +	struct panfrost_jm_ctx *jm_ctx;
>>> +
>>> +	int ret;
>>> +
>>> +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
>>> +	if (!jm_ctx)
>>> +		return -ENOMEM;
>>> +
>>> +	kref_init(&jm_ctx->refcnt);
>>> +
>>> +	/* Same priority for all JS within a single context */
>>> +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
>>> +
>>> +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
>>> +	if (ret)
>>> +		goto err_put_jm_ctx;
>>> +
>>> +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
>>> +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
>>> +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
>>> +
>>> +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
>>> +					    &sched, 1, NULL);
>>> +		if (ret)
>>> +			goto err_put_jm_ctx;
>>> +
>>> +		js_ctx->enabled = true;
>>> +	}
>>> +
>>> +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
>>> +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
>>> +	if (ret)
>>> +		goto err_put_jm_ctx;  
>>
>> On error here we just jump down and call panfrost_jm_ctx_put() which
>> will free jm_ctx but won't destroy any of the drm_sched_entities. There
>> seems to be something a bit off with the lifetime management here.
>>
>> Should panfrost_jm_ctx_release() be responsible for tearing down the
>> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
>> reference?
> 
> The idea was to kill/cancel any pending jobs as soon as userspace
> releases the context, like we were doing previously when the FD was
> closed. If we defer this ctx teardown to the release() function, we're
> basically waiting for all jobs to complete, which:
> 
> 1. doesn't encourage userspace to have proper control over the contexts
>    lifetime
> 2. might use GPU/mem resources to execute jobs no one cares about
>    anymore

Ah, good point - yes killing the jobs in panfrost_jm_ctx_destroy() makes
sense. But we still need to ensure the clean-up happens in the other
paths ;)

So panfrost_jm_ctx_destroy() should keep the killing jobs part, butthe
drm scheduler entity cleanup should be moved.

Thanks,
Steve
Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Boris Brezillon 3 weeks, 1 day ago
On Wed, 10 Sep 2025 16:56:43 +0100
Steven Price <steven.price@arm.com> wrote:

> On 10/09/2025 16:52, Boris Brezillon wrote:
> > On Wed, 10 Sep 2025 16:42:32 +0100
> > Steven Price <steven.price@arm.com> wrote:
> >   
> >>> +int panfrost_jm_ctx_create(struct drm_file *file,
> >>> +			   struct drm_panfrost_jm_ctx_create *args)
> >>> +{
> >>> +	struct panfrost_file_priv *priv = file->driver_priv;
> >>> +	struct panfrost_device *pfdev = priv->pfdev;
> >>> +	enum drm_sched_priority sched_prio;
> >>> +	struct panfrost_jm_ctx *jm_ctx;
> >>> +
> >>> +	int ret;
> >>> +
> >>> +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> >>> +	if (!jm_ctx)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	kref_init(&jm_ctx->refcnt);
> >>> +
> >>> +	/* Same priority for all JS within a single context */
> >>> +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> >>> +
> >>> +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> >>> +	if (ret)
> >>> +		goto err_put_jm_ctx;
> >>> +
> >>> +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> >>> +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> >>> +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> >>> +
> >>> +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> >>> +					    &sched, 1, NULL);
> >>> +		if (ret)
> >>> +			goto err_put_jm_ctx;
> >>> +
> >>> +		js_ctx->enabled = true;
> >>> +	}
> >>> +
> >>> +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> >>> +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> >>> +	if (ret)
> >>> +		goto err_put_jm_ctx;    
> >>
> >> On error here we just jump down and call panfrost_jm_ctx_put() which
> >> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> >> seems to be something a bit off with the lifetime management here.
> >>
> >> Should panfrost_jm_ctx_release() be responsible for tearing down the
> >> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> >> reference?  
> > 
> > The idea was to kill/cancel any pending jobs as soon as userspace
> > releases the context, like we were doing previously when the FD was
> > closed. If we defer this ctx teardown to the release() function, we're
> > basically waiting for all jobs to complete, which:
> > 
> > 1. doesn't encourage userspace to have proper control over the contexts
> >    lifetime
> > 2. might use GPU/mem resources to execute jobs no one cares about
> >    anymore  
> 
> Ah, good point - yes killing the jobs in panfrost_jm_ctx_destroy() makes
> sense. But we still need to ensure the clean-up happens in the other
> paths ;)
> 
> So panfrost_jm_ctx_destroy() should keep the killing jobs part, butthe
> drm scheduler entity cleanup should be moved.

The thing is, we need to call drm_sched_entity_fini() if we want all
pending jobs that were not queued to the HW yet to be cancelled
(_fini() calls _flush() + _kill()). This has to happen before we cancel
the jobs at the JM level, otherwise drm_sched might pass us new jobs
while we're trying to get rid of the currently running ones. Once we've
done that, there's basically nothing left to defer, except the kfree().
Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Steven Price 3 weeks ago
On 10/09/2025 17:50, Boris Brezillon wrote:
> On Wed, 10 Sep 2025 16:56:43 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>> On 10/09/2025 16:52, Boris Brezillon wrote:
>>> On Wed, 10 Sep 2025 16:42:32 +0100
>>> Steven Price <steven.price@arm.com> wrote:
>>>   
>>>>> +int panfrost_jm_ctx_create(struct drm_file *file,
>>>>> +			   struct drm_panfrost_jm_ctx_create *args)
>>>>> +{
>>>>> +	struct panfrost_file_priv *priv = file->driver_priv;
>>>>> +	struct panfrost_device *pfdev = priv->pfdev;
>>>>> +	enum drm_sched_priority sched_prio;
>>>>> +	struct panfrost_jm_ctx *jm_ctx;
>>>>> +
>>>>> +	int ret;
>>>>> +
>>>>> +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
>>>>> +	if (!jm_ctx)
>>>>> +		return -ENOMEM;
>>>>> +
>>>>> +	kref_init(&jm_ctx->refcnt);
>>>>> +
>>>>> +	/* Same priority for all JS within a single context */
>>>>> +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
>>>>> +
>>>>> +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
>>>>> +	if (ret)
>>>>> +		goto err_put_jm_ctx;
>>>>> +
>>>>> +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
>>>>> +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
>>>>> +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
>>>>> +
>>>>> +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
>>>>> +					    &sched, 1, NULL);
>>>>> +		if (ret)
>>>>> +			goto err_put_jm_ctx;
>>>>> +
>>>>> +		js_ctx->enabled = true;
>>>>> +	}
>>>>> +
>>>>> +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
>>>>> +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
>>>>> +	if (ret)
>>>>> +		goto err_put_jm_ctx;    
>>>>
>>>> On error here we just jump down and call panfrost_jm_ctx_put() which
>>>> will free jm_ctx but won't destroy any of the drm_sched_entities. There
>>>> seems to be something a bit off with the lifetime management here.
>>>>
>>>> Should panfrost_jm_ctx_release() be responsible for tearing down the
>>>> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
>>>> reference?  
>>>
>>> The idea was to kill/cancel any pending jobs as soon as userspace
>>> releases the context, like we were doing previously when the FD was
>>> closed. If we defer this ctx teardown to the release() function, we're
>>> basically waiting for all jobs to complete, which:
>>>
>>> 1. doesn't encourage userspace to have proper control over the contexts
>>>    lifetime
>>> 2. might use GPU/mem resources to execute jobs no one cares about
>>>    anymore  
>>
>> Ah, good point - yes killing the jobs in panfrost_jm_ctx_destroy() makes
>> sense. But we still need to ensure the clean-up happens in the other
>> paths ;)
>>
>> So panfrost_jm_ctx_destroy() should keep the killing jobs part, butthe
>> drm scheduler entity cleanup should be moved.
> 
> The thing is, we need to call drm_sched_entity_fini() if we want all
> pending jobs that were not queued to the HW yet to be cancelled
> (_fini() calls _flush() + _kill()). This has to happen before we cancel
> the jobs at the JM level, otherwise drm_sched might pass us new jobs
> while we're trying to get rid of the currently running ones. Once we've
> done that, there's basically nothing left to defer, except the kfree().

Ok, I guess that makes sense.

In which case panfrost_jm_ctx_create() just needs fixing to fully tear
down the context in the event the xa_alloc() fails. Although that makes
me wonder whether the reference counting on the JM context really
achieves anything. Are we ever expecting the context to live past the
panfrost_jm_ctx_destroy() call?

Indeed is it even possible to have any in-flight jobs after
drm_sched_entity_destroy() has returned?

Once all the sched entities have been destroyed there doesn't really
seem to be anything left in struct panfrost_jm_ctx.

Thanks,
Steve
Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources
Posted by Adrián Larumbe 2 weeks, 6 days ago
On 11.09.2025 10:30, Steven Price wrote:
> On 10/09/2025 17:50, Boris Brezillon wrote:
> > On Wed, 10 Sep 2025 16:56:43 +0100
> > Steven Price <steven.price@arm.com> wrote:
> >
> >> On 10/09/2025 16:52, Boris Brezillon wrote:
> >>> On Wed, 10 Sep 2025 16:42:32 +0100
> >>> Steven Price <steven.price@arm.com> wrote:
> >>>
> >>>>> +int panfrost_jm_ctx_create(struct drm_file *file,
> >>>>> +			   struct drm_panfrost_jm_ctx_create *args)
> >>>>> +{
> >>>>> +	struct panfrost_file_priv *priv = file->driver_priv;
> >>>>> +	struct panfrost_device *pfdev = priv->pfdev;
> >>>>> +	enum drm_sched_priority sched_prio;
> >>>>> +	struct panfrost_jm_ctx *jm_ctx;
> >>>>> +
> >>>>> +	int ret;
> >>>>> +
> >>>>> +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> >>>>> +	if (!jm_ctx)
> >>>>> +		return -ENOMEM;
> >>>>> +
> >>>>> +	kref_init(&jm_ctx->refcnt);
> >>>>> +
> >>>>> +	/* Same priority for all JS within a single context */
> >>>>> +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> >>>>> +
> >>>>> +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> >>>>> +	if (ret)
> >>>>> +		goto err_put_jm_ctx;
> >>>>> +
> >>>>> +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> >>>>> +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> >>>>> +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> >>>>> +
> >>>>> +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> >>>>> +					    &sched, 1, NULL);
> >>>>> +		if (ret)
> >>>>> +			goto err_put_jm_ctx;
> >>>>> +
> >>>>> +		js_ctx->enabled = true;
> >>>>> +	}
> >>>>> +
> >>>>> +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> >>>>> +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> >>>>> +	if (ret)
> >>>>> +		goto err_put_jm_ctx;
> >>>>
> >>>> On error here we just jump down and call panfrost_jm_ctx_put() which
> >>>> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> >>>> seems to be something a bit off with the lifetime management here.
> >>>>
> >>>> Should panfrost_jm_ctx_release() be responsible for tearing down the
> >>>> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> >>>> reference?
> >>>
> >>> The idea was to kill/cancel any pending jobs as soon as userspace
> >>> releases the context, like we were doing previously when the FD was
> >>> closed. If we defer this ctx teardown to the release() function, we're
> >>> basically waiting for all jobs to complete, which:
> >>>
> >>> 1. doesn't encourage userspace to have proper control over the contexts
> >>>    lifetime
> >>> 2. might use GPU/mem resources to execute jobs no one cares about
> >>>    anymore
> >>
> >> Ah, good point - yes killing the jobs in panfrost_jm_ctx_destroy() makes
> >> sense. But we still need to ensure the clean-up happens in the other
> >> paths ;)
> >>
> >> So panfrost_jm_ctx_destroy() should keep the killing jobs part, butthe
> >> drm scheduler entity cleanup should be moved.
> >
> > The thing is, we need to call drm_sched_entity_fini() if we want all
> > pending jobs that were not queued to the HW yet to be cancelled
> > (_fini() calls _flush() + _kill()). This has to happen before we cancel
> > the jobs at the JM level, otherwise drm_sched might pass us new jobs
> > while we're trying to get rid of the currently running ones. Once we've
> > done that, there's basically nothing left to defer, except the kfree().
>
> Ok, I guess that makes sense.
>
> In which case panfrost_jm_ctx_create() just needs fixing to fully tear
> down the context in the event the xa_alloc() fails. Although that makes
> me wonder whether the reference counting on the JM context really
> achieves anything. Are we ever expecting the context to live past the
> panfrost_jm_ctx_destroy() call?

We still need reference counting, otherwise there would be a racy window between
the submission and context destruction ioctls, in which a context that has just
been released is still owned by a newly created job leading to UAF.

> Indeed is it even possible to have any in-flight jobs after
> drm_sched_entity_destroy() has returned?

My understanding of drm_sched_entity_destroy() is that, after it returns,
no jobs can be in-flight any more and the entity is rendered unusable by
any new jobs. This can lead to the unpleasant situation in which a thread
tries to submit a new job and gets a context reference right before another
thread takes precedence and destroys it, causing the scheduler entities to
be unusable.

Then drm_sched_job_init() would contain a reference to an invalid entity,
which further down the line would cause drm_sched_entity_push_job() to report
a DRM_ERROR warning that te entity is stopped, which should never happen,
because drm_sched_entity_push_job() must always suceed.

> Once all the sched entities have been destroyed there doesn't really
> seem to be anything left in struct panfrost_jm_ctx.

We've thought of a new approach whereby a context would be flagged as destroyed
inside panfrost_jm_ctx_destroy(), destruction of scheduler entities done at context
release time and then cancelling new jobs that had been queued after context
destruction in the .run_job scheduler function if they notice the context
is so flagged.

> Thanks,
> Steve

Cheers,
Adrian Larumbe