[PATCH v1 4/5] drm/panthor: Propagate VM-level faults to groups

Lukas Zapolskas posted 5 patches 11 hours ago
[PATCH v1 4/5] drm/panthor: Propagate VM-level faults to groups
Posted by Lukas Zapolskas 11 hours ago
Receiving an MMU fault currently disables the AS, so each of the groups
is marked with the appropriate faults. Since no further submissions
can occur on a fatal fault for that group, the fault information
does not have to be cleared until the group is terminated.

Signed-off-by: Lukas Zapolskas <lukas.zapolskas@arm.com>
---
 drivers/gpu/drm/panthor/panthor_mmu.c   |  8 ++++++++
 drivers/gpu/drm/panthor/panthor_mmu.h   |  2 ++
 drivers/gpu/drm/panthor/panthor_sched.c | 13 +++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 10a7418eecda..9e78b0509f1a 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -2895,3 +2895,11 @@ void panthor_mmu_pt_cache_fini(void)
 {
 	kmem_cache_destroy(pt_cache);
 }
+
+struct panthor_vm_fault *panthor_vm_get_fault(struct panthor_vm *vm)
+{
+	if (!vm)
+		return NULL;
+
+	return &vm->fault;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
index 023fdc79c231..d69b4000a39e 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.h
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -123,4 +123,6 @@ void panthor_mmu_pt_cache_fini(void);
 void panthor_mmu_debugfs_init(struct drm_minor *minor);
 #endif
 
+struct panthor_vm_fault *panthor_vm_get_fault(struct panthor_vm *vm);
+
 #endif
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index a77399e95620..9ea0d2b27114 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -722,6 +722,11 @@ struct panthor_group {
 	 * panthor_group::groups::waiting list.
 	 */
 	struct list_head wait_node;
+
+	/**
+	 * @fatal: VM-level fault that caused a fatal error on the group.
+	 */
+	struct panthor_vm_fault fatal;
 };
 
 struct panthor_job_profiling_data {
@@ -1575,6 +1580,14 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
 			 group->task_info.pid, group->task_info.comm);
 
 		group->fatal_queues |= BIT(cs_id);
+
+		if (panthor_vm_has_unhandled_faults(group->vm)) {
+			struct panthor_vm_fault *fault;
+
+			fault = panthor_vm_get_fault(group->vm);
+			if (fault)
+				group->fatal = *fault;
+		}
 	}
 
 	if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
-- 
2.33.0.dirty
Re: [PATCH v1 4/5] drm/panthor: Propagate VM-level faults to groups
Posted by Boris Brezillon 10 hours ago
On Mon, 15 Dec 2025 11:54:56 +0000
Lukas Zapolskas <lukas.zapolskas@arm.com> wrote:

>  			 group->task_info.pid, group->task_info.comm);
>  
>  		group->fatal_queues |= BIT(cs_id);
> +
> +		if (panthor_vm_has_unhandled_faults(group->vm)) {
> +			struct panthor_vm_fault *fault;
> +
> +			fault = panthor_vm_get_fault(group->vm);
> +			if (fault)
> +				group->fatal = *fault;

group->vm can't be NULL, meaning fault can't be NULL either.

> +		}
>  	}
Re: [PATCH v1 4/5] drm/panthor: Propagate VM-level faults to groups
Posted by Boris Brezillon 10 hours ago
On Mon, 15 Dec 2025 11:54:56 +0000
Lukas Zapolskas <lukas.zapolskas@arm.com> wrote:

> Receiving an MMU fault currently disables the AS, so each of the groups
> is marked with the appropriate faults. Since no further submissions
> can occur on a fatal fault for that group, the fault information
> does not have to be cleared until the group is terminated.
> 
> Signed-off-by: Lukas Zapolskas <lukas.zapolskas@arm.com>
> ---
>  drivers/gpu/drm/panthor/panthor_mmu.c   |  8 ++++++++
>  drivers/gpu/drm/panthor/panthor_mmu.h   |  2 ++
>  drivers/gpu/drm/panthor/panthor_sched.c | 13 +++++++++++++
>  3 files changed, 23 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 10a7418eecda..9e78b0509f1a 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -2895,3 +2895,11 @@ void panthor_mmu_pt_cache_fini(void)
>  {
>  	kmem_cache_destroy(pt_cache);
>  }
> +
> +struct panthor_vm_fault *panthor_vm_get_fault(struct panthor_vm *vm)

const struct panthor_vm_fault *
panthor_vm_get_fault(struct panthor_vm *vm)

or

struct panthor_vm_fault
panthor_vm_get_fault(struct panthor_vm *vm)

or

void
panthor_vm_get_fault(struct panthor_vm *vm,
		     struct panthor_vm_fault *fault)

but you shouldn't let the caller with a writable pointer to vm->fault.

> +{
> +	if (!vm)
> +		return NULL;

I don't see a valid case where panthor_vm_get_fault() would be called
with a NULL pointer.

> +
> +	return &vm->fault;
> +}
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
> index 023fdc79c231..d69b4000a39e 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.h
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.h
> @@ -123,4 +123,6 @@ void panthor_mmu_pt_cache_fini(void);
>  void panthor_mmu_debugfs_init(struct drm_minor *minor);
>  #endif
>  
> +struct panthor_vm_fault *panthor_vm_get_fault(struct panthor_vm *vm);
> +
>  #endif
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index a77399e95620..9ea0d2b27114 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -722,6 +722,11 @@ struct panthor_group {
>  	 * panthor_group::groups::waiting list.
>  	 */
>  	struct list_head wait_node;
> +
> +	/**
> +	 * @fatal: VM-level fault that caused a fatal error on the group.
> +	 */
> +	struct panthor_vm_fault fatal;
>  };
>  
>  struct panthor_job_profiling_data {
> @@ -1575,6 +1580,14 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
>  			 group->task_info.pid, group->task_info.comm);
>  
>  		group->fatal_queues |= BIT(cs_id);
> +
> +		if (panthor_vm_has_unhandled_faults(group->vm)) {
> +			struct panthor_vm_fault *fault;
> +
> +			fault = panthor_vm_get_fault(group->vm);
> +			if (fault)
> +				group->fatal = *fault;
> +		}
>  	}
>  
>  	if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {