[v8] Add a few tracepoints to panthor

[PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration

Posted by Nicolas Frattaroli 3 weeks, 6 days ago

The current IRQ helpers do not guarantee mutual exclusion that covers
the entire transaction from accessing the mask member and modifying the
mask register.

This makes it hard, if not impossible, to implement mask modification
helpers that may change one of these outside the normal
suspend/resume/isr code paths.

Add a spinlock to struct panthor_irq that protects both the mask member
and register. Acquire it in all code paths that access these, but drop
it before processing the threaded handler function. Then, add the
aforementioned new helpers: enable_events, and disable_events. They work
by ORing and NANDing the mask bits.

resume is changed to no longer have a mask passed, as pirq->mask is
supposed to be the user-requested mask now, rather than a mirror of the
INT_MASK register contents. Users of the resume helper are adjusted
accordingly, including a rather painful refactor in panthor_mmu.c.

Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_device.h |  72 +++++++--
 drivers/gpu/drm/panthor/panthor_fw.c     |   3 +-
 drivers/gpu/drm/panthor/panthor_gpu.c    |   2 +-
 drivers/gpu/drm/panthor/panthor_mmu.c    | 247 ++++++++++++++++---------------
 drivers/gpu/drm/panthor/panthor_pwr.c    |   2 +-
 5 files changed, 187 insertions(+), 139 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 424f6cd1a814..0a29234ac58c 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -84,11 +84,14 @@ struct panthor_irq {
 	/** @irq: IRQ number. */
 	int irq;
 
-	/** @mask: Current mask being applied to xxx_INT_MASK. */
+	/** @mask: Values to write to xxx_INT_MASK if active. */
 	u32 mask;
 
 	/** @state: one of &enum panthor_irq_state reflecting the current state. */
 	atomic_t state;
+
+	/** @mask_lock: protects modifications to _INT_MASK and @mask */
+	spinlock_t mask_lock;
 };
 
 /**
@@ -422,6 +425,8 @@ static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)
 	struct panthor_device *ptdev = pirq->ptdev;						\
 	enum panthor_irq_state state;								\
 												\
+	guard(spinlock_irqsave)(&pirq->mask_lock);						\
+												\
 	state = atomic_read(&pirq->state);							\
 	if (state == PANTHOR_IRQ_STATE_SUSPENDED || state == PANTHOR_IRQ_STATE_SUSPENDING)	\
 		return IRQ_NONE;								\
@@ -438,11 +443,16 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
 	struct panthor_device *ptdev = pirq->ptdev;						\
 	enum panthor_irq_state state;								\
 	irqreturn_t ret = IRQ_NONE;								\
+	u32 mask;										\
 												\
-	atomic_cmpxchg(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE, PANTHOR_IRQ_STATE_PROCESSING);	\
+	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
+		mask = pirq->mask;								\
+		atomic_cmpxchg(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE,				\
+			       PANTHOR_IRQ_STATE_PROCESSING);					\
+	}											\
 												\
 	while (true) {										\
-		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
+		u32 status = (gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & mask);		\
 												\
 		if (!status)									\
 			break;									\
@@ -451,12 +461,16 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
 		ret = IRQ_HANDLED;								\
 	}											\
 												\
-	state = atomic_read(&pirq->state);							\
-	if (state != PANTHOR_IRQ_STATE_SUSPENDED && state != PANTHOR_IRQ_STATE_SUSPENDING) {	\
-		/* Only restore the bits that were used and are still enabled */		\
-		gpu_write(ptdev, __reg_prefix ## _INT_MASK,					\
-			  gpu_read(ptdev, __reg_prefix ## _INT_MASK) | (mask & pirq->mask));	\
-		atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);				\
+	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
+		state = atomic_read(&pirq->state);						\
+		if (state != PANTHOR_IRQ_STATE_SUSPENDED &&					\
+		    state != PANTHOR_IRQ_STATE_SUSPENDING) {					\
+			/* Only restore the bits that were used and are still enabled */	\
+			gpu_write(ptdev, __reg_prefix ## _INT_MASK,				\
+				  gpu_read(ptdev, __reg_prefix ## _INT_MASK) |			\
+				  (mask & pirq->mask));						\
+			atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);			\
+		}										\
 	}											\
 												\
 	return ret;										\
@@ -464,16 +478,18 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
 												\
 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
 {												\
-	pirq->mask = 0;										\
-	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);					\
-	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING);					\
+	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+		atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING);				\
+	}											\
 	synchronize_irq(pirq->irq);								\
 	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED);					\
 }												\
 												\
-static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
+static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq)			\
 {												\
-	pirq->mask = mask;									\
+	guard(spinlock_irqsave)(&pirq->mask_lock);						\
+												\
 	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);					\
 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, pirq->mask);				\
 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);				\
@@ -485,13 +501,39 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
 {												\
 	pirq->ptdev = ptdev;									\
 	pirq->irq = irq;									\
-	panthor_ ## __name ## _irq_resume(pirq, mask);						\
+	pirq->mask = mask;									\
+	spin_lock_init(&pirq->mask_lock);							\
+	panthor_ ## __name ## _irq_resume(pirq);						\
 												\
 	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
 					 panthor_ ## __name ## _irq_raw_handler,		\
 					 panthor_ ## __name ## _irq_threaded_handler,		\
 					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
 					 pirq);							\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask)	\
+{												\
+	enum panthor_irq_state state;								\
+												\
+	guard(spinlock_irqsave)(&pirq->mask_lock);						\
+												\
+	state = atomic_read(&pirq->state);							\
+	pirq->mask |= mask;									\
+	if (state != PANTHOR_IRQ_STATE_SUSPENDED || state != PANTHOR_IRQ_STATE_SUSPENDING)	\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\
+{												\
+	enum panthor_irq_state state;								\
+												\
+	guard(spinlock_irqsave)(&pirq->mask_lock);						\
+												\
+	state = atomic_read(&pirq->state);							\
+	pirq->mask &= ~mask;									\
+	if (state != PANTHOR_IRQ_STATE_SUSPENDED || state != PANTHOR_IRQ_STATE_SUSPENDING)	\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
 }
 
 extern struct workqueue_struct *panthor_cleanup_wq;
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index a64ec8756bed..0e46625f7621 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1080,7 +1080,8 @@ static int panthor_fw_start(struct panthor_device *ptdev)
 	bool timedout = false;
 
 	ptdev->fw->booted = false;
-	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
+	panthor_job_irq_enable_events(&ptdev->fw->irq, ~0);
+	panthor_job_irq_resume(&ptdev->fw->irq);
 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
 
 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index 057e167468d0..9304469a711a 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -395,7 +395,7 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
  */
 void panthor_gpu_resume(struct panthor_device *ptdev)
 {
-	panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
+	panthor_gpu_irq_resume(&ptdev->gpu->irq);
 	panthor_hw_l2_power_on(ptdev);
 }
 
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 198d59f42578..71b8318eab31 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -655,125 +655,6 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
 	vm->as.id = -1;
 }
 
-/**
- * panthor_vm_active() - Flag a VM as active
- * @vm: VM to flag as active.
- *
- * Assigns an address space to a VM so it can be used by the GPU/MCU.
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-int panthor_vm_active(struct panthor_vm *vm)
-{
-	struct panthor_device *ptdev = vm->ptdev;
-	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
-	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
-	int ret = 0, as, cookie;
-	u64 transtab, transcfg;
-
-	if (!drm_dev_enter(&ptdev->base, &cookie))
-		return -ENODEV;
-
-	if (refcount_inc_not_zero(&vm->as.active_cnt))
-		goto out_dev_exit;
-
-	/* Make sure we don't race with lock/unlock_region() calls
-	 * happening around VM bind operations.
-	 */
-	mutex_lock(&vm->op_lock);
-	mutex_lock(&ptdev->mmu->as.slots_lock);
-
-	if (refcount_inc_not_zero(&vm->as.active_cnt))
-		goto out_unlock;
-
-	as = vm->as.id;
-	if (as >= 0) {
-		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
-		 * re-enable the MMU after clearing+unmasking the AS interrupts.
-		 */
-		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
-			goto out_enable_as;
-
-		goto out_make_active;
-	}
-
-	/* Check for a free AS */
-	if (vm->for_mcu) {
-		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
-		as = 0;
-	} else {
-		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
-	}
-
-	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
-		struct panthor_vm *lru_vm;
-
-		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
-						  struct panthor_vm,
-						  as.lru_node);
-		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-
-		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
-		as = lru_vm->as.id;
-
-		ret = panthor_mmu_as_disable(ptdev, as, true);
-		if (ret)
-			goto out_unlock;
-
-		panthor_vm_release_as_locked(lru_vm);
-	}
-
-	/* Assign the free or reclaimed AS to the FD */
-	vm->as.id = as;
-	set_bit(as, &ptdev->mmu->as.alloc_mask);
-	ptdev->mmu->as.slots[as].vm = vm;
-
-out_enable_as:
-	transtab = cfg->arm_lpae_s1_cfg.ttbr;
-	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
-		   AS_TRANSCFG_PTW_RA |
-		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
-		   AS_TRANSCFG_INA_BITS(55 - va_bits);
-	if (ptdev->coherent)
-		transcfg |= AS_TRANSCFG_PTW_SH_OS;
-
-	/* If the VM is re-activated, we clear the fault. */
-	vm->unhandled_fault = false;
-
-	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
-	 * before enabling the AS.
-	 */
-	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
-		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
-		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
-		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
-		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
-	}
-
-	/* The VM update is guarded by ::op_lock, which we take at the beginning
-	 * of this function, so we don't expect any locked region here.
-	 */
-	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
-	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
-
-out_make_active:
-	if (!ret) {
-		refcount_set(&vm->as.active_cnt, 1);
-		list_del_init(&vm->as.lru_node);
-	}
-
-out_unlock:
-	mutex_unlock(&ptdev->mmu->as.slots_lock);
-	mutex_unlock(&vm->op_lock);
-
-out_dev_exit:
-	drm_dev_exit(cookie);
-	return ret;
-}
-
 /**
  * panthor_vm_idle() - Flag a VM idle
  * @vm: VM to flag as idle.
@@ -1762,6 +1643,128 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
 }
 PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
 
+/**
+ * panthor_vm_active() - Flag a VM as active
+ * @vm: VM to flag as active.
+ *
+ * Assigns an address space to a VM so it can be used by the GPU/MCU.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_active(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
+	int ret = 0, as, cookie;
+	u64 transtab, transcfg;
+	u32 fault_mask;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return -ENODEV;
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_dev_exit;
+
+	/* Make sure we don't race with lock/unlock_region() calls
+	 * happening around VM bind operations.
+	 */
+	mutex_lock(&vm->op_lock);
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_unlock;
+
+	as = vm->as.id;
+	if (as >= 0) {
+		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
+		 * re-enable the MMU after clearing+unmasking the AS interrupts.
+		 */
+		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
+			goto out_enable_as;
+
+		goto out_make_active;
+	}
+
+	/* Check for a free AS */
+	if (vm->for_mcu) {
+		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
+		as = 0;
+	} else {
+		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
+	}
+
+	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
+		struct panthor_vm *lru_vm;
+
+		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
+						  struct panthor_vm,
+						  as.lru_node);
+		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
+		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
+		as = lru_vm->as.id;
+
+		ret = panthor_mmu_as_disable(ptdev, as, true);
+		if (ret)
+			goto out_unlock;
+
+		panthor_vm_release_as_locked(lru_vm);
+	}
+
+	/* Assign the free or reclaimed AS to the FD */
+	vm->as.id = as;
+	set_bit(as, &ptdev->mmu->as.alloc_mask);
+	ptdev->mmu->as.slots[as].vm = vm;
+
+out_enable_as:
+	transtab = cfg->arm_lpae_s1_cfg.ttbr;
+	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
+		   AS_TRANSCFG_PTW_RA |
+		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
+		   AS_TRANSCFG_INA_BITS(55 - va_bits);
+	if (ptdev->coherent)
+		transcfg |= AS_TRANSCFG_PTW_SH_OS;
+
+	/* If the VM is re-activated, we clear the fault. */
+	vm->unhandled_fault = false;
+
+	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
+	 * before enabling the AS.
+	 */
+	fault_mask = panthor_mmu_as_fault_mask(ptdev, as);
+	if (ptdev->mmu->as.faulty_mask & fault_mask) {
+		gpu_write(ptdev, MMU_INT_CLEAR, fault_mask);
+		ptdev->mmu->as.faulty_mask &= ~fault_mask;
+		panthor_mmu_irq_enable_events(&ptdev->mmu->irq, fault_mask);
+		panthor_mmu_irq_disable_events(&ptdev->mmu->irq, ptdev->mmu->as.faulty_mask);
+	}
+
+	/* The VM update is guarded by ::op_lock, which we take at the beginning
+	 * of this function, so we don't expect any locked region here.
+	 */
+	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
+	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
+
+out_make_active:
+	if (!ret) {
+		refcount_set(&vm->as.active_cnt, 1);
+		list_del_init(&vm->as.lru_node);
+	}
+
+out_unlock:
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+	mutex_unlock(&vm->op_lock);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+
 /**
  * panthor_mmu_suspend() - Suspend the MMU logic
  * @ptdev: Device.
@@ -1805,7 +1808,8 @@ void panthor_mmu_resume(struct panthor_device *ptdev)
 	ptdev->mmu->as.faulty_mask = 0;
 	mutex_unlock(&ptdev->mmu->as.slots_lock);
 
-	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+	panthor_mmu_irq_resume(&ptdev->mmu->irq);
 }
 
 /**
@@ -1859,7 +1863,8 @@ void panthor_mmu_post_reset(struct panthor_device *ptdev)
 
 	mutex_unlock(&ptdev->mmu->as.slots_lock);
 
-	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+	panthor_mmu_irq_resume(&ptdev->mmu->irq);
 
 	/* Restart the VM_BIND queues. */
 	mutex_lock(&ptdev->mmu->vm.lock);
diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c
index 57cfc7ce715b..ed3b2b4479ca 100644
--- a/drivers/gpu/drm/panthor/panthor_pwr.c
+++ b/drivers/gpu/drm/panthor/panthor_pwr.c
@@ -545,5 +545,5 @@ void panthor_pwr_resume(struct panthor_device *ptdev)
 	if (!ptdev->pwr)
 		return;
 
-	panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK);
+	panthor_pwr_irq_resume(&ptdev->pwr->irq);
 }

-- 
2.52.0

Re: [PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration

Posted by Boris Brezillon 3 weeks, 5 days ago

On Mon, 12 Jan 2026 15:37:50 +0100
Nicolas Frattaroli <nicolas.frattaroli@collabora.com> wrote:

> The current IRQ helpers do not guarantee mutual exclusion that covers
> the entire transaction from accessing the mask member and modifying the
> mask register.
> 
> This makes it hard, if not impossible, to implement mask modification
> helpers that may change one of these outside the normal
> suspend/resume/isr code paths.
> 
> Add a spinlock to struct panthor_irq that protects both the mask member
> and register. Acquire it in all code paths that access these, but drop
> it before processing the threaded handler function. Then, add the
> aforementioned new helpers: enable_events, and disable_events. They work
> by ORing and NANDing the mask bits.
> 
> resume is changed to no longer have a mask passed, as pirq->mask is
> supposed to be the user-requested mask now, rather than a mirror of the
> INT_MASK register contents. Users of the resume helper are adjusted
> accordingly, including a rather painful refactor in panthor_mmu.c.
> 
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_device.h |  72 +++++++--
>  drivers/gpu/drm/panthor/panthor_fw.c     |   3 +-
>  drivers/gpu/drm/panthor/panthor_gpu.c    |   2 +-
>  drivers/gpu/drm/panthor/panthor_mmu.c    | 247 ++++++++++++++++---------------
>  drivers/gpu/drm/panthor/panthor_pwr.c    |   2 +-
>  5 files changed, 187 insertions(+), 139 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index 424f6cd1a814..0a29234ac58c 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -84,11 +84,14 @@ struct panthor_irq {
>  	/** @irq: IRQ number. */
>  	int irq;
>  
> -	/** @mask: Current mask being applied to xxx_INT_MASK. */
> +	/** @mask: Values to write to xxx_INT_MASK if active. */
>  	u32 mask;
>  
>  	/** @state: one of &enum panthor_irq_state reflecting the current state. */
>  	atomic_t state;
> +
> +	/** @mask_lock: protects modifications to _INT_MASK and @mask */
> +	spinlock_t mask_lock;

nit: Can we move this mask_lock right after the mask field?

Re: [PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration

Posted by Boris Brezillon 3 weeks, 6 days ago

On Mon, 12 Jan 2026 15:37:50 +0100
Nicolas Frattaroli <nicolas.frattaroli@collabora.com> wrote:

> The current IRQ helpers do not guarantee mutual exclusion that covers
> the entire transaction from accessing the mask member and modifying the
> mask register.
> 
> This makes it hard, if not impossible, to implement mask modification
> helpers that may change one of these outside the normal
> suspend/resume/isr code paths.
> 
> Add a spinlock to struct panthor_irq that protects both the mask member
> and register. Acquire it in all code paths that access these, but drop
> it before processing the threaded handler function. Then, add the
> aforementioned new helpers: enable_events, and disable_events. They work
> by ORing and NANDing the mask bits.
> 
> resume is changed to no longer have a mask passed, as pirq->mask is
> supposed to be the user-requested mask now, rather than a mirror of the
> INT_MASK register contents. Users of the resume helper are adjusted
> accordingly, including a rather painful refactor in panthor_mmu.c.
> 
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_device.h |  72 +++++++--
>  drivers/gpu/drm/panthor/panthor_fw.c     |   3 +-
>  drivers/gpu/drm/panthor/panthor_gpu.c    |   2 +-
>  drivers/gpu/drm/panthor/panthor_mmu.c    | 247 ++++++++++++++++---------------
>  drivers/gpu/drm/panthor/panthor_pwr.c    |   2 +-
>  5 files changed, 187 insertions(+), 139 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index 424f6cd1a814..0a29234ac58c 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -84,11 +84,14 @@ struct panthor_irq {
>  	/** @irq: IRQ number. */
>  	int irq;
>  
> -	/** @mask: Current mask being applied to xxx_INT_MASK. */
> +	/** @mask: Values to write to xxx_INT_MASK if active. */
>  	u32 mask;
>  
>  	/** @state: one of &enum panthor_irq_state reflecting the current state. */
>  	atomic_t state;
> +
> +	/** @mask_lock: protects modifications to _INT_MASK and @mask */
> +	spinlock_t mask_lock;
>  };
>  
>  /**
> @@ -422,6 +425,8 @@ static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)
>  	struct panthor_device *ptdev = pirq->ptdev;						\
>  	enum panthor_irq_state state;								\
>  												\
> +	guard(spinlock_irqsave)(&pirq->mask_lock);						\
> +												\
>  	state = atomic_read(&pirq->state);							\
>  	if (state == PANTHOR_IRQ_STATE_SUSPENDED || state == PANTHOR_IRQ_STATE_SUSPENDING)	\
>  		return IRQ_NONE;								\
> @@ -438,11 +443,16 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
>  	struct panthor_device *ptdev = pirq->ptdev;						\
>  	enum panthor_irq_state state;								\
>  	irqreturn_t ret = IRQ_NONE;								\
> +	u32 mask;										\
>  												\
> -	atomic_cmpxchg(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE, PANTHOR_IRQ_STATE_PROCESSING);	\
> +	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
> +		mask = pirq->mask;								\
> +		atomic_cmpxchg(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE,				\
> +			       PANTHOR_IRQ_STATE_PROCESSING);					\
> +	}											\
>  												\
>  	while (true) {										\
> -		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
> +		u32 status = (gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & mask);		\
>  												\
>  		if (!status)									\
>  			break;									\
> @@ -451,12 +461,16 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
>  		ret = IRQ_HANDLED;								\
>  	}											\
>  												\
> -	state = atomic_read(&pirq->state);							\
> -	if (state != PANTHOR_IRQ_STATE_SUSPENDED && state != PANTHOR_IRQ_STATE_SUSPENDING) {	\
> -		/* Only restore the bits that were used and are still enabled */		\
> -		gpu_write(ptdev, __reg_prefix ## _INT_MASK,					\
> -			  gpu_read(ptdev, __reg_prefix ## _INT_MASK) | (mask & pirq->mask));	\
> -		atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);				\
> +	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
> +		state = atomic_read(&pirq->state);						\
> +		if (state != PANTHOR_IRQ_STATE_SUSPENDED &&					\
> +		    state != PANTHOR_IRQ_STATE_SUSPENDING) {					\
> +			/* Only restore the bits that were used and are still enabled */	\
> +			gpu_write(ptdev, __reg_prefix ## _INT_MASK,				\
> +				  gpu_read(ptdev, __reg_prefix ## _INT_MASK) |			\
> +				  (mask & pirq->mask));						\
> +			atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);			\
> +		}										\
>  	}											\
>  												\
>  	return ret;										\
> @@ -464,16 +478,18 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
>  												\
>  static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
>  {												\
> -	pirq->mask = 0;										\
> -	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);					\
> -	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING);					\
> +	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
> +		atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING);				\
> +	}											\
>  	synchronize_irq(pirq->irq);								\
>  	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED);					\
>  }												\
>  												\
> -static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
> +static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq)			\
>  {												\
> -	pirq->mask = mask;									\
> +	guard(spinlock_irqsave)(&pirq->mask_lock);						\
> +												\
>  	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);					\
>  	gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, pirq->mask);				\
>  	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);				\
> @@ -485,13 +501,39 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
>  {												\
>  	pirq->ptdev = ptdev;									\
>  	pirq->irq = irq;									\
> -	panthor_ ## __name ## _irq_resume(pirq, mask);						\
> +	pirq->mask = mask;									\
> +	spin_lock_init(&pirq->mask_lock);							\
> +	panthor_ ## __name ## _irq_resume(pirq);						\
>  												\
>  	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
>  					 panthor_ ## __name ## _irq_raw_handler,		\
>  					 panthor_ ## __name ## _irq_threaded_handler,		\
>  					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
>  					 pirq);							\
> +}												\
> +												\
> +static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask)	\
> +{												\
> +	enum panthor_irq_state state;								\
> +												\
> +	guard(spinlock_irqsave)(&pirq->mask_lock);						\
> +												\
> +	state = atomic_read(&pirq->state);							\
> +	pirq->mask |= mask;									\
> +	if (state != PANTHOR_IRQ_STATE_SUSPENDED || state != PANTHOR_IRQ_STATE_SUSPENDING)	\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
> +}												\
> +												\
> +static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\
> +{												\
> +	enum panthor_irq_state state;								\
> +												\
> +	guard(spinlock_irqsave)(&pirq->mask_lock);						\
> +												\
> +	state = atomic_read(&pirq->state);							\
> +	pirq->mask &= ~mask;									\
> +	if (state != PANTHOR_IRQ_STATE_SUSPENDED || state != PANTHOR_IRQ_STATE_SUSPENDING)	\
> +		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
>  }
>  
>  extern struct workqueue_struct *panthor_cleanup_wq;
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
> index a64ec8756bed..0e46625f7621 100644
> --- a/drivers/gpu/drm/panthor/panthor_fw.c
> +++ b/drivers/gpu/drm/panthor/panthor_fw.c
> @@ -1080,7 +1080,8 @@ static int panthor_fw_start(struct panthor_device *ptdev)
>  	bool timedout = false;
>  
>  	ptdev->fw->booted = false;
> -	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
> +	panthor_job_irq_enable_events(&ptdev->fw->irq, ~0);
> +	panthor_job_irq_resume(&ptdev->fw->irq);
>  	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
>  
>  	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 057e167468d0..9304469a711a 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -395,7 +395,7 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
>   */
>  void panthor_gpu_resume(struct panthor_device *ptdev)
>  {
> -	panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
> +	panthor_gpu_irq_resume(&ptdev->gpu->irq);
>  	panthor_hw_l2_power_on(ptdev);
>  }
>  
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 198d59f42578..71b8318eab31 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -655,125 +655,6 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
>  	vm->as.id = -1;
>  }
>  
> -/**
> - * panthor_vm_active() - Flag a VM as active
> - * @vm: VM to flag as active.
> - *
> - * Assigns an address space to a VM so it can be used by the GPU/MCU.
> - *
> - * Return: 0 on success, a negative error code otherwise.
> - */
> -int panthor_vm_active(struct panthor_vm *vm)
> -{
> -	struct panthor_device *ptdev = vm->ptdev;
> -	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> -	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> -	int ret = 0, as, cookie;
> -	u64 transtab, transcfg;
> -
> -	if (!drm_dev_enter(&ptdev->base, &cookie))
> -		return -ENODEV;
> -
> -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> -		goto out_dev_exit;
> -
> -	/* Make sure we don't race with lock/unlock_region() calls
> -	 * happening around VM bind operations.
> -	 */
> -	mutex_lock(&vm->op_lock);
> -	mutex_lock(&ptdev->mmu->as.slots_lock);
> -
> -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> -		goto out_unlock;
> -
> -	as = vm->as.id;
> -	if (as >= 0) {
> -		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> -		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> -		 */
> -		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> -			goto out_enable_as;
> -
> -		goto out_make_active;
> -	}
> -
> -	/* Check for a free AS */
> -	if (vm->for_mcu) {
> -		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> -		as = 0;
> -	} else {
> -		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> -	}
> -
> -	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> -		struct panthor_vm *lru_vm;
> -
> -		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> -						  struct panthor_vm,
> -						  as.lru_node);
> -		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> -			ret = -EBUSY;
> -			goto out_unlock;
> -		}
> -
> -		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> -		as = lru_vm->as.id;
> -
> -		ret = panthor_mmu_as_disable(ptdev, as, true);
> -		if (ret)
> -			goto out_unlock;
> -
> -		panthor_vm_release_as_locked(lru_vm);
> -	}
> -
> -	/* Assign the free or reclaimed AS to the FD */
> -	vm->as.id = as;
> -	set_bit(as, &ptdev->mmu->as.alloc_mask);
> -	ptdev->mmu->as.slots[as].vm = vm;
> -
> -out_enable_as:
> -	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> -	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> -		   AS_TRANSCFG_PTW_RA |
> -		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> -		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> -	if (ptdev->coherent)
> -		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> -
> -	/* If the VM is re-activated, we clear the fault. */
> -	vm->unhandled_fault = false;
> -
> -	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> -	 * before enabling the AS.
> -	 */
> -	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
> -		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
> -		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
> -		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
> -		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
> -	}
> -
> -	/* The VM update is guarded by ::op_lock, which we take at the beginning
> -	 * of this function, so we don't expect any locked region here.
> -	 */
> -	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
> -	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> -
> -out_make_active:
> -	if (!ret) {
> -		refcount_set(&vm->as.active_cnt, 1);
> -		list_del_init(&vm->as.lru_node);
> -	}
> -
> -out_unlock:
> -	mutex_unlock(&ptdev->mmu->as.slots_lock);
> -	mutex_unlock(&vm->op_lock);
> -
> -out_dev_exit:
> -	drm_dev_exit(cookie);
> -	return ret;
> -}
> -
>  /**
>   * panthor_vm_idle() - Flag a VM idle
>   * @vm: VM to flag as idle.
> @@ -1762,6 +1643,128 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>  }
>  PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
>  
> +/**
> + * panthor_vm_active() - Flag a VM as active
> + * @vm: VM to flag as active.
> + *
> + * Assigns an address space to a VM so it can be used by the GPU/MCU.
> + *
> + * Return: 0 on success, a negative error code otherwise.
> + */
> +int panthor_vm_active(struct panthor_vm *vm)
> +{
> +	struct panthor_device *ptdev = vm->ptdev;
> +	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> +	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> +	int ret = 0, as, cookie;
> +	u64 transtab, transcfg;
> +	u32 fault_mask;
> +
> +	if (!drm_dev_enter(&ptdev->base, &cookie))
> +		return -ENODEV;
> +
> +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> +		goto out_dev_exit;
> +
> +	/* Make sure we don't race with lock/unlock_region() calls
> +	 * happening around VM bind operations.
> +	 */
> +	mutex_lock(&vm->op_lock);
> +	mutex_lock(&ptdev->mmu->as.slots_lock);
> +
> +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> +		goto out_unlock;
> +
> +	as = vm->as.id;
> +	if (as >= 0) {
> +		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> +		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> +		 */
> +		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> +			goto out_enable_as;
> +
> +		goto out_make_active;
> +	}
> +
> +	/* Check for a free AS */
> +	if (vm->for_mcu) {
> +		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> +		as = 0;
> +	} else {
> +		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> +	}
> +
> +	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> +		struct panthor_vm *lru_vm;
> +
> +		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> +						  struct panthor_vm,
> +						  as.lru_node);
> +		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> +			ret = -EBUSY;
> +			goto out_unlock;
> +		}
> +
> +		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> +		as = lru_vm->as.id;
> +
> +		ret = panthor_mmu_as_disable(ptdev, as, true);
> +		if (ret)
> +			goto out_unlock;
> +
> +		panthor_vm_release_as_locked(lru_vm);
> +	}
> +
> +	/* Assign the free or reclaimed AS to the FD */
> +	vm->as.id = as;
> +	set_bit(as, &ptdev->mmu->as.alloc_mask);
> +	ptdev->mmu->as.slots[as].vm = vm;
> +
> +out_enable_as:
> +	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> +	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> +		   AS_TRANSCFG_PTW_RA |
> +		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> +		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> +	if (ptdev->coherent)
> +		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> +
> +	/* If the VM is re-activated, we clear the fault. */
> +	vm->unhandled_fault = false;
> +
> +	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> +	 * before enabling the AS.
> +	 */
> +	fault_mask = panthor_mmu_as_fault_mask(ptdev, as);
> +	if (ptdev->mmu->as.faulty_mask & fault_mask) {
> +		gpu_write(ptdev, MMU_INT_CLEAR, fault_mask);
> +		ptdev->mmu->as.faulty_mask &= ~fault_mask;
> +		panthor_mmu_irq_enable_events(&ptdev->mmu->irq, fault_mask);
> +		panthor_mmu_irq_disable_events(&ptdev->mmu->irq, ptdev->mmu->as.faulty_mask);

Why do we need a _disable_events() here?

> +	}
> +
> +	/* The VM update is guarded by ::op_lock, which we take at the beginning
> +	 * of this function, so we don't expect any locked region here.
> +	 */
> +	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
> +	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> +
> +out_make_active:
> +	if (!ret) {
> +		refcount_set(&vm->as.active_cnt, 1);
> +		list_del_init(&vm->as.lru_node);
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> +	mutex_unlock(&vm->op_lock);
> +
> +out_dev_exit:
> +	drm_dev_exit(cookie);
> +	return ret;
> +}
> +
> +

nit: one too many empty lines.

>  /**
>   * panthor_mmu_suspend() - Suspend the MMU logic
>   * @ptdev: Device.
> @@ -1805,7 +1808,8 @@ void panthor_mmu_resume(struct panthor_device *ptdev)
>  	ptdev->mmu->as.faulty_mask = 0;
>  	mutex_unlock(&ptdev->mmu->as.slots_lock);
>  
> -	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> +	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));

I don't think we should touch the events mask in the suspend/resume
path. The way I see it, events should be:

- enabled when an AS is enabled (as_enable())
- disabled when an AS is disabled (as_disable())
- disabled when a VM has an unhandled faults

Because making a VM active might imply evicting another VM, we might
end up with disable+enable_events() pairs that we could have been
optimized into a NOP, but the overhead should be negligible, and if we
have to rotate VMs on AS slots we've already lost anyway (in term of
perfs).

> +	panthor_mmu_irq_resume(&ptdev->mmu->irq);
>  }
>  
>  /**
> @@ -1859,7 +1863,8 @@ void panthor_mmu_post_reset(struct panthor_device *ptdev)
>  
>  	mutex_unlock(&ptdev->mmu->as.slots_lock);
>  
> -	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> +	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));

Same here, I don't think we need to change the event mask.

> +	panthor_mmu_irq_resume(&ptdev->mmu->irq);
>  
>  	/* Restart the VM_BIND queues. */
>  	mutex_lock(&ptdev->mmu->vm.lock);
> diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c
> index 57cfc7ce715b..ed3b2b4479ca 100644
> --- a/drivers/gpu/drm/panthor/panthor_pwr.c
> +++ b/drivers/gpu/drm/panthor/panthor_pwr.c
> @@ -545,5 +545,5 @@ void panthor_pwr_resume(struct panthor_device *ptdev)
>  	if (!ptdev->pwr)
>  		return;
>  
> -	panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK);
> +	panthor_pwr_irq_resume(&ptdev->pwr->irq);
>  }
>

Re: [PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration

Posted by Nicolas Frattaroli 3 weeks, 3 days ago

On Monday, 12 January 2026 16:12:52 Central European Standard Time Boris Brezillon wrote:
> On Mon, 12 Jan 2026 15:37:50 +0100
> Nicolas Frattaroli <nicolas.frattaroli@collabora.com> wrote:
> 
> > The current IRQ helpers do not guarantee mutual exclusion that covers
> > the entire transaction from accessing the mask member and modifying the
> > mask register.
> > 
> > This makes it hard, if not impossible, to implement mask modification
> > helpers that may change one of these outside the normal
> > suspend/resume/isr code paths.
> > 
> > Add a spinlock to struct panthor_irq that protects both the mask member
> > and register. Acquire it in all code paths that access these, but drop
> > it before processing the threaded handler function. Then, add the
> > aforementioned new helpers: enable_events, and disable_events. They work
> > by ORing and NANDing the mask bits.
> > 
> > resume is changed to no longer have a mask passed, as pirq->mask is
> > supposed to be the user-requested mask now, rather than a mirror of the
> > INT_MASK register contents. Users of the resume helper are adjusted
> > accordingly, including a rather painful refactor in panthor_mmu.c.
> > 
> > Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> > ---
> >  drivers/gpu/drm/panthor/panthor_device.h |  72 +++++++--
> >  drivers/gpu/drm/panthor/panthor_fw.c     |   3 +-
> >  drivers/gpu/drm/panthor/panthor_gpu.c    |   2 +-
> >  drivers/gpu/drm/panthor/panthor_mmu.c    | 247 ++++++++++++++++---------------
> >  drivers/gpu/drm/panthor/panthor_pwr.c    |   2 +-
> >  5 files changed, 187 insertions(+), 139 deletions(-)
> > 
> [... snip ...]
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > index 198d59f42578..71b8318eab31 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -655,125 +655,6 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
> >  	vm->as.id = -1;
> >  }
> >  
> > -/**
> > - * panthor_vm_active() - Flag a VM as active
> > - * @vm: VM to flag as active.
> > - *
> > - * Assigns an address space to a VM so it can be used by the GPU/MCU.
> > - *
> > - * Return: 0 on success, a negative error code otherwise.
> > - */
> > -int panthor_vm_active(struct panthor_vm *vm)
> > -{
> > -	struct panthor_device *ptdev = vm->ptdev;
> > -	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> > -	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> > -	int ret = 0, as, cookie;
> > -	u64 transtab, transcfg;
> > -
> > -	if (!drm_dev_enter(&ptdev->base, &cookie))
> > -		return -ENODEV;
> > -
> > -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > -		goto out_dev_exit;
> > -
> > -	/* Make sure we don't race with lock/unlock_region() calls
> > -	 * happening around VM bind operations.
> > -	 */
> > -	mutex_lock(&vm->op_lock);
> > -	mutex_lock(&ptdev->mmu->as.slots_lock);
> > -
> > -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > -		goto out_unlock;
> > -
> > -	as = vm->as.id;
> > -	if (as >= 0) {
> > -		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> > -		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> > -		 */
> > -		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> > -			goto out_enable_as;
> > -
> > -		goto out_make_active;
> > -	}
> > -
> > -	/* Check for a free AS */
> > -	if (vm->for_mcu) {
> > -		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> > -		as = 0;
> > -	} else {
> > -		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> > -	}
> > -
> > -	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> > -		struct panthor_vm *lru_vm;
> > -
> > -		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> > -						  struct panthor_vm,
> > -						  as.lru_node);
> > -		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> > -			ret = -EBUSY;
> > -			goto out_unlock;
> > -		}
> > -
> > -		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> > -		as = lru_vm->as.id;
> > -
> > -		ret = panthor_mmu_as_disable(ptdev, as, true);
> > -		if (ret)
> > -			goto out_unlock;
> > -
> > -		panthor_vm_release_as_locked(lru_vm);
> > -	}
> > -
> > -	/* Assign the free or reclaimed AS to the FD */
> > -	vm->as.id = as;
> > -	set_bit(as, &ptdev->mmu->as.alloc_mask);
> > -	ptdev->mmu->as.slots[as].vm = vm;
> > -
> > -out_enable_as:
> > -	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> > -	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> > -		   AS_TRANSCFG_PTW_RA |
> > -		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> > -		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> > -	if (ptdev->coherent)
> > -		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> > -
> > -	/* If the VM is re-activated, we clear the fault. */
> > -	vm->unhandled_fault = false;
> > -
> > -	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> > -	 * before enabling the AS.
> > -	 */
> > -	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
> > -		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
> > -		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
> > -		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
> > -		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
> > -	}
> > -
> > -	/* The VM update is guarded by ::op_lock, which we take at the beginning
> > -	 * of this function, so we don't expect any locked region here.
> > -	 */
> > -	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
> > -	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> > -
> > -out_make_active:
> > -	if (!ret) {
> > -		refcount_set(&vm->as.active_cnt, 1);
> > -		list_del_init(&vm->as.lru_node);
> > -	}
> > -
> > -out_unlock:
> > -	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > -	mutex_unlock(&vm->op_lock);
> > -
> > -out_dev_exit:
> > -	drm_dev_exit(cookie);
> > -	return ret;
> > -}
> > -
> >  /**
> >   * panthor_vm_idle() - Flag a VM idle
> >   * @vm: VM to flag as idle.
> > @@ -1762,6 +1643,128 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> >  }
> >  PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
> >  
> > +/**
> > + * panthor_vm_active() - Flag a VM as active
> > + * @vm: VM to flag as active.
> > + *
> > + * Assigns an address space to a VM so it can be used by the GPU/MCU.
> > + *
> > + * Return: 0 on success, a negative error code otherwise.
> > + */
> > +int panthor_vm_active(struct panthor_vm *vm)
> > +{
> > +	struct panthor_device *ptdev = vm->ptdev;
> > +	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> > +	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> > +	int ret = 0, as, cookie;
> > +	u64 transtab, transcfg;
> > +	u32 fault_mask;
> > +
> > +	if (!drm_dev_enter(&ptdev->base, &cookie))
> > +		return -ENODEV;
> > +
> > +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > +		goto out_dev_exit;
> > +
> > +	/* Make sure we don't race with lock/unlock_region() calls
> > +	 * happening around VM bind operations.
> > +	 */
> > +	mutex_lock(&vm->op_lock);
> > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > +
> > +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > +		goto out_unlock;
> > +
> > +	as = vm->as.id;
> > +	if (as >= 0) {
> > +		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> > +		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> > +		 */
> > +		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> > +			goto out_enable_as;
> > +
> > +		goto out_make_active;
> > +	}
> > +
> > +	/* Check for a free AS */
> > +	if (vm->for_mcu) {
> > +		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> > +		as = 0;
> > +	} else {
> > +		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> > +	}
> > +
> > +	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> > +		struct panthor_vm *lru_vm;
> > +
> > +		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> > +						  struct panthor_vm,
> > +						  as.lru_node);
> > +		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> > +			ret = -EBUSY;
> > +			goto out_unlock;
> > +		}
> > +
> > +		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> > +		as = lru_vm->as.id;
> > +
> > +		ret = panthor_mmu_as_disable(ptdev, as, true);
> > +		if (ret)
> > +			goto out_unlock;
> > +
> > +		panthor_vm_release_as_locked(lru_vm);
> > +	}
> > +
> > +	/* Assign the free or reclaimed AS to the FD */
> > +	vm->as.id = as;
> > +	set_bit(as, &ptdev->mmu->as.alloc_mask);
> > +	ptdev->mmu->as.slots[as].vm = vm;
> > +
> > +out_enable_as:
> > +	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> > +	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> > +		   AS_TRANSCFG_PTW_RA |
> > +		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> > +		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> > +	if (ptdev->coherent)
> > +		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> > +
> > +	/* If the VM is re-activated, we clear the fault. */
> > +	vm->unhandled_fault = false;
> > +
> > +	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> > +	 * before enabling the AS.
> > +	 */
> > +	fault_mask = panthor_mmu_as_fault_mask(ptdev, as);
> > +	if (ptdev->mmu->as.faulty_mask & fault_mask) {
> > +		gpu_write(ptdev, MMU_INT_CLEAR, fault_mask);
> > +		ptdev->mmu->as.faulty_mask &= ~fault_mask;
> > +		panthor_mmu_irq_enable_events(&ptdev->mmu->irq, fault_mask);
> > +		panthor_mmu_irq_disable_events(&ptdev->mmu->irq, ptdev->mmu->as.faulty_mask);
> 
> Why do we need a _disable_events() here?

It's what the code originally did as far as I can tell. Not super obvious
because I had to move the function, but it did:

	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
	 * before enabling the AS.
	 */
	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
	}

We write `~(ptdev->mmu->as.faulty_mask & ~panthor_mmu_as_fault_mask(ptdev, as))` to
the mask register. Though now looking at it again, I don't think my new version
expands to the same thing at all, since
`ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);` is trying
to clear the fault mask of the one bit this translates to from what I can tell,
and then the negation in the write re-enables it but clears all other bits? That
can't be right. If anything if it wanted to re-enable interrupts it should OR
the register contents, not overwrite them.

I feel a little better about the me from a few days ago when I can look at the
code with a fresh set of eyes and still not get what it's actually trying to do,
other than trusting the comment.

Also, genuinely what is the point of `panthor_mmu_as_fault_mask`? Half of its
parameters are unused and its entire implementation is shorter than the function
name.

So yeah I think I'll remove the disable_events here and double-check what this
code is actually supposed to do, because the version I'm replacing seems very
non-obvious, as I can't see how what it does corresponds to what the comment
says it does (clear fault and re-enable its interrupt). This also plays into your
remark below.

> 
> > +	}
> > +
> > +	/* The VM update is guarded by ::op_lock, which we take at the beginning
> > +	 * of this function, so we don't expect any locked region here.
> > +	 */
> > +	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
> > +	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> > +
> > +out_make_active:
> > +	if (!ret) {
> > +		refcount_set(&vm->as.active_cnt, 1);
> > +		list_del_init(&vm->as.lru_node);
> > +	}
> > +
> > +out_unlock:
> > +	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > +	mutex_unlock(&vm->op_lock);
> > +
> > +out_dev_exit:
> > +	drm_dev_exit(cookie);
> > +	return ret;
> > +}
> > +
> > +
> 
> nit: one too many empty lines.
> 
> >  /**
> >   * panthor_mmu_suspend() - Suspend the MMU logic
> >   * @ptdev: Device.
> > @@ -1805,7 +1808,8 @@ void panthor_mmu_resume(struct panthor_device *ptdev)
> >  	ptdev->mmu->as.faulty_mask = 0;
> >  	mutex_unlock(&ptdev->mmu->as.slots_lock);
> >  
> > -	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> > +	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> 
> I don't think we should touch the events mask in the suspend/resume
> path. The way I see it, events should be:
> 
> - enabled when an AS is enabled (as_enable())
> - disabled when an AS is disabled (as_disable())
> - disabled when a VM has an unhandled faults
> 
> Because making a VM active might imply evicting another VM, we might
> end up with disable+enable_events() pairs that we could have been
> optimized into a NOP, but the overhead should be negligible, and if we
> have to rotate VMs on AS slots we've already lost anyway (in term of
> perfs).

Yep, I'll do that. I think I was naively trying to translate the code,
but since we now have pirq->mask preserved for us, this explicit juggling
of state can be removed.

> 
> > +	panthor_mmu_irq_resume(&ptdev->mmu->irq);
> >  }
> >  
> >  /**
> > @@ -1859,7 +1863,8 @@ void panthor_mmu_post_reset(struct panthor_device *ptdev)
> >  
> >  	mutex_unlock(&ptdev->mmu->as.slots_lock);
> >  
> > -	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> > +	panthor_mmu_irq_enable_events(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
> 
> Same here, I don't think we need to change the event mask.
> 
> > +	panthor_mmu_irq_resume(&ptdev->mmu->irq);
> >  
> >  	/* Restart the VM_BIND queues. */
> >  	mutex_lock(&ptdev->mmu->vm.lock);
> > diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c
> > index 57cfc7ce715b..ed3b2b4479ca 100644
> > --- a/drivers/gpu/drm/panthor/panthor_pwr.c
> > +++ b/drivers/gpu/drm/panthor/panthor_pwr.c
> > @@ -545,5 +545,5 @@ void panthor_pwr_resume(struct panthor_device *ptdev)
> >  	if (!ptdev->pwr)
> >  		return;
> >  
> > -	panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK);
> > +	panthor_pwr_irq_resume(&ptdev->pwr->irq);
> >  }
> > 
> 
> 

Thanks for the review.

Kind regards,
Nicolas Frattaroli

Re: [PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration

Posted by Boris Brezillon 3 weeks, 3 days ago

On Thu, 15 Jan 2026 12:15:22 +0100
Nicolas Frattaroli <nicolas.frattaroli@collabora.com> wrote:

> On Monday, 12 January 2026 16:12:52 Central European Standard Time Boris Brezillon wrote:
> > On Mon, 12 Jan 2026 15:37:50 +0100
> > Nicolas Frattaroli <nicolas.frattaroli@collabora.com> wrote:
> >   
> > > The current IRQ helpers do not guarantee mutual exclusion that covers
> > > the entire transaction from accessing the mask member and modifying the
> > > mask register.
> > > 
> > > This makes it hard, if not impossible, to implement mask modification
> > > helpers that may change one of these outside the normal
> > > suspend/resume/isr code paths.
> > > 
> > > Add a spinlock to struct panthor_irq that protects both the mask member
> > > and register. Acquire it in all code paths that access these, but drop
> > > it before processing the threaded handler function. Then, add the
> > > aforementioned new helpers: enable_events, and disable_events. They work
> > > by ORing and NANDing the mask bits.
> > > 
> > > resume is changed to no longer have a mask passed, as pirq->mask is
> > > supposed to be the user-requested mask now, rather than a mirror of the
> > > INT_MASK register contents. Users of the resume helper are adjusted
> > > accordingly, including a rather painful refactor in panthor_mmu.c.
> > > 
> > > Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> > > ---
> > >  drivers/gpu/drm/panthor/panthor_device.h |  72 +++++++--
> > >  drivers/gpu/drm/panthor/panthor_fw.c     |   3 +-
> > >  drivers/gpu/drm/panthor/panthor_gpu.c    |   2 +-
> > >  drivers/gpu/drm/panthor/panthor_mmu.c    | 247 ++++++++++++++++---------------
> > >  drivers/gpu/drm/panthor/panthor_pwr.c    |   2 +-
> > >  5 files changed, 187 insertions(+), 139 deletions(-)
> > >   
> > [... snip ...]  
> > > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > > index 198d59f42578..71b8318eab31 100644
> > > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > > @@ -655,125 +655,6 @@ static void panthor_vm_release_as_locked(struct panthor_vm *vm)
> > >  	vm->as.id = -1;
> > >  }
> > >  
> > > -/**
> > > - * panthor_vm_active() - Flag a VM as active
> > > - * @vm: VM to flag as active.
> > > - *
> > > - * Assigns an address space to a VM so it can be used by the GPU/MCU.
> > > - *
> > > - * Return: 0 on success, a negative error code otherwise.
> > > - */
> > > -int panthor_vm_active(struct panthor_vm *vm)
> > > -{
> > > -	struct panthor_device *ptdev = vm->ptdev;
> > > -	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> > > -	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> > > -	int ret = 0, as, cookie;
> > > -	u64 transtab, transcfg;
> > > -
> > > -	if (!drm_dev_enter(&ptdev->base, &cookie))
> > > -		return -ENODEV;
> > > -
> > > -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > > -		goto out_dev_exit;
> > > -
> > > -	/* Make sure we don't race with lock/unlock_region() calls
> > > -	 * happening around VM bind operations.
> > > -	 */
> > > -	mutex_lock(&vm->op_lock);
> > > -	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > -
> > > -	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > > -		goto out_unlock;
> > > -
> > > -	as = vm->as.id;
> > > -	if (as >= 0) {
> > > -		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> > > -		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> > > -		 */
> > > -		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> > > -			goto out_enable_as;
> > > -
> > > -		goto out_make_active;
> > > -	}
> > > -
> > > -	/* Check for a free AS */
> > > -	if (vm->for_mcu) {
> > > -		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> > > -		as = 0;
> > > -	} else {
> > > -		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> > > -	}
> > > -
> > > -	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> > > -		struct panthor_vm *lru_vm;
> > > -
> > > -		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> > > -						  struct panthor_vm,
> > > -						  as.lru_node);
> > > -		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> > > -			ret = -EBUSY;
> > > -			goto out_unlock;
> > > -		}
> > > -
> > > -		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> > > -		as = lru_vm->as.id;
> > > -
> > > -		ret = panthor_mmu_as_disable(ptdev, as, true);
> > > -		if (ret)
> > > -			goto out_unlock;
> > > -
> > > -		panthor_vm_release_as_locked(lru_vm);
> > > -	}
> > > -
> > > -	/* Assign the free or reclaimed AS to the FD */
> > > -	vm->as.id = as;
> > > -	set_bit(as, &ptdev->mmu->as.alloc_mask);
> > > -	ptdev->mmu->as.slots[as].vm = vm;
> > > -
> > > -out_enable_as:
> > > -	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> > > -	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> > > -		   AS_TRANSCFG_PTW_RA |
> > > -		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> > > -		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> > > -	if (ptdev->coherent)
> > > -		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> > > -
> > > -	/* If the VM is re-activated, we clear the fault. */
> > > -	vm->unhandled_fault = false;
> > > -
> > > -	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> > > -	 * before enabling the AS.
> > > -	 */
> > > -	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
> > > -		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
> > > -		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
> > > -		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
> > > -		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
> > > -	}
> > > -
> > > -	/* The VM update is guarded by ::op_lock, which we take at the beginning
> > > -	 * of this function, so we don't expect any locked region here.
> > > -	 */
> > > -	drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0);
> > > -	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
> > > -
> > > -out_make_active:
> > > -	if (!ret) {
> > > -		refcount_set(&vm->as.active_cnt, 1);
> > > -		list_del_init(&vm->as.lru_node);
> > > -	}
> > > -
> > > -out_unlock:
> > > -	mutex_unlock(&ptdev->mmu->as.slots_lock);
> > > -	mutex_unlock(&vm->op_lock);
> > > -
> > > -out_dev_exit:
> > > -	drm_dev_exit(cookie);
> > > -	return ret;
> > > -}
> > > -
> > >  /**
> > >   * panthor_vm_idle() - Flag a VM idle
> > >   * @vm: VM to flag as idle.
> > > @@ -1762,6 +1643,128 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> > >  }
> > >  PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
> > >  
> > > +/**
> > > + * panthor_vm_active() - Flag a VM as active
> > > + * @vm: VM to flag as active.
> > > + *
> > > + * Assigns an address space to a VM so it can be used by the GPU/MCU.
> > > + *
> > > + * Return: 0 on success, a negative error code otherwise.
> > > + */
> > > +int panthor_vm_active(struct panthor_vm *vm)
> > > +{
> > > +	struct panthor_device *ptdev = vm->ptdev;
> > > +	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
> > > +	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
> > > +	int ret = 0, as, cookie;
> > > +	u64 transtab, transcfg;
> > > +	u32 fault_mask;
> > > +
> > > +	if (!drm_dev_enter(&ptdev->base, &cookie))
> > > +		return -ENODEV;
> > > +
> > > +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > > +		goto out_dev_exit;
> > > +
> > > +	/* Make sure we don't race with lock/unlock_region() calls
> > > +	 * happening around VM bind operations.
> > > +	 */
> > > +	mutex_lock(&vm->op_lock);
> > > +	mutex_lock(&ptdev->mmu->as.slots_lock);
> > > +
> > > +	if (refcount_inc_not_zero(&vm->as.active_cnt))
> > > +		goto out_unlock;
> > > +
> > > +	as = vm->as.id;
> > > +	if (as >= 0) {
> > > +		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
> > > +		 * re-enable the MMU after clearing+unmasking the AS interrupts.
> > > +		 */
> > > +		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
> > > +			goto out_enable_as;
> > > +
> > > +		goto out_make_active;
> > > +	}
> > > +
> > > +	/* Check for a free AS */
> > > +	if (vm->for_mcu) {
> > > +		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
> > > +		as = 0;
> > > +	} else {
> > > +		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
> > > +	}
> > > +
> > > +	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
> > > +		struct panthor_vm *lru_vm;
> > > +
> > > +		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
> > > +						  struct panthor_vm,
> > > +						  as.lru_node);
> > > +		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
> > > +			ret = -EBUSY;
> > > +			goto out_unlock;
> > > +		}
> > > +
> > > +		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
> > > +		as = lru_vm->as.id;
> > > +
> > > +		ret = panthor_mmu_as_disable(ptdev, as, true);
> > > +		if (ret)
> > > +			goto out_unlock;
> > > +
> > > +		panthor_vm_release_as_locked(lru_vm);
> > > +	}
> > > +
> > > +	/* Assign the free or reclaimed AS to the FD */
> > > +	vm->as.id = as;
> > > +	set_bit(as, &ptdev->mmu->as.alloc_mask);
> > > +	ptdev->mmu->as.slots[as].vm = vm;
> > > +
> > > +out_enable_as:
> > > +	transtab = cfg->arm_lpae_s1_cfg.ttbr;
> > > +	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
> > > +		   AS_TRANSCFG_PTW_RA |
> > > +		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
> > > +		   AS_TRANSCFG_INA_BITS(55 - va_bits);
> > > +	if (ptdev->coherent)
> > > +		transcfg |= AS_TRANSCFG_PTW_SH_OS;
> > > +
> > > +	/* If the VM is re-activated, we clear the fault. */
> > > +	vm->unhandled_fault = false;
> > > +
> > > +	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> > > +	 * before enabling the AS.
> > > +	 */
> > > +	fault_mask = panthor_mmu_as_fault_mask(ptdev, as);
> > > +	if (ptdev->mmu->as.faulty_mask & fault_mask) {
> > > +		gpu_write(ptdev, MMU_INT_CLEAR, fault_mask);
> > > +		ptdev->mmu->as.faulty_mask &= ~fault_mask;
> > > +		panthor_mmu_irq_enable_events(&ptdev->mmu->irq, fault_mask);
> > > +		panthor_mmu_irq_disable_events(&ptdev->mmu->irq, ptdev->mmu->as.faulty_mask);  
> > 
> > Why do we need a _disable_events() here?  
> 
> It's what the code originally did as far as I can tell. Not super obvious
> because I had to move the function, but it did:
> 
> 	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
> 	 * before enabling the AS.
> 	 */
> 	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
> 		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
> 		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
> 		ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as);
> 		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
> 	}
> 
> We write `~(ptdev->mmu->as.faulty_mask & ~panthor_mmu_as_fault_mask(ptdev, as))` to
> the mask register. Though now looking at it again, I don't think my new version
> expands to the same thing at all, since
> `ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);` is trying
> to clear the fault mask of the one bit this translates to from what I can tell,
> and then the negation in the write re-enables it but clears all other bits? That
> can't be right. If anything if it wanted to re-enable interrupts it should OR
> the register contents, not overwrite them.

It's more an "enable anything that's not faulty" than a "re-enable
events for this specific AS". So all we were doing is keep track of the
faulty+not-acknowledged mask, and then clearing bits in this mask as AS
slots get acknowledged or recycled.

> 
> I feel a little better about the me from a few days ago when I can look at the
> code with a fresh set of eyes and still not get what it's actually trying to do,
> other than trusting the comment.
> 
> Also, genuinely what is the point of `panthor_mmu_as_fault_mask`? Half of its
> parameters are unused and its entire implementation is shorter than the function
> name.

panthor_mmu_as_fault_mask() is only here because at some point I
intended to port JM HW support to panthor, the fault mask for an AS
there is `BIT(as) | BIT(as + 16)` instead of just `BIT(as)` on new HW.
So the plan was for panthor_mmu_as_fault_mask() to abstract that for us.

[PATCH v8 1/4] drm/panthor: Rework panthor_irq::suspended into panthor_irq::state
[PATCH v8 2/4] drm/panthor: Extend IRQ helpers for mask modification/restoration
[PATCH v8 3/4] drm/panthor: Add tracepoint for hardware utilisation changes
[PATCH v8 4/4] drm/panthor: Add gpu_job_irq tracepoint