[PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support

Akhil P Oommen posted 16 patches 3 weeks, 3 days ago
[PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support
Posted by Akhil P Oommen 3 weeks, 3 days ago
There are some special registers which are accessible even when GX power
domain is collapsed during an IFPC sleep. Accessing these registers
wakes up GPU from power collapse and allow programming these registers
without additional handshake with GMU. This patch adds support for this
special register write sequence.

Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 80 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
 drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++----
 3 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -16,6 +16,84 @@
 
 #define GPU_PAS_ID 13
 
+static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
+{
+	/* Success if !writedropped0/1 */
+	if (!(status & mask))
+		return true;
+
+	udelay(10);
+
+	/* Try to update fenced register again */
+	gpu_write(gpu, offset, value);
+
+	/* We can't do a posted write here because the power domain could be
+	 * in collapse state. So use the heaviest barrier instead
+	 */
+	mb();
+	return false;
+}
+
+static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
+{
+	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	struct msm_gpu *gpu = &adreno_gpu->base;
+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+	u32 status;
+
+	gpu_write(gpu, offset, value);
+
+	/* Nothing else to be done in the case of no-GMU */
+	if (adreno_has_gmu_wrapper(adreno_gpu))
+		return 0;
+
+	/* We can't do a posted write here because the power domain could be
+	 * in collapse state. So use the heaviest barrier instead
+	 */
+	mb();
+
+	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
+		return 0;
+
+	/* Try again for another 1ms before failing */
+	gpu_write(gpu, offset, value);
+	mb();
+
+	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+			fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
+		/*
+		 * The 'delay' warning is here because the pause to print this
+		 * warning will allow gpu to move to power collapse which
+		 * defeats the purpose of continuous polling for 2 ms
+		 */
+		dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
+				offset);
+		return 0;
+	}
+
+	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
+			offset);
+
+	return -ETIMEDOUT;
+}
+
+int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
+{
+	int ret;
+
+	ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
+	if (ret)
+		return ret;
+
+	if (!is_64b)
+		return 0;
+
+	ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
+
+	return ret;
+}
+
 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -86,7 +164,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	/* Update HW if this is the current ring and we are not in preempt*/
 	if (!a6xx_in_preempt(a6xx_gpu)) {
 		if (a6xx_gpu->cur_ring == ring)
-			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
 		else
 			ring->restore_wptr = true;
 	} else {
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 6e71f617fc3d0d564e51650dfed63a18f31042ac..e736c59d566b3fcf8c62a212494e3b110c09caa9 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -295,5 +295,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
 
 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
+int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
 
 #endif /* __A6XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
index 6a12a35dabff1e64aae8440c2a8c88f5feb4803e..10625ffbc4cfc26edc36efcf11dbb4efd55ab3e0 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
@@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
 }
 
 /* Write the most recent wptr for the given ring into the hardware */
-static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
 {
 	unsigned long flags;
 	uint32_t wptr;
@@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	if (ring->restore_wptr) {
 		wptr = get_wptr(ring);
 
-		gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+		a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
 
 		ring->restore_wptr = false;
 	}
@@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
 
 	set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
 
-	update_wptr(gpu, a6xx_gpu->cur_ring);
+	update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
 
 	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
 
@@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 	 */
 	if (!ring || (a6xx_gpu->cur_ring == ring)) {
 		set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
-		update_wptr(gpu, a6xx_gpu->cur_ring);
+		update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
 		set_preempt_state(a6xx_gpu, PREEMPT_NONE);
 		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
 		return;
@@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 
 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
 
-	gpu_write64(gpu,
-		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
-		a6xx_gpu->preempt_smmu_iova[ring->id]);
+	a6xx_fenced_write(a6xx_gpu,
+		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
+		BIT(1), true);
 
-	gpu_write64(gpu,
+	a6xx_fenced_write(a6xx_gpu,
 		REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
-		a6xx_gpu->preempt_iova[ring->id]);
+		a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
 
 	a6xx_gpu->next_ring = ring;
 
@@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 	set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
 
 	/* Trigger the preemption */
-	gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
+	a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
 }
 
 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,

-- 
2.50.1
Re: [PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support
Posted by Connor Abbott 3 weeks, 3 days ago
On Mon, Sep 8, 2025 at 4:27 AM Akhil P Oommen <akhilpo@oss.qualcomm.com> wrote:
>
> There are some special registers which are accessible even when GX power
> domain is collapsed during an IFPC sleep. Accessing these registers
> wakes up GPU from power collapse and allow programming these registers
> without additional handshake with GMU. This patch adds support for this
> special register write sequence.
>
> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 80 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++----
>  3 files changed, 90 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -16,6 +16,84 @@
>
>  #define GPU_PAS_ID 13
>
> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
> +{
> +       /* Success if !writedropped0/1 */
> +       if (!(status & mask))
> +               return true;
> +
> +       udelay(10);
> +
> +       /* Try to update fenced register again */
> +       gpu_write(gpu, offset, value);
> +
> +       /* We can't do a posted write here because the power domain could be
> +        * in collapse state. So use the heaviest barrier instead
> +        */
> +       mb();
> +       return false;
> +}
> +
> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
> +{
> +       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +       struct msm_gpu *gpu = &adreno_gpu->base;
> +       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +       u32 status;
> +
> +       gpu_write(gpu, offset, value);
> +
> +       /* Nothing else to be done in the case of no-GMU */
> +       if (adreno_has_gmu_wrapper(adreno_gpu))
> +               return 0;
> +
> +       /* We can't do a posted write here because the power domain could be
> +        * in collapse state. So use the heaviest barrier instead
> +        */
> +       mb();
> +
> +       if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
> +                       fence_status_check(gpu, offset, value, status, mask), 0, 1000))
> +               return 0;
> +
> +       /* Try again for another 1ms before failing */
> +       gpu_write(gpu, offset, value);
> +       mb();
> +
> +       if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
> +                       fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
> +               /*
> +                * The 'delay' warning is here because the pause to print this
> +                * warning will allow gpu to move to power collapse which
> +                * defeats the purpose of continuous polling for 2 ms
> +                */
> +               dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
> +                               offset);
> +               return 0;
> +       }
> +
> +       dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
> +                       offset);
> +
> +       return -ETIMEDOUT;
> +}
> +
> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
> +{
> +       int ret;
> +
> +       ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
> +       if (ret)
> +               return ret;
> +
> +       if (!is_64b)
> +               return 0;
> +
> +       ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
> +
> +       return ret;
> +}
> +
>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>  {
>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -86,7 +164,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>         /* Update HW if this is the current ring and we are not in preempt*/
>         if (!a6xx_in_preempt(a6xx_gpu)) {
>                 if (a6xx_gpu->cur_ring == ring)
> -                       gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
> +                       a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>                 else
>                         ring->restore_wptr = true;
>         } else {
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 6e71f617fc3d0d564e51650dfed63a18f31042ac..e736c59d566b3fcf8c62a212494e3b110c09caa9 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -295,5 +295,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>
>  void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);

"mask" makes it sound like it's the mask for a masked write, which it
isn't. At least in the public API I'd name it something more explicit
like "fence_status_mask". Also it would be nice to add defines like
GMU_FENCE_STATUS_WPTR/CONTEXT_SWITCH to make the parameter values in
callsites less magical. Finally, this might be personal preference,
but it's not immediately obvious what the "true"/"false" in callsites
mean, so it would make users clearer to add a separate
"a6xx_fenced_write64" and make 64-bit reg writes use that instead of
is_64b.

Connor

>
>  #endif /* __A6XX_GPU_H__ */
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> index 6a12a35dabff1e64aae8440c2a8c88f5feb4803e..10625ffbc4cfc26edc36efcf11dbb4efd55ab3e0 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
>  }
>
>  /* Write the most recent wptr for the given ring into the hardware */
> -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
>  {
>         unsigned long flags;
>         uint32_t wptr;
> @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>         if (ring->restore_wptr) {
>                 wptr = get_wptr(ring);
>
> -               gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
> +               a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>
>                 ring->restore_wptr = false;
>         }
> @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
>
>         set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>
> -       update_wptr(gpu, a6xx_gpu->cur_ring);
> +       update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>
>         set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>
> @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>          */
>         if (!ring || (a6xx_gpu->cur_ring == ring)) {
>                 set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
> -               update_wptr(gpu, a6xx_gpu->cur_ring);
> +               update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>                 set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>                 spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
>                 return;
> @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>
>         spin_unlock_irqrestore(&ring->preempt_lock, flags);
>
> -       gpu_write64(gpu,
> -               REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
> -               a6xx_gpu->preempt_smmu_iova[ring->id]);
> +       a6xx_fenced_write(a6xx_gpu,
> +               REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
> +               BIT(1), true);
>
> -       gpu_write64(gpu,
> +       a6xx_fenced_write(a6xx_gpu,
>                 REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
> -               a6xx_gpu->preempt_iova[ring->id]);
> +               a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
>
>         a6xx_gpu->next_ring = ring;
>
> @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>         set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
>
>         /* Trigger the preemption */
> -       gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
> +       a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
>  }
>
>  static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>
> --
> 2.50.1
>
Re: [PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support
Posted by Akhil P Oommen 3 weeks, 3 days ago
On 9/8/2025 9:24 PM, Connor Abbott wrote:
> On Mon, Sep 8, 2025 at 4:27 AM Akhil P Oommen <akhilpo@oss.qualcomm.com> wrote:
>>
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>> ---
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 80 ++++++++++++++++++++++++++++++-
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++----
>>  3 files changed, 90 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,84 @@
>>
>>  #define GPU_PAS_ID 13
>>
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>> +{
>> +       /* Success if !writedropped0/1 */
>> +       if (!(status & mask))
>> +               return true;
>> +
>> +       udelay(10);
>> +
>> +       /* Try to update fenced register again */
>> +       gpu_write(gpu, offset, value);
>> +
>> +       /* We can't do a posted write here because the power domain could be
>> +        * in collapse state. So use the heaviest barrier instead
>> +        */
>> +       mb();
>> +       return false;
>> +}
>> +
>> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
>> +{
>> +       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> +       struct msm_gpu *gpu = &adreno_gpu->base;
>> +       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +       u32 status;
>> +
>> +       gpu_write(gpu, offset, value);
>> +
>> +       /* Nothing else to be done in the case of no-GMU */
>> +       if (adreno_has_gmu_wrapper(adreno_gpu))
>> +               return 0;
>> +
>> +       /* We can't do a posted write here because the power domain could be
>> +        * in collapse state. So use the heaviest barrier instead
>> +        */
>> +       mb();
>> +
>> +       if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +                       fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> +               return 0;
>> +
>> +       /* Try again for another 1ms before failing */
>> +       gpu_write(gpu, offset, value);
>> +       mb();
>> +
>> +       if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +                       fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
>> +               /*
>> +                * The 'delay' warning is here because the pause to print this
>> +                * warning will allow gpu to move to power collapse which
>> +                * defeats the purpose of continuous polling for 2 ms
>> +                */
>> +               dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>> +                               offset);
>> +               return 0;
>> +       }
>> +
>> +       dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>> +                       offset);
>> +
>> +       return -ETIMEDOUT;
>> +}
>> +
>> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
>> +{
>> +       int ret;
>> +
>> +       ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
>> +       if (ret)
>> +               return ret;
>> +
>> +       if (!is_64b)
>> +               return 0;
>> +
>> +       ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
>> +
>> +       return ret;
>> +}
>> +
>>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>>  {
>>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -86,7 +164,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>>         /* Update HW if this is the current ring and we are not in preempt*/
>>         if (!a6xx_in_preempt(a6xx_gpu)) {
>>                 if (a6xx_gpu->cur_ring == ring)
>> -                       gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> +                       a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>>                 else
>>                         ring->restore_wptr = true;
>>         } else {
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 6e71f617fc3d0d564e51650dfed63a18f31042ac..e736c59d566b3fcf8c62a212494e3b110c09caa9 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -295,5 +295,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>>
>>  void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
>>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
>> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
> 
> "mask" makes it sound like it's the mask for a masked write, which it
> isn't. At least in the public API I'd name it something more explicit
> like "fence_status_mask". Also it would be nice to add defines like
> GMU_FENCE_STATUS_WPTR/CONTEXT_SWITCH to make the parameter values in
> callsites less magical. Finally, this might be personal preference,
> but it's not immediately obvious what the "true"/"false" in callsites
> mean, so it would make users clearer to add a separate
> "a6xx_fenced_write64" and make 64-bit reg writes use that instead of
> is_64b.

I agree about the BIT definition. Will update if I send another
revision. Same for the 'mask'. I can see the confusion due to write and
mask in the same line.

64B fenced write is used only at a single place (in the preempt trigger
call). So I feel it is an overkill to create another function for that.
I did weigh that option earlier though.

-Akhil

> 
> Connor
> 
>>
Re: [PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support
Posted by Konrad Dybcio 3 weeks, 3 days ago
On 9/8/25 10:27 AM, Akhil P Oommen wrote:
> There are some special registers which are accessible even when GX power
> domain is collapsed during an IFPC sleep. Accessing these registers
> wakes up GPU from power collapse and allow programming these registers
> without additional handshake with GMU. This patch adds support for this
> special register write sequence.
> 
> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 80 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++----
>  3 files changed, 90 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -16,6 +16,84 @@
>  
>  #define GPU_PAS_ID 13
>  
> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
> +{
> +	/* Success if !writedropped0/1 */
> +	if (!(status & mask))
> +		return true;
> +
> +	udelay(10);
> +
> +	/* Try to update fenced register again */
> +	gpu_write(gpu, offset, value);
> +
> +	/* We can't do a posted write here because the power domain could be
> +	 * in collapse state. So use the heaviest barrier instead
> +	 */
> +	mb();
> +	return false;
> +}
> +
> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
> +{
> +	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +	struct msm_gpu *gpu = &adreno_gpu->base;
> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +	u32 status;
> +
> +	gpu_write(gpu, offset, value);
> +
> +	/* Nothing else to be done in the case of no-GMU */
> +	if (adreno_has_gmu_wrapper(adreno_gpu))
> +		return 0;
> +
> +	/* We can't do a posted write here because the power domain could be
> +	 * in collapse state. So use the heaviest barrier instead
> +	 */

I'm not sure I follow - what's the relationship between the write being
posted and the power domain being collapsed (i.e. the hw not being
powered on)?

Are you trying to get rid of the delay that could happen between this
write leaving the CPU and arriving at the GPU (which would then be
woken up), so that the 1ms poll below has greater chance to succeed
because of how these "special registers" work?

Konrad
Re: [PATCH v2 07/16] drm/msm/adreno: Add fenced regwrite support
Posted by Akhil P Oommen 3 weeks, 3 days ago
On 9/8/2025 9:07 PM, Konrad Dybcio wrote:
> On 9/8/25 10:27 AM, Akhil P Oommen wrote:
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>> ---
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 80 ++++++++++++++++++++++++++++++-
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 ++++----
>>  3 files changed, 90 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 45dd5fd1c2bfcb0a01b71a326c7d95b0f9496d99..a63dad80ef461da45d5c41a042ed4f19d8282ef5 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,84 @@
>>  
>>  #define GPU_PAS_ID 13
>>  
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>> +{
>> +	/* Success if !writedropped0/1 */
>> +	if (!(status & mask))
>> +		return true;
>> +
>> +	udelay(10);
>> +
>> +	/* Try to update fenced register again */
>> +	gpu_write(gpu, offset, value);
>> +
>> +	/* We can't do a posted write here because the power domain could be
>> +	 * in collapse state. So use the heaviest barrier instead
>> +	 */
>> +	mb();
>> +	return false;
>> +}
>> +
>> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
>> +{
>> +	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> +	struct msm_gpu *gpu = &adreno_gpu->base;
>> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +	u32 status;
>> +
>> +	gpu_write(gpu, offset, value);
>> +
>> +	/* Nothing else to be done in the case of no-GMU */
>> +	if (adreno_has_gmu_wrapper(adreno_gpu))
>> +		return 0;
>> +
>> +	/* We can't do a posted write here because the power domain could be
>> +	 * in collapse state. So use the heaviest barrier instead
>> +	 */
> 
> I'm not sure I follow - what's the relationship between the write being
> posted and the power domain being collapsed (i.e. the hw not being
> powered on)?
> 
> Are you trying to get rid of the delay that could happen between this
> write leaving the CPU and arriving at the GPU (which would then be
> woken up), so that the 1ms poll below has greater chance to succeed
> because of how these "special registers" work?

We should strictly ensure that the GX register write is posted first (it
could be posted to the hw or dropped by the fence hw). Otherwise the
fence status register (in CX domain) might incorrectly report that the
register write went through. Ideally, we should do a posted write here,
but we can't do that due to IFPC. A full barrier (DSB SY) will ensure
this ordering requirement.

Another motivation here is to align closely with the downstream sequence
which uses a similar barrier. It will be a super painful debug if this
sequence miss a register write.

-Akhil.

> 
> Konrad