[v1] drm/msm: Support for Inter Frame Power Collapse (IFPC) feature

[PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 2 weeks ago

There are some special registers which are accessible even when GX power
domain is collapsed during an IFPC sleep. Accessing these registers
wakes up GPU from power collapse and allow programming these registers
without additional handshake with GMU. This patch adds support for this
special register write sequence.

Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
 drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
 3 files changed, 73 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -16,6 +16,67 @@
 
 #define GPU_PAS_ID 13
 
+static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
+{
+	/* Success if !writedropped0/1 */
+	if (!(status & mask))
+		return true;
+
+	udelay(10);
+
+	/* Try to update fenced register again */
+	gpu_write(gpu, offset, value);
+	return false;
+}
+
+static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
+{
+	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	struct msm_gpu *gpu = &adreno_gpu->base;
+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+	u32 status;
+
+	gpu_write(gpu, offset, value);
+
+	/* Nothing else to be done in the case of no-GMU */
+	if (adreno_has_gmu_wrapper(adreno_gpu))
+		return 0;
+
+	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
+		return 0;
+
+	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
+			offset);
+
+	/* Try again for another 1ms before failing */
+	gpu_write(gpu, offset, value);
+	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
+		return 0;
+
+	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
+			offset);
+
+	return -ETIMEDOUT;
+}
+
+int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
+{
+	int ret;
+
+	ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
+	if (ret)
+		return ret;
+
+	if (!is_64b)
+		return 0;
+
+	ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
+
+	return ret;
+}
+
 static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	/* Update HW if this is the current ring and we are not in preempt*/
 	if (!a6xx_in_preempt(a6xx_gpu)) {
 		if (a6xx_gpu->cur_ring == ring)
-			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
 		else
 			ring->restore_wptr = true;
 	} else {
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
 
 void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
 void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
+int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
 
 #endif /* __A6XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
index 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
@@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
 }
 
 /* Write the most recent wptr for the given ring into the hardware */
-static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
 {
 	unsigned long flags;
 	uint32_t wptr;
@@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	if (ring->restore_wptr) {
 		wptr = get_wptr(ring);
 
-		gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+		a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
 
 		ring->restore_wptr = false;
 	}
@@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
 
 	set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
 
-	update_wptr(gpu, a6xx_gpu->cur_ring);
+	update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
 
 	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
 
@@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 	 */
 	if (!ring || (a6xx_gpu->cur_ring == ring)) {
 		set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
-		update_wptr(gpu, a6xx_gpu->cur_ring);
+		update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
 		set_preempt_state(a6xx_gpu, PREEMPT_NONE);
 		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
 		return;
@@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 
 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
 
-	gpu_write64(gpu,
-		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
-		a6xx_gpu->preempt_smmu_iova[ring->id]);
+	a6xx_fenced_write(a6xx_gpu,
+		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
+		BIT(1), true);
 
-	gpu_write64(gpu,
+	a6xx_fenced_write(a6xx_gpu,
 		REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
-		a6xx_gpu->preempt_iova[ring->id]);
+		a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
 
 	a6xx_gpu->next_ring = ring;
 
@@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
 	set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
 
 	/* Trigger the preemption */
-	gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
+	a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
 }
 
 static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,

-- 
2.50.1

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Dmitry Baryshkov 2 months, 2 weeks ago

On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
> There are some special registers which are accessible even when GX power
> domain is collapsed during an IFPC sleep. Accessing these registers
> wakes up GPU from power collapse and allow programming these registers
> without additional handshake with GMU. This patch adds support for this
> special register write sequence.
> 
> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>  3 files changed, 73 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -16,6 +16,67 @@
>  
>  #define GPU_PAS_ID 13
>  
> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
> +{
> +	/* Success if !writedropped0/1 */
> +	if (!(status & mask))
> +		return true;
> +
> +	udelay(10);

Why do we need udelay() here? Why can't we use interval setting inside
gmu_poll_timeout()?

> +
> +	/* Try to update fenced register again */
> +	gpu_write(gpu, offset, value);
> +	return false;
> +}
> +
> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
> +{
> +	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> +	struct msm_gpu *gpu = &adreno_gpu->base;
> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +	u32 status;
> +
> +	gpu_write(gpu, offset, value);
> +
> +	/* Nothing else to be done in the case of no-GMU */
> +	if (adreno_has_gmu_wrapper(adreno_gpu))
> +		return 0;
> +
> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
> +		return 0;
> +
> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
> +			offset);
> +
> +	/* Try again for another 1ms before failing */
> +	gpu_write(gpu, offset, value);
> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
> +		return 0;
> +
> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
> +			offset);
> +
> +	return -ETIMEDOUT;
> +}
> +
> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
> +{
> +	int ret;
> +
> +	ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
> +	if (ret)
> +		return ret;
> +
> +	if (!is_64b)
> +		return 0;
> +
> +	ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);

no need for a separate ret assignment.

> +
> +	return ret;
> +}
> +
>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>  {
>  	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>  	/* Update HW if this is the current ring and we are not in preempt*/
>  	if (!a6xx_in_preempt(a6xx_gpu)) {
>  		if (a6xx_gpu->cur_ring == ring)
> -			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
> +			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);

I can't stop but notice that we don't handle a6xx_fenced_write() errors.
Is it fine? Or will it result in some sort of crash / reset?

>  		else
>  			ring->restore_wptr = true;
>  	} else {
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>  
>  void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
>  
>  #endif /* __A6XX_GPU_H__ */
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> index 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
>  }
>  
>  /* Write the most recent wptr for the given ring into the hardware */
> -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
>  {
>  	unsigned long flags;
>  	uint32_t wptr;
> @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>  	if (ring->restore_wptr) {
>  		wptr = get_wptr(ring);
>  
> -		gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
> +		a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>  
>  		ring->restore_wptr = false;
>  	}
> @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
>  
>  	set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>  
> -	update_wptr(gpu, a6xx_gpu->cur_ring);
> +	update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>  
>  	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>  
> @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>  	 */
>  	if (!ring || (a6xx_gpu->cur_ring == ring)) {
>  		set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
> -		update_wptr(gpu, a6xx_gpu->cur_ring);
> +		update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>  		set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>  		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
>  		return;
> @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>  
>  	spin_unlock_irqrestore(&ring->preempt_lock, flags);
>  
> -	gpu_write64(gpu,
> -		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
> -		a6xx_gpu->preempt_smmu_iova[ring->id]);
> +	a6xx_fenced_write(a6xx_gpu,
> +		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
> +		BIT(1), true);
>  
> -	gpu_write64(gpu,
> +	a6xx_fenced_write(a6xx_gpu,
>  		REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
> -		a6xx_gpu->preempt_iova[ring->id]);
> +		a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
>  
>  	a6xx_gpu->next_ring = ring;
>  
> @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>  	set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
>  
>  	/* Trigger the preemption */
> -	gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
> +	a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
>  }
>  
>  static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
> 
> -- 
> 2.50.1
> 

-- 
With best wishes
Dmitry

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 2 weeks ago

On 7/22/2025 7:09 PM, Dmitry Baryshkov wrote:
> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>> ---
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,67 @@
>>  
>>  #define GPU_PAS_ID 13
>>  
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>> +{
>> +	/* Success if !writedropped0/1 */>> +	if (!(status & mask))
>> +		return true;
>> +
>> +	udelay(10);
> 
> Why do we need udelay() here? Why can't we use interval setting inside
> gmu_poll_timeout()?

Then the delay won't be at the right place when we use gmu_poll_timeout.
We need the below sequence:
1. reg write
2. check for status
	2.1 Done if success
3. wait
4. reg write again
5. goto 2

Another option is to avoid gmu_poll_timeout and just open code the loop
here.

> 
>> +
>> +	/* Try to update fenced register again */
>> +	gpu_write(gpu, offset, value);
>> +	return false;
>> +}
>> +
>> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
>> +{
>> +	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> +	struct msm_gpu *gpu = &adreno_gpu->base;
>> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> +	u32 status;
>> +
>> +	gpu_write(gpu, offset, value);
>> +
>> +	/* Nothing else to be done in the case of no-GMU */
>> +	if (adreno_has_gmu_wrapper(adreno_gpu))
>> +		return 0;
>> +

I think we should add an mb() here like downstream just to be cautious.

>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> +		return 0;
>> +
>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>> +			offset);
>> +
>> +	/* Try again for another 1ms before failing */
>> +	gpu_write(gpu, offset, value);
>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> +		return 0;
>> +
>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>> +			offset);
>> +
>> +	return -ETIMEDOUT;
>> +}
>> +
>> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
>> +{
>> +	int ret;
>> +
>> +	ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (!is_64b)
>> +		return 0;
>> +
>> +	ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
> 
> no need for a separate ret assignment.

Ack

> 
>> +
>> +	return ret;
>> +}
>> +
>>  static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>>  {
>>  	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>>  	/* Update HW if this is the current ring and we are not in preempt*/
>>  	if (!a6xx_in_preempt(a6xx_gpu)) {
>>  		if (a6xx_gpu->cur_ring == ring)
>> -			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> +			a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
> 
> I can't stop but notice that we don't handle a6xx_fenced_write() errors.
> Is it fine? Or will it result in some sort of crash / reset?

Recover_worker will kick in indirectly, for eg: due to hangcheck timeout
here. Chances of failure here production devices are low though.

-Akhil

> 
>>  		else
>>  			ring->restore_wptr = true;
>>  	} else {
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>>  
>>  void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
>>  void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
>> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
>>  
>>  #endif /* __A6XX_GPU_H__ */
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> index 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
>>  }
>>  
>>  /* Write the most recent wptr for the given ring into the hardware */
>> -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>> +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
>>  {
>>  	unsigned long flags;
>>  	uint32_t wptr;
>> @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>>  	if (ring->restore_wptr) {
>>  		wptr = get_wptr(ring);
>>  
>> -		gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> +		a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>>  
>>  		ring->restore_wptr = false;
>>  	}
>> @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
>>  
>>  	set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>>  
>> -	update_wptr(gpu, a6xx_gpu->cur_ring);
>> +	update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>>  
>>  	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>>  
>> @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>  	 */
>>  	if (!ring || (a6xx_gpu->cur_ring == ring)) {
>>  		set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>> -		update_wptr(gpu, a6xx_gpu->cur_ring);
>> +		update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>>  		set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>>  		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
>>  		return;
>> @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>  
>>  	spin_unlock_irqrestore(&ring->preempt_lock, flags);
>>  
>> -	gpu_write64(gpu,
>> -		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
>> -		a6xx_gpu->preempt_smmu_iova[ring->id]);
>> +	a6xx_fenced_write(a6xx_gpu,
>> +		REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
>> +		BIT(1), true);
>>  
>> -	gpu_write64(gpu,
>> +	a6xx_fenced_write(a6xx_gpu,
>>  		REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
>> -		a6xx_gpu->preempt_iova[ring->id]);
>> +		a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
>>  
>>  	a6xx_gpu->next_ring = ring;
>>  
>> @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>  	set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
>>  
>>  	/* Trigger the preemption */
>> -	gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
>> +	a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
>>  }
>>  
>>  static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>>
>> -- 
>> 2.50.1
>>
>

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Konrad Dybcio 2 months, 2 weeks ago

On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>> ---
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,67 @@
>>  
>>  #define GPU_PAS_ID 13
>>  
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>> +{
>> +	/* Success if !writedropped0/1 */
>> +	if (!(status & mask))
>> +		return true;
>> +
>> +	udelay(10);
> 
> Why do we need udelay() here? Why can't we use interval setting inside
> gmu_poll_timeout()?

Similarly here:

[...]

>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> +		return 0;
>> +
>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>> +			offset);
>> +
>> +	/* Try again for another 1ms before failing */
>> +	gpu_write(gpu, offset, value);
>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> +		return 0;
>> +
>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>> +			offset);

We may want to combine the two, so as not to worry the user too much..

If it's going to fail, I would assume it's going to fail both checks
(unless e.g. the bus is so congested a single write can't go through
to a sleepy GPU across 2 miliseconds, but that's another issue)

Konrad

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 2 weeks ago

On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>> There are some special registers which are accessible even when GX power
>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>> wakes up GPU from power collapse and allow programming these registers
>>> without additional handshake with GMU. This patch adds support for this
>>> special register write sequence.
>>>
>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>> ---
>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>> @@ -16,6 +16,67 @@
>>>  
>>>  #define GPU_PAS_ID 13
>>>  
>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>> +{
>>> +	/* Success if !writedropped0/1 */
>>> +	if (!(status & mask))
>>> +		return true;
>>> +
>>> +	udelay(10);
>>
>> Why do we need udelay() here? Why can't we use interval setting inside
>> gmu_poll_timeout()?
> 
> Similarly here:
> 
> [...]
> 
>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>> +		return 0;
>>> +
>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>> +			offset);
>>> +
>>> +	/* Try again for another 1ms before failing */
>>> +	gpu_write(gpu, offset, value);
>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>> +		return 0;
>>> +
>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>> +			offset);
> 
> We may want to combine the two, so as not to worry the user too much..
> 
> If it's going to fail, I would assume it's going to fail both checks
> (unless e.g. the bus is so congested a single write can't go through
> to a sleepy GPU across 2 miliseconds, but that's another issue)

In case of success, we cannot be sure if the first write went through.
So we should poll separately.

-Akhil.

> 
> Konrad

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Konrad Dybcio 2 months, 2 weeks ago

On 7/23/25 11:06 PM, Akhil P Oommen wrote:
> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>> There are some special registers which are accessible even when GX power
>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>> wakes up GPU from power collapse and allow programming these registers
>>>> without additional handshake with GMU. This patch adds support for this
>>>> special register write sequence.
>>>>
>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>> ---
>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>> @@ -16,6 +16,67 @@
>>>>  
>>>>  #define GPU_PAS_ID 13
>>>>  
>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>> +{
>>>> +	/* Success if !writedropped0/1 */
>>>> +	if (!(status & mask))
>>>> +		return true;
>>>> +
>>>> +	udelay(10);
>>>
>>> Why do we need udelay() here? Why can't we use interval setting inside
>>> gmu_poll_timeout()?
>>
>> Similarly here:
>>
>> [...]
>>
>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>> +		return 0;
>>>> +
>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>> +			offset);
>>>> +
>>>> +	/* Try again for another 1ms before failing */
>>>> +	gpu_write(gpu, offset, value);
>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>> +		return 0;
>>>> +
>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>> +			offset);
>>
>> We may want to combine the two, so as not to worry the user too much..
>>
>> If it's going to fail, I would assume it's going to fail both checks
>> (unless e.g. the bus is so congested a single write can't go through
>> to a sleepy GPU across 2 miliseconds, but that's another issue)
> 
> In case of success, we cannot be sure if the first write went through.
> So we should poll separately.

You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
== 202 times, it really better go through..

If it's just about the write reaching the GPU, you can write it once and
read back the register you've written to, this way you're sure that the
GPU can observe the write

Konrad

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 1 week ago

On 7/24/2025 5:16 PM, Konrad Dybcio wrote:
> On 7/23/25 11:06 PM, Akhil P Oommen wrote:
>> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>>> There are some special registers which are accessible even when GX power
>>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>>> wakes up GPU from power collapse and allow programming these registers
>>>>> without additional handshake with GMU. This patch adds support for this
>>>>> special register write sequence.
>>>>>
>>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>>> ---
>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>> @@ -16,6 +16,67 @@
>>>>>  
>>>>>  #define GPU_PAS_ID 13
>>>>>  
>>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>>> +{
>>>>> +	/* Success if !writedropped0/1 */
>>>>> +	if (!(status & mask))
>>>>> +		return true;
>>>>> +
>>>>> +	udelay(10);
>>>>
>>>> Why do we need udelay() here? Why can't we use interval setting inside
>>>> gmu_poll_timeout()?
>>>
>>> Similarly here:
>>>
>>> [...]
>>>
>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>> +		return 0;
>>>>> +
>>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>>> +			offset);
>>>>> +
>>>>> +	/* Try again for another 1ms before failing */
>>>>> +	gpu_write(gpu, offset, value);
>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>> +		return 0;
>>>>> +
>>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>>> +			offset);
>>>
>>> We may want to combine the two, so as not to worry the user too much..
>>>
>>> If it's going to fail, I would assume it's going to fail both checks
>>> (unless e.g. the bus is so congested a single write can't go through
>>> to a sleepy GPU across 2 miliseconds, but that's another issue)
>>
>> In case of success, we cannot be sure if the first write went through.
>> So we should poll separately.
> 
> You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
> == 202 times, it really better go through..

For the following sequence:
1. write reg1 <- suppose this is dropped
2. write reg2 <- and this went through
3. Check fence status <- This will show success

> 
> If it's just about the write reaching the GPU, you can write it once and
> read back the register you've written to, this way you're sure that the
> GPU can observe the write

This is a very unique hw behavior. We can't do posted write.

-Akhil

> 
> Konrad

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Konrad Dybcio 2 months, 1 week ago

On 7/24/25 6:54 PM, Akhil P Oommen wrote:
> On 7/24/2025 5:16 PM, Konrad Dybcio wrote:
>> On 7/23/25 11:06 PM, Akhil P Oommen wrote:
>>> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>>>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>>>> There are some special registers which are accessible even when GX power
>>>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>>>> wakes up GPU from power collapse and allow programming these registers
>>>>>> without additional handshake with GMU. This patch adds support for this
>>>>>> special register write sequence.
>>>>>>
>>>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>>>> ---
>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>> @@ -16,6 +16,67 @@
>>>>>>  
>>>>>>  #define GPU_PAS_ID 13
>>>>>>  
>>>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>>>> +{
>>>>>> +	/* Success if !writedropped0/1 */
>>>>>> +	if (!(status & mask))
>>>>>> +		return true;
>>>>>> +
>>>>>> +	udelay(10);
>>>>>
>>>>> Why do we need udelay() here? Why can't we use interval setting inside
>>>>> gmu_poll_timeout()?
>>>>
>>>> Similarly here:
>>>>
>>>> [...]
>>>>
>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>>>> +			offset);
>>>>>> +
>>>>>> +	/* Try again for another 1ms before failing */
>>>>>> +	gpu_write(gpu, offset, value);
>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>>>> +			offset);
>>>>
>>>> We may want to combine the two, so as not to worry the user too much..
>>>>
>>>> If it's going to fail, I would assume it's going to fail both checks
>>>> (unless e.g. the bus is so congested a single write can't go through
>>>> to a sleepy GPU across 2 miliseconds, but that's another issue)
>>>
>>> In case of success, we cannot be sure if the first write went through.
>>> So we should poll separately.
>>
>> You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
>> == 202 times, it really better go through..
> 
> For the following sequence:
> 1. write reg1 <- suppose this is dropped
> 2. write reg2 <- and this went through
> 3. Check fence status <- This will show success

What I'm saying is that fence_status_check() does the same write you
execute inbetween the polling calls

Konrad
> 
>>
>> If it's just about the write reaching the GPU, you can write it once and
>> read back the register you've written to, this way you're sure that the
>> GPU can observe the write
> 
> This is a very unique hw behavior. We can't do posted write.
> 
> -Akhil
> 
>>
>> Konrad
>

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 1 week ago

On 7/29/2025 6:31 PM, Konrad Dybcio wrote:
> On 7/24/25 6:54 PM, Akhil P Oommen wrote:
>> On 7/24/2025 5:16 PM, Konrad Dybcio wrote:
>>> On 7/23/25 11:06 PM, Akhil P Oommen wrote:
>>>> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>>>>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>>>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>>>>> There are some special registers which are accessible even when GX power
>>>>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>>>>> wakes up GPU from power collapse and allow programming these registers
>>>>>>> without additional handshake with GMU. This patch adds support for this
>>>>>>> special register write sequence.
>>>>>>>
>>>>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>>>>> ---
>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>> @@ -16,6 +16,67 @@
>>>>>>>  
>>>>>>>  #define GPU_PAS_ID 13
>>>>>>>  
>>>>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>>>>> +{
>>>>>>> +	/* Success if !writedropped0/1 */
>>>>>>> +	if (!(status & mask))
>>>>>>> +		return true;
>>>>>>> +
>>>>>>> +	udelay(10);
>>>>>>
>>>>>> Why do we need udelay() here? Why can't we use interval setting inside
>>>>>> gmu_poll_timeout()?
>>>>>
>>>>> Similarly here:
>>>>>
>>>>> [...]
>>>>>
>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>>>>> +			offset);

This print should be after the 2nd polling. Otherwise the delay due to
this may allow GPU to go back to IFPC.

>>>>>>> +
>>>>>>> +	/* Try again for another 1ms before failing */
>>>>>>> +	gpu_write(gpu, offset, value);
>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>>>>> +			offset);
>>>>>
>>>>> We may want to combine the two, so as not to worry the user too much..
>>>>>
>>>>> If it's going to fail, I would assume it's going to fail both checks
>>>>> (unless e.g. the bus is so congested a single write can't go through
>>>>> to a sleepy GPU across 2 miliseconds, but that's another issue)
>>>>
>>>> In case of success, we cannot be sure if the first write went through.
>>>> So we should poll separately.
>>>
>>> You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
>>> == 202 times, it really better go through..
>>
>> For the following sequence:
>> 1. write reg1 <- suppose this is dropped
>> 2. write reg2 <- and this went through
>> 3. Check fence status <- This will show success
> 
> What I'm saying is that fence_status_check() does the same write you
> execute inbetween the polling calls

On a second thought I think it is simpler to just use a single polling
of 2ms and measure the time taken using ktime to print a warning if it
took more that 1ms.

-Akhil.

> 
> Konrad
>>
>>>
>>> If it's just about the write reaching the GPU, you can write it once and
>>> read back the register you've written to, this way you're sure that the
>>> GPU can observe the write
>>
>> This is a very unique hw behavior. We can't do posted write.
>>
>> -Akhil
>>
>>>
>>> Konrad
>>

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Akhil P Oommen 2 months, 1 week ago

On 7/30/2025 3:10 AM, Akhil P Oommen wrote:
> On 7/29/2025 6:31 PM, Konrad Dybcio wrote:
>> On 7/24/25 6:54 PM, Akhil P Oommen wrote:
>>> On 7/24/2025 5:16 PM, Konrad Dybcio wrote:
>>>> On 7/23/25 11:06 PM, Akhil P Oommen wrote:
>>>>> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>>>>>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>>>>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>>>>>> There are some special registers which are accessible even when GX power
>>>>>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>>>>>> wakes up GPU from power collapse and allow programming these registers
>>>>>>>> without additional handshake with GMU. This patch adds support for this
>>>>>>>> special register write sequence.
>>>>>>>>
>>>>>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>>>>>> ---
>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>>>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>> @@ -16,6 +16,67 @@
>>>>>>>>  
>>>>>>>>  #define GPU_PAS_ID 13
>>>>>>>>  
>>>>>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>>>>>> +{
>>>>>>>> +	/* Success if !writedropped0/1 */
>>>>>>>> +	if (!(status & mask))
>>>>>>>> +		return true;
>>>>>>>> +
>>>>>>>> +	udelay(10);
>>>>>>>
>>>>>>> Why do we need udelay() here? Why can't we use interval setting inside
>>>>>>> gmu_poll_timeout()?
>>>>>>
>>>>>> Similarly here:
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>>> +		return 0;
>>>>>>>> +
>>>>>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>>>>>> +			offset);
> 
> This print should be after the 2nd polling. Otherwise the delay due to
> this may allow GPU to go back to IFPC.
> 
>>>>>>>> +
>>>>>>>> +	/* Try again for another 1ms before failing */
>>>>>>>> +	gpu_write(gpu, offset, value);
>>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>>> +		return 0;
>>>>>>>> +
>>>>>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>>>>>> +			offset);
>>>>>>
>>>>>> We may want to combine the two, so as not to worry the user too much..
>>>>>>
>>>>>> If it's going to fail, I would assume it's going to fail both checks
>>>>>> (unless e.g. the bus is so congested a single write can't go through
>>>>>> to a sleepy GPU across 2 miliseconds, but that's another issue)
>>>>>
>>>>> In case of success, we cannot be sure if the first write went through.
>>>>> So we should poll separately.
>>>>
>>>> You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
>>>> == 202 times, it really better go through..
>>>
>>> For the following sequence:
>>> 1. write reg1 <- suppose this is dropped
>>> 2. write reg2 <- and this went through
>>> 3. Check fence status <- This will show success
>>
>> What I'm saying is that fence_status_check() does the same write you
>> execute inbetween the polling calls
> 
> On a second thought I think it is simpler to just use a single polling
> of 2ms and measure the time taken using ktime to print a warning if it
> took more that 1ms.

But then we can't know if the higher latency measured is because this
thread got scheduled out just before we measure with ktime 2nd time. So
we should rely on gmu_poll_timeout() for accuracy.

We need a warn after 1ms because there is a 1ms timeout in VRM. We
should know if it occurs frequently enough to cause a performance issue.

I will move the the prints towards fn exit.

-Akhil.

> 
> -Akhil.
> 
>>
>> Konrad
>>>
>>>>
>>>> If it's just about the write reaching the GPU, you can write it once and
>>>> read back the register you've written to, this way you're sure that the
>>>> GPU can observe the write
>>>
>>> This is a very unique hw behavior. We can't do posted write.
>>>
>>> -Akhil
>>>
>>>>
>>>> Konrad
>>>
>

Re: [PATCH 07/17] drm/msm/adreno: Add fenced regwrite support

Posted by Konrad Dybcio 2 months, 1 week ago

On 7/29/25 11:49 PM, Akhil P Oommen wrote:
> On 7/30/2025 3:10 AM, Akhil P Oommen wrote:
>> On 7/29/2025 6:31 PM, Konrad Dybcio wrote:
>>> On 7/24/25 6:54 PM, Akhil P Oommen wrote:
>>>> On 7/24/2025 5:16 PM, Konrad Dybcio wrote:
>>>>> On 7/23/25 11:06 PM, Akhil P Oommen wrote:
>>>>>> On 7/22/2025 8:22 PM, Konrad Dybcio wrote:
>>>>>>> On 7/22/25 3:39 PM, Dmitry Baryshkov wrote:
>>>>>>>> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>>>>>>>>> There are some special registers which are accessible even when GX power
>>>>>>>>> domain is collapsed during an IFPC sleep. Accessing these registers
>>>>>>>>> wakes up GPU from power collapse and allow programming these registers
>>>>>>>>> without additional handshake with GMU. This patch adds support for this
>>>>>>>>> special register write sequence.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
>>>>>>>>> ---
>>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 63 ++++++++++++++++++++++++++++++-
>>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  1 +
>>>>>>>>>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>>>>>>>>>  3 files changed, 73 insertions(+), 11 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>>>>>>>>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>>>>>>>>> @@ -16,6 +16,67 @@
>>>>>>>>>  
>>>>>>>>>  #define GPU_PAS_ID 13
>>>>>>>>>  
>>>>>>>>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>>>>>>>>> +{
>>>>>>>>> +	/* Success if !writedropped0/1 */
>>>>>>>>> +	if (!(status & mask))
>>>>>>>>> +		return true;
>>>>>>>>> +
>>>>>>>>> +	udelay(10);
>>>>>>>>
>>>>>>>> Why do we need udelay() here? Why can't we use interval setting inside
>>>>>>>> gmu_poll_timeout()?
>>>>>>>
>>>>>>> Similarly here:
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>>>> +		return 0;
>>>>>>>>> +
>>>>>>>>> +	dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>>>>>>>>> +			offset);
>>
>> This print should be after the 2nd polling. Otherwise the delay due to
>> this may allow GPU to go back to IFPC.
>>
>>>>>>>>> +
>>>>>>>>> +	/* Try again for another 1ms before failing */
>>>>>>>>> +	gpu_write(gpu, offset, value);
>>>>>>>>> +	if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>>>>>>>>> +			fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>>>>>>>>> +		return 0;
>>>>>>>>> +
>>>>>>>>> +	dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>>>>>>>>> +			offset);
>>>>>>>
>>>>>>> We may want to combine the two, so as not to worry the user too much..
>>>>>>>
>>>>>>> If it's going to fail, I would assume it's going to fail both checks
>>>>>>> (unless e.g. the bus is so congested a single write can't go through
>>>>>>> to a sleepy GPU across 2 miliseconds, but that's another issue)
>>>>>>
>>>>>> In case of success, we cannot be sure if the first write went through.
>>>>>> So we should poll separately.
>>>>>
>>>>> You're writing to it 2 (outside fence_status_check) + 2*1000/10 (inside)
>>>>> == 202 times, it really better go through..
>>>>
>>>> For the following sequence:
>>>> 1. write reg1 <- suppose this is dropped
>>>> 2. write reg2 <- and this went through
>>>> 3. Check fence status <- This will show success
>>>
>>> What I'm saying is that fence_status_check() does the same write you
>>> execute inbetween the polling calls
>>
>> On a second thought I think it is simpler to just use a single polling
>> of 2ms and measure the time taken using ktime to print a warning if it
>> took more that 1ms.
> 
> But then we can't know if the higher latency measured is because this
> thread got scheduled out just before we measure with ktime 2nd time. So
> we should rely on gmu_poll_timeout() for accuracy.
> 
> We need a warn after 1ms because there is a 1ms timeout in VRM. We
> should know if it occurs frequently enough to cause a performance issue.

VRM, as in the RPMh component?

Please provide more context on how it's tied together in the commit
message, it'll be useful for people down the line (I'd assume accessing
these special registers invokes some handler that brings GX back up
momentarily, which is why we're polling in the first place?)

Konrad