[PATCH V1] accel/amdxdna: Remove hardware context status

Lizhi Hou posted 1 patch 4 days, 19 hours ago
drivers/accel/amdxdna/aie2_ctx.c     | 25 ++-----------------------
drivers/accel/amdxdna/aie2_message.c |  3 +++
drivers/accel/amdxdna/amdxdna_ctx.h  |  5 -----
3 files changed, 5 insertions(+), 28 deletions(-)
[PATCH V1] accel/amdxdna: Remove hardware context status
Posted by Lizhi Hou 4 days, 19 hours ago
One newly supported command does not require hardware context configuration
to be performed upfront. As a result, checking hardware context status
causes this command to fail incorrectly.

Remove hardware context status handling entirely. For other commands,
if userspace submits a request without configuring the hardware context
first, the firmware will report an error or time out as appropriate.

Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_ctx.c     | 25 ++-----------------------
 drivers/accel/amdxdna/aie2_message.c |  3 +++
 drivers/accel/amdxdna/amdxdna_ctx.h  |  5 -----
 3 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 208ac5b0579e..db0658fa03b9 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -56,17 +56,6 @@ static void aie2_job_put(struct amdxdna_sched_job *job)
 	kref_put(&job->refcnt, aie2_job_release);
 }
 
-static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
-{
-	 hwctx->old_status = hwctx->status;
-	 hwctx->status = HWCTX_STAT_STOP;
-}
-
-static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
-{
-	hwctx->status = hwctx->old_status;
-}
-
 /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
 static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
 			    struct drm_sched_job *bad_job)
@@ -93,11 +82,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
 		goto out;
 	}
 
-	if (hwctx->status != HWCTX_STAT_READY) {
-		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
-		goto out;
-	}
-
 	ret = aie2_config_cu(hwctx, NULL);
 	if (ret) {
 		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
@@ -149,7 +133,6 @@ static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
 
 	aie2_hwctx_wait_for_idle(hwctx);
 	aie2_hwctx_stop(xdna, hwctx, NULL);
-	aie2_hwctx_status_shift_stop(hwctx);
 
 	return 0;
 }
@@ -171,7 +154,6 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
 {
 	struct amdxdna_dev *xdna = hwctx->client->xdna;
 
-	aie2_hwctx_status_restore(hwctx);
 	return aie2_hwctx_restart(xdna, hwctx);
 }
 
@@ -334,7 +316,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence;
 	int ret;
 
-	if (hwctx->status != HWCTX_STAT_READY)
+	if (!hwctx->priv->mbox_chann)
 		return NULL;
 
 	if (!mmget_not_zero(job->mm))
@@ -716,7 +698,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
 	}
 	amdxdna_pm_suspend_put(xdna);
 
-	hwctx->status = HWCTX_STAT_INIT;
 	init_waitqueue_head(&priv->job_free_wq);
 
 	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -760,7 +741,6 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
 	/* Request fw to destroy hwctx and cancel the rest pending requests */
 	drm_sched_stop(&hwctx->priv->sched, NULL);
 	aie2_release_resource(hwctx);
-	hwctx->status = HWCTX_STAT_STOP;
 	drm_sched_start(&hwctx->priv->sched, 0);
 
 	mutex_unlock(&xdna->dev_lock);
@@ -805,7 +785,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
 	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
 		return -EINVAL;
 
-	if (hwctx->status != HWCTX_STAT_INIT) {
+	if (hwctx->cus) {
 		XDNA_ERR(xdna, "Not support re-config CU");
 		return -EINVAL;
 	}
@@ -836,7 +816,6 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
 	}
 
 	wmb(); /* To avoid locking in command submit when check status */
-	hwctx->status = HWCTX_STAT_READY;
 
 	return 0;
 
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 578eaa7bf137..53e13858077b 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -493,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
 	if (!chann)
 		return -ENODEV;
 
+	if (!hwctx->cus)
+		return 0;
+
 	if (hwctx->cus->num_cus > MAX_NUM_CUS) {
 		XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
 		return -EINVAL;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 4f641926a272..e90204edfb7d 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -107,11 +107,6 @@ struct amdxdna_hwctx {
 	u32				start_col;
 	u32				num_col;
 	u32				num_unused_col;
-#define HWCTX_STAT_INIT  0
-#define HWCTX_STAT_READY 1
-#define HWCTX_STAT_STOP  2
-	u32				status;
-	u32				old_status;
 
 	struct amdxdna_qos_info		     qos;
 	struct amdxdna_hwctx_param_config_cu *cus;
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Remove hardware context status
Posted by Mario Limonciello 4 days, 3 hours ago
On 2/2/26 3:24 PM, Lizhi Hou wrote:
> One newly supported command does not require hardware context configuration
> to be performed upfront. As a result, checking hardware context status
> causes this command to fail incorrectly.
> 
> Remove hardware context status handling entirely. For other commands,
> if userspace submits a request without configuring the hardware context
> first, the firmware will report an error or time out as appropriate.
> 
> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie2_ctx.c     | 25 ++-----------------------
>   drivers/accel/amdxdna/aie2_message.c |  3 +++
>   drivers/accel/amdxdna/amdxdna_ctx.h  |  5 -----
>   3 files changed, 5 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 208ac5b0579e..db0658fa03b9 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -56,17 +56,6 @@ static void aie2_job_put(struct amdxdna_sched_job *job)
>   	kref_put(&job->refcnt, aie2_job_release);
>   }
>   
> -static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
> -{
> -	 hwctx->old_status = hwctx->status;
> -	 hwctx->status = HWCTX_STAT_STOP;
> -}
> -
> -static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
> -{
> -	hwctx->status = hwctx->old_status;
> -}
> -
>   /* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
>   static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
>   			    struct drm_sched_job *bad_job)
> @@ -93,11 +82,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
>   		goto out;
>   	}
>   
> -	if (hwctx->status != HWCTX_STAT_READY) {
> -		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
> -		goto out;
> -	}
> -
>   	ret = aie2_config_cu(hwctx, NULL);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
> @@ -149,7 +133,6 @@ static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
>   
>   	aie2_hwctx_wait_for_idle(hwctx);
>   	aie2_hwctx_stop(xdna, hwctx, NULL);
> -	aie2_hwctx_status_shift_stop(hwctx);
>   
>   	return 0;
>   }
> @@ -171,7 +154,6 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
>   {
>   	struct amdxdna_dev *xdna = hwctx->client->xdna;
>   
> -	aie2_hwctx_status_restore(hwctx);
>   	return aie2_hwctx_restart(xdna, hwctx);
>   }
>   
> @@ -334,7 +316,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
>   	struct dma_fence *fence;
>   	int ret;
>   
> -	if (hwctx->status != HWCTX_STAT_READY)
> +	if (!hwctx->priv->mbox_chann)
>   		return NULL;
>   
>   	if (!mmget_not_zero(job->mm))
> @@ -716,7 +698,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>   	}
>   	amdxdna_pm_suspend_put(xdna);
>   
> -	hwctx->status = HWCTX_STAT_INIT;
>   	init_waitqueue_head(&priv->job_free_wq);
>   
>   	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
> @@ -760,7 +741,6 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>   	/* Request fw to destroy hwctx and cancel the rest pending requests */
>   	drm_sched_stop(&hwctx->priv->sched, NULL);
>   	aie2_release_resource(hwctx);
> -	hwctx->status = HWCTX_STAT_STOP;
>   	drm_sched_start(&hwctx->priv->sched, 0);
>   
>   	mutex_unlock(&xdna->dev_lock);
> @@ -805,7 +785,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
>   	if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
>   		return -EINVAL;
>   
> -	if (hwctx->status != HWCTX_STAT_INIT) {
> +	if (hwctx->cus) {
>   		XDNA_ERR(xdna, "Not support re-config CU");
>   		return -EINVAL;
>   	}
> @@ -836,7 +816,6 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
>   	}
>   
>   	wmb(); /* To avoid locking in command submit when check status */
> -	hwctx->status = HWCTX_STAT_READY;
>   
>   	return 0;
>   
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 578eaa7bf137..53e13858077b 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -493,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>   	if (!chann)
>   		return -ENODEV;
>   
> +	if (!hwctx->cus)
> +		return 0;
> +
>   	if (hwctx->cus->num_cus > MAX_NUM_CUS) {
>   		XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
>   		return -EINVAL;
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index 4f641926a272..e90204edfb7d 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -107,11 +107,6 @@ struct amdxdna_hwctx {
>   	u32				start_col;
>   	u32				num_col;
>   	u32				num_unused_col;
> -#define HWCTX_STAT_INIT  0
> -#define HWCTX_STAT_READY 1
> -#define HWCTX_STAT_STOP  2
> -	u32				status;
> -	u32				old_status;
>   
>   	struct amdxdna_qos_info		     qos;
>   	struct amdxdna_hwctx_param_config_cu *cus;
Re: [PATCH V1] accel/amdxdna: Remove hardware context status
Posted by Lizhi Hou 3 days, 23 hours ago
Applied to drm-misc-next-fixes

On 2/3/26 06:04, Mario Limonciello wrote:
> On 2/2/26 3:24 PM, Lizhi Hou wrote:
>> One newly supported command does not require hardware context 
>> configuration
>> to be performed upfront. As a result, checking hardware context status
>> causes this command to fail incorrectly.
>>
>> Remove hardware context status handling entirely. For other commands,
>> if userspace submits a request without configuring the hardware context
>> first, the firmware will report an error or time out as appropriate.
>>
>> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> ---
>>   drivers/accel/amdxdna/aie2_ctx.c     | 25 ++-----------------------
>>   drivers/accel/amdxdna/aie2_message.c |  3 +++
>>   drivers/accel/amdxdna/amdxdna_ctx.h  |  5 -----
>>   3 files changed, 5 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c 
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 208ac5b0579e..db0658fa03b9 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -56,17 +56,6 @@ static void aie2_job_put(struct amdxdna_sched_job 
>> *job)
>>       kref_put(&job->refcnt, aie2_job_release);
>>   }
>>   -static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
>> -{
>> -     hwctx->old_status = hwctx->status;
>> -     hwctx->status = HWCTX_STAT_STOP;
>> -}
>> -
>> -static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
>> -{
>> -    hwctx->status = hwctx->old_status;
>> -}
>> -
>>   /* The bad_job is used in aie2_sched_job_timedout, otherwise, set 
>> it to NULL */
>>   static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct 
>> amdxdna_hwctx *hwctx,
>>                   struct drm_sched_job *bad_job)
>> @@ -93,11 +82,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev 
>> *xdna, struct amdxdna_hwctx *hw
>>           goto out;
>>       }
>>   -    if (hwctx->status != HWCTX_STAT_READY) {
>> -        XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
>> -        goto out;
>> -    }
>> -
>>       ret = aie2_config_cu(hwctx, NULL);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>> @@ -149,7 +133,6 @@ static int aie2_hwctx_suspend_cb(struct 
>> amdxdna_hwctx *hwctx, void *arg)
>>         aie2_hwctx_wait_for_idle(hwctx);
>>       aie2_hwctx_stop(xdna, hwctx, NULL);
>> -    aie2_hwctx_status_shift_stop(hwctx);
>>         return 0;
>>   }
>> @@ -171,7 +154,6 @@ static int aie2_hwctx_resume_cb(struct 
>> amdxdna_hwctx *hwctx, void *arg)
>>   {
>>       struct amdxdna_dev *xdna = hwctx->client->xdna;
>>   -    aie2_hwctx_status_restore(hwctx);
>>       return aie2_hwctx_restart(xdna, hwctx);
>>   }
>>   @@ -334,7 +316,7 @@ aie2_sched_job_run(struct drm_sched_job 
>> *sched_job)
>>       struct dma_fence *fence;
>>       int ret;
>>   -    if (hwctx->status != HWCTX_STAT_READY)
>> +    if (!hwctx->priv->mbox_chann)
>>           return NULL;
>>         if (!mmget_not_zero(job->mm))
>> @@ -716,7 +698,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>       }
>>       amdxdna_pm_suspend_put(xdna);
>>   -    hwctx->status = HWCTX_STAT_INIT;
>>       init_waitqueue_head(&priv->job_free_wq);
>>         XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>> @@ -760,7 +741,6 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>       /* Request fw to destroy hwctx and cancel the rest pending 
>> requests */
>>       drm_sched_stop(&hwctx->priv->sched, NULL);
>>       aie2_release_resource(hwctx);
>> -    hwctx->status = HWCTX_STAT_STOP;
>>       drm_sched_start(&hwctx->priv->sched, 0);
>>         mutex_unlock(&xdna->dev_lock);
>> @@ -805,7 +785,7 @@ static int aie2_hwctx_cu_config(struct 
>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>       if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
>>           return -EINVAL;
>>   -    if (hwctx->status != HWCTX_STAT_INIT) {
>> +    if (hwctx->cus) {
>>           XDNA_ERR(xdna, "Not support re-config CU");
>>           return -EINVAL;
>>       }
>> @@ -836,7 +816,6 @@ static int aie2_hwctx_cu_config(struct 
>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>       }
>>         wmb(); /* To avoid locking in command submit when check 
>> status */
>> -    hwctx->status = HWCTX_STAT_READY;
>>         return 0;
>>   diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 578eaa7bf137..53e13858077b 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -493,6 +493,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>       if (!chann)
>>           return -ENODEV;
>>   +    if (!hwctx->cus)
>> +        return 0;
>> +
>>       if (hwctx->cus->num_cus > MAX_NUM_CUS) {
>>           XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
>>           return -EINVAL;
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index 4f641926a272..e90204edfb7d 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -107,11 +107,6 @@ struct amdxdna_hwctx {
>>       u32                start_col;
>>       u32                num_col;
>>       u32                num_unused_col;
>> -#define HWCTX_STAT_INIT  0
>> -#define HWCTX_STAT_READY 1
>> -#define HWCTX_STAT_STOP  2
>> -    u32                status;
>> -    u32                old_status;
>>         struct amdxdna_qos_info             qos;
>>       struct amdxdna_hwctx_param_config_cu *cus;
>