[PATCH V1] accel/amdxdna: Fix potential NULL pointer dereference in context cleanup

Lizhi Hou posted 1 patch 5 days, 4 hours ago
There is a newer version of this series
drivers/accel/amdxdna/aie2_message.c | 52 +++++++++++++++-------------
1 file changed, 28 insertions(+), 24 deletions(-)
[PATCH V1] accel/amdxdna: Fix potential NULL pointer dereference in context cleanup
Posted by Lizhi Hou 5 days, 4 hours ago
aie_destroy_context() is invoked during error handling in
aie2_create_context(). However, aie_destroy_context() assumes that the
context's mailbox channel pointer is non-NULL. If mailbox channel
creation fails, the pointer remains NULL and calling aie_destroy_context()
can lead to a NULL pointer dereference.

In aie2_create_context(), replace aie_destroy_context() with a function
which request firmware to remove the context created previously.

Fixes: be462c97b7df ("accel/amdxdna: Add hardware context")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_message.c | 52 +++++++++++++++-------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 03b75757a6e6..980aef9dc51a 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -192,6 +192,19 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
 	return 0;
 }
 
+static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id)
+{
+	DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
+	struct amdxdna_dev *xdna = ndev->xdna;
+	int ret;
+
+	req.context_id = id;
+	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+	if (ret)
+		XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
+
+	return ret;
+}
 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
 {
 	DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
@@ -214,13 +227,16 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
 		return ret;
 
 	hwctx->fw_ctx_id = resp.context_id;
-	WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
+	if (hwctx->fw_ctx_id == -1) {
+		WARN_ON_ONCE("Unexpected context id");
+		return -EINVAL;
+	}
 
 	if (ndev->force_preempt_enabled) {
 		ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
 		if (ret) {
 			XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
-			return ret;
+			goto del_ctx_req;
 		}
 	}
 
@@ -237,51 +253,39 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
 
 	ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
 	if (ret == -EINVAL) {
-		XDNA_ERR(xdna, "not able to create channel");
-		goto out_destroy_context;
+		XDNA_ERR(xdna, "Alloc IRQ failed %d", ret);
+		goto del_ctx_req;
 	}
 
 	intr_reg = i2x.mb_head_ptr_reg + 4;
 	hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
 							      intr_reg, ret);
 	if (!hwctx->priv->mbox_chann) {
-		XDNA_ERR(xdna, "not able to create channel");
+		XDNA_ERR(xdna, "Not able to create channel");
 		ret = -EINVAL;
-		goto out_destroy_context;
+		goto del_ctx_req;
 	}
 	ndev->hwctx_num++;
 
-	XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
-		 hwctx->name, ret, resp.msix_id);
-	XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
-		 hwctx->fw_ctx_id, hwctx->client->pasid);
+	XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
+	XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid);
 
 	return 0;
 
-out_destroy_context:
-	aie2_destroy_context(ndev, hwctx);
+del_ctx_req:
+	aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
 	return ret;
 }
 
 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
 {
-	DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
 	struct amdxdna_dev *xdna = ndev->xdna;
 	int ret;
 
-	if (hwctx->fw_ctx_id == -1)
-		return 0;
-
 	xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
-
-	req.context_id = hwctx->fw_ctx_id;
-	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
-	if (ret)
-		XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
-
+	ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
 	xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
-	XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
-		 hwctx->fw_ctx_id);
+	XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
 	hwctx->priv->mbox_chann = NULL;
 	hwctx->fw_ctx_id = -1;
 	ndev->hwctx_num--;
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Fix potential NULL pointer dereference in context cleanup
Posted by Mario Limonciello 4 days, 11 hours ago
On 12/10/25 10:56 PM, Lizhi Hou wrote:
> aie_destroy_context() is invoked during error handling in
> aie2_create_context(). However, aie_destroy_context() assumes that the
> context's mailbox channel pointer is non-NULL. If mailbox channel
> creation fails, the pointer remains NULL and calling aie_destroy_context()
> can lead to a NULL pointer dereference.
> 
> In aie2_create_context(), replace aie_destroy_context() with a function
> which request firmware to remove the context created previously.
> 
> Fixes: be462c97b7df ("accel/amdxdna: Add hardware context")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_message.c | 52 +++++++++++++++-------------
>   1 file changed, 28 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 03b75757a6e6..980aef9dc51a 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -192,6 +192,19 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
>   	return 0;
>   }
>   
> +static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id)
> +{
> +	DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
> +	struct amdxdna_dev *xdna = ndev->xdna;
> +	int ret;
> +
> +	req.context_id = id;
> +	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> +	if (ret)
> +		XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
> +
> +	return ret;
> +}
>   int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
>   {
>   	DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
> @@ -214,13 +227,16 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
>   		return ret;
>   
>   	hwctx->fw_ctx_id = resp.context_id;
> -	WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
> +	if (hwctx->fw_ctx_id == -1) {
> +		WARN_ON_ONCE("Unexpected context id");
> +		return -EINVAL;
> +	}

Is the message super important?  I'd think just do this:

if (WARN_ON_ONCE(hwctx->fw_ctx_id == -1))
	return -EINVAL;>
>   	if (ndev->force_preempt_enabled) {
>   		ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, &hwctx->fw_ctx_id);
>   		if (ret) {
>   			XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
> -			return ret;
> +			goto del_ctx_req;
>   		}
>   	}
>   
> @@ -237,51 +253,39 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
>   
>   	ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
>   	if (ret == -EINVAL) {
> -		XDNA_ERR(xdna, "not able to create channel");
> -		goto out_destroy_context;
> +		XDNA_ERR(xdna, "Alloc IRQ failed %d", ret);
> +		goto del_ctx_req;
>   	}
>   
>   	intr_reg = i2x.mb_head_ptr_reg + 4;
>   	hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
>   							      intr_reg, ret);
>   	if (!hwctx->priv->mbox_chann) {
> -		XDNA_ERR(xdna, "not able to create channel");
> +		XDNA_ERR(xdna, "Not able to create channel");
>   		ret = -EINVAL;
> -		goto out_destroy_context;
> +		goto del_ctx_req;
>   	}
>   	ndev->hwctx_num++;
>   
> -	XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
> -		 hwctx->name, ret, resp.msix_id);
> -	XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
> -		 hwctx->fw_ctx_id, hwctx->client->pasid);
> +	XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
> +	XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid);
>   
>   	return 0;
>   
> -out_destroy_context:
> -	aie2_destroy_context(ndev, hwctx);
> +del_ctx_req:
> +	aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
>   	return ret;
>   }
>   
>   int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
>   {
> -	DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
>   	struct amdxdna_dev *xdna = ndev->xdna;
>   	int ret;
>   
> -	if (hwctx->fw_ctx_id == -1)
> -		return 0;
> -
>   	xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
> -
> -	req.context_id = hwctx->fw_ctx_id;
> -	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> -	if (ret)
> -		XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
> -
> +	ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
>   	xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
> -	XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
> -		 hwctx->fw_ctx_id);
> +	XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
>   	hwctx->priv->mbox_chann = NULL;
>   	hwctx->fw_ctx_id = -1;
>   	ndev->hwctx_num--;
Re: [PATCH V1] accel/amdxdna: Fix potential NULL pointer dereference in context cleanup
Posted by Lizhi Hou 4 days, 5 hours ago
On 12/11/25 13:50, Mario Limonciello wrote:
> On 12/10/25 10:56 PM, Lizhi Hou wrote:
>> aie_destroy_context() is invoked during error handling in
>> aie2_create_context(). However, aie_destroy_context() assumes that the
>> context's mailbox channel pointer is non-NULL. If mailbox channel
>> creation fails, the pointer remains NULL and calling 
>> aie_destroy_context()
>> can lead to a NULL pointer dereference.
>>
>> In aie2_create_context(), replace aie_destroy_context() with a function
>> which request firmware to remove the context created previously.
>>
>> Fixes: be462c97b7df ("accel/amdxdna: Add hardware context")
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_message.c | 52 +++++++++++++++-------------
>>   1 file changed, 28 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 03b75757a6e6..980aef9dc51a 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -192,6 +192,19 @@ int aie2_query_firmware_version(struct 
>> amdxdna_dev_hdl *ndev,
>>       return 0;
>>   }
>>   +static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, 
>> u32 id)
>> +{
>> +    DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
>> +    struct amdxdna_dev *xdna = ndev->xdna;
>> +    int ret;
>> +
>> +    req.context_id = id;
>> +    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>> +    if (ret)
>> +        XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
>> +
>> +    return ret;
>> +}
>>   int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct 
>> amdxdna_hwctx *hwctx)
>>   {
>>       DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
>> @@ -214,13 +227,16 @@ int aie2_create_context(struct amdxdna_dev_hdl 
>> *ndev, struct amdxdna_hwctx *hwct
>>           return ret;
>>         hwctx->fw_ctx_id = resp.context_id;
>> -    WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
>> +    if (hwctx->fw_ctx_id == -1) {
>> +        WARN_ON_ONCE("Unexpected context id");
>> +        return -EINVAL;
>> +    }
>
> Is the message super important?  I'd think just do this:
It indicates there is unexpected hardware issue happened. It should 
never happen.
>
> if (WARN_ON_ONCE(hwctx->fw_ctx_id == -1))
>     return -EINVAL;>

Sure.  I will change it.


Lizhi

>>       if (ndev->force_preempt_enabled) {
>>           ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_FORCE_PREEMPT, 
>> &hwctx->fw_ctx_id);
>>           if (ret) {
>>               XDNA_ERR(xdna, "failed to enable force preempt %d", ret);
>> -            return ret;
>> +            goto del_ctx_req;
>>           }
>>       }
>>   @@ -237,51 +253,39 @@ int aie2_create_context(struct 
>> amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
>>         ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
>>       if (ret == -EINVAL) {
>> -        XDNA_ERR(xdna, "not able to create channel");
>> -        goto out_destroy_context;
>> +        XDNA_ERR(xdna, "Alloc IRQ failed %d", ret);
>> +        goto del_ctx_req;
>>       }
>>         intr_reg = i2x.mb_head_ptr_reg + 4;
>>       hwctx->priv->mbox_chann = 
>> xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
>>                                     intr_reg, ret);
>>       if (!hwctx->priv->mbox_chann) {
>> -        XDNA_ERR(xdna, "not able to create channel");
>> +        XDNA_ERR(xdna, "Not able to create channel");
>>           ret = -EINVAL;
>> -        goto out_destroy_context;
>> +        goto del_ctx_req;
>>       }
>>       ndev->hwctx_num++;
>>   -    XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
>> -         hwctx->name, ret, resp.msix_id);
>> -    XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
>> -         hwctx->fw_ctx_id, hwctx->client->pasid);
>> +    XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, 
>> resp.msix_id);
>> +    XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, 
>> hwctx->client->pasid);
>>         return 0;
>>   -out_destroy_context:
>> -    aie2_destroy_context(ndev, hwctx);
>> +del_ctx_req:
>> +    aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
>>       return ret;
>>   }
>>     int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct 
>> amdxdna_hwctx *hwctx)
>>   {
>> -    DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
>>       struct amdxdna_dev *xdna = ndev->xdna;
>>       int ret;
>>   -    if (hwctx->fw_ctx_id == -1)
>> -        return 0;
>> -
>>       xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
>> -
>> -    req.context_id = hwctx->fw_ctx_id;
>> -    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>> -    if (ret)
>> -        XDNA_WARN(xdna, "%s destroy context failed, ret %d", 
>> hwctx->name, ret);
>> -
>> +    ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
>> xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
>> -    XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
>> -         hwctx->fw_ctx_id);
>> +    XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
>>       hwctx->priv->mbox_chann = NULL;
>>       hwctx->fw_ctx_id = -1;
>>       ndev->hwctx_num--;
>