drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++----- drivers/accel/amdxdna/aie2_pci.c | 7 ++----- drivers/accel/amdxdna/aie2_pci.h | 1 + 3 files changed, 19 insertions(+), 10 deletions(-)
mgmt_chann may be set to NULL if the firmware returns an unexpected
error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
pointer dereference in aie2_hw_stop().
Fix this by introducing a dedicated helper to destroy mgmt_chann
and by adding proper NULL checks before accessing it.
Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
drivers/accel/amdxdna/aie2_pci.h | 1 +
3 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 277a27bce850..22e1a85a7ae0 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
return -ENODEV;
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
- if (ret == -ETIME) {
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
- ndev->mgmt_chann = NULL;
- }
+ if (ret == -ETIME)
+ aie2_destroy_mgmt_chann(ndev);
if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
@@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
ndev->exec_msg_ops = &legacy_exec_message_ops;
}
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (!ndev->mgmt_chann)
+ return;
+
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+}
+
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 85079b6fc5d9..977ce21eaf9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
aie2_mgmt_fw_fini(ndev);
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
- ndev->mgmt_chann = NULL;
+ aie2_destroy_mgmt_chann(ndev);
drmm_kfree(&xdna->ddev, ndev->mbox);
ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
@@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
return 0;
destroy_mgmt_chann:
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ aie2_destroy_mgmt_chann(ndev);
stop_psp:
aie2_psp_stop(ndev->psp_hdl);
fini_smu:
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index b20a3661078c..e72311c77996 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
/* aie2_message.c */
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
--
2.34.1
On 2/26/26 3:38 PM, Lizhi Hou wrote:
> mgmt_chann may be set to NULL if the firmware returns an unexpected
> error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
> pointer dereference in aie2_hw_stop().
>
> Fix this by introducing a dedicated helper to destroy mgmt_chann
> and by adding proper NULL checks before accessing it.
>
Is this actually going to fix it? It sounds like a concurrency problem, no?
Do you want a mutex in the helper perhaps?
> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
> drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
> drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
> drivers/accel/amdxdna/aie2_pci.h | 1 +
> 3 files changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 277a27bce850..22e1a85a7ae0 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
> return -ENODEV;
>
> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
> - if (ret == -ETIME) {
> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
> - ndev->mgmt_chann = NULL;
> - }
> + if (ret == -ETIME)
> + aie2_destroy_mgmt_chann(ndev);
>
> if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
> XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
> @@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
> ndev->exec_msg_ops = &legacy_exec_message_ops;
> }
>
> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
> +{
> + struct amdxdna_dev *xdna = ndev->xdna;
> +
> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> +
> + if (!ndev->mgmt_chann)
> + return;
> +
> + xdna_mailbox_stop_channel(ndev->mgmt_chann);
> + xdna_mailbox_destroy_channel(ndev->mgmt_chann);
> + ndev->mgmt_chann = NULL;
> +}
> +
> static inline struct amdxdna_gem_obj *
> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
> {
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 85079b6fc5d9..977ce21eaf9f 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>
> aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
> aie2_mgmt_fw_fini(ndev);
> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
> - ndev->mgmt_chann = NULL;
> + aie2_destroy_mgmt_chann(ndev);
> drmm_kfree(&xdna->ddev, ndev->mbox);
> ndev->mbox = NULL;
> aie2_psp_stop(ndev->psp_hdl);
> @@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
> return 0;
>
> destroy_mgmt_chann:
> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
> + aie2_destroy_mgmt_chann(ndev);
> stop_psp:
> aie2_psp_stop(ndev->psp_hdl);
> fini_smu:
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index b20a3661078c..e72311c77996 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
>
> /* aie2_message.c */
> void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
On 2/27/26 12:11, Mario Limonciello wrote:
> On 2/26/26 3:38 PM, Lizhi Hou wrote:
>> mgmt_chann may be set to NULL if the firmware returns an unexpected
>> error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
>> pointer dereference in aie2_hw_stop().
>>
>> Fix this by introducing a dedicated helper to destroy mgmt_chann
>> and by adding proper NULL checks before accessing it.
>>
>
> Is this actually going to fix it? It sounds like a concurrency
> problem, no?
It is not concurrency issue. The code is protected by dev_lock. That is
why I added
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
in my fix.
This is actually error code path issue. Normally, xdna_send_msg_wait()
will not time out. This only happens with broken firmware. That is why
it was not found before.
Lizhi
>
> Do you want a mutex in the helper perhaps?
>
>> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>> drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
>> drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
>> drivers/accel/amdxdna/aie2_pci.h | 1 +
>> 3 files changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 277a27bce850..22e1a85a7ae0 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct
>> amdxdna_dev_hdl *ndev,
>> return -ENODEV;
>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>> - if (ret == -ETIME) {
>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>> - ndev->mgmt_chann = NULL;
>> - }
>> + if (ret == -ETIME)
>> + aie2_destroy_mgmt_chann(ndev);
>> if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
>> XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
>> @@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
>> ndev->exec_msg_ops = &legacy_exec_message_ops;
>> }
>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
>> +{
>> + struct amdxdna_dev *xdna = ndev->xdna;
>> +
>> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> +
>> + if (!ndev->mgmt_chann)
>> + return;
>> +
>> + xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> + xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>> + ndev->mgmt_chann = NULL;
>> +}
>> +
>> static inline struct amdxdna_gem_obj *
>> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
>> {
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 85079b6fc5d9..977ce21eaf9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>> aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
>> aie2_mgmt_fw_fini(ndev);
>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>> - ndev->mgmt_chann = NULL;
>> + aie2_destroy_mgmt_chann(ndev);
>> drmm_kfree(&xdna->ddev, ndev->mbox);
>> ndev->mbox = NULL;
>> aie2_psp_stop(ndev->psp_hdl);
>> @@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>> return 0;
>> destroy_mgmt_chann:
>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>> + aie2_destroy_mgmt_chann(ndev);
>> stop_psp:
>> aie2_psp_stop(ndev->psp_hdl);
>> fini_smu:
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index b20a3661078c..e72311c77996 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct
>> amdxdna_dev_hdl *ndev,
>> /* aie2_message.c */
>> void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
>> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type,
>> u64 value);
>
On 2/27/26 9:16 PM, Lizhi Hou wrote:
>
> On 2/27/26 12:11, Mario Limonciello wrote:
>> On 2/26/26 3:38 PM, Lizhi Hou wrote:
>>> mgmt_chann may be set to NULL if the firmware returns an unexpected
>>> error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
>>> pointer dereference in aie2_hw_stop().
>>>
>>> Fix this by introducing a dedicated helper to destroy mgmt_chann
>>> and by adding proper NULL checks before accessing it.
>>>
>>
>> Is this actually going to fix it? It sounds like a concurrency
>> problem, no?
>
> It is not concurrency issue. The code is protected by dev_lock. That is
> why I added
>
> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>
> in my fix.
>
> This is actually error code path issue. Normally, xdna_send_msg_wait()
> will not time out. This only happens with broken firmware. That is why
> it was not found before.
>
> Lizhi
Got it thanks.
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
>>
>> Do you want a mutex in the helper perhaps?
>>
>>> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> ---
>>> drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
>>> drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
>>> drivers/accel/amdxdna/aie2_pci.h | 1 +
>>> 3 files changed, 19 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>> amdxdna/aie2_message.c
>>> index 277a27bce850..22e1a85a7ae0 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct
>>> amdxdna_dev_hdl *ndev,
>>> return -ENODEV;
>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>> - if (ret == -ETIME) {
>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>> - ndev->mgmt_chann = NULL;
>>> - }
>>> + if (ret == -ETIME)
>>> + aie2_destroy_mgmt_chann(ndev);
>>> if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
>>> XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
>>> @@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
>>> ndev->exec_msg_ops = &legacy_exec_message_ops;
>>> }
>>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> + struct amdxdna_dev *xdna = ndev->xdna;
>>> +
>>> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>> +
>>> + if (!ndev->mgmt_chann)
>>> + return;
>>> +
>>> + xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>> + xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>> + ndev->mgmt_chann = NULL;
>>> +}
>>> +
>>> static inline struct amdxdna_gem_obj *
>>> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
>>> {
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>> amdxdna/aie2_pci.c
>>> index 85079b6fc5d9..977ce21eaf9f 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>>> aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
>>> aie2_mgmt_fw_fini(ndev);
>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>> - ndev->mgmt_chann = NULL;
>>> + aie2_destroy_mgmt_chann(ndev);
>>> drmm_kfree(&xdna->ddev, ndev->mbox);
>>> ndev->mbox = NULL;
>>> aie2_psp_stop(ndev->psp_hdl);
>>> @@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>> return 0;
>>> destroy_mgmt_chann:
>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>> + aie2_destroy_mgmt_chann(ndev);
>>> stop_psp:
>>> aie2_psp_stop(ndev->psp_hdl);
>>> fini_smu:
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>> amdxdna/aie2_pci.h
>>> index b20a3661078c..e72311c77996 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct
>>> amdxdna_dev_hdl *ndev,
>>> /* aie2_message.c */
>>> void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
>>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
>>> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>>> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type,
>>> u64 value);
>>
Applied to drm-misc-fixes
On 3/2/26 08:57, Mario Limonciello wrote:
> On 2/27/26 9:16 PM, Lizhi Hou wrote:
>>
>> On 2/27/26 12:11, Mario Limonciello wrote:
>>> On 2/26/26 3:38 PM, Lizhi Hou wrote:
>>>> mgmt_chann may be set to NULL if the firmware returns an unexpected
>>>> error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
>>>> pointer dereference in aie2_hw_stop().
>>>>
>>>> Fix this by introducing a dedicated helper to destroy mgmt_chann
>>>> and by adding proper NULL checks before accessing it.
>>>>
>>>
>>> Is this actually going to fix it? It sounds like a concurrency
>>> problem, no?
>>
>> It is not concurrency issue. The code is protected by dev_lock. That
>> is why I added
>>
>> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>
>> in my fix.
>>
>> This is actually error code path issue. Normally,
>> xdna_send_msg_wait() will not time out. This only happens with broken
>> firmware. That is why it was not found before.
>>
>> Lizhi
>
> Got it thanks.
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>>
>>>
>>> Do you want a mutex in the helper perhaps?
>>>
>>>> Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>> ---
>>>> drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
>>>> drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
>>>> drivers/accel/amdxdna/aie2_pci.h | 1 +
>>>> 3 files changed, 19 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>>> amdxdna/aie2_message.c
>>>> index 277a27bce850..22e1a85a7ae0 100644
>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>> @@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct
>>>> amdxdna_dev_hdl *ndev,
>>>> return -ENODEV;
>>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>>> - if (ret == -ETIME) {
>>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>>> - ndev->mgmt_chann = NULL;
>>>> - }
>>>> + if (ret == -ETIME)
>>>> + aie2_destroy_mgmt_chann(ndev);
>>>> if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
>>>> XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
>>>> @@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
>>>> ndev->exec_msg_ops = &legacy_exec_message_ops;
>>>> }
>>>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
>>>> +{
>>>> + struct amdxdna_dev *xdna = ndev->xdna;
>>>> +
>>>> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>> +
>>>> + if (!ndev->mgmt_chann)
>>>> + return;
>>>> +
>>>> + xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>> + xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>>> + ndev->mgmt_chann = NULL;
>>>> +}
>>>> +
>>>> static inline struct amdxdna_gem_obj *
>>>> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
>>>> {
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>>> amdxdna/aie2_pci.c
>>>> index 85079b6fc5d9..977ce21eaf9f 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>> @@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>>>> aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
>>>> aie2_mgmt_fw_fini(ndev);
>>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>>> - ndev->mgmt_chann = NULL;
>>>> + aie2_destroy_mgmt_chann(ndev);
>>>> drmm_kfree(&xdna->ddev, ndev->mbox);
>>>> ndev->mbox = NULL;
>>>> aie2_psp_stop(ndev->psp_hdl);
>>>> @@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>>> return 0;
>>>> destroy_mgmt_chann:
>>>> - xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>> - xdna_mailbox_destroy_channel(ndev->mgmt_chann);
>>>> + aie2_destroy_mgmt_chann(ndev);
>>>> stop_psp:
>>>> aie2_psp_stop(ndev->psp_hdl);
>>>> fini_smu:
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>>> amdxdna/aie2_pci.h
>>>> index b20a3661078c..e72311c77996 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>> @@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct
>>>> amdxdna_dev_hdl *ndev,
>>>> /* aie2_message.c */
>>>> void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
>>>> +void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
>>>> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>>> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>>>> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type,
>>>> u64 value);
>>>
>
© 2016 - 2026 Red Hat, Inc.