[PATCH V1] accel/amdxdna: Fix clflush buffer size

Lizhi Hou posted 1 patch 1 month, 1 week ago
drivers/accel/amdxdna/aie2_message.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
[PATCH V1] accel/amdxdna: Fix clflush buffer size
Posted by Lizhi Hou 1 month, 1 week ago
The firmware is told the buffer is req.buf_size bytes. It may read/write
the entire region. If the CPU only flushes a subset, the remaining cache
lines could contain stale data, causing the device to see garbage.

Fixes: 6e87001fe19f ("accel/amdxdna: Adjust size for copy_to_user()")
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_message.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 6e98af7b74db..a012e7e935ad 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -390,7 +390,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
 	req.num_cols = hweight32(aie_bitmap);
 	req.aie_bitmap = aie_bitmap;
 
-	drm_clflush_virt_range(buff_addr, size); /* device can access */
+	drm_clflush_virt_range(buff_addr, req.dump_buff_size); /* device can access */
 	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
 	if (ret) {
 		XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
@@ -442,7 +442,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
 	req.buf_size = buf_sz;
 	req.type = header->type;
 
-	drm_clflush_virt_range(addr, size); /* device can access */
+	drm_clflush_virt_range(addr, req.buf_size); /* device can access */
 	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
 	if (ret) {
 		XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
@@ -1186,7 +1186,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
 	req.context_id = context_id;
 	req.buf_size = buf_size;
 
-	drm_clflush_virt_range(buf, sizeof(*report));
+	drm_clflush_virt_range(buf, req.buf_size);
 	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
 	if (ret) {
 		XDNA_ERR(xdna, "Get app health failed, ret %d status 0x%x", ret, resp.status);
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Fix clflush buffer size
Posted by Mario Limonciello 1 month ago

On 5/6/26 23:02, Lizhi Hou wrote:
> The firmware is told the buffer is req.buf_size bytes. It may read/write
> the entire region. If the CPU only flushes a subset, the remaining cache
> lines could contain stale data, causing the device to see garbage.
> 
> Fixes: 6e87001fe19f ("accel/amdxdna: Adjust size for copy_to_user()")
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie2_message.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 6e98af7b74db..a012e7e935ad 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -390,7 +390,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
>   	req.num_cols = hweight32(aie_bitmap);
>   	req.aie_bitmap = aie_bitmap;
>   
> -	drm_clflush_virt_range(buff_addr, size); /* device can access */
> +	drm_clflush_virt_range(buff_addr, req.dump_buff_size); /* device can access */
>   	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
> @@ -442,7 +442,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
>   	req.buf_size = buf_sz;
>   	req.type = header->type;
>   
> -	drm_clflush_virt_range(addr, size); /* device can access */
> +	drm_clflush_virt_range(addr, req.buf_size); /* device can access */
>   	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
> @@ -1186,7 +1186,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
>   	req.context_id = context_id;
>   	req.buf_size = buf_size;
>   
> -	drm_clflush_virt_range(buf, sizeof(*report));
> +	drm_clflush_virt_range(buf, req.buf_size);
>   	ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Get app health failed, ret %d status 0x%x", ret, resp.status);
Re: [PATCH V1] accel/amdxdna: Fix clflush buffer size
Posted by Lizhi Hou 1 month ago
Applied to drm-misc-next

On 5/7/26 09:55, Mario Limonciello wrote:
>
>
> On 5/6/26 23:02, Lizhi Hou wrote:
>> The firmware is told the buffer is req.buf_size bytes. It may read/write
>> the entire region. If the CPU only flushes a subset, the remaining cache
>> lines could contain stale data, causing the device to see garbage.
>>
>> Fixes: 6e87001fe19f ("accel/amdxdna: Adjust size for copy_to_user()")
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> ---
>>   drivers/accel/amdxdna/aie2_message.c | 6 +++---
>>   1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 6e98af7b74db..a012e7e935ad 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -390,7 +390,7 @@ int aie2_query_status(struct amdxdna_dev_hdl 
>> *ndev, char __user *buf,
>>       req.num_cols = hweight32(aie_bitmap);
>>       req.aie_bitmap = aie_bitmap;
>>   -    drm_clflush_virt_range(buff_addr, size); /* device can access */
>> +    drm_clflush_virt_range(buff_addr, req.dump_buff_size); /* device 
>> can access */
>>       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
>> @@ -442,7 +442,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl 
>> *ndev,
>>       req.buf_size = buf_sz;
>>       req.type = header->type;
>>   -    drm_clflush_virt_range(addr, size); /* device can access */
>> +    drm_clflush_virt_range(addr, req.buf_size); /* device can access */
>>       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
>> @@ -1186,7 +1186,7 @@ int aie2_query_app_health(struct 
>> amdxdna_dev_hdl *ndev, u32 context_id,
>>       req.context_id = context_id;
>>       req.buf_size = buf_size;
>>   -    drm_clflush_virt_range(buf, sizeof(*report));
>> +    drm_clflush_virt_range(buf, req.buf_size);
>>       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Get app health failed, ret %d status 0x%x", 
>> ret, resp.status);
>