[v1] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()

[PATCH] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()

Posted by Su Hui 1 week, 1 day ago

Clang static checker(scan-build) warning：
drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2
Argument to kfree() is a constant address (18446744073709551604), which
is not memory allocated by malloc().

There are some problems in hw_engine_group_alloc() and
xe_hw_engine_setup_groups(). First, kfree() can only handle NULL
pointers instead of negitave error codes. When hw_engine_group_alloc()
failed, there is a bad kfree call for negitave error codes in
xe_hw_engine_setup_groups(). Second, when drmm_add_action_or_reset()
failed, destroy_workqueue() should be called to free 'group->resume_wq'.

Free 'group' and destroy 'group->resume_wq' when hw_engine_group_alloc()
failed to fix these problems.

Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission")
Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group")
Signed-off-by: Su Hui <suhui@nfschina.com>
---
 drivers/gpu/drm/xe/xe_hw_engine_group.c | 31 ++++++++++++++-----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index 82750520a90a..ee2cb32817fa 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -51,7 +51,7 @@ static struct xe_hw_engine_group *
 hw_engine_group_alloc(struct xe_device *xe)
 {
 	struct xe_hw_engine_group *group;
-	int err;
+	int err = -ENOMEM;
 
 	group = kzalloc(sizeof(*group), GFP_KERNEL);
 	if (!group)
@@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe)
 
 	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
 	if (!group->resume_wq)
-		return ERR_PTR(-ENOMEM);
+		goto free_group;
 
 	init_rwsem(&group->mode_sem);
 	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
@@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe)
 
 	err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
 	if (err)
-		return ERR_PTR(err);
+		goto destroy_wq;
 
 	return group;
+
+destroy_wq:
+	destroy_workqueue(group->resume_wq);
+free_group:
+	kfree(group);
+	return ERR_PTR(err);
 }
 
 /**
@@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
 	int err;
 
 	group_rcs_ccs = hw_engine_group_alloc(xe);
-	if (IS_ERR(group_rcs_ccs)) {
-		err = PTR_ERR(group_rcs_ccs);
-		goto err_group_rcs_ccs;
-	}
+	if (IS_ERR(group_rcs_ccs))
+		return PTR_ERR(group_rcs_ccs);
 
 	group_bcs = hw_engine_group_alloc(xe);
 	if (IS_ERR(group_bcs)) {
 		err = PTR_ERR(group_bcs);
-		goto err_group_bcs;
+		goto free_group_rcs_ccs;
 	}
 
 	group_vcs_vecs = hw_engine_group_alloc(xe);
 	if (IS_ERR(group_vcs_vecs)) {
 		err = PTR_ERR(group_vcs_vecs);
-		goto err_group_vcs_vecs;
+		goto free_group_bcs;
 	}
 
 	for_each_hw_engine(hwe, gt, id) {
@@ -126,13 +130,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
 
 	return 0;
 
-err_group_vcs_vecs:
-	kfree(group_vcs_vecs);
-err_group_bcs:
+free_group_bcs:
+	destroy_workqueue(group_bcs->resume_wq);
 	kfree(group_bcs);
-err_group_rcs_ccs:
+free_group_rcs_ccs:
+	destroy_workqueue(group_rcs_ccs->resume_wq);
 	kfree(group_rcs_ccs);
-
 	return err;
 }
 
-- 
2.30.2

Re: [PATCH] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()

Posted by Vivekanandan, Balasubramani 1 week, 1 day ago

On 14.11.2024 14:39, Su Hui wrote:
> Clang static checker(scan-build) warning：
> drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2
> Argument to kfree() is a constant address (18446744073709551604), which
> is not memory allocated by malloc().
> 
> There are some problems in hw_engine_group_alloc() and
> xe_hw_engine_setup_groups(). First, kfree() can only handle NULL
> pointers instead of negitave error codes. When hw_engine_group_alloc()
> failed, there is a bad kfree call for negitave error codes in
> xe_hw_engine_setup_groups(). Second, when drmm_add_action_or_reset()
> failed, destroy_workqueue() should be called to free 'group->resume_wq'.
> 
> Free 'group' and destroy 'group->resume_wq' when hw_engine_group_alloc()
> failed to fix these problems.
> 
> Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission")
> Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group")
> Signed-off-by: Su Hui <suhui@nfschina.com>
> ---
>  drivers/gpu/drm/xe/xe_hw_engine_group.c | 31 ++++++++++++++-----------
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 82750520a90a..ee2cb32817fa 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -51,7 +51,7 @@ static struct xe_hw_engine_group *
>  hw_engine_group_alloc(struct xe_device *xe)
>  {
>  	struct xe_hw_engine_group *group;
> -	int err;
> +	int err = -ENOMEM;
>  
>  	group = kzalloc(sizeof(*group), GFP_KERNEL);
>  	if (!group)
> @@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe)
>  
>  	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
>  	if (!group->resume_wq)
> -		return ERR_PTR(-ENOMEM);
> +		goto free_group;
kfree can be directly called from here followed by return, instead of a
goto.

>  
>  	init_rwsem(&group->mode_sem);
>  	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
> @@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe)
>  
>  	err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
>  	if (err)
> -		return ERR_PTR(err);
> +		goto destroy_wq;
There is no need to clear the resources on failure, because
drmm_add_action_or_reset takes care of freeing the resources on
failure.

>  
>  	return group;
> +
> +destroy_wq:
> +	destroy_workqueue(group->resume_wq);
> +free_group:
> +	kfree(group);
> +	return ERR_PTR(err);
>  }
>  
>  /**
> @@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  	int err;
>  
>  	group_rcs_ccs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_rcs_ccs)) {
> -		err = PTR_ERR(group_rcs_ccs);
> -		goto err_group_rcs_ccs;
> -	}
> +	if (IS_ERR(group_rcs_ccs))
> +		return PTR_ERR(group_rcs_ccs);
>  
>  	group_bcs = hw_engine_group_alloc(xe);
>  	if (IS_ERR(group_bcs)) {
>  		err = PTR_ERR(group_bcs);
> -		goto err_group_bcs;
> +		goto free_group_rcs_ccs;
No need of freeing the memory here and in the following lines as we have
managed it through the drmm_add_action_or_reset call in
hw_engine_group_alloc.
We can simply return the error code.

>  	}
>  
>  	group_vcs_vecs = hw_engine_group_alloc(xe);
>  	if (IS_ERR(group_vcs_vecs)) {
>  		err = PTR_ERR(group_vcs_vecs);
> -		goto err_group_vcs_vecs;
> +		goto free_group_bcs;
>  	}
>  
>  	for_each_hw_engine(hwe, gt, id) {
> @@ -126,13 +130,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  
>  	return 0;
>  
> -err_group_vcs_vecs:
> -	kfree(group_vcs_vecs);
> -err_group_bcs:
> +free_group_bcs:
> +	destroy_workqueue(group_bcs->resume_wq);
>  	kfree(group_bcs);
> -err_group_rcs_ccs:
> +free_group_rcs_ccs:
> +	destroy_workqueue(group_rcs_ccs->resume_wq);
>  	kfree(group_rcs_ccs);
> -
All these kfree statements are not required.

Regards,
Bala
>  	return err;
>  }
>  
> -- 
> 2.30.2
>

Re: [PATCH] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()

Posted by Su Hui 1 week, 1 day ago

On 2024/11/14 15:45, Vivekanandan, Balasubramani wrote:
> On 14.11.2024 14:39, Su Hui wrote:
>>
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
>> index 82750520a90a..ee2cb32817fa 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
>> @@ -51,7 +51,7 @@ static struct xe_hw_engine_group *
>>   hw_engine_group_alloc(struct xe_device *xe)
>>   {
>>   	struct xe_hw_engine_group *group;
>> -	int err;
>> +	int err = -ENOMEM;
>>   
>>   	group = kzalloc(sizeof(*group), GFP_KERNEL);
>>   	if (!group)
>> @@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe)
>>   
>>   	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
>>   	if (!group->resume_wq)
>> -		return ERR_PTR(-ENOMEM);
>> +		goto free_group;
> kfree can be directly called from here followed by return, instead of a
> goto.
Agreed.
>>   
>>   	init_rwsem(&group->mode_sem);
>>   	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
>> @@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe)
>>   
>>   	err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
>>   	if (err)
>> -		return ERR_PTR(err);
>> +		goto destroy_wq;
> There is no need to clear the resources on failure, because
> drmm_add_action_or_reset takes care of freeing the resources on
> failure.
Oh, my fault, I missed this function.
>>   
>>   
>>   /**
>> @@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>>   	int err;
>>   
>>   	group_rcs_ccs = hw_engine_group_alloc(xe);
>> -	if (IS_ERR(group_rcs_ccs)) {
>> -		err = PTR_ERR(group_rcs_ccs);
>> -		goto err_group_rcs_ccs;
>> -	}
>> +	if (IS_ERR(group_rcs_ccs))
>> +		return PTR_ERR(group_rcs_ccs);
>>   
>>   	group_bcs = hw_engine_group_alloc(xe);
>>   	if (IS_ERR(group_bcs)) {
>>   		err = PTR_ERR(group_bcs);
>> -		goto err_group_bcs;
>> +		goto free_group_rcs_ccs;
> No need of freeing the memory here and in the following lines as we have
> managed it through the drmm_add_action_or_reset call in
> hw_engine_group_alloc.
> We can simply return the error code.
Got it.
>
>>   
>> -err_group_vcs_vecs:
>> -	kfree(group_vcs_vecs);
>> -err_group_bcs:
>> +free_group_bcs:
>> +	destroy_workqueue(group_bcs->resume_wq);
>>   	kfree(group_bcs);
>> -err_group_rcs_ccs:
>> +free_group_rcs_ccs:
>> +	destroy_workqueue(group_rcs_ccs->resume_wq);
>>   	kfree(group_rcs_ccs);
>> -
> All these kfree statements are not required.
Agreed too. Thanks for your review.
I will send a v2 patch to remove these kfree if there are no further 
suggestions.

Regards,
Su Hui