[PATCH v2] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()

Su Hui posted 1 patch 1 week ago
drivers/gpu/drm/xe/xe_hw_engine_group.c | 32 +++++++------------------
1 file changed, 9 insertions(+), 23 deletions(-)
[PATCH v2] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()
Posted by Su Hui 1 week ago
Clang static checker(scan-build) warning:
drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2
Argument to kfree() is a constant address (18446744073709551604), which
is not memory allocated by malloc().

kfree() can only handle NULL pointers instead of negitave error codes.
When hw_engine_group_alloc() failed, there is a bad kfree call for
negitave error codes in xe_hw_engine_setup_groups().

Free 'group' when alloc_workqueue() failed in hw_engine_group_alloc(), and
remove wrong kfree() in xe_hw_engine_setup_groups() to fix this problem.
It's safe to remove these kfree() because drmm_add_action_or_reset()
can free these by calling hw_engine_group_free().

Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission")
Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group")
Signed-off-by: Su Hui <suhui@nfschina.com>
---
v2:
 - remove wrong destroy_workqueue() and kfree() in v1 patch
v1:
 - https://lore.kernel.org/all/20241114063942.3448607-1-suhui@nfschina.com/

 drivers/gpu/drm/xe/xe_hw_engine_group.c | 32 +++++++------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index 82750520a90a..3bfa002734ad 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -58,8 +58,10 @@ hw_engine_group_alloc(struct xe_device *xe)
 		return ERR_PTR(-ENOMEM);
 
 	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
-	if (!group->resume_wq)
+	if (!group->resume_wq) {
+		kfree(group);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	init_rwsem(&group->mode_sem);
 	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
@@ -84,25 +86,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
 	enum xe_hw_engine_id id;
 	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
 	struct xe_device *xe = gt_to_xe(gt);
-	int err;
 
 	group_rcs_ccs = hw_engine_group_alloc(xe);
-	if (IS_ERR(group_rcs_ccs)) {
-		err = PTR_ERR(group_rcs_ccs);
-		goto err_group_rcs_ccs;
-	}
+	if (IS_ERR(group_rcs_ccs))
+		return PTR_ERR(group_rcs_ccs);
 
 	group_bcs = hw_engine_group_alloc(xe);
-	if (IS_ERR(group_bcs)) {
-		err = PTR_ERR(group_bcs);
-		goto err_group_bcs;
-	}
+	if (IS_ERR(group_bcs))
+		return PTR_ERR(group_bcs);
 
 	group_vcs_vecs = hw_engine_group_alloc(xe);
-	if (IS_ERR(group_vcs_vecs)) {
-		err = PTR_ERR(group_vcs_vecs);
-		goto err_group_vcs_vecs;
-	}
+	if (IS_ERR(group_vcs_vecs))
+		return PTR_ERR(group_vcs_vecs);
 
 	for_each_hw_engine(hwe, gt, id) {
 		switch (hwe->class) {
@@ -125,15 +120,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
 	}
 
 	return 0;
-
-err_group_vcs_vecs:
-	kfree(group_vcs_vecs);
-err_group_bcs:
-	kfree(group_bcs);
-err_group_rcs_ccs:
-	kfree(group_rcs_ccs);
-
-	return err;
 }
 
 /**
-- 
2.30.2

Re: [PATCH v2] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()
Posted by Matthew Brost 6 days, 22 hours ago
On Fri, Nov 15, 2024 at 10:49:42AM +0800, Su Hui wrote:
> Clang static checker(scan-build) warning:
> drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2
> Argument to kfree() is a constant address (18446744073709551604), which
> is not memory allocated by malloc().
> 
> kfree() can only handle NULL pointers instead of negitave error codes.
> When hw_engine_group_alloc() failed, there is a bad kfree call for
> negitave error codes in xe_hw_engine_setup_groups().
> 
> Free 'group' when alloc_workqueue() failed in hw_engine_group_alloc(), and
> remove wrong kfree() in xe_hw_engine_setup_groups() to fix this problem.
> It's safe to remove these kfree() because drmm_add_action_or_reset()
> can free these by calling hw_engine_group_free().
> 
> Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission")
> Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group")
> Signed-off-by: Su Hui <suhui@nfschina.com>

Thanks for thr fix. Look correct to me.
Reviewed-by: Matthew Brost <matthew.brost@intel.com>

> ---
> v2:
>  - remove wrong destroy_workqueue() and kfree() in v1 patch
> v1:
>  - https://lore.kernel.org/all/20241114063942.3448607-1-suhui@nfschina.com/
> 
>  drivers/gpu/drm/xe/xe_hw_engine_group.c | 32 +++++++------------------
>  1 file changed, 9 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 82750520a90a..3bfa002734ad 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -58,8 +58,10 @@ hw_engine_group_alloc(struct xe_device *xe)
>  		return ERR_PTR(-ENOMEM);
>  
>  	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
> -	if (!group->resume_wq)
> +	if (!group->resume_wq) {
> +		kfree(group);
>  		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	init_rwsem(&group->mode_sem);
>  	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
> @@ -84,25 +86,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  	enum xe_hw_engine_id id;
>  	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
>  	struct xe_device *xe = gt_to_xe(gt);
> -	int err;
>  
>  	group_rcs_ccs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_rcs_ccs)) {
> -		err = PTR_ERR(group_rcs_ccs);
> -		goto err_group_rcs_ccs;
> -	}
> +	if (IS_ERR(group_rcs_ccs))
> +		return PTR_ERR(group_rcs_ccs);
>  
>  	group_bcs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_bcs)) {
> -		err = PTR_ERR(group_bcs);
> -		goto err_group_bcs;
> -	}
> +	if (IS_ERR(group_bcs))
> +		return PTR_ERR(group_bcs);
>  
>  	group_vcs_vecs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_vcs_vecs)) {
> -		err = PTR_ERR(group_vcs_vecs);
> -		goto err_group_vcs_vecs;
> -	}
> +	if (IS_ERR(group_vcs_vecs))
> +		return PTR_ERR(group_vcs_vecs);
>  
>  	for_each_hw_engine(hwe, gt, id) {
>  		switch (hwe->class) {
> @@ -125,15 +120,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  	}
>  
>  	return 0;
> -
> -err_group_vcs_vecs:
> -	kfree(group_vcs_vecs);
> -err_group_bcs:
> -	kfree(group_bcs);
> -err_group_rcs_ccs:
> -	kfree(group_rcs_ccs);
> -
> -	return err;
>  }
>  
>  /**
> -- 
> 2.30.2
> 
Re: [PATCH v2] drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups()
Posted by Vivekanandan, Balasubramani 1 week ago
On 15.11.2024 10:49, Su Hui wrote:
> Clang static checker(scan-build) warning:
> drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2
> Argument to kfree() is a constant address (18446744073709551604), which
> is not memory allocated by malloc().
> 
> kfree() can only handle NULL pointers instead of negitave error codes.
> When hw_engine_group_alloc() failed, there is a bad kfree call for
> negitave error codes in xe_hw_engine_setup_groups().
> 
> Free 'group' when alloc_workqueue() failed in hw_engine_group_alloc(), and
> remove wrong kfree() in xe_hw_engine_setup_groups() to fix this problem.
> It's safe to remove these kfree() because drmm_add_action_or_reset()
> can free these by calling hw_engine_group_free().
> 
> Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission")
> Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group")
> Signed-off-by: Su Hui <suhui@nfschina.com>
> ---
> v2:
>  - remove wrong destroy_workqueue() and kfree() in v1 patch
> v1:
>  - https://lore.kernel.org/all/20241114063942.3448607-1-suhui@nfschina.com/

Looks good to me.

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>

Regards,
Bala
> 
>  drivers/gpu/drm/xe/xe_hw_engine_group.c | 32 +++++++------------------
>  1 file changed, 9 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 82750520a90a..3bfa002734ad 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -58,8 +58,10 @@ hw_engine_group_alloc(struct xe_device *xe)
>  		return ERR_PTR(-ENOMEM);
>  
>  	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
> -	if (!group->resume_wq)
> +	if (!group->resume_wq) {
> +		kfree(group);
>  		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	init_rwsem(&group->mode_sem);
>  	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
> @@ -84,25 +86,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  	enum xe_hw_engine_id id;
>  	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
>  	struct xe_device *xe = gt_to_xe(gt);
> -	int err;
>  
>  	group_rcs_ccs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_rcs_ccs)) {
> -		err = PTR_ERR(group_rcs_ccs);
> -		goto err_group_rcs_ccs;
> -	}
> +	if (IS_ERR(group_rcs_ccs))
> +		return PTR_ERR(group_rcs_ccs);
>  
>  	group_bcs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_bcs)) {
> -		err = PTR_ERR(group_bcs);
> -		goto err_group_bcs;
> -	}
> +	if (IS_ERR(group_bcs))
> +		return PTR_ERR(group_bcs);
>  
>  	group_vcs_vecs = hw_engine_group_alloc(xe);
> -	if (IS_ERR(group_vcs_vecs)) {
> -		err = PTR_ERR(group_vcs_vecs);
> -		goto err_group_vcs_vecs;
> -	}
> +	if (IS_ERR(group_vcs_vecs))
> +		return PTR_ERR(group_vcs_vecs);
>  
>  	for_each_hw_engine(hwe, gt, id) {
>  		switch (hwe->class) {
> @@ -125,15 +120,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
>  	}
>  
>  	return 0;
> -
> -err_group_vcs_vecs:
> -	kfree(group_vcs_vecs);
> -err_group_bcs:
> -	kfree(group_bcs);
> -err_group_rcs_ccs:
> -	kfree(group_rcs_ccs);
> -
> -	return err;
>  }
>  
>  /**
> -- 
> 2.30.2
>