drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
A potential NULL pointer dereference may occur when accessing
tmp_mqd->cp_hqd_pq_control without verifying that tmp_mqd is non-NULL.
This may happen if mqd_backup[mqd_idx] is unexpectedly NULL.
Although a NULL check for mqd_backup[mqd_idx] existed previously, it was
moved to a position after the dereference in a recent commit, which
renders it ineffective.
Add an explicit NULL check for tmp_mqd before dereferencing its members.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Cc: stable@vger.kernel.org # v5.13+
Fixes: a330b52a9e59 ("drm/amdgpu: Init the cp MQD if it's not be initialized before")
Signed-off-by: Alexey Nepomnyashih <sdl@nppct.ru>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d7db4cb907ae..134cab16a00d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3817,10 +3817,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
* check mqd->cp_hqd_pq_control since this value should not be 0
*/
tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
- if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
+ if (amdgpu_in_reset(adev) && tmp_mqd && tmp_mqd->cp_hqd_pq_control) {
/* for GPU_RESET case , reset MQD to a clean status */
- if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
+ memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
@@ -3863,7 +3862,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
*/
tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
- if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ if (!restore && tmp_mqd && (!tmp_mqd->cp_hqd_pq_control ||
(!amdgpu_in_reset(adev) && !adev->in_suspend))) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
@@ -3874,8 +3873,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
--
2.43.0
On Sat, May 24, 2025 at 2:14 AM Alexey Nepomnyashih <sdl@nppct.ru> wrote:
>
> A potential NULL pointer dereference may occur when accessing
> tmp_mqd->cp_hqd_pq_control without verifying that tmp_mqd is non-NULL.
> This may happen if mqd_backup[mqd_idx] is unexpectedly NULL.
>
> Although a NULL check for mqd_backup[mqd_idx] existed previously, it was
> moved to a position after the dereference in a recent commit, which
> renders it ineffective.
I don't think it's possible for mqd_backup to be NULL at this point.
We would have failed earlier in init if the mqd backup allocation
failed.
Alex
>
> Add an explicit NULL check for tmp_mqd before dereferencing its members.
>
> Found by Linux Verification Center (linuxtesting.org) with SVACE.
>
> Cc: stable@vger.kernel.org # v5.13+
> Fixes: a330b52a9e59 ("drm/amdgpu: Init the cp MQD if it's not be initialized before")
> Signed-off-by: Alexey Nepomnyashih <sdl@nppct.ru>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++++------
> 1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index d7db4cb907ae..134cab16a00d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3817,10 +3817,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
> * check mqd->cp_hqd_pq_control since this value should not be 0
> */
> tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
> - if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
> + if (amdgpu_in_reset(adev) && tmp_mqd && tmp_mqd->cp_hqd_pq_control) {
> /* for GPU_RESET case , reset MQD to a clean status */
> - if (adev->gfx.kiq[0].mqd_backup)
> - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
> + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
>
> /* reset ring buffer */
> ring->wptr = 0;
> @@ -3863,7 +3862,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
> */
> tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
>
> - if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
> + if (!restore && tmp_mqd && (!tmp_mqd->cp_hqd_pq_control ||
> (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
> memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
> ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
> @@ -3874,8 +3873,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
> soc15_grbm_select(adev, 0, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
>
> - if (adev->gfx.mec.mqd_backup[mqd_idx])
> - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
> + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
> } else {
> /* restore MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> --
> 2.43.0
>
> On Sat, May 24, 2025 at 2:14 AM Alexey Nepomnyashih <sdl@nppct.ru> wrote: >> A potential NULL pointer dereference may occur when accessing >> tmp_mqd->cp_hqd_pq_control without verifying that tmp_mqd is non-NULL. >> This may happen if mqd_backup[mqd_idx] is unexpectedly NULL. >> >> Although a NULL check for mqd_backup[mqd_idx] existed previously, it was >> moved to a position after the dereference in a recent commit, which >> renders it ineffective. > I don't think it's possible for mqd_backup to be NULL at this point. > We would have failed earlier in init if the mqd backup allocation > failed. > > Alex In scenarios such as GPU reset or power management resume, there is no strict guarantee that amdgpu_gfx_mqd_sw_init() (via ->sw_init()) is invoked before gfx_v9_0_kiq_init_queue(). As a result, mqd_backup[] may remain uninitialized, and dereferencing it without a NULL check can lead to a crash. Most other uses of mqd_backup[] in the driver explicitly check for NULL, indicating that uninitialized entries are an expected condition and should be handled accordingly. Alexey
On Wed, Jun 4, 2025 at 3:30 PM SDL <sdl@nppct.ru> wrote: > > > > On Sat, May 24, 2025 at 2:14 AM Alexey Nepomnyashih <sdl@nppct.ru> wrote: > >> A potential NULL pointer dereference may occur when accessing > >> tmp_mqd->cp_hqd_pq_control without verifying that tmp_mqd is non-NULL. > >> This may happen if mqd_backup[mqd_idx] is unexpectedly NULL. > >> > >> Although a NULL check for mqd_backup[mqd_idx] existed previously, it was > >> moved to a position after the dereference in a recent commit, which > >> renders it ineffective. > > I don't think it's possible for mqd_backup to be NULL at this point. > > We would have failed earlier in init if the mqd backup allocation > > failed. > > > > Alex > In scenarios such as GPU reset or power management resume, there is no > strict > guarantee that amdgpu_gfx_mqd_sw_init() (via ->sw_init()) is invoked before > gfx_v9_0_kiq_init_queue(). As a result, mqd_backup[] may remain > uninitialized, > and dereferencing it without a NULL check can lead to a crash. > > Most other uses of mqd_backup[] in the driver explicitly check for NULL, > indicating that uninitialized entries are an expected condition and > should be handled > accordingly. sw_init() is only called once at driver load time. everything is allocated at that point. If that fails, the driver would not have loaded in the first place. I don't think it's possible for it to be NULL. Alex > > Alexey
04.06.2025 22:34, Alex Deucher пишет: > On Wed, Jun 4, 2025 at 3:30 PM SDL <sdl@nppct.ru> wrote: >> >>> On Sat, May 24, 2025 at 2:14 AM Alexey Nepomnyashih <sdl@nppct.ru> wrote: >>>> A potential NULL pointer dereference may occur when accessing >>>> tmp_mqd->cp_hqd_pq_control without verifying that tmp_mqd is non-NULL. >>>> This may happen if mqd_backup[mqd_idx] is unexpectedly NULL. >>>> >>>> Although a NULL check for mqd_backup[mqd_idx] existed previously, it was >>>> moved to a position after the dereference in a recent commit, which >>>> renders it ineffective. >>> I don't think it's possible for mqd_backup to be NULL at this point. >>> We would have failed earlier in init if the mqd backup allocation >>> failed. >>> >>> Alex >> In scenarios such as GPU reset or power management resume, there is no >> strict >> guarantee that amdgpu_gfx_mqd_sw_init() (via ->sw_init()) is invoked before >> gfx_v9_0_kiq_init_queue(). As a result, mqd_backup[] may remain >> uninitialized, >> and dereferencing it without a NULL check can lead to a crash. >> >> Most other uses of mqd_backup[] in the driver explicitly check for NULL, >> indicating that uninitialized entries are an expected condition and >> should be handled >> accordingly. > sw_init() is only called once at driver load time. everything is > allocated at that point. If that fails, the driver would not have > loaded in the first place. I don't think it's possible for it to be > NULL. > > Alex Thanks for the review! I agree with your point. Alexey
© 2016 - 2025 Red Hat, Inc.