[PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems

Richard Gong posted 1 patch 4 years ago
drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
[PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Richard Gong 4 years ago
Active State Power Management (ASPM) feature is enabled since kernel 5.14.
There are some AMD GFX cards (such as WX3200 and RX640) that won't work
with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
video/display output, Intel Alder Lake based systems will hang during
suspend/resume.

The issue was initially reported on one system (Dell Precision 3660 with
BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
Lake based systems.

Add extra check to disable ASPM on Intel Alder Lake based systems.

Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Gong <richard.gong@amd.com>
---
v4: s/CONFIG_X86_64/CONFIG_X86
    enhanced check logic
v3: s/intel_core_asom_chk/aspm_support_quirk_check
    correct build error with W=1 option
v2: correct commit description
    move the check from chip family to problematic platform
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..b33e0a9bee65 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@
 #include "mxgpu_vi.h"
 #include "amdgpu_dm.h"
 
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
 #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
 #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
 		WREG32_PCIE(ixPCIE_LC_CNTL, data);
 }
 
+static bool aspm_support_quirk_check(void)
+{
+	if (IS_ENABLED(CONFIG_X86)) {
+		struct cpuinfo_x86 *c = &cpu_data(0);
+
+		return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+	}
+
+	return true;
+}
+
 static void vi_program_aspm(struct amdgpu_device *adev)
 {
 	u32 data, data1, orig;
 	bool bL1SS = false;
 	bool bClkReqSupport = true;
 
-	if (!amdgpu_device_should_use_aspm(adev))
+	if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
 		return;
 
 	if (adev->flags & AMD_IS_APU ||
-- 
2.25.1
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Nathan Chancellor 4 years ago
Hi Richard,

On Tue, Apr 12, 2022 at 04:50:00PM -0500, Richard Gong wrote:
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
> video/display output, Intel Alder Lake based systems will hang during
> suspend/resume.
> 
> The issue was initially reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> Lake based systems.
> 
> Add extra check to disable ASPM on Intel Alder Lake based systems.
> 
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Richard Gong <richard.gong@amd.com>
> ---
> v4: s/CONFIG_X86_64/CONFIG_X86
>     enhanced check logic
> v3: s/intel_core_asom_chk/aspm_support_quirk_check
>     correct build error with W=1 option
> v2: correct commit description
>     move the check from chip family to problematic platform
> ---
>  drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..b33e0a9bee65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
>  #include "mxgpu_vi.h"
>  #include "amdgpu_dm.h"
>  
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
>  #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
>  #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
>  #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>  		WREG32_PCIE(ixPCIE_LC_CNTL, data);
>  }
>  
> +static bool aspm_support_quirk_check(void)
> +{
> +	if (IS_ENABLED(CONFIG_X86)) {
> +		struct cpuinfo_x86 *c = &cpu_data(0);
> +
> +		return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +	}

I have not seen this reported by a bot, sorry if it is a duplicate. This
breaks non-x86 builds (arm64 allmodconfig for example):

drivers/gpu/drm/amd/amdgpu/vi.c:1144:28: error: implicit declaration of function 'cpu_data' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
                struct cpuinfo_x86 *c = &cpu_data(0);
                                         ^
drivers/gpu/drm/amd/amdgpu/vi.c:1144:27: error: cannot take the address of an rvalue of type 'int'
                struct cpuinfo_x86 *c = &cpu_data(0);
                                        ^~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/vi.c:1146:13: error: incomplete definition of type 'struct cpuinfo_x86'
                return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
                         ~^
drivers/gpu/drm/amd/amdgpu/vi.c:1144:10: note: forward declaration of 'struct cpuinfo_x86'
                struct cpuinfo_x86 *c = &cpu_data(0);
                       ^
drivers/gpu/drm/amd/amdgpu/vi.c:1146:28: error: incomplete definition of type 'struct cpuinfo_x86'
                return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
                                        ~^
drivers/gpu/drm/amd/amdgpu/vi.c:1144:10: note: forward declaration of 'struct cpuinfo_x86'
                struct cpuinfo_x86 *c = &cpu_data(0);
                       ^
drivers/gpu/drm/amd/amdgpu/vi.c:1146:43: error: use of undeclared identifier 'INTEL_FAM6_ALDERLAKE'
                return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
                                                        ^
5 errors generated.

'struct cpuinfo_x86' is only defined for CONFIG_X86 so this section
needs to guarded with the preprocessor, which is how it was done in v2.
Please go back to that.

Cheers,
Nathan
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Gong, Richard 4 years ago
Hi Nathan,

On 4/13/2022 10:40 AM, Nathan Chancellor wrote:
> Hi Richard,
>
> On Tue, Apr 12, 2022 at 04:50:00PM -0500, Richard Gong wrote:
>> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
>> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
>> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
>> video/display output, Intel Alder Lake based systems will hang during
>> suspend/resume.
>>
>> The issue was initially reported on one system (Dell Precision 3660 with
>> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
>> Lake based systems.
>>
>> Add extra check to disable ASPM on Intel Alder Lake based systems.
>>
>> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
>> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F1885&amp;data=04%7C01%7Crichard.gong%40amd.com%7C35699b2c088747daedf508da1d63f1f3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637854612351767549%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=lzgZ3bV0PLFFl9uo3wt6N1dOoZpU2DqpddAk%2BTX8rEI%3D&amp;reserved=0
>> Reported-by: kernel test robot <lkp@intel.com>
>> Signed-off-by: Richard Gong <richard.gong@amd.com>
>> ---
>> v4: s/CONFIG_X86_64/CONFIG_X86
>>      enhanced check logic
>> v3: s/intel_core_asom_chk/aspm_support_quirk_check
>>      correct build error with W=1 option
>> v2: correct commit description
>>      move the check from chip family to problematic platform
>> ---
>>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>>   1 file changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
>> index 039b90cdc3bc..b33e0a9bee65 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>> @@ -81,6 +81,10 @@
>>   #include "mxgpu_vi.h"
>>   #include "amdgpu_dm.h"
>>   
>> +#if IS_ENABLED(CONFIG_X86)
>> +#include <asm/intel-family.h>
>> +#endif
>> +
>>   #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
>> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>>   		WREG32_PCIE(ixPCIE_LC_CNTL, data);
>>   }
>>   
>> +static bool aspm_support_quirk_check(void)
>> +{
>> +	if (IS_ENABLED(CONFIG_X86)) {
>> +		struct cpuinfo_x86 *c = &cpu_data(0);
>> +
>> +		return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>> +	}
> I have not seen this reported by a bot, sorry if it is a duplicate. This
> breaks non-x86 builds (arm64 allmodconfig for example):
>
> drivers/gpu/drm/amd/amdgpu/vi.c:1144:28: error: implicit declaration of function 'cpu_data' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
>                  struct cpuinfo_x86 *c = &cpu_data(0);
>                                           ^
> drivers/gpu/drm/amd/amdgpu/vi.c:1144:27: error: cannot take the address of an rvalue of type 'int'
>                  struct cpuinfo_x86 *c = &cpu_data(0);
>                                          ^~~~~~~~~~~~
> drivers/gpu/drm/amd/amdgpu/vi.c:1146:13: error: incomplete definition of type 'struct cpuinfo_x86'
>                  return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>                           ~^
> drivers/gpu/drm/amd/amdgpu/vi.c:1144:10: note: forward declaration of 'struct cpuinfo_x86'
>                  struct cpuinfo_x86 *c = &cpu_data(0);
>                         ^
> drivers/gpu/drm/amd/amdgpu/vi.c:1146:28: error: incomplete definition of type 'struct cpuinfo_x86'
>                  return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>                                          ~^
> drivers/gpu/drm/amd/amdgpu/vi.c:1144:10: note: forward declaration of 'struct cpuinfo_x86'
>                  struct cpuinfo_x86 *c = &cpu_data(0);
>                         ^
> drivers/gpu/drm/amd/amdgpu/vi.c:1146:43: error: use of undeclared identifier 'INTEL_FAM6_ALDERLAKE'
>                  return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>                                                          ^
> 5 errors generated.
>
> 'struct cpuinfo_x86' is only defined for CONFIG_X86 so this section
> needs to guarded with the preprocessor, which is how it was done in v2.
> Please go back to that.

Thanks, I will do that.

Regards,

Richard

>
> Cheers,
> Nathan
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Paul Menzel 4 years ago
Dear Richard,


Thank you for sending out v4.

Am 12.04.22 um 23:50 schrieb Richard Gong:
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
> video/display output, Intel Alder Lake based systems will hang during
> suspend/resume.

I am still not clear, what “hang during suspend/resume” means. I guess 
suspending works fine? During resume (S3 or S0ix?), where does it hang? 
The system is functional, but there are only display problems?

> The issue was initially reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> Lake based systems.
> 
> Add extra check to disable ASPM on Intel Alder Lake based systems.
> 
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <lkp@intel.com>

This tag is a little confusing. Maybe clarify that it was for an issue 
in a previous patch iteration?

> Signed-off-by: Richard Gong <richard.gong@amd.com>
> ---
> v4: s/CONFIG_X86_64/CONFIG_X86
>      enhanced check logic
> v3: s/intel_core_asom_chk/aspm_support_quirk_check
>      correct build error with W=1 option
> v2: correct commit description
>      move the check from chip family to problematic platform
> ---
>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>   1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..b33e0a9bee65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
>   #include "mxgpu_vi.h"
>   #include "amdgpu_dm.h"
>   
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
>   #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>   		WREG32_PCIE(ixPCIE_LC_CNTL, data);
>   }
>   
> +static bool aspm_support_quirk_check(void)
> +{
> +	if (IS_ENABLED(CONFIG_X86)) {
> +		struct cpuinfo_x86 *c = &cpu_data(0);
> +
> +		return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +	}
> +
> +	return true;
> +}
> +
>   static void vi_program_aspm(struct amdgpu_device *adev)
>   {
>   	u32 data, data1, orig;
>   	bool bL1SS = false;
>   	bool bClkReqSupport = true;
>   
> -	if (!amdgpu_device_should_use_aspm(adev))
> +	if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
>   		return;

Can users still forcefully enable ASPM with the parameter `amdgpu.aspm`?

>   
>   	if (adev->flags & AMD_IS_APU ||

If I remember correctly, there were also newer cards, where ASPM worked 
with Intel Alder Lake, right? Can only the problematic generations for 
WX3200 and RX640 be excluded from ASPM?


Kind regards,

Paul
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Alex Deucher 4 years ago
On Wed, Apr 13, 2022 at 3:43 AM Paul Menzel <pmenzel@molgen.mpg.de> wrote:
>
> Dear Richard,
>
>
> Thank you for sending out v4.
>
> Am 12.04.22 um 23:50 schrieb Richard Gong:
> > Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> > There are some AMD GFX cards (such as WX3200 and RX640) that won't work
> > with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
> > video/display output, Intel Alder Lake based systems will hang during
> > suspend/resume.
>
> I am still not clear, what “hang during suspend/resume” means. I guess
> suspending works fine? During resume (S3 or S0ix?), where does it hang?
> The system is functional, but there are only display problems?
>
> > The issue was initially reported on one system (Dell Precision 3660 with
> > BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> > Lake based systems.
> >
> > Add extra check to disable ASPM on Intel Alder Lake based systems.
> >
> > Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> > Reported-by: kernel test robot <lkp@intel.com>
>
> This tag is a little confusing. Maybe clarify that it was for an issue
> in a previous patch iteration?
>
> > Signed-off-by: Richard Gong <richard.gong@amd.com>
> > ---
> > v4: s/CONFIG_X86_64/CONFIG_X86
> >      enhanced check logic
> > v3: s/intel_core_asom_chk/aspm_support_quirk_check
> >      correct build error with W=1 option
> > v2: correct commit description
> >      move the check from chip family to problematic platform
> > ---
> >   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> >   1 file changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> > index 039b90cdc3bc..b33e0a9bee65 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> > @@ -81,6 +81,10 @@
> >   #include "mxgpu_vi.h"
> >   #include "amdgpu_dm.h"
> >
> > +#if IS_ENABLED(CONFIG_X86)
> > +#include <asm/intel-family.h>
> > +#endif
> > +
> >   #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
> >   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK    0x00000001L
> >   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK        0x00000002L
> > @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
> >               WREG32_PCIE(ixPCIE_LC_CNTL, data);
> >   }
> >
> > +static bool aspm_support_quirk_check(void)
> > +{
> > +     if (IS_ENABLED(CONFIG_X86)) {
> > +             struct cpuinfo_x86 *c = &cpu_data(0);
> > +
> > +             return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> > +     }
> > +
> > +     return true;
> > +}
> > +
> >   static void vi_program_aspm(struct amdgpu_device *adev)
> >   {
> >       u32 data, data1, orig;
> >       bool bL1SS = false;
> >       bool bClkReqSupport = true;
> >
> > -     if (!amdgpu_device_should_use_aspm(adev))
> > +     if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
> >               return;
>
> Can users still forcefully enable ASPM with the parameter `amdgpu.aspm`?
>
> >
> >       if (adev->flags & AMD_IS_APU ||
>
> If I remember correctly, there were also newer cards, where ASPM worked
> with Intel Alder Lake, right? Can only the problematic generations for
> WX3200 and RX640 be excluded from ASPM?

This patch only disables it for the generation that was problematic.

Alex

>
>
> Kind regards,
>
> Paul
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Paul Menzel 4 years ago
[Cc: -kernel test robot <lkp@intel.com>]

Dear Alex, dear Richard,


Am 13.04.22 um 15:00 schrieb Alex Deucher:
> On Wed, Apr 13, 2022 at 3:43 AM Paul Menzel wrote:

>> Thank you for sending out v4.
>>
>> Am 12.04.22 um 23:50 schrieb Richard Gong:
>>> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
>>> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
>>> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
>>> video/display output, Intel Alder Lake based systems will hang during
>>> suspend/resume.
>>
>> I am still not clear, what “hang during suspend/resume” means. I guess
>> suspending works fine? During resume (S3 or S0ix?), where does it hang?
>> The system is functional, but there are only display problems?
>>
>>> The issue was initially reported on one system (Dell Precision 3660 with
>>> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
>>> Lake based systems.
>>>
>>> Add extra check to disable ASPM on Intel Alder Lake based systems.
>>>
>>> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
>>> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
>>> Reported-by: kernel test robot <lkp@intel.com>
>>
>> This tag is a little confusing. Maybe clarify that it was for an issue
>> in a previous patch iteration?
>>
>>> Signed-off-by: Richard Gong <richard.gong@amd.com>
>>> ---
>>> v4: s/CONFIG_X86_64/CONFIG_X86
>>>       enhanced check logic
>>> v3: s/intel_core_asom_chk/aspm_support_quirk_check
>>>       correct build error with W=1 option
>>> v2: correct commit description
>>>       move the check from chip family to problematic platform
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>>>    1 file changed, 16 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
>>> index 039b90cdc3bc..b33e0a9bee65 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>>> @@ -81,6 +81,10 @@
>>>    #include "mxgpu_vi.h"
>>>    #include "amdgpu_dm.h"
>>>
>>> +#if IS_ENABLED(CONFIG_X86)
>>> +#include <asm/intel-family.h>
>>> +#endif
>>> +
>>>    #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
>>>    #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK    0x00000001L
>>>    #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK        0x00000002L
>>> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>>>                WREG32_PCIE(ixPCIE_LC_CNTL, data);
>>>    }
>>>
>>> +static bool aspm_support_quirk_check(void)
>>> +{
>>> +     if (IS_ENABLED(CONFIG_X86)) {
>>> +             struct cpuinfo_x86 *c = &cpu_data(0);
>>> +
>>> +             return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>>> +     }
>>> +
>>> +     return true;
>>> +}
>>> +
>>>    static void vi_program_aspm(struct amdgpu_device *adev)
>>>    {
>>>        u32 data, data1, orig;
>>>        bool bL1SS = false;
>>>        bool bClkReqSupport = true;
>>>
>>> -     if (!amdgpu_device_should_use_aspm(adev))
>>> +     if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
>>>                return;
>>
>> Can users still forcefully enable ASPM with the parameter `amdgpu.aspm`?
>>
>>>
>>>        if (adev->flags & AMD_IS_APU ||
>>
>> If I remember correctly, there were also newer cards, where ASPM worked
>> with Intel Alder Lake, right? Can only the problematic generations for
>> WX3200 and RX640 be excluded from ASPM?
> 
> This patch only disables it for the generation that was problematic.

Could that please be made clear in the commit message summary, and message?

Loosely related, is there a public (or internal issue) to analyze how to 
get ASPM working for VI generation devices with Intel Alder Lake?


Kind regards,

Paul
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Alex Deucher 4 years ago
On Thu, Apr 14, 2022 at 3:52 AM Paul Menzel <pmenzel@molgen.mpg.de> wrote:
>
> [Cc: -kernel test robot <lkp@intel.com>]
>
> Dear Alex, dear Richard,
>
>
> Am 13.04.22 um 15:00 schrieb Alex Deucher:
> > On Wed, Apr 13, 2022 at 3:43 AM Paul Menzel wrote:
>
> >> Thank you for sending out v4.
> >>
> >> Am 12.04.22 um 23:50 schrieb Richard Gong:
> >>> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> >>> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
> >>> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
> >>> video/display output, Intel Alder Lake based systems will hang during
> >>> suspend/resume.
> >>
> >> I am still not clear, what “hang during suspend/resume” means. I guess
> >> suspending works fine? During resume (S3 or S0ix?), where does it hang?
> >> The system is functional, but there are only display problems?
> >>
> >>> The issue was initially reported on one system (Dell Precision 3660 with
> >>> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> >>> Lake based systems.
> >>>
> >>> Add extra check to disable ASPM on Intel Alder Lake based systems.
> >>>
> >>> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> >>> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> >>> Reported-by: kernel test robot <lkp@intel.com>
> >>
> >> This tag is a little confusing. Maybe clarify that it was for an issue
> >> in a previous patch iteration?
> >>
> >>> Signed-off-by: Richard Gong <richard.gong@amd.com>
> >>> ---
> >>> v4: s/CONFIG_X86_64/CONFIG_X86
> >>>       enhanced check logic
> >>> v3: s/intel_core_asom_chk/aspm_support_quirk_check
> >>>       correct build error with W=1 option
> >>> v2: correct commit description
> >>>       move the check from chip family to problematic platform
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> >>>    1 file changed, 16 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> >>> index 039b90cdc3bc..b33e0a9bee65 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> >>> @@ -81,6 +81,10 @@
> >>>    #include "mxgpu_vi.h"
> >>>    #include "amdgpu_dm.h"
> >>>
> >>> +#if IS_ENABLED(CONFIG_X86)
> >>> +#include <asm/intel-family.h>
> >>> +#endif
> >>> +
> >>>    #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
> >>>    #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK    0x00000001L
> >>>    #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK        0x00000002L
> >>> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
> >>>                WREG32_PCIE(ixPCIE_LC_CNTL, data);
> >>>    }
> >>>
> >>> +static bool aspm_support_quirk_check(void)
> >>> +{
> >>> +     if (IS_ENABLED(CONFIG_X86)) {
> >>> +             struct cpuinfo_x86 *c = &cpu_data(0);
> >>> +
> >>> +             return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> >>> +     }
> >>> +
> >>> +     return true;
> >>> +}
> >>> +
> >>>    static void vi_program_aspm(struct amdgpu_device *adev)
> >>>    {
> >>>        u32 data, data1, orig;
> >>>        bool bL1SS = false;
> >>>        bool bClkReqSupport = true;
> >>>
> >>> -     if (!amdgpu_device_should_use_aspm(adev))
> >>> +     if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
> >>>                return;
> >>
> >> Can users still forcefully enable ASPM with the parameter `amdgpu.aspm`?
> >>
> >>>
> >>>        if (adev->flags & AMD_IS_APU ||
> >>
> >> If I remember correctly, there were also newer cards, where ASPM worked
> >> with Intel Alder Lake, right? Can only the problematic generations for
> >> WX3200 and RX640 be excluded from ASPM?
> >
> > This patch only disables it for the generation that was problematic.
>
> Could that please be made clear in the commit message summary, and message?

Sure.  Richard, please add that this only disables ASPM on VI parts
when in an alderlake system.

>
> Loosely related, is there a public (or internal issue) to analyze how to
> get ASPM working for VI generation devices with Intel Alder Lake?

We'd need support from Intel.  I'm not sure where things currently stand.

Alex
RE: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Limonciello, Mario 4 years ago
[Public]

 
> On Wed, Apr 13, 2022 at 3:43 AM Paul Menzel <pmenzel@molgen.mpg.de>
> wrote:
> >
> > Dear Richard,
> >
> >
> > Thank you for sending out v4.
> >
> > Am 12.04.22 um 23:50 schrieb Richard Gong:
> > > Active State Power Management (ASPM) feature is enabled since kernel
> 5.14.
> > > There are some AMD GFX cards (such as WX3200 and RX640) that won't
> work
> > > with ASPM-enabled Intel Alder Lake based systems. Using these GFX
> cards as
> > > video/display output, Intel Alder Lake based systems will hang during
> > > suspend/resume.
> >
> > I am still not clear, what "hang during suspend/resume" means. I guess
> > suspending works fine? During resume (S3 or S0ix?), where does it hang?
> > The system is functional, but there are only display problems?

I believe Intel would need to identify the state of the SOC to determine where
the PCIE problem actually occurs; on the way down or up.

As he said in the commit message it results in a hang.

> >
> > > The issue was initially reported on one system (Dell Precision 3660 with
> > > BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> > > Lake based systems.
> > >
> > > Add extra check to disable ASPM on Intel Alder Lake based systems.
> > >
> > > Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> > > Link:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1885&amp;data=04%7C01%7Cmario.limonciello%40amd.com%
> 7Cfe4b6b553c3b47c1288f08da1d4da9c8%7C3dd8961fe4884e608e11a82d994e
> 183d%7C0%7C0%7C637854516675116782%7CUnknown%7CTWFpbGZsb3d8ey
> JWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%
> 7C3000&amp;sdata=gvFmP1HQP%2FyzLfT0gYYCupAQBIG%2FtiDYelQNqTLAx
> ck%3D&amp;reserved=0
> > > Reported-by: kernel test robot <lkp@intel.com>
> >
> > This tag is a little confusing. Maybe clarify that it was for an issue
> > in a previous patch iteration?
> >
> > > Signed-off-by: Richard Gong <richard.gong@amd.com>
> > > ---
> > > v4: s/CONFIG_X86_64/CONFIG_X86
> > >      enhanced check logic
> > > v3: s/intel_core_asom_chk/aspm_support_quirk_check
> > >      correct build error with W=1 option
> > > v2: correct commit description
> > >      move the check from chip family to problematic platform
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> > >   1 file changed, 16 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c
> > > index 039b90cdc3bc..b33e0a9bee65 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> > > @@ -81,6 +81,10 @@
> > >   #include "mxgpu_vi.h"
> > >   #include "amdgpu_dm.h"
> > >
> > > +#if IS_ENABLED(CONFIG_X86)
> > > +#include <asm/intel-family.h>
> > > +#endif
> > > +
> > >   #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
> > >   #define
> PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK
> 0x00000001L
> > >   #define
> PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK
> 0x00000002L
> > > @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct
> amdgpu_device *adev)
> > >               WREG32_PCIE(ixPCIE_LC_CNTL, data);
> > >   }
> > >
> > > +static bool aspm_support_quirk_check(void)
> > > +{
> > > +     if (IS_ENABLED(CONFIG_X86)) {
> > > +             struct cpuinfo_x86 *c = &cpu_data(0);
> > > +
> > > +             return !(c->x86 == 6 && c->x86_model ==
> INTEL_FAM6_ALDERLAKE);

Don't you need to check x86_vendor?  Although extremely unlikely if you don't
check x86_vendor nothing to stop another X86 manufacturer from having a
design that has same model # as INTEL_FAM6_ALDERLAKE.

> > > +     }
> > > +
> > > +     return true;
> > > +}
> > > +
> > >   static void vi_program_aspm(struct amdgpu_device *adev)
> > >   {
> > >       u32 data, data1, orig;
> > >       bool bL1SS = false;
> > >       bool bClkReqSupport = true;
> > >
> > > -     if (!amdgpu_device_should_use_aspm(adev))
> > > +     if (!amdgpu_device_should_use_aspm(adev) ||
> !aspm_support_quirk_check())
> > >               return;
> >
> > Can users still forcefully enable ASPM with the parameter `amdgpu.aspm`?

amdgpu.aspm is module wide not just for one card.  That is it covers all AMD GPU cards
in the system.  If it's set to 1 or pcie_aspm_enabled returns true it will enable for other
GPUs besides these.

There is the possibility to move this quirk check within " amdgpu_device_should_use_aspm"
and only match this combination when set to amdgpu.aspm is set to "-1" (the default), something
like this:

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1335,6 +1335,8 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
        default:
                return false;
        }
+       if (amdgpu_device_is_quirked_for_aspm(adev))
+               return false;
        return pcie_aspm_enabled(adev->pdev);
 }


> >
> > >
> > >       if (adev->flags & AMD_IS_APU ||
> >
> > If I remember correctly, there were also newer cards, where ASPM worked
> > with Intel Alder Lake, right? Can only the problematic generations for
> > WX3200 and RX640 be excluded from ASPM?
> 
> This patch only disables it for the generation that was problematic.
> 
> Alex
> 
> >
> >
> > Kind regards,
> >
> > Paul
Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems
Posted by Lazar, Lijo 4 years ago

On 4/13/2022 3:20 AM, Richard Gong wrote:
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD GFX cards (such as WX3200 and RX640) that won't work
> with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
> video/display output, Intel Alder Lake based systems will hang during
> suspend/resume.
> 
> The issue was initially reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
> Lake based systems.
> 
> Add extra check to disable ASPM on Intel Alder Lake based systems.
> 
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Richard Gong <richard.gong@amd.com>

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>

Thanks,
Lijo

> ---
> v4: s/CONFIG_X86_64/CONFIG_X86
>      enhanced check logic
> v3: s/intel_core_asom_chk/aspm_support_quirk_check
>      correct build error with W=1 option
> v2: correct commit description
>      move the check from chip family to problematic platform
> ---
>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>   1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..b33e0a9bee65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
>   #include "mxgpu_vi.h"
>   #include "amdgpu_dm.h"
>   
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
>   #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>   		WREG32_PCIE(ixPCIE_LC_CNTL, data);
>   }
>   
> +static bool aspm_support_quirk_check(void)
> +{
> +	if (IS_ENABLED(CONFIG_X86)) {
> +		struct cpuinfo_x86 *c = &cpu_data(0);
> +
> +		return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +	}
> +
> +	return true;
> +}
> +
>   static void vi_program_aspm(struct amdgpu_device *adev)
>   {
>   	u32 data, data1, orig;
>   	bool bL1SS = false;
>   	bool bClkReqSupport = true;
>   
> -	if (!amdgpu_device_should_use_aspm(adev))
> +	if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
>   		return;
>   
>   	if (adev->flags & AMD_IS_APU ||
>