[PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()

Yazen Ghannam posted 3 patches 2 years, 8 months ago
[PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by Yazen Ghannam 2 years, 8 months ago
Define helper functions for legacy and SMCA systems in order to reuse
individual checks in later changes.

Describe what each function is checking for, and correct the XEC bitmask
for SMCA.

No functional change intended.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mce/amd.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5e74610b39e7..1ccfb0c9257f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 		deferred_error_interrupt_enable(c);
 }
 
-bool amd_mce_is_memory_error(struct mce *m)
+/*
+ * DRAM ECC errors are reported in the Northbridge (bank 4) with
+ * Extended Error Code 8.
+ */
+static bool legacy_mce_is_memory_error(struct mce *m)
+{
+	return m->bank == 4 && XEC(m->status, 0x1f) == 8;
+}
+
+/*
+ * DRAM ECC errors are reported in Unified Memory Controllers with
+ * Extended Error Code 0.
+ */
+static bool smca_mce_is_memory_error(struct mce *m)
 {
 	enum smca_bank_types bank_type;
-	/* ErrCodeExt[20:16] */
-	u8 xec = (m->status >> 16) & 0x1f;
+
+	if (XEC(m->status, 0x3f))
+		return false;
 
 	bank_type = smca_get_bank_type(m->extcpu, m->bank);
+
+	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
+}
+
+bool amd_mce_is_memory_error(struct mce *m)
+{
 	if (mce_flags.smca)
-		return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
+		return smca_mce_is_memory_error(m);
 
-	return m->bank == 4 && xec == 0x8;
+	return legacy_mce_is_memory_error(m);
 }
 
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
-- 
2.34.1
Re: [PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by Shuai Xue 2 years, 8 months ago

On 2023/6/13 22:11, Yazen Ghannam wrote:
> Define helper functions for legacy and SMCA systems in order to reuse
> individual checks in later changes.
> 
> Describe what each function is checking for, and correct the XEC bitmask
> for SMCA.
> 
> No functional change intended.
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
>  arch/x86/kernel/cpu/mce/amd.c | 30 +++++++++++++++++++++++++-----
>  1 file changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index 5e74610b39e7..1ccfb0c9257f 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>  		deferred_error_interrupt_enable(c);
>  }
>  
> -bool amd_mce_is_memory_error(struct mce *m)
> +/*
> + * DRAM ECC errors are reported in the Northbridge (bank 4) with
> + * Extended Error Code 8.
> + */
> +static bool legacy_mce_is_memory_error(struct mce *m)
> +{
> +	return m->bank == 4 && XEC(m->status, 0x1f) == 8;
> +}
> +
> +/*
> + * DRAM ECC errors are reported in Unified Memory Controllers with
> + * Extended Error Code 0.
> + */
> +static bool smca_mce_is_memory_error(struct mce *m)
>  {
>  	enum smca_bank_types bank_type;
> -	/* ErrCodeExt[20:16] */
> -	u8 xec = (m->status >> 16) & 0x1f;
> +
> +	if (XEC(m->status, 0x3f))
> +		return false;
>  
>  	bank_type = smca_get_bank_type(m->extcpu, m->bank);
> +
> +	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
> +}
> +
> +bool amd_mce_is_memory_error(struct mce *m)
> +{
>  	if (mce_flags.smca)
> -		return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
> +		return smca_mce_is_memory_error(m);
>  
> -	return m->bank == 4 && xec == 0x8;
> +	return legacy_mce_is_memory_error(m);
>  }
>  
>  static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)

Hi, Yazen,

Which tree are you working on? This patch can not be applied to Linus master ?
(commit b6dad5178ceaf23f369c3711062ce1f2afc33644)

Thanks.

Best Regards,
Shuai
Re: [PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by Yazen Ghannam 2 years, 7 months ago
On 6/13/2023 10:06 PM, Shuai Xue wrote:
> 
> 
> On 2023/6/13 22:11, Yazen Ghannam wrote:
>> Define helper functions for legacy and SMCA systems in order to reuse
>> individual checks in later changes.
>>
>> Describe what each function is checking for, and correct the XEC bitmask
>> for SMCA.
>>
>> No functional change intended.
>>
>> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
>> ---
>>   arch/x86/kernel/cpu/mce/amd.c | 30 +++++++++++++++++++++++++-----
>>   1 file changed, 25 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
>> index 5e74610b39e7..1ccfb0c9257f 100644
>> --- a/arch/x86/kernel/cpu/mce/amd.c
>> +++ b/arch/x86/kernel/cpu/mce/amd.c
>> @@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>>   		deferred_error_interrupt_enable(c);
>>   }
>>   
>> -bool amd_mce_is_memory_error(struct mce *m)
>> +/*
>> + * DRAM ECC errors are reported in the Northbridge (bank 4) with
>> + * Extended Error Code 8.
>> + */
>> +static bool legacy_mce_is_memory_error(struct mce *m)
>> +{
>> +	return m->bank == 4 && XEC(m->status, 0x1f) == 8;
>> +}
>> +
>> +/*
>> + * DRAM ECC errors are reported in Unified Memory Controllers with
>> + * Extended Error Code 0.
>> + */
>> +static bool smca_mce_is_memory_error(struct mce *m)
>>   {
>>   	enum smca_bank_types bank_type;
>> -	/* ErrCodeExt[20:16] */
>> -	u8 xec = (m->status >> 16) & 0x1f;
>> +
>> +	if (XEC(m->status, 0x3f))
>> +		return false;
>>   
>>   	bank_type = smca_get_bank_type(m->extcpu, m->bank);
>> +
>> +	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
>> +}
>> +
>> +bool amd_mce_is_memory_error(struct mce *m)
>> +{
>>   	if (mce_flags.smca)
>> -		return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
>> +		return smca_mce_is_memory_error(m);
>>   
>> -	return m->bank == 4 && xec == 0x8;
>> +	return legacy_mce_is_memory_error(m);
>>   }
>>   
>>   static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
> 
> Hi, Yazen,
> 
> Which tree are you working on? This patch can not be applied to Linus master ?
> (commit b6dad5178ceaf23f369c3711062ce1f2afc33644)
> 

Hi Shuai,

I'm using tip/master as the base.
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/log/

Sorry, I forgot to mention this in the cover letter.

Thanks,
Yazen
Re: [PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by Shuai Xue 2 years, 7 months ago

On 2023/6/14 23:06, Yazen Ghannam wrote:
> On 6/13/2023 10:06 PM, Shuai Xue wrote:
>>
>>
>> On 2023/6/13 22:11, Yazen Ghannam wrote:
>>> Define helper functions for legacy and SMCA systems in order to reuse
>>> individual checks in later changes.
>>>
>>> Describe what each function is checking for, and correct the XEC bitmask
>>> for SMCA.
>>>
>>> No functional change intended.
>>>
>>> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
>>> ---
>>>   arch/x86/kernel/cpu/mce/amd.c | 30 +++++++++++++++++++++++++-----
>>>   1 file changed, 25 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
>>> index 5e74610b39e7..1ccfb0c9257f 100644
>>> --- a/arch/x86/kernel/cpu/mce/amd.c
>>> +++ b/arch/x86/kernel/cpu/mce/amd.c
>>> @@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>>>           deferred_error_interrupt_enable(c);
>>>   }
>>>   -bool amd_mce_is_memory_error(struct mce *m)
>>> +/*
>>> + * DRAM ECC errors are reported in the Northbridge (bank 4) with
>>> + * Extended Error Code 8.
>>> + */
>>> +static bool legacy_mce_is_memory_error(struct mce *m)
>>> +{
>>> +    return m->bank == 4 && XEC(m->status, 0x1f) == 8;
>>> +}
>>> +
>>> +/*
>>> + * DRAM ECC errors are reported in Unified Memory Controllers with
>>> + * Extended Error Code 0.
>>> + */
>>> +static bool smca_mce_is_memory_error(struct mce *m)
>>>   {
>>>       enum smca_bank_types bank_type;
>>> -    /* ErrCodeExt[20:16] */
>>> -    u8 xec = (m->status >> 16) & 0x1f;
>>> +
>>> +    if (XEC(m->status, 0x3f))
>>> +        return false;
>>>         bank_type = smca_get_bank_type(m->extcpu, m->bank);
>>> +
>>> +    return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
>>> +}
>>> +
>>> +bool amd_mce_is_memory_error(struct mce *m)
>>> +{
>>>       if (mce_flags.smca)
>>> -        return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
>>> +        return smca_mce_is_memory_error(m);
>>>   -    return m->bank == 4 && xec == 0x8;
>>> +    return legacy_mce_is_memory_error(m);
>>>   }
>>>     static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
>>
>> Hi, Yazen,
>>
>> Which tree are you working on? This patch can not be applied to Linus master ?
>> (commit b6dad5178ceaf23f369c3711062ce1f2afc33644)
>>
> 
> Hi Shuai,
> 
> I'm using tip/master as the base.
> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/log/
> 
> Sorry, I forgot to mention this in the cover letter.

Ok. This patch itself looks good to me.

Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>

Thanks.
Shuai
Re: [PATCH 1/3] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by Yazen Ghannam 2 years, 7 months ago
On 6/14/2023 10:03 PM, Shuai Xue wrote:
> 
> 
> On 2023/6/14 23:06, Yazen Ghannam wrote:
>> On 6/13/2023 10:06 PM, Shuai Xue wrote:
>>>
>>>
>>> On 2023/6/13 22:11, Yazen Ghannam wrote:
>>>> Define helper functions for legacy and SMCA systems in order to reuse
>>>> individual checks in later changes.
>>>>
>>>> Describe what each function is checking for, and correct the XEC bitmask
>>>> for SMCA.
>>>>
>>>> No functional change intended.
>>>>
>>>> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
>>>> ---
>>>>    arch/x86/kernel/cpu/mce/amd.c | 30 +++++++++++++++++++++++++-----
>>>>    1 file changed, 25 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
>>>> index 5e74610b39e7..1ccfb0c9257f 100644
>>>> --- a/arch/x86/kernel/cpu/mce/amd.c
>>>> +++ b/arch/x86/kernel/cpu/mce/amd.c
>>>> @@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>>>>            deferred_error_interrupt_enable(c);
>>>>    }
>>>>    -bool amd_mce_is_memory_error(struct mce *m)
>>>> +/*
>>>> + * DRAM ECC errors are reported in the Northbridge (bank 4) with
>>>> + * Extended Error Code 8.
>>>> + */
>>>> +static bool legacy_mce_is_memory_error(struct mce *m)
>>>> +{
>>>> +    return m->bank == 4 && XEC(m->status, 0x1f) == 8;
>>>> +}
>>>> +
>>>> +/*
>>>> + * DRAM ECC errors are reported in Unified Memory Controllers with
>>>> + * Extended Error Code 0.
>>>> + */
>>>> +static bool smca_mce_is_memory_error(struct mce *m)
>>>>    {
>>>>        enum smca_bank_types bank_type;
>>>> -    /* ErrCodeExt[20:16] */
>>>> -    u8 xec = (m->status >> 16) & 0x1f;
>>>> +
>>>> +    if (XEC(m->status, 0x3f))
>>>> +        return false;
>>>>          bank_type = smca_get_bank_type(m->extcpu, m->bank);
>>>> +
>>>> +    return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
>>>> +}
>>>> +
>>>> +bool amd_mce_is_memory_error(struct mce *m)
>>>> +{
>>>>        if (mce_flags.smca)
>>>> -        return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
>>>> +        return smca_mce_is_memory_error(m);
>>>>    -    return m->bank == 4 && xec == 0x8;
>>>> +    return legacy_mce_is_memory_error(m);
>>>>    }
>>>>      static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
>>>
>>> Hi, Yazen,
>>>
>>> Which tree are you working on? This patch can not be applied to Linus master ?
>>> (commit b6dad5178ceaf23f369c3711062ce1f2afc33644)
>>>
>>
>> Hi Shuai,
>>
>> I'm using tip/master as the base.
>> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/log/
>>
>> Sorry, I forgot to mention this in the cover letter.
> 
> Ok. This patch itself looks good to me.
> 
> Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
> 

Thank you!

-Yazen

[tip: ras/core] x86/MCE/AMD: Split amd_mce_is_memory_error()
Posted by tip-bot2 for Yazen Ghannam 2 years, 3 months ago
The following commit has been merged into the ras/core branch of tip:

Commit-ID:     495a91d0998367f4f079593f491bdfe8ef06838e
Gitweb:        https://git.kernel.org/tip/495a91d0998367f4f079593f491bdfe8ef06838e
Author:        Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate:    Tue, 13 Jun 2023 09:11:40 -05:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Mon, 16 Oct 2023 15:04:53 +02:00

x86/MCE/AMD: Split amd_mce_is_memory_error()

Define helper functions for legacy and SMCA systems in order to reuse
individual checks in later changes.

Describe what each function is checking for, and correct the XEC bitmask
for SMCA.

No functional change intended.

  [ bp: Use "else in amd_mce_is_memory_error() to make the conditional
    balanced, for readability. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230613141142.36801-2-yazen.ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/amd.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index c267f43..c069934 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -713,17 +713,37 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 		deferred_error_interrupt_enable(c);
 }
 
-bool amd_mce_is_memory_error(struct mce *m)
+/*
+ * DRAM ECC errors are reported in the Northbridge (bank 4) with
+ * Extended Error Code 8.
+ */
+static bool legacy_mce_is_memory_error(struct mce *m)
+{
+	return m->bank == 4 && XEC(m->status, 0x1f) == 8;
+}
+
+/*
+ * DRAM ECC errors are reported in Unified Memory Controllers with
+ * Extended Error Code 0.
+ */
+static bool smca_mce_is_memory_error(struct mce *m)
 {
 	enum smca_bank_types bank_type;
-	/* ErrCodeExt[20:16] */
-	u8 xec = (m->status >> 16) & 0x1f;
+
+	if (XEC(m->status, 0x3f))
+		return false;
 
 	bank_type = smca_get_bank_type(m->extcpu, m->bank);
-	if (mce_flags.smca)
-		return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
 
-	return m->bank == 4 && xec == 0x8;
+	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
+}
+
+bool amd_mce_is_memory_error(struct mce *m)
+{
+	if (mce_flags.smca)
+		return smca_mce_is_memory_error(m);
+	else
+		return legacy_mce_is_memory_error(m);
 }
 
 static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)