[PATCH v5 03/20] x86/mce/amd: Remove smca_banks_map

Yazen Ghannam posted 20 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v5 03/20] x86/mce/amd: Remove smca_banks_map
Posted by Yazen Ghannam 1 month, 1 week ago
The MCx_MISC0[BlkPtr] field was used on legacy systems to hold a
register offset for the next MCx_MISC* register. In this way, an
implementation-specific number of registers can be discovered at
runtime.

The MCAX/SMCA register space simplifies this by always including
the MCx_MISC[1-4] registers. The MCx_MISC0[BlkPtr] field is used to
indicate (true/false) whether any MCx_MISC[1-4] registers are present.

Currently, MCx_MISC0[BlkPtr] is checked early and cached to be used
during sysfs init later. This is unnecessary as the MCx_MISC0 register
is read again later anyway.

Remove the smca_banks_map variable as it is effectively redundant, and
use a direct register/bit check instead.

Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lore.kernel.org/r/20250624-wip-mca-updates-v4-7-236dd74f645f@amd.com
    
    v4->v5:
    * Keep MCx_MISC0[BlkPtr] check to be compliant with uarch.
    
    v3->v4:
    * No change.
    
    v2->v3:
    * Minor edit in commit message.
    * Added tags from Qiuxu and Tony.
    
    v1->v2:
    * New in v2.

 arch/x86/kernel/cpu/mce/amd.c | 34 +++-------------------------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f429451cafc8..580682af432d 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -252,9 +252,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  */
 static DEFINE_PER_CPU(u64, bank_map);
 
-/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u64, smca_misc_banks_map);
-
 static void amd_threshold_interrupt(void);
 static void amd_deferred_error_interrupt(void);
 
@@ -264,28 +261,6 @@ static void default_deferred_error_interrupt(void)
 }
 void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 
-static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
-{
-	u32 low, high;
-
-	/*
-	 * For SMCA enabled processors, BLKPTR field of the first MISC register
-	 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
-	 */
-	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-		return;
-
-	if (!(low & MCI_CONFIG_MCAX))
-		return;
-
-	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
-		return;
-
-	if (low & MASK_BLKPTR_LO)
-		per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
-
-}
-
 static void smca_configure(unsigned int bank, unsigned int cpu)
 {
 	u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
@@ -326,8 +301,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 		wrmsr(smca_config, low, high);
 	}
 
-	smca_set_misc_banks_map(bank, cpu);
-
 	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
 		pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
 		return;
@@ -525,13 +498,12 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 smca_get_block_address(unsigned int bank, unsigned int block,
-				  unsigned int cpu)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block, u32 low)
 {
 	if (!block)
 		return MSR_AMD64_SMCA_MCx_MISC(bank);
 
-	if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
+	if (!(low & MASK_BLKPTR_LO))
 		return 0;
 
 	return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
@@ -547,7 +519,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 		return addr;
 
 	if (mce_flags.smca)
-		return smca_get_block_address(bank, block, cpu);
+		return smca_get_block_address(bank, block, low);
 
 	/* Fall back to method we used for older processors: */
 	switch (block) {

-- 
2.51.0
Re: [PATCH v5 03/20] x86/mce/amd: Remove smca_banks_map
Posted by Borislav Petkov 1 month, 1 week ago
On Mon, Aug 25, 2025 at 05:33:00PM +0000, Yazen Ghannam wrote:
> The MCx_MISC0[BlkPtr] field was used on legacy systems to hold a
> register offset for the next MCx_MISC* register. In this way, an
> implementation-specific number of registers can be discovered at
> runtime.
> 
> The MCAX/SMCA register space simplifies this by always including
> the MCx_MISC[1-4] registers. The MCx_MISC0[BlkPtr] field is used to
> indicate (true/false) whether any MCx_MISC[1-4] registers are present.
> 
> Currently, MCx_MISC0[BlkPtr] is checked early and cached to be used
> during sysfs init later. This is unnecessary as the MCx_MISC0 register
> is read again later anyway.
> 
> Remove the smca_banks_map variable as it is effectively redundant, and
> use a direct register/bit check instead.
> 
> Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Tested-by: Tony Luck <tony.luck@intel.com>
> Reviewed-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> ---
> 
> Notes:
>     Link:
>     https://lore.kernel.org/r/20250624-wip-mca-updates-v4-7-236dd74f645f@amd.com
>     
>     v4->v5:
>     * Keep MCx_MISC0[BlkPtr] check to be compliant with uarch.

I'm not sure I understand what that means...?

Anyway, some more cleanup ontop:

---
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 580682af432d..7e36bc0d0e6c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -498,17 +498,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 smca_get_block_address(unsigned int bank, unsigned int block, u32 low)
-{
-	if (!block)
-		return MSR_AMD64_SMCA_MCx_MISC(bank);
-
-	if (!(low & MASK_BLKPTR_LO))
-		return 0;
-
-	return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-}
-
 static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 			     unsigned int bank, unsigned int block,
 			     unsigned int cpu)
@@ -518,8 +507,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 	if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
 		return addr;
 
-	if (mce_flags.smca)
-		return smca_get_block_address(bank, block, low);
+	if (mce_flags.smca) {
+		if (!block)
+			return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+		if (!(low & MASK_BLKPTR_LO))
+			return 0;
+
+		return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+	}
 
 	/* Fall back to method we used for older processors: */
 	switch (block) {


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v5 03/20] x86/mce/amd: Remove smca_banks_map
Posted by Yazen Ghannam 1 month, 1 week ago
On Mon, Aug 25, 2025 at 08:19:38PM +0200, Borislav Petkov wrote:
> On Mon, Aug 25, 2025 at 05:33:00PM +0000, Yazen Ghannam wrote:
> > The MCx_MISC0[BlkPtr] field was used on legacy systems to hold a
> > register offset for the next MCx_MISC* register. In this way, an
> > implementation-specific number of registers can be discovered at
> > runtime.
> > 
> > The MCAX/SMCA register space simplifies this by always including
> > the MCx_MISC[1-4] registers. The MCx_MISC0[BlkPtr] field is used to
> > indicate (true/false) whether any MCx_MISC[1-4] registers are present.
> > 
> > Currently, MCx_MISC0[BlkPtr] is checked early and cached to be used
> > during sysfs init later. This is unnecessary as the MCx_MISC0 register
> > is read again later anyway.
> > 
> > Remove the smca_banks_map variable as it is effectively redundant, and
> > use a direct register/bit check instead.
> > 
> > Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > Tested-by: Tony Luck <tony.luck@intel.com>
> > Reviewed-by: Tony Luck <tony.luck@intel.com>
> > Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> > ---
> > 
> > Notes:
> >     Link:
> >     https://lore.kernel.org/r/20250624-wip-mca-updates-v4-7-236dd74f645f@amd.com
> >     
> >     v4->v5:
> >     * Keep MCx_MISC0[BlkPtr] check to be compliant with uarch.
> 
> I'm not sure I understand what that means...?

I completely removed the check below in previous revisions. But I put
it back to make sure we follow the microarchitecture guidelines, i.e.
the procedure(s) in documentation (APM, PPR, etc.).

	if (!(low & MASK_BLKPTR_LO))
		return 0;

> 
> Anyway, some more cleanup ontop:
> 
> ---
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index 580682af432d..7e36bc0d0e6c 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -498,17 +498,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
>  	wrmsr(MSR_CU_DEF_ERR, low, high);
>  }
>  
> -static u32 smca_get_block_address(unsigned int bank, unsigned int block, u32 low)
> -{
> -	if (!block)
> -		return MSR_AMD64_SMCA_MCx_MISC(bank);
> -
> -	if (!(low & MASK_BLKPTR_LO))
> -		return 0;
> -
> -	return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
> -}
> -
>  static u32 get_block_address(u32 current_addr, u32 low, u32 high,
>  			     unsigned int bank, unsigned int block,
>  			     unsigned int cpu)
> @@ -518,8 +507,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
>  	if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
>  		return addr;
>  
> -	if (mce_flags.smca)
> -		return smca_get_block_address(bank, block, low);
> +	if (mce_flags.smca) {
> +		if (!block)
> +			return MSR_AMD64_SMCA_MCx_MISC(bank);
> +
> +		if (!(low & MASK_BLKPTR_LO))
> +			return 0;
> +
> +		return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
> +	}
>  
>  	/* Fall back to method we used for older processors: */
>  	switch (block) {
> 
> 
> -- 

Looks good to me.

Thanks,
Yazen
[tip: ras/core] x86/mce/amd: Remove smca_banks_map
Posted by tip-bot2 for Yazen Ghannam 4 weeks ago
The following commit has been merged into the ras/core branch of tip:

Commit-ID:     b249288abde5190bb113ea5acef8af4ceac4957c
Gitweb:        https://git.kernel.org/tip/b249288abde5190bb113ea5acef8af4ceac4957c
Author:        Yazen Ghannam <yazen.ghannam@amd.com>
AuthorDate:    Mon, 25 Aug 2025 17:33:00 
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Fri, 05 Sep 2025 12:41:48 +02:00

x86/mce/amd: Remove smca_banks_map

The MCx_MISC0[BlkPtr] field was used on legacy systems to hold a register
offset for the next MCx_MISC* register. In this way, an implementation-specific
number of registers can be discovered at runtime.

The MCAX/SMCA register space simplifies this by always including the
MCx_MISC[1-4] registers. The MCx_MISC0[BlkPtr] field is used to indicate
(true/false) whether any MCx_MISC[1-4] registers are present.

Currently, MCx_MISC0[BlkPtr] is checked early and cached to be used during
sysfs init later. This is unnecessary as the MCx_MISC0 register is read again
later anyway.

Remove the smca_banks_map variable as it is effectively redundant, and use
a direct register/bit check instead.

  [ bp: Zap smca_get_block_address() too. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250825-wip-mca-updates-v5-3-865768a2eef8@amd.com
---
 arch/x86/kernel/cpu/mce/amd.c | 50 ++++++----------------------------
 1 file changed, 9 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index f429451..7e36bc0 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -252,9 +252,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  */
 static DEFINE_PER_CPU(u64, bank_map);
 
-/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u64, smca_misc_banks_map);
-
 static void amd_threshold_interrupt(void);
 static void amd_deferred_error_interrupt(void);
 
@@ -264,28 +261,6 @@ static void default_deferred_error_interrupt(void)
 }
 void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 
-static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
-{
-	u32 low, high;
-
-	/*
-	 * For SMCA enabled processors, BLKPTR field of the first MISC register
-	 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
-	 */
-	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
-		return;
-
-	if (!(low & MCI_CONFIG_MCAX))
-		return;
-
-	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
-		return;
-
-	if (low & MASK_BLKPTR_LO)
-		per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
-
-}
-
 static void smca_configure(unsigned int bank, unsigned int cpu)
 {
 	u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
@@ -326,8 +301,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 		wrmsr(smca_config, low, high);
 	}
 
-	smca_set_misc_banks_map(bank, cpu);
-
 	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
 		pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
 		return;
@@ -525,18 +498,6 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 smca_get_block_address(unsigned int bank, unsigned int block,
-				  unsigned int cpu)
-{
-	if (!block)
-		return MSR_AMD64_SMCA_MCx_MISC(bank);
-
-	if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
-		return 0;
-
-	return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-}
-
 static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 			     unsigned int bank, unsigned int block,
 			     unsigned int cpu)
@@ -546,8 +507,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 	if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
 		return addr;
 
-	if (mce_flags.smca)
-		return smca_get_block_address(bank, block, cpu);
+	if (mce_flags.smca) {
+		if (!block)
+			return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+		if (!(low & MASK_BLKPTR_LO))
+			return 0;
+
+		return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+	}
 
 	/* Fall back to method we used for older processors: */
 	switch (block) {