[PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems

Yazen Ghannam posted 17 patches 8 months, 1 week ago
There is a newer version of this series
[PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
Posted by Yazen Ghannam 8 months, 1 week ago
Scalable MCA systems have a per-CPU register that gives the APIC LVT
offset for the thresholding and deferred error interrupts.

Currently, this register is read once to set up the deferred error
interrupt and then read again for each thresholding block. Furthermore,
the APIC LVT registers are configured each time, but they only need to
be configured once per-CPU.

Move the APIC LVT setup to the early part of CPU init, so that the
registers are set up once. Also, this ensures that the kernel is ready
to service the interrupts before the individual error sources (each MCA
bank) are enabled.

Apply this change only to SMCA systems to avoid breaking any legacy
behavior. The deferred error interrupt is technically advertised by the
SUCCOR feature. However, this was first made available on SMCA systems.
Therefore, only set up the deferred error interrupt on SMCA systems and
simplify the code.

Tested-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lore.kernel.org/r/20250213-wip-mca-updates-v2-14-3636547fe05f@amd.com
    
    v2->v3:
    * Add tags from Tony.
    
    v1->v2:
    * Use new per-CPU struct.
    * Don't set up interrupt vectors.

 arch/x86/kernel/cpu/mce/amd.c | 113 ++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 65 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 62c4fe98d02a..9e226bdbdc40 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -43,9 +43,6 @@
 /* Deferred error settings */
 #define MSR_CU_DEF_ERR		0xC0000410
 #define MASK_DEF_LVTOFF		0x000000F0
-#define MASK_DEF_INT_TYPE	0x00000006
-#define DEF_LVT_OFF		0x2
-#define DEF_INT_TYPE_APIC	0x2
 
 /* Scalable MCA: */
 
@@ -57,6 +54,8 @@ static bool thresholding_irq_en;
 struct mce_amd_cpu_data {
 	mce_banks_t     thr_intr_banks;
 	mce_banks_t     dfr_intr_banks;
+	bool		thr_intr_en;
+	bool		dfr_intr_en;
 };
 
 static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
@@ -271,6 +270,7 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 
 static void smca_configure(unsigned int bank, unsigned int cpu)
 {
+	struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
 	u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
 	const struct smca_hwid *s_hwid;
 	unsigned int i, hwid_mcatype;
@@ -301,8 +301,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
 		 * APIC based interrupt. First, check that no interrupt has been
 		 * set.
 		 */
-		if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
-			__set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
+		if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) {
+			__set_bit(bank, data->dfr_intr_banks);
 			high |= BIT(5);
 		}
 
@@ -378,6 +378,14 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
 {
 	int msr = (hi & MASK_LVTOFF_HI) >> 20;
 
+	/*
+	 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+	 * the BIOS provides the value. The original field where LVT offset
+	 * was set is reserved. Return early here:
+	 */
+	if (mce_flags.smca)
+		return false;
+
 	if (apic < 0) {
 		pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
 		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
@@ -386,14 +394,6 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
 	}
 
 	if (apic != msr) {
-		/*
-		 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
-		 * the BIOS provides the value. The original field where LVT offset
-		 * was set is reserved. Return early here:
-		 */
-		if (mce_flags.smca)
-			return false;
-
 		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -474,41 +474,6 @@ static int setup_APIC_mce_threshold(int reserved, int new)
 	return reserved;
 }
 
-static int setup_APIC_deferred_error(int reserved, int new)
-{
-	if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
-					      APIC_EILVT_MSG_FIX, 0))
-		return new;
-
-	return reserved;
-}
-
-static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
-{
-	u32 low = 0, high = 0;
-	int def_offset = -1, def_new;
-
-	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
-		return;
-
-	def_new = (low & MASK_DEF_LVTOFF) >> 4;
-	if (!(low & MASK_DEF_LVTOFF)) {
-		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
-		def_new = DEF_LVT_OFF;
-		low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
-	}
-
-	def_offset = setup_APIC_deferred_error(def_offset, def_new);
-	if ((def_offset == def_new) &&
-	    (deferred_error_int_vector != amd_deferred_error_interrupt))
-		deferred_error_int_vector = amd_deferred_error_interrupt;
-
-	if (!mce_flags.smca)
-		low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
-
-	wrmsr(MSR_CU_DEF_ERR, low, high);
-}
-
 static u32 smca_get_block_address(unsigned int bank, unsigned int block,
 				  unsigned int cpu)
 {
@@ -551,7 +516,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 			int offset, u32 misc_high)
 {
 	unsigned int cpu = smp_processor_id();
-	u32 smca_low, smca_high;
 	struct threshold_block b;
 	int new;
 
@@ -571,18 +535,10 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 	__set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
 	b.interrupt_enable = 1;
 
-	if (!mce_flags.smca) {
-		new = (misc_high & MASK_LVTOFF_HI) >> 20;
-		goto set_offset;
-	}
-
-	/* Gather LVT offset for thresholding: */
-	if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
-		goto out;
-
-	new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+	if (mce_flags.smca)
+		goto done;
 
-set_offset:
+	new = (misc_high & MASK_LVTOFF_HI) >> 20;
 	offset = setup_APIC_mce_threshold(offset, new);
 	if (offset == new)
 		thresholding_irq_en = true;
@@ -590,7 +546,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 done:
 	mce_threshold_block_init(&b, offset);
 
-out:
 	return offset;
 }
 
@@ -659,6 +614,32 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 		wrmsrq(MSR_K7_HWCR, hwcr);
 }
 
+/*
+ * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is
+ * ready to send interrupts.
+ *
+ * Individual error sources are enabled later during per-bank init.
+ */
+static void smca_enable_interrupt_vectors(void)
+{
+	struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
+	u64 mca_intr_cfg, offset;
+
+	if (!mce_flags.smca || !mce_flags.succor)
+		return;
+
+	if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg))
+		return;
+
+	offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
+	if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
+		data->thr_intr_en = true;
+
+	offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
+	if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0))
+		data->dfr_intr_en = true;
+}
+
 static void amd_apply_quirks(struct cpuinfo_x86 *c)
 {
 	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
@@ -690,11 +671,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 
 	amd_apply_quirks(c);
 	mce_flags.amd_threshold	 = 1;
+	smca_enable_interrupt_vectors();
 
 	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
-		if (mce_flags.smca)
+		if (mce_flags.smca) {
 			smca_configure(bank, cpu);
 
+			if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en)
+				continue;
+		}
+
 		disable_err_thresholding(c, bank);
 
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -715,9 +701,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			offset = prepare_threshold_block(bank, block, address, offset, high);
 		}
 	}
-
-	if (mce_flags.succor)
-		deferred_error_interrupt_enable(c);
 }
 
 void mce_smca_cpu_init(void)

-- 
2.49.0
Re: [PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
Posted by Borislav Petkov 7 months, 2 weeks ago
On Tue, Apr 15, 2025 at 02:55:09PM +0000, Yazen Ghannam wrote:
> -static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
> -{
> -	u32 low = 0, high = 0;
> -	int def_offset = -1, def_new;
> -
> -	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
> -		return;
> -
> -	def_new = (low & MASK_DEF_LVTOFF) >> 4;
> -	if (!(low & MASK_DEF_LVTOFF)) {
> -		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");

I'm not sure why it is ok to remove that one.

/me goes and digs into lore...

Here's why we did it back then:

https://lore.kernel.org/all/5547906E.3060701@amd.com/

and apparently that was for some bulldozer BIOS.

How can we trust Zen BIOS all of a sudden?

;-)

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
Posted by Yazen Ghannam 7 months, 2 weeks ago
On Wed, May 07, 2025 at 09:35:39PM +0200, Borislav Petkov wrote:
> On Tue, Apr 15, 2025 at 02:55:09PM +0000, Yazen Ghannam wrote:
> > -static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
> > -{
> > -	u32 low = 0, high = 0;
> > -	int def_offset = -1, def_new;
> > -
> > -	if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
> > -		return;
> > -
> > -	def_new = (low & MASK_DEF_LVTOFF) >> 4;
> > -	if (!(low & MASK_DEF_LVTOFF)) {
> > -		pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
> 
> I'm not sure why it is ok to remove that one.
> 
> /me goes and digs into lore...
> 
> Here's why we did it back then:
> 
> https://lore.kernel.org/all/5547906E.3060701@amd.com/
> 
> and apparently that was for some bulldozer BIOS.
> 
> How can we trust Zen BIOS all of a sudden?
> 
> ;-)
> 

Let me flip it around. Why is this check needed at all? Was there ever a
real issue to resolve? It seems to me the deferred error updates are
just following what other code did.

I figure the reason to have the platform give the offset to the OS is so
the OS doesn't hard code it (in case it needs to change). These offsets
were hard coded in the past (conflict between IBS/THR), and it caused
problems when the offsets switched in the hardware. The registers that
give the offsets were introduced soon after, I think.

So the checks we do are defeating the purpose. The OS is still hard
coding the offsets. The goal of this change is to follow the intent of
the design. Sometimes we need to let go and trust [the BIOS]. ;)

Now we could update the checks to verify that an offset is not used for
multiple interrupt sources.

Let's follow up with the design folks to be sure.

Thanks,
Yazen
Re: [PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
Posted by Borislav Petkov 7 months, 2 weeks ago
On Thu, May 08, 2025 at 11:53:00AM -0400, Yazen Ghannam wrote:
> Let me flip it around. Why is this check needed at all?

As I said above, some BIOS f*ckup.

> Was there ever a real issue to resolve?

Not that I remember...

> It seems to me the deferred error updates are just following what other code
> did.

Let's search the web for it:

* https://bbs.archlinux.org/viewtopic.php?id=299379

- silly guests, who cares

* https://gitlab.com/qemu-project/qemu/-/issues/2571

- another misguided qemu...

Aha:

https://lore.kernel.org/lkml/20241219124426.325747-1-pbonzini@redhat.com

the usual virt silly stuff.

> I figure the reason to have the platform give the offset to the OS is so
> the OS doesn't hard code it (in case it needs to change). These offsets
> were hard coded in the past (conflict between IBS/THR), and it caused
> problems when the offsets switched in the hardware. The registers that
> give the offsets were introduced soon after, I think.

Right.

> So the checks we do are defeating the purpose. The OS is still hard
> coding the offsets. The goal of this change is to follow the intent of
> the design. Sometimes we need to let go and trust [the BIOS]. ;)

Look at you being silly :-P

> Now we could update the checks to verify that an offset is not used for
> multiple interrupt sources.

... or, we won't do anything until someone in BIOS f*cks up again.
 
> Let's follow up with the design folks to be sure.

Yah, sounds like we will have to verify them after all. You can see how
universally widespread the trust in BIOS is...

:-P

In any case, whatever you do, when you axe off stuff, write in the commit
message why you do so. Silently removing it is making me want to know why it
is ok now.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v3 14/17] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems
Posted by Yazen Ghannam 7 months, 1 week ago
On Fri, May 09, 2025 at 04:08:21PM +0200, Borislav Petkov wrote:
> On Thu, May 08, 2025 at 11:53:00AM -0400, Yazen Ghannam wrote:
> > Let me flip it around. Why is this check needed at all?
> 
> As I said above, some BIOS f*ckup.
> 
> > Was there ever a real issue to resolve?
> 
> Not that I remember...
> 
> > It seems to me the deferred error updates are just following what other code
> > did.
> 
> Let's search the web for it:
> 
> * https://bbs.archlinux.org/viewtopic.php?id=299379
> 
> - silly guests, who cares
> 
> * https://gitlab.com/qemu-project/qemu/-/issues/2571
> 
> - another misguided qemu...
> 
> Aha:
> 
> https://lore.kernel.org/lkml/20241219124426.325747-1-pbonzini@redhat.com
> 
> the usual virt silly stuff.
> 
> > I figure the reason to have the platform give the offset to the OS is so
> > the OS doesn't hard code it (in case it needs to change). These offsets
> > were hard coded in the past (conflict between IBS/THR), and it caused
> > problems when the offsets switched in the hardware. The registers that
> > give the offsets were introduced soon after, I think.
> 
> Right.
> 
> > So the checks we do are defeating the purpose. The OS is still hard
> > coding the offsets. The goal of this change is to follow the intent of
> > the design. Sometimes we need to let go and trust [the BIOS]. ;)
> 
> Look at you being silly :-P
> 
> > Now we could update the checks to verify that an offset is not used for
> > multiple interrupt sources.
> 
> ... or, we won't do anything until someone in BIOS f*cks up again.
>  
> > Let's follow up with the design folks to be sure.
> 
> Yah, sounds like we will have to verify them after all. You can see how
> universally widespread the trust in BIOS is...
> 
> :-P
> 
> In any case, whatever you do, when you axe off stuff, write in the commit
> message why you do so. Silently removing it is making me want to know why it
> is ok now.
> 

Right, it sounds like we should take the values from the platform and
just make sure they aren't used for multiple sources. In other words, we
don't hard code the offsets, and we verify that each source has a unique
offset.

I agree we can leave this for now. So I'll drop this part from the patch.

I think this topic can be a separate set, and it should cover all APIC
LVT sources including IBS. I'll add it to the to-do list. :)

Thanks,
Yazen