AMD systems optionally support a deferred error interrupt. The interrupt
should be used as another signal to trigger MCA polling. This is similar
to how other MCA interrupts are handled.
Deferred errors do not require any special handling related to the
interrupt, e.g. resetting or rearming the interrupt, etc.
However, Scalable MCA systems include a pair of registers, MCA_DESTAT
and MCA_DEADDR, that should be checked for valid errors. This check
should be done whenever MCA registers are polled. Currently, the
deferred error interrupt does this check, but the MCA polling function
does not.
Call the MCA polling function when handling the deferred error
interrupt. This keeps all "polling" cases in a common function.
Call the polling function only for banks that have the deferred error
interrupt enabled.
Add an SMCA status check helper. This will do the same status check and
register clearing that the interrupt handler has done. And it extends
the common polling flow to find AMD deferred errors.
Add a flag to poll for Deferred errors similar to MCP_UC for
uncorrectable errors. This will do checks specific to deferred errors
and fallback to common UC/CE checks otherwise.
Also, clear the MCA_DESTAT register at the end of the handler rather
than the beginning. This maintains the procedure that the 'status'
register must be cleared as the final step.
Remove old code whose functionality is already covered in the common MCA
code.
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
Notes:
Link:
https://lore.kernel.org/r/20250908-wip-mca-updates-v6-9-eef5d6c74b9c@amd.com
v6->v7:
* Rework DFR error handling to avoid reporting bogus errors.
* Clear MCA_DESTAT at the end of handler. (Nikolay)
* Link: https://lore.kernel.org/r/20250915010010.3547-1-spasswolf@web.de
v5->v6:
* Move status clearing code to new helper.
v4->v5:
* No change.
v3->v4:
* Add kflag for checking DFR registers.
v2->v3:
* Add tags from Qiuxu and Tony.
v1->v2:
* Keep code comment.
* Log directly from helper function rather than pass values.
Link:
https://lore.kernel.org/r/20250213-wip-mca-updates-v2-13-3636547fe05f@amd.com
v2->v3:
* Add tags from Qiuxu and Tony.
v1->v2:
* Keep code comment.
* Log directly from helper function rather than pass values.
arch/x86/include/asm/mce.h | 7 +++
arch/x86/kernel/cpu/mce/amd.c | 111 +++++------------------------------------
arch/x86/kernel/cpu/mce/core.c | 51 ++++++++++++++++++-
3 files changed, 70 insertions(+), 99 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 31e3cb550fb3..1482648c8508 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -165,6 +165,12 @@
*/
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+/*
+ * Indicates that handler should check and clear Deferred error registers
+ * rather than common ones.
+ */
+#define MCE_CHECK_DFR_REGS BIT_ULL(8)
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
@@ -293,6 +299,7 @@ enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
+ MCP_DFR = BIT(3), /* log deferred errors */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index ac6a98aa7bc2..64aa7ecfd332 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -56,6 +56,7 @@ static bool thresholding_irq_en;
struct mce_amd_cpu_data {
mce_banks_t thr_intr_banks;
+ mce_banks_t dfr_intr_banks;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
@@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
* APIC based interrupt. First, check that no interrupt has been
* set.
*/
- if ((low & BIT(5)) && !((high >> 5) & 0x3))
+ if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
+ __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
high |= BIT(5);
+ }
this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
@@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m)
return false;
}
-static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
-{
- struct mce_hw_err err;
- struct mce *m = &err.m;
-
- mce_prep_record(&err);
-
- m->status = status;
- m->misc = misc;
- m->bank = bank;
- m->tsc = rdtsc();
-
- if (m->status & MCI_STATUS_ADDRV) {
- m->addr = addr;
-
- smca_extract_err_addr(m);
- }
-
- if (mce_flags.smca) {
- rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
-
- if (m->status & MCI_STATUS_SYNDV) {
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
- }
- }
-
- mce_log(&err);
-}
-
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
@@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
apic_eoi();
}
-/*
- * Returns true if the logged error is deferred. False, otherwise.
- */
-static inline bool
-_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
-{
- u64 status, addr = 0;
-
- rdmsrq(msr_stat, status);
- if (!(status & MCI_STATUS_VAL))
- return false;
-
- if (status & MCI_STATUS_ADDRV)
- rdmsrq(msr_addr, addr);
-
- __log_error(bank, status, addr, misc);
-
- wrmsrq(msr_stat, 0);
-
- return status & MCI_STATUS_DEFERRED;
-}
-
-static bool _log_error_deferred(unsigned int bank, u32 misc)
-{
- if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
- mca_msr_reg(bank, MCA_ADDR), misc))
- return false;
-
- /*
- * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
- * Return true here to avoid accessing these registers.
- */
- if (!mce_flags.smca)
- return true;
-
- /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
- wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
- return true;
-}
-
-/*
- * We have three scenarios for checking for Deferred errors:
- *
- * 1) Non-SMCA systems check MCA_STATUS and log error if found.
- * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
- * clear MCA_DESTAT.
- * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
- * log it.
- */
-static void log_error_deferred(unsigned int bank)
-{
- if (_log_error_deferred(bank, 0))
- return;
-
- /*
- * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
- * for a valid error.
- */
- _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
- MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
-}
-
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- unsigned int bank;
-
- for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
- log_error_deferred(bank);
+ machine_check_poll(MCP_TIMESTAMP | MCP_DFR, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
}
static void reset_block(struct threshold_block *block)
@@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m)
{
amd_reset_thr_limit(m->bank);
+ /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
+ if (m->status & MCI_STATUS_DEFERRED)
+ mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+
+ /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ return;
+
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 460e90a1a0b1..39725df7d35c 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
+ else
+ m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -714,6 +717,42 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
+/*
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
+ */
+static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+{
+ struct mce *m = &err->m;
+
+ /*
+ * If the MCA_STATUS register has a deferred error, then continue using it as
+ * the status register.
+ *
+ * MCA_DESTAT will be cleared at the end of the handler.
+ */
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
+ return true;
+
+ /*
+ * If the MCA_DESTAT register has a deferred error, then use it instead.
+ *
+ * MCA_STATUS will not be cleared at the end of the handler.
+ */
+ m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
+ m->kflags |= MCE_CHECK_DFR_REGS;
+ return true;
+ }
+
+ return false;
+}
+
/*
* Newer Intel systems that support software error
* recovery need to make additional checks. Other
@@ -740,10 +779,17 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
{
struct mce *m = &err->m;
+ if (flags & MCP_DFR)
+ return smca_should_log_poll_error(flags, err);
+
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
return false;
+ /* Ignore deferred errors if not looking for them (MCP_DFR not set). */
+ if (m->status & MCI_STATUS_DEFERRED)
+ return false;
+
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
@@ -1878,6 +1924,9 @@ static void __mcheck_cpu_init_prepare_banks(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
+
+ if (mce_flags.smca)
+ machine_check_poll(MCP_DFR | MCP_QUEUE_LOG, &all_banks);
}
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
--
2.51.0
On Thu, Oct 16, 2025 at 04:37:47PM +0000, Yazen Ghannam wrote:
> @@ -1878,6 +1924,9 @@ static void __mcheck_cpu_init_prepare_banks(void)
>
> bitmap_fill(all_banks, MAX_NR_BANKS);
> machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
> +
> + if (mce_flags.smca)
> + machine_check_poll(MCP_DFR | MCP_QUEUE_LOG, &all_banks);
So you're going to run the poll again just for DFR errors?!
What for?
I think this is enough:
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 1482648c8508..7d6588195d56 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -299,7 +299,6 @@ enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
- MCP_DFR = BIT(3), /* log deferred errors */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 64aa7ecfd332..d9f9ee7db5c8 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -807,7 +807,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- machine_check_poll(MCP_TIMESTAMP | MCP_DFR, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
+ machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
}
static void reset_block(struct threshold_block *block)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 39725df7d35c..7be062429ce3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -779,17 +779,13 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
{
struct mce *m = &err->m;
- if (flags & MCP_DFR)
+ if (mce_flags.smca)
return smca_should_log_poll_error(flags, err);
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
return false;
- /* Ignore deferred errors if not looking for them (MCP_DFR not set). */
- if (m->status & MCI_STATUS_DEFERRED)
- return false;
-
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
@@ -1924,9 +1920,6 @@ static void __mcheck_cpu_init_prepare_banks(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
-
- if (mce_flags.smca)
- machine_check_poll(MCP_DFR | MCP_QUEUE_LOG, &all_banks);
}
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, Oct 24, 2025 at 05:03:33PM +0200, Borislav Petkov wrote:
> On Thu, Oct 16, 2025 at 04:37:47PM +0000, Yazen Ghannam wrote:
> > @@ -1878,6 +1924,9 @@ static void __mcheck_cpu_init_prepare_banks(void)
> >
> > bitmap_fill(all_banks, MAX_NR_BANKS);
> > machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
> > +
> > + if (mce_flags.smca)
> > + machine_check_poll(MCP_DFR | MCP_QUEUE_LOG, &all_banks);
>
> So you're going to run the poll again just for DFR errors?!
>
> What for?
Yeah, I guess I went too far with trying to catch bogus errors.
>
> I think this is enough:
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 1482648c8508..7d6588195d56 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -299,7 +299,6 @@ enum mcp_flags {
> MCP_TIMESTAMP = BIT(0), /* log time stamp */
> MCP_UC = BIT(1), /* log uncorrected errors */
> MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
> - MCP_DFR = BIT(3), /* log deferred errors */
> };
>
> void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index 64aa7ecfd332..d9f9ee7db5c8 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -807,7 +807,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
> /* APIC interrupt handler for deferred errors */
> static void amd_deferred_error_interrupt(void)
> {
> - machine_check_poll(MCP_TIMESTAMP | MCP_DFR, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> }
>
> static void reset_block(struct threshold_block *block)
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 39725df7d35c..7be062429ce3 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -779,17 +779,13 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> {
> struct mce *m = &err->m;
>
> - if (flags & MCP_DFR)
> + if (mce_flags.smca)
> return smca_should_log_poll_error(flags, err);
>
> /* If this entry is not valid, ignore it. */
> if (!(m->status & MCI_STATUS_VAL))
> return false;
>
> - /* Ignore deferred errors if not looking for them (MCP_DFR not set). */
> - if (m->status & MCI_STATUS_DEFERRED)
> - return false;
> -
> /*
> * If we are logging everything (at CPU online) or this
> * is a corrected error, then we must log it.
> @@ -1924,9 +1920,6 @@ static void __mcheck_cpu_init_prepare_banks(void)
>
> bitmap_fill(all_banks, MAX_NR_BANKS);
> machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
> -
> - if (mce_flags.smca)
> - machine_check_poll(MCP_DFR | MCP_QUEUE_LOG, &all_banks);
> }
>
> for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
>
>
>
This looks good to me.
Should I send another revision?
Thanks,
Yazen
On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> Should I send another revision?
Nah, I'm not done simplifying this yet. :-P
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, Oct 24, 2025 at 11:27:23PM +0200, Borislav Petkov wrote:
> On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> > Should I send another revision?
>
> Nah, I'm not done simplifying this yet. :-P
Yeah, no, looks ok now:
---
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 16 Oct 2025 16:37:47 +0000
Subject: [PATCH] x86/mce: Unify AMD DFR handler with MCA Polling
AMD systems optionally support a deferred error interrupt. The interrupt
should be used as another signal to trigger MCA polling. This is similar to
how other MCA interrupts are handled.
Deferred errors do not require any special handling related to the interrupt,
e.g. resetting or rearming the interrupt, etc.
However, Scalable MCA systems include a pair of registers, MCA_DESTAT and
MCA_DEADDR, that should be checked for valid errors. This check should be done
whenever MCA registers are polled. Currently, the deferred error interrupt
does this check, but the MCA polling function does not.
Call the MCA polling function when handling the deferred error interrupt. This
keeps all "polling" cases in a common function.
Add an SMCA status check helper. This will do the same status check and
register clearing that the interrupt handler has done. And it extends the
common polling flow to find AMD deferred errors.
Clear the MCA_DESTAT register at the end of the handler rather than the
beginning. This maintains the procedure that the 'status' register must be
cleared as the final step.
[ bp: Zap commit message pieces explaining what the patch does;
zap unnecessary special-casing of deferred errors. ]
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/all/20251016-wip-mca-updates-v7-0-5c139a4062cb@amd.com
---
arch/x86/include/asm/mce.h | 6 ++
arch/x86/kernel/cpu/mce/amd.c | 111 ++++-----------------------------
arch/x86/kernel/cpu/mce/core.c | 44 ++++++++++++-
3 files changed, 62 insertions(+), 99 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 31e3cb550fb3..7d6588195d56 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -165,6 +165,12 @@
*/
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+/*
+ * Indicates that handler should check and clear Deferred error registers
+ * rather than common ones.
+ */
+#define MCE_CHECK_DFR_REGS BIT_ULL(8)
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index ac6a98aa7bc2..d9f9ee7db5c8 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -56,6 +56,7 @@ static bool thresholding_irq_en;
struct mce_amd_cpu_data {
mce_banks_t thr_intr_banks;
+ mce_banks_t dfr_intr_banks;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
@@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
* APIC based interrupt. First, check that no interrupt has been
* set.
*/
- if ((low & BIT(5)) && !((high >> 5) & 0x3))
+ if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
+ __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
high |= BIT(5);
+ }
this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
@@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m)
return false;
}
-static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
-{
- struct mce_hw_err err;
- struct mce *m = &err.m;
-
- mce_prep_record(&err);
-
- m->status = status;
- m->misc = misc;
- m->bank = bank;
- m->tsc = rdtsc();
-
- if (m->status & MCI_STATUS_ADDRV) {
- m->addr = addr;
-
- smca_extract_err_addr(m);
- }
-
- if (mce_flags.smca) {
- rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
-
- if (m->status & MCI_STATUS_SYNDV) {
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
- rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
- }
- }
-
- mce_log(&err);
-}
-
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
@@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
apic_eoi();
}
-/*
- * Returns true if the logged error is deferred. False, otherwise.
- */
-static inline bool
-_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
-{
- u64 status, addr = 0;
-
- rdmsrq(msr_stat, status);
- if (!(status & MCI_STATUS_VAL))
- return false;
-
- if (status & MCI_STATUS_ADDRV)
- rdmsrq(msr_addr, addr);
-
- __log_error(bank, status, addr, misc);
-
- wrmsrq(msr_stat, 0);
-
- return status & MCI_STATUS_DEFERRED;
-}
-
-static bool _log_error_deferred(unsigned int bank, u32 misc)
-{
- if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
- mca_msr_reg(bank, MCA_ADDR), misc))
- return false;
-
- /*
- * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
- * Return true here to avoid accessing these registers.
- */
- if (!mce_flags.smca)
- return true;
-
- /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
- wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
- return true;
-}
-
-/*
- * We have three scenarios for checking for Deferred errors:
- *
- * 1) Non-SMCA systems check MCA_STATUS and log error if found.
- * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
- * clear MCA_DESTAT.
- * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
- * log it.
- */
-static void log_error_deferred(unsigned int bank)
-{
- if (_log_error_deferred(bank, 0))
- return;
-
- /*
- * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
- * for a valid error.
- */
- _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
- MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
-}
-
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- unsigned int bank;
-
- for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
- log_error_deferred(bank);
+ machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
}
static void reset_block(struct threshold_block *block)
@@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m)
{
amd_reset_thr_limit(m->bank);
+ /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
+ if (m->status & MCI_STATUS_DEFERRED)
+ mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+
+ /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ return;
+
mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 460e90a1a0b1..7be062429ce3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
+ else
+ m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -714,6 +717,42 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
+/*
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
+ */
+static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+{
+ struct mce *m = &err->m;
+
+ /*
+ * If the MCA_STATUS register has a deferred error, then continue using it as
+ * the status register.
+ *
+ * MCA_DESTAT will be cleared at the end of the handler.
+ */
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
+ return true;
+
+ /*
+ * If the MCA_DESTAT register has a deferred error, then use it instead.
+ *
+ * MCA_STATUS will not be cleared at the end of the handler.
+ */
+ m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
+ m->kflags |= MCE_CHECK_DFR_REGS;
+ return true;
+ }
+
+ return false;
+}
+
/*
* Newer Intel systems that support software error
* recovery need to make additional checks. Other
@@ -740,6 +779,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
{
struct mce *m = &err->m;
+ if (mce_flags.smca)
+ return smca_should_log_poll_error(flags, err);
+
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
return false;
--
2.51.0
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Sat, Oct 25, 2025 at 05:03:04PM +0200, Borislav Petkov wrote:
> On Fri, Oct 24, 2025 at 11:27:23PM +0200, Borislav Petkov wrote:
> > On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> > > Should I send another revision?
> >
> > Nah, I'm not done simplifying this yet. :-P
>
> Yeah, no, looks ok now:
>
> ---
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> Date: Thu, 16 Oct 2025 16:37:47 +0000
> Subject: [PATCH] x86/mce: Unify AMD DFR handler with MCA Polling
>
> AMD systems optionally support a deferred error interrupt. The interrupt
> should be used as another signal to trigger MCA polling. This is similar to
> how other MCA interrupts are handled.
>
> Deferred errors do not require any special handling related to the interrupt,
> e.g. resetting or rearming the interrupt, etc.
>
> However, Scalable MCA systems include a pair of registers, MCA_DESTAT and
> MCA_DEADDR, that should be checked for valid errors. This check should be done
> whenever MCA registers are polled. Currently, the deferred error interrupt
> does this check, but the MCA polling function does not.
>
> Call the MCA polling function when handling the deferred error interrupt. This
> keeps all "polling" cases in a common function.
>
> Add an SMCA status check helper. This will do the same status check and
> register clearing that the interrupt handler has done. And it extends the
> common polling flow to find AMD deferred errors.
>
> Clear the MCA_DESTAT register at the end of the handler rather than the
> beginning. This maintains the procedure that the 'status' register must be
> cleared as the final step.
>
> [ bp: Zap commit message pieces explaining what the patch does;
> zap unnecessary special-casing of deferred errors. ]
>
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> Link: https://lore.kernel.org/all/20251016-wip-mca-updates-v7-0-5c139a4062cb@amd.com
> ---
> arch/x86/include/asm/mce.h | 6 ++
> arch/x86/kernel/cpu/mce/amd.c | 111 ++++-----------------------------
> arch/x86/kernel/cpu/mce/core.c | 44 ++++++++++++-
> 3 files changed, 62 insertions(+), 99 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 31e3cb550fb3..7d6588195d56 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -165,6 +165,12 @@
> */
> #define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
>
> +/*
> + * Indicates that handler should check and clear Deferred error registers
> + * rather than common ones.
> + */
> +#define MCE_CHECK_DFR_REGS BIT_ULL(8)
> +
> /*
> * This structure contains all data related to the MCE log. Also
> * carries a signature to make it easier to find from external
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index ac6a98aa7bc2..d9f9ee7db5c8 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -56,6 +56,7 @@ static bool thresholding_irq_en;
>
> struct mce_amd_cpu_data {
> mce_banks_t thr_intr_banks;
> + mce_banks_t dfr_intr_banks;
> };
>
> static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
> @@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
> * APIC based interrupt. First, check that no interrupt has been
> * set.
> */
> - if ((low & BIT(5)) && !((high >> 5) & 0x3))
> + if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
> + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> high |= BIT(5);
> + }
>
> this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
>
> @@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m)
> return false;
> }
>
> -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
> -{
> - struct mce_hw_err err;
> - struct mce *m = &err.m;
> -
> - mce_prep_record(&err);
> -
> - m->status = status;
> - m->misc = misc;
> - m->bank = bank;
> - m->tsc = rdtsc();
> -
> - if (m->status & MCI_STATUS_ADDRV) {
> - m->addr = addr;
> -
> - smca_extract_err_addr(m);
> - }
> -
> - if (mce_flags.smca) {
> - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
> -
> - if (m->status & MCI_STATUS_SYNDV) {
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
> - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
> - }
> - }
> -
> - mce_log(&err);
> -}
> -
> DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
> {
> trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
> @@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
> apic_eoi();
> }
>
> -/*
> - * Returns true if the logged error is deferred. False, otherwise.
> - */
> -static inline bool
> -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
> -{
> - u64 status, addr = 0;
> -
> - rdmsrq(msr_stat, status);
> - if (!(status & MCI_STATUS_VAL))
> - return false;
> -
> - if (status & MCI_STATUS_ADDRV)
> - rdmsrq(msr_addr, addr);
> -
> - __log_error(bank, status, addr, misc);
> -
> - wrmsrq(msr_stat, 0);
> -
> - return status & MCI_STATUS_DEFERRED;
> -}
> -
> -static bool _log_error_deferred(unsigned int bank, u32 misc)
> -{
> - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
> - mca_msr_reg(bank, MCA_ADDR), misc))
> - return false;
> -
> - /*
> - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
> - * Return true here to avoid accessing these registers.
> - */
> - if (!mce_flags.smca)
> - return true;
> -
> - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
> - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
> - return true;
> -}
> -
> -/*
> - * We have three scenarios for checking for Deferred errors:
> - *
> - * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> - * clear MCA_DESTAT.
> - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> - * log it.
> - */
> -static void log_error_deferred(unsigned int bank)
> -{
> - if (_log_error_deferred(bank, 0))
> - return;
> -
> - /*
> - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
> - * for a valid error.
> - */
> - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
> - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
> -}
> -
> /* APIC interrupt handler for deferred errors */
> static void amd_deferred_error_interrupt(void)
> {
> - unsigned int bank;
> -
> - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
> - log_error_deferred(bank);
> + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
> }
>
> static void reset_block(struct threshold_block *block)
> @@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m)
> {
> amd_reset_thr_limit(m->bank);
>
> + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
> + if (m->status & MCI_STATUS_DEFERRED)
> + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
> +
> + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
> + if (m->kflags & MCE_CHECK_DFR_REGS)
> + return;
> +
> mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
> }
>
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 460e90a1a0b1..7be062429ce3 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
> m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
>
> if (m->status & MCI_STATUS_ADDRV) {
> - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
> + if (m->kflags & MCE_CHECK_DFR_REGS)
> + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
> + else
> + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
>
> /*
> * Mask the reported address by the reported granularity.
> @@ -714,6 +717,42 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
>
> DEFINE_PER_CPU(unsigned, mce_poll_count);
>
> +/*
> + * We have three scenarios for checking for Deferred errors:
> + *
> + * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> + * clear MCA_DESTAT.
> + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> + * log it.
> + */
> +static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> +{
> + struct mce *m = &err->m;
> +
> + /*
> + * If the MCA_STATUS register has a deferred error, then continue using it as
> + * the status register.
> + *
> + * MCA_DESTAT will be cleared at the end of the handler.
> + */
> + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
> + return true;
> +
> + /*
> + * If the MCA_DESTAT register has a deferred error, then use it instead.
> + *
> + * MCA_STATUS will not be cleared at the end of the handler.
> + */
> + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
> + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
> + m->kflags |= MCE_CHECK_DFR_REGS;
> + return true;
> + }
> +
> + return false;
> +}
> +
No, this still isn't right. Sorry, I had a brain freeze before.
This function only returns true for valid deferred errors. Other errors
return false.
> /*
> * Newer Intel systems that support software error
> * recovery need to make additional checks. Other
> @@ -740,6 +779,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> {
> struct mce *m = &err->m;
>
> + if (mce_flags.smca)
> + return smca_should_log_poll_error(flags, err);
> +
This will never find corrected errors or uncorrected (non-deferred)
errors. That's one of the reasons to add the MCP_DFR flag.
Otherwise, we'd need to include some of the same checks from below.
> /* If this entry is not valid, ignore it. */
> if (!(m->status & MCI_STATUS_VAL))
> return false;
> --
Thanks,
Yazen
On Mon, Oct 27, 2025 at 09:35:42AM -0400, Yazen Ghannam wrote:
> On Sat, Oct 25, 2025 at 05:03:04PM +0200, Borislav Petkov wrote:
> > On Fri, Oct 24, 2025 at 11:27:23PM +0200, Borislav Petkov wrote:
> > > On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> > > > Should I send another revision?
> > >
> > > Nah, I'm not done simplifying this yet. :-P
> >
> > Yeah, no, looks ok now:
> >
Here's another fixup. I also simplified the function parameters and
tweaked the code comments.
Thanks,
Yazen
---
arch/x86/kernel/cpu/mce/core.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7be062429ce3..eaee48b8b339 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -726,21 +726,18 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/
-static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+static bool smca_should_log_poll_error(struct mce *m)
{
- struct mce *m = &err->m;
-
/*
- * If the MCA_STATUS register has a deferred error, then continue using it as
- * the status register.
- *
- * MCA_DESTAT will be cleared at the end of the handler.
+ * If MCA_STATUS happens to have a deferred error, then MCA_DESTAT will
+ * be cleared at the end of the handler.
*/
- if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
+ if (m->status & MCI_STATUS_VAL)
return true;
/*
- * If the MCA_DESTAT register has a deferred error, then use it instead.
+ * Use the MCA_DESTAT register if it has a deferred error. The redundant
+ * status bit check is to filter out any bogus errors.
*
* MCA_STATUS will not be cleared at the end of the handler.
*/
@@ -780,7 +777,7 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
struct mce *m = &err->m;
if (mce_flags.smca)
- return smca_should_log_poll_error(flags, err);
+ return smca_should_log_poll_error(m);
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
--
2.51.1
On Mon, Oct 27, 2025 at 10:11:39AM -0400, Yazen Ghannam wrote:
> /*
> - * If the MCA_STATUS register has a deferred error, then continue using it as
> - * the status register.
> - *
> - * MCA_DESTAT will be cleared at the end of the handler.
> + * If MCA_STATUS happens to have a deferred error, then MCA_DESTAT will
> + * be cleared at the end of the handler.
> */
> - if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
> + if (m->status & MCI_STATUS_VAL)
> return true;
I'm still confused by those comments - we check VAL but we talk about
deferred...
>
> /*
> - * If the MCA_DESTAT register has a deferred error, then use it instead.
> + * Use the MCA_DESTAT register if it has a deferred error.
This one...
> The redundant
> + * status bit check is to filter out any bogus errors.
... probably only confuses. No need to mention it.
> *
> * MCA_STATUS will not be cleared at the end of the handler.
> */
> @@ -780,7 +777,7 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> struct mce *m = &err->m;
>
> if (mce_flags.smca)
> - return smca_should_log_poll_error(flags, err);
> + return smca_should_log_poll_error(m);
>
> /* If this entry is not valid, ignore it. */
> if (!(m->status & MCI_STATUS_VAL))
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Oct 28, 2025 at 04:22:31PM +0100, Borislav Petkov wrote: > On Mon, Oct 27, 2025 at 10:11:39AM -0400, Yazen Ghannam wrote: > > /* > > - * If the MCA_STATUS register has a deferred error, then continue using it as > > - * the status register. > > - * > > - * MCA_DESTAT will be cleared at the end of the handler. > > + * If MCA_STATUS happens to have a deferred error, then MCA_DESTAT will > > + * be cleared at the end of the handler. > > */ > > - if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) > > + if (m->status & MCI_STATUS_VAL) > > return true; > > I'm still confused by those comments - we check VAL but we talk about > deferred... Yes, fair point. How about this? /* * If MCA_STATUS has a valid error of any type, then use it. * * If the error happens to be a deferred error, then the copy * saved in MCA_DESTAT will be cleared at the end of the * handler. * * If MCA_STATUS does not have a valid error, then check * MCA_DESTAT for a valid deferred error. */ > > > > > /* > > - * If the MCA_DESTAT register has a deferred error, then use it instead. > > + * Use the MCA_DESTAT register if it has a deferred error. > > This one... > > > The redundant > > + * status bit check is to filter out any bogus errors. > > ... probably only confuses. No need to mention it. > Okay, agreed. I think this entire second comment can be removed. Thanks, Yazen
On Tue, Oct 28, 2025 at 11:42:58AM -0400, Yazen Ghannam wrote:
> Yes, fair point. How about this?
>
> /*
> * If MCA_STATUS has a valid error of any type, then use it.
> *
> * If the error happens to be a deferred error, then the copy
> * saved in MCA_DESTAT will be cleared at the end of the
> * handler.
> *
> * If MCA_STATUS does not have a valid error, then check
> * MCA_DESTAT for a valid deferred error.
> */
Well, we already have this at the top:
/*
* We have three scenarios for checking for Deferred errors:
*
* 1) Non-SMCA systems check MCA_STATUS and log error if found.
* 2) SMCA systems check MCA_STATUS. If error is found then log it and also
* clear MCA_DESTAT.
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/
and that is good enough IMO. The rest people can read out from the code.
> Okay, agreed. I think this entire second comment can be removed.
Gone.
IOW, this:
/*
* We have three scenarios for checking for Deferred errors:
*
* 1) Non-SMCA systems check MCA_STATUS and log error if found.
* 2) SMCA systems check MCA_STATUS. If error is found then log it and also
* clear MCA_DESTAT.
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/
static bool smca_should_log_poll_error(struct mce *m)
{
if (m->status & MCI_STATUS_VAL)
return true;
m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
m->kflags |= MCE_CHECK_DFR_REGS;
return true;
}
return false;
}
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Oct 28, 2025 at 06:46:56PM +0100, Borislav Petkov wrote:
> On Tue, Oct 28, 2025 at 11:42:58AM -0400, Yazen Ghannam wrote:
> > Yes, fair point. How about this?
> >
> > /*
> > * If MCA_STATUS has a valid error of any type, then use it.
> > *
> > * If the error happens to be a deferred error, then the copy
> > * saved in MCA_DESTAT will be cleared at the end of the
> > * handler.
> > *
> > * If MCA_STATUS does not have a valid error, then check
> > * MCA_DESTAT for a valid deferred error.
> > */
>
> Well, we already have this at the top:
>
> /*
> * We have three scenarios for checking for Deferred errors:
> *
> * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> * clear MCA_DESTAT.
> * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> * log it.
> */
>
> and that is good enough IMO. The rest people can read out from the code.
Okay, sounds good.
>
> > Okay, agreed. I think this entire second comment can be removed.
>
> Gone.
>
> IOW, this:
>
> /*
> * We have three scenarios for checking for Deferred errors:
> *
> * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> * clear MCA_DESTAT.
> * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> * log it.
> */
> static bool smca_should_log_poll_error(struct mce *m)
> {
> if (m->status & MCI_STATUS_VAL)
> return true;
>
> m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
> if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
> m->kflags |= MCE_CHECK_DFR_REGS;
> return true;
> }
>
> return false;
> }
>
Yep, that's it. Much cleaner. :)
Thanks,
Yazen
On Tue, Oct 28, 2025 at 04:37:19PM -0400, Yazen Ghannam wrote:
> Yep, that's it. Much cleaner. :)
:-)
Final version:
From dd705221b2b3fb06fd2dc25dd51a8aaa1b1bd6d5 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 16 Oct 2025 16:37:48 +0000
Subject: [PATCH] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA
systems
Scalable MCA systems have a per-CPU register that gives the APIC LVT offset
for the thresholding and deferred error interrupts.
Currently, this register is read once to set up the deferred error interrupt
and then read again for each thresholding block. Furthermore, the APIC LVT
registers are configured each time, but they only need to be configured once
per-CPU.
Move the APIC LVT setup to the early part of CPU init, so that the registers
are set up once. Also, this ensures that the kernel is ready to service the
interrupts before the individual error sources (each MCA bank) are enabled.
Apply this change only to SMCA systems to avoid breaking any legacy behavior.
The deferred error interrupt is technically advertised by the SUCCOR feature.
However, this was first made available on SMCA systems. Therefore, only set
up the deferred error interrupt on SMCA systems and simplify the code.
Guidance from hardware designers is that the LVT offsets provided from the
platform should be used. The kernel should not try to enforce specific values.
However, the kernel should check that an LVT offset is not reused for multiple
sources.
Therefore, remove the extra checking and value enforcement from the MCE code.
The "reuse/conflict" case is already handled in setup_APIC_eilvt().
[ bp: Simplify and drop some comments. ]
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/all/20251016-wip-mca-updates-v7-0-5c139a4062cb@amd.com
---
arch/x86/kernel/cpu/mce/amd.c | 121 +++++++++++++++------------------
arch/x86/kernel/cpu/mce/core.c | 19 +-----
2 files changed, 56 insertions(+), 84 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index d9f9ee7db5c8..0f4a01056ad3 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -43,9 +43,6 @@
/* Deferred error settings */
#define MSR_CU_DEF_ERR 0xC0000410
#define MASK_DEF_LVTOFF 0x000000F0
-#define MASK_DEF_INT_TYPE 0x00000006
-#define DEF_LVT_OFF 0x2
-#define DEF_INT_TYPE_APIC 0x2
/* Scalable MCA: */
@@ -57,6 +54,10 @@ static bool thresholding_irq_en;
struct mce_amd_cpu_data {
mce_banks_t thr_intr_banks;
mce_banks_t dfr_intr_banks;
+
+ u32 thr_intr_en: 1,
+ dfr_intr_en: 1,
+ __resv: 30;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
@@ -271,6 +272,7 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
static void smca_configure(unsigned int bank, unsigned int cpu)
{
+ struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
const struct smca_hwid *s_hwid;
unsigned int i, hwid_mcatype;
@@ -301,8 +303,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
* APIC based interrupt. First, check that no interrupt has been
* set.
*/
- if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
- __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
+ if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) {
+ __set_bit(bank, data->dfr_intr_banks);
high |= BIT(5);
}
@@ -377,6 +379,14 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
int msr = (hi & MASK_LVTOFF_HI) >> 20;
+ /*
+ * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+ * the BIOS provides the value. The original field where LVT offset
+ * was set is reserved. Return early here:
+ */
+ if (mce_flags.smca)
+ return false;
+
if (apic < 0) {
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
@@ -385,14 +395,6 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
}
if (apic != msr) {
- /*
- * On SMCA CPUs, LVT offset is programmed at a different MSR, and
- * the BIOS provides the value. The original field where LVT offset
- * was set is reserved. Return early here:
- */
- if (mce_flags.smca)
- return false;
-
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -473,41 +475,6 @@ static int setup_APIC_mce_threshold(int reserved, int new)
return reserved;
}
-static int setup_APIC_deferred_error(int reserved, int new)
-{
- if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
- APIC_EILVT_MSG_FIX, 0))
- return new;
-
- return reserved;
-}
-
-static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
-{
- u32 low = 0, high = 0;
- int def_offset = -1, def_new;
-
- if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
- return;
-
- def_new = (low & MASK_DEF_LVTOFF) >> 4;
- if (!(low & MASK_DEF_LVTOFF)) {
- pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
- def_new = DEF_LVT_OFF;
- low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
- }
-
- def_offset = setup_APIC_deferred_error(def_offset, def_new);
- if ((def_offset == def_new) &&
- (deferred_error_int_vector != amd_deferred_error_interrupt))
- deferred_error_int_vector = amd_deferred_error_interrupt;
-
- if (!mce_flags.smca)
- low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
-
- wrmsr(MSR_CU_DEF_ERR, low, high);
-}
-
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block,
unsigned int cpu)
@@ -543,12 +510,10 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
return addr;
}
-static int
-prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
- int offset, u32 misc_high)
+static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
- u32 smca_low, smca_high;
struct threshold_block b;
int new;
@@ -568,18 +533,10 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
__set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
b.interrupt_enable = 1;
- if (!mce_flags.smca) {
- new = (misc_high & MASK_LVTOFF_HI) >> 20;
- goto set_offset;
- }
-
- /* Gather LVT offset for thresholding: */
- if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
- goto out;
-
- new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+ if (mce_flags.smca)
+ goto done;
-set_offset:
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce_threshold(offset, new);
if (offset == new)
thresholding_irq_en = true;
@@ -587,7 +544,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
done:
mce_threshold_block_init(&b, offset);
-out:
return offset;
}
@@ -678,6 +634,32 @@ static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
mce_banks[0].ctl = 0;
}
+/*
+ * Enable the APIC LVT interrupt vectors once per-CPU. This should be done
+ * before hardware is ready to send interrupts.
+ *
+ * Individual error sources are enabled later during per-bank init.
+ */
+static void smca_enable_interrupt_vectors(void)
+{
+ struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
+ u64 mca_intr_cfg, offset;
+
+ if (!mce_flags.smca || !mce_flags.succor)
+ return;
+
+ if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg))
+ return;
+
+ offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
+ if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
+ data->thr_intr_en = 1;
+
+ offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
+ if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0))
+ data->dfr_intr_en = 1;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -689,10 +671,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
mce_flags.amd_threshold = 1;
+ smca_enable_interrupt_vectors();
+
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
- if (mce_flags.smca)
+ if (mce_flags.smca) {
smca_configure(bank, cpu);
+ if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en)
+ continue;
+ }
+
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -713,9 +701,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
offset = prepare_threshold_block(bank, block, address, offset, high);
}
}
-
- if (mce_flags.succor)
- deferred_error_interrupt_enable(c);
}
void smca_bsp_init(void)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7be062429ce3..4aff14e04287 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -726,24 +726,11 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/
-static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+static bool smca_should_log_poll_error(struct mce *m)
{
- struct mce *m = &err->m;
-
- /*
- * If the MCA_STATUS register has a deferred error, then continue using it as
- * the status register.
- *
- * MCA_DESTAT will be cleared at the end of the handler.
- */
- if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
+ if (m->status & MCI_STATUS_VAL)
return true;
- /*
- * If the MCA_DESTAT register has a deferred error, then use it instead.
- *
- * MCA_STATUS will not be cleared at the end of the handler.
- */
m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
m->kflags |= MCE_CHECK_DFR_REGS;
@@ -780,7 +767,7 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
struct mce *m = &err->m;
if (mce_flags.smca)
- return smca_should_log_poll_error(flags, err);
+ return smca_should_log_poll_error(m);
/* If this entry is not valid, ignore it. */
if (!(m->status & MCI_STATUS_VAL))
--
2.51.0
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Oct 29, 2025 at 12:18:25AM +0100, Borislav Petkov wrote:
> On Tue, Oct 28, 2025 at 04:37:19PM -0400, Yazen Ghannam wrote:
> > Yep, that's it. Much cleaner. :)
>
> :-)
>
> Final version:
>
> From dd705221b2b3fb06fd2dc25dd51a8aaa1b1bd6d5 Mon Sep 17 00:00:00 2001
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> Date: Thu, 16 Oct 2025 16:37:48 +0000
> Subject: [PATCH] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA
> systems
>
^^^ This is the following patch.
https://lore.kernel.org/all/20251016-wip-mca-updates-v7-3-5c139a4062cb@amd.com/
Why apply the fix ups to that?
Thanks,
Yazen
[...]
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 7be062429ce3..4aff14e04287 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -726,24 +726,11 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
> * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> * log it.
> */
> -static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> +static bool smca_should_log_poll_error(struct mce *m)
> {
> - struct mce *m = &err->m;
> -
> - /*
> - * If the MCA_STATUS register has a deferred error, then continue using it as
> - * the status register.
> - *
> - * MCA_DESTAT will be cleared at the end of the handler.
> - */
> - if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
> + if (m->status & MCI_STATUS_VAL)
> return true;
>
> - /*
> - * If the MCA_DESTAT register has a deferred error, then use it instead.
> - *
> - * MCA_STATUS will not be cleared at the end of the handler.
> - */
> m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
> if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
> m->kflags |= MCE_CHECK_DFR_REGS;
> @@ -780,7 +767,7 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> struct mce *m = &err->m;
>
> if (mce_flags.smca)
> - return smca_should_log_poll_error(flags, err);
> + return smca_should_log_poll_error(m);
>
> /* If this entry is not valid, ignore it. */
> if (!(m->status & MCI_STATUS_VAL))
> --
On Wed, Oct 29, 2025 at 11:09:12AM -0400, Yazen Ghannam wrote:
> Why apply the fix ups to that?
Too many patches flying back'n'forth. :-\
Lemme finish going through them and you can send a new set, pls.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
© 2016 - 2026 Red Hat, Inc.