From: Lyle Li <LyleLi@zhaoxin.com>
Zhaoxin consists of two vendors, X86_VENDOR_ZHAOXIN and
X86_VENDOR_CENTAUR, so add the centaur vendor to support
Zhaoxin MCA in mce/core.c and mce/intel.c.
Signed-off-by: Lyle Li <LyleLi@zhaoxin.com>
Reviewed-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
---
arch/x86/kernel/cpu/mce/core.c | 42 ++++++++++++++++-----------------
arch/x86/kernel/cpu/mce/intel.c | 3 ++-
2 files changed, 23 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index ad0623b65..b7b98c33a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -496,6 +496,7 @@ bool mce_usable_address(struct mce *m)
case X86_VENDOR_INTEL:
case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
return intel_mce_usable_address(m);
default:
@@ -513,6 +514,7 @@ bool mce_is_memory_error(struct mce *m)
case X86_VENDOR_INTEL:
case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
*
@@ -1247,7 +1249,8 @@ static noinstr bool mce_check_crashing_cpu(void)
mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
- if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
if (mcgstatus & MCG_STATUS_LMCES)
return false;
}
@@ -1521,7 +1524,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* on Intel, Zhaoxin only.
*/
if (m.cpuvendor == X86_VENDOR_INTEL ||
- m.cpuvendor == X86_VENDOR_ZHAOXIN)
+ m.cpuvendor == X86_VENDOR_ZHAOXIN ||
+ m.cpuvendor == X86_VENDOR_CENTAUR)
lmce = m.mcgstatus & MCG_STATUS_LMCES;
/*
@@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
}
}
+ if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+ /*
+ * All newer Centaur CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
+ c->x86 > 6) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+ }
+
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
if (cfg->bootlog != 0)
@@ -2012,21 +2028,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
}
}
-static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
-{
- struct mca_config *cfg = &mca_cfg;
-
- /*
- * All newer Centaur CPUs support MCE broadcasting. Enable
- * synchronization with a one second timeout.
- */
- if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
- c->x86 > 6) {
- if (cfg->monarch_timeout < 0)
- cfg->monarch_timeout = USEC_PER_SEC;
- }
-}
-
static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
@@ -2072,9 +2073,6 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_CENTAUR:
- mce_centaur_feature_init(c);
- break;
-
case X86_VENDOR_ZHAOXIN:
mce_zhaoxin_feature_init(c);
break;
@@ -2092,6 +2090,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
mce_zhaoxin_feature_clear(c);
break;
@@ -2401,7 +2400,8 @@ static void vendor_disable_error_reporting(void)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
+ boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
return;
mce_disable_error_reporting();
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f6103e6bf..b7e67f4f7 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -88,7 +88,8 @@ static int cmci_supported(int *banks)
* makes sure none of the backdoors are entered otherwise.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
- boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
return 0;
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
--
2.34.1
On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: > From: Lyle Li <LyleLi@zhaoxin.com> > > Zhaoxin consists of two vendors, X86_VENDOR_ZHAOXIN and > X86_VENDOR_CENTAUR, so add the centaur vendor to support > Zhaoxin MCA in mce/core.c and mce/intel.c. > > Signed-off-by: Lyle Li <LyleLi@zhaoxin.com> > Reviewed-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com> If you're sending someone else's patch, then you need to add your SOB too. Please educate yourself on the process: https://kernel.org/doc/html/latest/process/submitting-patches.html -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: > @@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) > } > } > > + if (c->x86_vendor == X86_VENDOR_CENTAUR) { > + /* > + * All newer Centaur CPUs support MCE broadcasting. Enable > + * synchronization with a one second timeout. > + */ > + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || > + c->x86 > 6) { > + if (cfg->monarch_timeout < 0) > + cfg->monarch_timeout = USEC_PER_SEC; > + } > + } So if centaur == zhaoxin, why aren't you moving this hunk to mce_zhaoxin_feature_init() instead? -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 2024/9/13 22:27, Borislav Petkov wrote: > > > [这封邮件来自外部发件人 谨防风险] > > On Mon, Sep 09, 2024 at 06:43:47PM +0800, Tony W Wang-oc wrote: >> @@ -1970,6 +1974,18 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) >> } >> } >> >> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >> + /* >> + * All newer Centaur CPUs support MCE broadcasting. Enable >> + * synchronization with a one second timeout. >> + */ >> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> + c->x86 > 6) { >> + if (cfg->monarch_timeout < 0) >> + cfg->monarch_timeout = USEC_PER_SEC; >> + } >> + } > > So if centaur == zhaoxin, why aren't you moving this hunk to > mce_zhaoxin_feature_init() instead? > > Ok, will adjust the patch in the next version. Sincerely! TonyWWang-oc
On 9/13/24 07:27, Borislav Petkov wrote: >> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >> + /* >> + * All newer Centaur CPUs support MCE broadcasting. Enable >> + * synchronization with a one second timeout. >> + */ >> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> + c->x86 > 6) { >> + if (cfg->monarch_timeout < 0) >> + cfg->monarch_timeout = USEC_PER_SEC; >> + } >> + } > So if centaur == zhaoxin, why aren't you moving this hunk to > mce_zhaoxin_feature_init() instead? The centaur and zhaoxin logic is also _really_ close here: > if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } vs > if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || > c->x86 > 6) { > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } I'd just randomly guess that the zhaoxin version is buggy because it doesn't do a c->x86 check before the "(c->x86_model == 0x19 || c->x86_model == 0x1f)". So instead of copying and pasting the same block over and over, can we consolidate it a bit? foo() { /* Older CPUs do not do MCE broadcast: */ if (c->x86 < 6) return; /* All newer ones do: */ if (c->x86 > 6) goto mce_broadcast; /* Family 6 is mixed: */ if (c->x86_vendor == X86_VENDOR_CENTAUR) { if (c->x86_model == 0xf && c->x86_stepping >= 0xe) goto mce_broadcast; } else if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { if (c->x86_model == 0x19 || c->x86_model == 0x1f)) goto mce_broadcast; } return; mce_broadcast: if (cfg->monarch_timeout < 0) cfg->monarch_timeout = USEC_PER_SEC; } Heck, the Intel code can even go in there I think. Wouldn't that tell the story a bit better?
On 2024/9/13 23:47, Dave Hansen wrote: > > > [这封邮件来自外部发件人 谨防风险] > > On 9/13/24 07:27, Borislav Petkov wrote: >>> + if (c->x86_vendor == X86_VENDOR_CENTAUR) { >>> + /* >>> + * All newer Centaur CPUs support MCE broadcasting. Enable >>> + * synchronization with a one second timeout. >>> + */ >>> + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >>> + c->x86 > 6) { >>> + if (cfg->monarch_timeout < 0) >>> + cfg->monarch_timeout = USEC_PER_SEC; >>> + } >>> + } >> So if centaur == zhaoxin, why aren't you moving this hunk to >> mce_zhaoxin_feature_init() instead? > > The centaur and zhaoxin logic is also _really_ close here: > >> if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) { >> if (cfg->monarch_timeout < 0) >> cfg->monarch_timeout = USEC_PER_SEC; >> } > > vs > >> if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || >> c->x86 > 6) { >> if (cfg->monarch_timeout < 0) >> cfg->monarch_timeout = USEC_PER_SEC; >> } > > I'd just randomly guess that the zhaoxin version is buggy because it > doesn't do a c->x86 check before the "(c->x86_model == 0x19 || > c->x86_model == 0x1f)". > Yes, the check for c->x86 == 6 is omitted in the zhaoxin version. > So instead of copying and pasting the same block over and over, can we > consolidate it a bit? > > foo() > { > /* Older CPUs do not do MCE broadcast: */ > if (c->x86 < 6) > return; > /* All newer ones do: */ > if (c->x86 > 6) > goto mce_broadcast; > > /* Family 6 is mixed: */ > if (c->x86_vendor == X86_VENDOR_CENTAUR) { > if (c->x86_model == 0xf && > c->x86_stepping >= 0xe) > goto mce_broadcast; > } else if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { > if (c->x86_model == 0x19 || > c->x86_model == 0x1f)) > goto mce_broadcast; > } > > return; > > mce_broadcast: > if (cfg->monarch_timeout < 0) > cfg->monarch_timeout = USEC_PER_SEC; > } > Thank you! That makes more sense, and will adopt it into zhaoxin.c Sincerely! TonyWWang-oc
© 2016 - 2024 Red Hat, Inc.