From: Tony Luck <tony.luck@intel.com>
Split each vendor specific part into its own helper function.
Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
Changes in v3:
- Newly added.
arch/x86/kernel/cpu/mce/core.c | 194 ++++++++++++++++++---------------
1 file changed, 106 insertions(+), 88 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 57c05015f984..bb8b1000fa0a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1880,101 +1880,119 @@ static void __mcheck_cpu_check_banks(void)
}
}
+static void apply_quirks_amd(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ struct mca_config *cfg = &mca_cfg;
+
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+ }
+ if (c->x86 < 0x11 && cfg->bootlog < 0) {
+ /*
+ * Lots of broken BIOS around that don't clear them
+ * by default and leave crap in there. Don't log:
+ */
+ cfg->bootlog = 0;
+ }
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
+ mce_banks[0].ctl = 0;
+
+ /*
+ * overflow_recov is supported for F15h Models 00h-0fh
+ * even though we don't have a CPUID bit for it.
+ */
+ if (c->x86 == 0x15 && c->x86_model <= 0xf)
+ mce_flags.overflow_recov = 1;
+
+ if (c->x86 >= 0x17 && c->x86 <= 0x1A)
+ mce_flags.zen_ifu_quirk = 1;
+}
+
+static void apply_quirks_intel(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ struct mca_config *cfg = &mca_cfg;
+
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ */
+ if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
+ mce_banks[0].init = false;
+
+ /*
+ * All newer Intel systems support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
+ cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+
+ /*
+ * There are also broken BIOSes on some Pentium M and
+ * earlier systems:
+ */
+ if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
+ cfg->bootlog = 0;
+
+ if (c->x86_vfm == INTEL_SANDYBRIDGE_X)
+ mce_flags.snb_ifu_quirk = 1;
+
+ /*
+ * Skylake, Cascacde Lake and Cooper Lake require a quirk on
+ * rep movs.
+ */
+ if (c->x86_vfm == INTEL_SKYLAKE_X)
+ mce_flags.skx_repmov_quirk = 1;
+}
+
+static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
+{
+ struct mca_config *cfg = &mca_cfg;
+
+ /*
+ * All newer Zhaoxin CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+}
+
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
- if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
+ switch (c->x86_vendor) {
+ case X86_VENDOR_UNKNOWN:
pr_info("unknown CPU type - not enabling MCE support\n");
return -EOPNOTSUPP;
- }
-
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
- /*
- * disable GART TBL walk error reporting, which
- * trips off incorrectly with the IOMMU & 3ware
- * & Cerberus:
- */
- clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
- }
- if (c->x86 < 0x11 && cfg->bootlog < 0) {
- /*
- * Lots of broken BIOS around that don't clear them
- * by default and leave crap in there. Don't log:
- */
- cfg->bootlog = 0;
- }
- /*
- * Various K7s with broken bank 0 around. Always disable
- * by default.
- */
- if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
- mce_banks[0].ctl = 0;
-
- /*
- * overflow_recov is supported for F15h Models 00h-0fh
- * even though we don't have a CPUID bit for it.
- */
- if (c->x86 == 0x15 && c->x86_model <= 0xf)
- mce_flags.overflow_recov = 1;
-
- if (c->x86 >= 0x17 && c->x86 <= 0x1A)
- mce_flags.zen_ifu_quirk = 1;
-
- }
-
- if (c->x86_vendor == X86_VENDOR_INTEL) {
- /*
- * SDM documents that on family 6 bank 0 should not be written
- * because it aliases to another special BIOS controlled
- * register.
- * But it's not aliased anymore on model 0x1a+
- * Don't ignore bank 0 completely because there could be a
- * valid event later, merely don't write CTL0.
- */
-
- if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
- mce_banks[0].init = false;
-
- /*
- * All newer Intel systems support MCE broadcasting. Enable
- * synchronization with a one second timeout.
- */
- if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
- cfg->monarch_timeout < 0)
- cfg->monarch_timeout = USEC_PER_SEC;
-
- /*
- * There are also broken BIOSes on some Pentium M and
- * earlier systems:
- */
- if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
- cfg->bootlog = 0;
-
- if (c->x86_vfm == INTEL_SANDYBRIDGE_X)
- mce_flags.snb_ifu_quirk = 1;
-
- /*
- * Skylake, Cascacde Lake and Cooper Lake require a quirk on
- * rep movs.
- */
- if (c->x86_vfm == INTEL_SKYLAKE_X)
- mce_flags.skx_repmov_quirk = 1;
- }
-
- if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
- /*
- * All newer Zhaoxin CPUs support MCE broadcasting. Enable
- * synchronization with a one second timeout.
- */
- if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
- if (cfg->monarch_timeout < 0)
- cfg->monarch_timeout = USEC_PER_SEC;
- }
+ case X86_VENDOR_AMD:
+ apply_quirks_amd(c);
+ break;
+ case X86_VENDOR_INTEL:
+ apply_quirks_intel(c);
+ break;
+ case X86_VENDOR_ZHAOXIN:
+ apply_quirks_zhaoxin(c);
+ break;
}
if (cfg->monarch_timeout < 0)
--
2.17.1
On Fri, Oct 25, 2024 at 10:45:58AM +0800, Qiuxu Zhuo wrote:
> From: Tony Luck <tony.luck@intel.com>
>
> Split each vendor specific part into its own helper function.
>
> Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> ---
> Changes in v3:
> - Newly added.
>
> arch/x86/kernel/cpu/mce/core.c | 194 ++++++++++++++++++---------------
> 1 file changed, 106 insertions(+), 88 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 57c05015f984..bb8b1000fa0a 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1880,101 +1880,119 @@ static void __mcheck_cpu_check_banks(void)
> }
> }
>
> +static void apply_quirks_amd(struct cpuinfo_x86 *c)
> +{
> + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
> + struct mca_config *cfg = &mca_cfg;
> +
> + /* This should be disabled by the BIOS, but isn't always */
> + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
> + /*
> + * disable GART TBL walk error reporting, which
> + * trips off incorrectly with the IOMMU & 3ware
> + * & Cerberus:
> + */
> + clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
> + }
Newline here please.
> + if (c->x86 < 0x11 && cfg->bootlog < 0) {
> + /*
> + * Lots of broken BIOS around that don't clear them
> + * by default and leave crap in there. Don't log:
> + */
> + cfg->bootlog = 0;
> + }
And here.
> + /*
> + * Various K7s with broken bank 0 around. Always disable
> + * by default.
> + */
> + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
> + mce_banks[0].ctl = 0;
> +
> + /*
> + * overflow_recov is supported for F15h Models 00h-0fh
> + * even though we don't have a CPUID bit for it.
> + */
> + if (c->x86 == 0x15 && c->x86_model <= 0xf)
> + mce_flags.overflow_recov = 1;
> +
> + if (c->x86 >= 0x17 && c->x86 <= 0x1A)
> + mce_flags.zen_ifu_quirk = 1;
> +}
> +
> +static void apply_quirks_intel(struct cpuinfo_x86 *c)
> +{
> + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
> + struct mca_config *cfg = &mca_cfg;
Is there a benefit to this pointer? We use mca_cfg.FIELD in most other
places.
Thanks,
Yazen
Hi Yazen,
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> [...]
> > +static void apply_quirks_amd(struct cpuinfo_x86 *c) {
> > + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
> > + struct mca_config *cfg = &mca_cfg;
> > +
> > + /* This should be disabled by the BIOS, but isn't always */
> > + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
> > + /*
> > + * disable GART TBL walk error reporting, which
> > + * trips off incorrectly with the IOMMU & 3ware
> > + * & Cerberus:
> > + */
> > + clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
> > + }
>
> Newline here please.
OK.
Will update it in next version.
> > + if (c->x86 < 0x11 && cfg->bootlog < 0) {
> > + /*
> > + * Lots of broken BIOS around that don't clear them
> > + * by default and leave crap in there. Don't log:
> > + */
> > + cfg->bootlog = 0;
> > + }
>
> And here.
And will update it in next version.
> [...]
> > +static void apply_quirks_intel(struct cpuinfo_x86 *c) {
> > + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
> > + struct mca_config *cfg = &mca_cfg;
>
> Is there a benefit to this pointer? We use mca_cfg.FIELD in most other places.
This could make the diff smaller for easier review, and I also believe that fewer direct
uses of global variables in functions are better. Additionally, there are multiple uses of
'mca_cfg' in the function, the local variable 'cfg' is shorter and more convenient to use.
[ Certainly, if the global variable 'mca_cfg' is only used once in the function, directly
using it might be more convenient. ]
Just from my perspective, no strong preference. 😊
-Qiuxu
On Wed, Oct 30, 2024 at 01:39:43AM +0000, Zhuo, Qiuxu wrote:
[...]
Thanks Qiuxu.
>
> > > +static void apply_quirks_intel(struct cpuinfo_x86 *c) {
> > > + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
> > > + struct mca_config *cfg = &mca_cfg;
> >
> > Is there a benefit to this pointer? We use mca_cfg.FIELD in most other places.
>
> This could make the diff smaller for easier review, and I also believe that fewer direct
> uses of global variables in functions are better. Additionally, there are multiple uses of
> 'mca_cfg' in the function, the local variable 'cfg' is shorter and more convenient to use.
>
I don't think it would make the diff smaller here since the code is
already being moved.
Though you could say this is a separate logical change compared to just
moving the code as-is.
Also, I don't think the "shorter, more convenient" idea holds. It's not
that much shorter. And there are already cases of using the global
variables "mca_cfg" and "mce_flags".
Why is "...fewer direct uses of global variables in functions..." better?
> [ Certainly, if the global variable 'mca_cfg' is only used once in the function, directly
> using it might be more convenient. ]
>
There is one such case in your patch.
> Just from my perspective, no strong preference. 😊
>
Same here. I just figured this suggestion would be another possible
cleanup. :)
Thanks,
Yazen
Hi Yazen,
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> [...]
> > Just from my perspective, no strong preference. 😊
>
> Same here. I just figured this suggestion would be another possible
> cleanup. :)
Thanks for your suggestion. Yes, it does save 3 lines of code.
Either the current patch or your suggestion is OK with me.
Hi @Boris,
may I know which option is OK with you:
Option A (current patch):
struct mca_config *cfg = &mca_cfg;
and then use 'cfg' in apply_quirks_{amd, intel, zhaoxin}()
Option B (suggested by Yazen):
Directly use 'mca_cfg' in apply_quirks_{amd, intel, zhaoxin}()
Thanks!
-Qiuxu
© 2016 - 2026 Red Hat, Inc.