[PATCH] Handle Ice Lake MONITOR erratum

Dave Hansen posted 1 patch 7 months, 3 weeks ago
b/arch/x86/include/asm/mwait.h |    3 +++
b/arch/x86/kernel/cpu/intel.c  |   17 ++++++++++++++---
2 files changed, 17 insertions(+), 3 deletions(-)
[PATCH] Handle Ice Lake MONITOR erratum
Posted by Dave Hansen 7 months, 3 weeks ago

From: Dave Hansen <dave.hansen@linux.intel.com>

Andrew Cooper reported some boot issues on Ice Lake servers when
running Xen that he tracked down to MWAIT not waking up. Do the safe
thing and consider them buggy since there's a published erratum.
Note: I've seen no reports of this occurring on Linux.

Add Ice Lake servers to the list of shaky MONITOR implementations with
no workaround available. Also, before the if() gets too unwieldy, move
it over to a x86_cpu_id array. Additionally, add a comment to the
X86_BUG_MONITOR consumption site to make it clear how and why affected
CPUs get IPIs to wake them up.

There is no equivalent erratum for the "Xeon D" Ice Lakes so
INTEL_ICELAKE_D is not affected.

The erratum is called ICX143 in the "3rd Gen Intel Xeon Scalable
Processors, Codename Ice Lake Specification Update". It is Intel
document 637780, currently available here:

	https://cdrdv2.intel.com/v1/dl/getContent/637780

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org

---

 b/arch/x86/include/asm/mwait.h |    3 +++
 b/arch/x86/kernel/cpu/intel.c  |   17 ++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff -puN arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug arch/x86/kernel/cpu/intel.c
--- a/arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug	2025-04-18 13:54:46.022590596 -0700
+++ b/arch/x86/kernel/cpu/intel.c	2025-04-18 15:15:19.374365069 -0700
@@ -513,6 +513,19 @@ static void init_intel_misc_features(str
 }
 
 /*
+ * These CPUs have buggy MWAIT/MONITOR implementations that
+ * usually manifest as hangs or stalls at boot.
+ */
+#define MWAIT_VFM(_vfm)	\
+	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
+static const struct x86_cpu_id monitor_bug_list[] = {
+	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
+	MWAIT_VFM(INTEL_LUNARLAKE_M),
+	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
+	{},
+};
+
+/*
  * This is a list of Intel CPUs that are known to suffer from downclocking when
  * ZMM registers (512-bit vectors) are used.  On these CPUs, when the kernel
  * executes SIMD-optimized code such as cryptography functions or CRCs, it
@@ -565,9 +578,7 @@ static void init_intel(struct cpuinfo_x8
 	     c->x86_vfm == INTEL_WESTMERE_EX))
 		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 
-	if (boot_cpu_has(X86_FEATURE_MWAIT) &&
-	    (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
-	     c->x86_vfm == INTEL_LUNARLAKE_M))
+	if (x86_match_cpu(monitor_bug_list))
 		set_cpu_bug(c, X86_BUG_MONITOR);
 
 #ifdef CONFIG_X86_64
diff -puN arch/x86/include/asm/mwait.h~ICX-MONITOR-bug arch/x86/include/asm/mwait.h
--- a/arch/x86/include/asm/mwait.h~ICX-MONITOR-bug	2025-04-18 15:17:18.353749634 -0700
+++ b/arch/x86/include/asm/mwait.h	2025-04-18 15:20:06.037927656 -0700
@@ -110,6 +110,9 @@ static __always_inline void __sti_mwait(
  * through MWAIT. Whenever someone changes need_resched, we would be woken
  * up from MWAIT (without an IPI).
  *
+ * Buggy (X86_BUG_MONITOR) CPUs will never set the polling bit and will
+ * always be sent IPIs.
+ *
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
_
Re: [PATCH] Handle Ice Lake MONITOR erratum
Posted by Ingo Molnar 7 months, 3 weeks ago
* Dave Hansen <dave.hansen@linux.intel.com> wrote:

> 
> From: Dave Hansen <dave.hansen@linux.intel.com>
> 
> Andrew Cooper reported some boot issues on Ice Lake servers when
> running Xen that he tracked down to MWAIT not waking up. Do the safe
> thing and consider them buggy since there's a published erratum.
> Note: I've seen no reports of this occurring on Linux.
> 
> Add Ice Lake servers to the list of shaky MONITOR implementations with
> no workaround available. Also, before the if() gets too unwieldy, move
> it over to a x86_cpu_id array. Additionally, add a comment to the
> X86_BUG_MONITOR consumption site to make it clear how and why affected
> CPUs get IPIs to wake them up.
> 
> There is no equivalent erratum for the "Xeon D" Ice Lakes so
> INTEL_ICELAKE_D is not affected.
> 
> The erratum is called ICX143 in the "3rd Gen Intel Xeon Scalable
> Processors, Codename Ice Lake Specification Update". It is Intel
> document 637780, currently available here:
> 
> 	https://cdrdv2.intel.com/v1/dl/getContent/637780
> 
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: stable@vger.kernel.org
> 
> ---
> 
>  b/arch/x86/include/asm/mwait.h |    3 +++
>  b/arch/x86/kernel/cpu/intel.c  |   17 ++++++++++++++---
>  2 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff -puN arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug arch/x86/kernel/cpu/intel.c
> --- a/arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug	2025-04-18 13:54:46.022590596 -0700
> +++ b/arch/x86/kernel/cpu/intel.c	2025-04-18 15:15:19.374365069 -0700
> @@ -513,6 +513,19 @@ static void init_intel_misc_features(str
>  }
>  
>  /*
> + * These CPUs have buggy MWAIT/MONITOR implementations that
> + * usually manifest as hangs or stalls at boot.
> + */
> +#define MWAIT_VFM(_vfm)	\
> +	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
> +static const struct x86_cpu_id monitor_bug_list[] = {
> +	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
> +	MWAIT_VFM(INTEL_LUNARLAKE_M),
> +	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
> +	{},
> +};

While it's just an internal helper, macro names should still be 
intuitive:

  s/MWAIT_VFM
   /VFM_MWAIT_BUG

or so?

Thanks,

	Ingo
Re: [PATCH] Handle Ice Lake MONITOR erratum
Posted by Dave Hansen 7 months, 3 weeks ago
On 4/21/25 23:46, Ingo Molnar wrote:
>>  /*
>> + * These CPUs have buggy MWAIT/MONITOR implementations that
>> + * usually manifest as hangs or stalls at boot.
>> + */
>> +#define MWAIT_VFM(_vfm)	\
>> +	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
>> +static const struct x86_cpu_id monitor_bug_list[] = {
>> +	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
>> +	MWAIT_VFM(INTEL_LUNARLAKE_M),
>> +	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
>> +	{},
>> +};
> While it's just an internal helper, macro names should still be 
> intuitive:
> 
>   s/MWAIT_VFM
>    /VFM_MWAIT_BUG

The current convention is to end with the thing that's being matched,
like "_FEATURE" or "_VFM" in the X86_MATCH*() macros. That's why I
ordered it the way I did.

As for including "BUG", the _macro_ doesn't match CPUs with the bug.
It's just matching CPUs with the specified VFM that have MWAIT. It could
(theoretically) get used for non-bug things so I don't think it's
intuitive to put "BUG" in the name.

But, honestly, we have a tone of these one-off x86_cpu_id macros around.
They have lots of pretty silly naming, but they're mostly only used at
one isolated site so I don't worry about the naming _too_ much.
Re: [PATCH] Handle Ice Lake MONITOR erratum
Posted by Ingo Molnar 7 months, 3 weeks ago
* Dave Hansen <dave.hansen@intel.com> wrote:

> On 4/21/25 23:46, Ingo Molnar wrote:
> >>  /*
> >> + * These CPUs have buggy MWAIT/MONITOR implementations that
> >> + * usually manifest as hangs or stalls at boot.
> >> + */
> >> +#define MWAIT_VFM(_vfm)	\
> >> +	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
> >> +static const struct x86_cpu_id monitor_bug_list[] = {
> >> +	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
> >> +	MWAIT_VFM(INTEL_LUNARLAKE_M),
> >> +	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
> >> +	{},
> >> +};
> > While it's just an internal helper, macro names should still be 
> > intuitive:
> > 
> >   s/MWAIT_VFM
> >    /VFM_MWAIT_BUG
> 
> The current convention is to end with the thing that's being matched,
> like "_FEATURE" or "_VFM" in the X86_MATCH*() macros. That's why I
> ordered it the way I did.
> 
> As for including "BUG", the _macro_ doesn't match CPUs with the bug.
> It's just matching CPUs with the specified VFM that have MWAIT. It could
> (theoretically) get used for non-bug things so I don't think it's
> intuitive to put "BUG" in the name.

Oh, that makes sense - objection withdrawn.

Thanks,

	Ingo
Re: [PATCH] Handle Ice Lake MONITOR erratum
Posted by srinivas pandruvada 7 months, 3 weeks ago
On Mon, 2025-04-21 at 12:22 -0700, Dave Hansen wrote:
> 
> From: Dave Hansen <dave.hansen@linux.intel.com>
> 
> Andrew Cooper reported some boot issues on Ice Lake servers when
> running Xen that he tracked down to MWAIT not waking up. Do the safe
> thing and consider them buggy since there's a published erratum.
> Note: I've seen no reports of this occurring on Linux.
> 
> Add Ice Lake servers to the list of shaky MONITOR implementations
> with
> no workaround available. Also, before the if() gets too unwieldy,
> move
> it over to a x86_cpu_id array. Additionally, add a comment to the
> X86_BUG_MONITOR consumption site to make it clear how and why
> affected
> CPUs get IPIs to wake them up.
> 
> There is no equivalent erratum for the "Xeon D" Ice Lakes so
> INTEL_ICELAKE_D is not affected.
> 
> The erratum is called ICX143 in the "3rd Gen Intel Xeon Scalable
> Processors, Codename Ice Lake Specification Update". It is Intel
> document 637780, currently available here:
> 
> 	https://cdrdv2.intel.com/v1/dl/getContent/637780
> 
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: stable@vger.kernel.org
> 
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

Thanks,
Srinivas

> ---
> 
>  b/arch/x86/include/asm/mwait.h |    3 +++
>  b/arch/x86/kernel/cpu/intel.c  |   17 ++++++++++++++---
>  2 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff -puN arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug
> arch/x86/kernel/cpu/intel.c
> --- a/arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug	2025-04-18
> 13:54:46.022590596 -0700
> +++ b/arch/x86/kernel/cpu/intel.c	2025-04-18
> 15:15:19.374365069 -0700
> @@ -513,6 +513,19 @@ static void init_intel_misc_features(str
>  }
>  
>  /*
> + * These CPUs have buggy MWAIT/MONITOR implementations that
> + * usually manifest as hangs or stalls at boot.
> + */
> +#define MWAIT_VFM(_vfm)	\
> +	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
> +static const struct x86_cpu_id monitor_bug_list[] = {
> +	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
> +	MWAIT_VFM(INTEL_LUNARLAKE_M),
> +	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
> +	{},
> +};
> +
> +/*
>   * This is a list of Intel CPUs that are known to suffer from
> downclocking when
>   * ZMM registers (512-bit vectors) are used.  On these CPUs, when
> the kernel
>   * executes SIMD-optimized code such as cryptography functions or
> CRCs, it
> @@ -565,9 +578,7 @@ static void init_intel(struct cpuinfo_x8
>  	     c->x86_vfm == INTEL_WESTMERE_EX))
>  		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
>  
> -	if (boot_cpu_has(X86_FEATURE_MWAIT) &&
> -	    (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
> -	     c->x86_vfm == INTEL_LUNARLAKE_M))
> +	if (x86_match_cpu(monitor_bug_list))
>  		set_cpu_bug(c, X86_BUG_MONITOR);
>  
>  #ifdef CONFIG_X86_64
> diff -puN arch/x86/include/asm/mwait.h~ICX-MONITOR-bug
> arch/x86/include/asm/mwait.h
> --- a/arch/x86/include/asm/mwait.h~ICX-MONITOR-bug	2025-04-18
> 15:17:18.353749634 -0700
> +++ b/arch/x86/include/asm/mwait.h	2025-04-18
> 15:20:06.037927656 -0700
> @@ -110,6 +110,9 @@ static __always_inline void __sti_mwait(
>   * through MWAIT. Whenever someone changes need_resched, we would be
> woken
>   * up from MWAIT (without an IPI).
>   *
> + * Buggy (X86_BUG_MONITOR) CPUs will never set the polling bit and
> will
> + * always be sent IPIs.
> + *
>   * New with Core Duo processors, MWAIT can take some hints based on
> CPU
>   * capability.
>   */
> _
Re: [PATCH] Handle Ice Lake MONITOR erratum
Posted by Andrew Cooper 7 months, 3 weeks ago
On 21/04/2025 8:22 pm, Dave Hansen wrote:
> From: Dave Hansen <dave.hansen@linux.intel.com>
>
> Andrew Cooper reported some boot issues on Ice Lake servers when
> running Xen that he tracked down to MWAIT not waking up. Do the safe
> thing and consider them buggy since there's a published erratum.
> Note: I've seen no reports of this occurring on Linux.
>
> Add Ice Lake servers to the list of shaky MONITOR implementations with
> no workaround available. Also, before the if() gets too unwieldy, move
> it over to a x86_cpu_id array. Additionally, add a comment to the
> X86_BUG_MONITOR consumption site to make it clear how and why affected
> CPUs get IPIs to wake them up.
>
> There is no equivalent erratum for the "Xeon D" Ice Lakes so
> INTEL_ICELAKE_D is not affected.
>
> The erratum is called ICX143 in the "3rd Gen Intel Xeon Scalable
> Processors, Codename Ice Lake Specification Update". It is Intel
> document 637780, currently available here:
>
> 	https://cdrdv2.intel.com/v1/dl/getContent/637780
>
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: stable@vger.kernel.org

CC Roger/Frediano, who did most of the work here.  (I mostly just talked
to people).

https://lore.kernel.org/xen-devel/20250417161913.14661-1-roger.pau@citrix.com/T/#u

~Andrew

>
> ---
>
>  b/arch/x86/include/asm/mwait.h |    3 +++
>  b/arch/x86/kernel/cpu/intel.c  |   17 ++++++++++++++---
>  2 files changed, 17 insertions(+), 3 deletions(-)
>
> diff -puN arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug arch/x86/kernel/cpu/intel.c
> --- a/arch/x86/kernel/cpu/intel.c~ICX-MONITOR-bug	2025-04-18 13:54:46.022590596 -0700
> +++ b/arch/x86/kernel/cpu/intel.c	2025-04-18 15:15:19.374365069 -0700
> @@ -513,6 +513,19 @@ static void init_intel_misc_features(str
>  }
>  
>  /*
> + * These CPUs have buggy MWAIT/MONITOR implementations that
> + * usually manifest as hangs or stalls at boot.
> + */
> +#define MWAIT_VFM(_vfm)	\
> +	X86_MATCH_VFM_FEATURE(_vfm, X86_FEATURE_MWAIT, 0)
> +static const struct x86_cpu_id monitor_bug_list[] = {
> +	MWAIT_VFM(INTEL_ATOM_GOLDMONT),
> +	MWAIT_VFM(INTEL_LUNARLAKE_M),
> +	MWAIT_VFM(INTEL_ICELAKE_X),	/* Erratum ICX143 */
> +	{},
> +};
> +
> +/*
>   * This is a list of Intel CPUs that are known to suffer from downclocking when
>   * ZMM registers (512-bit vectors) are used.  On these CPUs, when the kernel
>   * executes SIMD-optimized code such as cryptography functions or CRCs, it
> @@ -565,9 +578,7 @@ static void init_intel(struct cpuinfo_x8
>  	     c->x86_vfm == INTEL_WESTMERE_EX))
>  		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
>  
> -	if (boot_cpu_has(X86_FEATURE_MWAIT) &&
> -	    (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
> -	     c->x86_vfm == INTEL_LUNARLAKE_M))
> +	if (x86_match_cpu(monitor_bug_list))
>  		set_cpu_bug(c, X86_BUG_MONITOR);
>  
>  #ifdef CONFIG_X86_64
> diff -puN arch/x86/include/asm/mwait.h~ICX-MONITOR-bug arch/x86/include/asm/mwait.h
> --- a/arch/x86/include/asm/mwait.h~ICX-MONITOR-bug	2025-04-18 15:17:18.353749634 -0700
> +++ b/arch/x86/include/asm/mwait.h	2025-04-18 15:20:06.037927656 -0700
> @@ -110,6 +110,9 @@ static __always_inline void __sti_mwait(
>   * through MWAIT. Whenever someone changes need_resched, we would be woken
>   * up from MWAIT (without an IPI).
>   *
> + * Buggy (X86_BUG_MONITOR) CPUs will never set the polling bit and will
> + * always be sent IPIs.
> + *
>   * New with Core Duo processors, MWAIT can take some hints based on CPU
>   * capability.
>   */
> _