From: Len Brown <len.brown@intel.com>
Under some conditions, MONITOR wakeups on Lunar Lake processors
can be lost, resulting in significant user-visible delays.
Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu()
always sends an IPI, avoiding this potential delay.
Also update the X86_BUG_MONITOR workaround to handle
the new smp_kick_mwait_play_dead() path.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364
Cc: stable@vger.kernel.org # 6.11
Signed-off-by: Len Brown <len.brown@intel.com>
---
arch/x86/kernel/cpu/intel.c | 3 ++-
arch/x86/kernel/smpboot.c | 3 +++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e7656cbef68d..aa63f5f780a0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c)
c->x86_vfm == INTEL_WESTMERE_EX))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
+ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M))
set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 766f092dab80..910cb2d72c13 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void)
for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) {
/* Bring it out of mwait */
WRITE_ONCE(md->control, newstate);
+ /* If MONITOR unreliable, send IPI */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR))
+ __apic_send_IPI(cpu, RESCHEDULE_VECTOR);
udelay(5);
}
--
2.43.0
On Fri, Nov 08 2024 at 08:49, Len Brown wrote: > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c > index e7656cbef68d..aa63f5f780a0 100644 > --- a/arch/x86/kernel/cpu/intel.c > +++ b/arch/x86/kernel/cpu/intel.c > @@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c) > c->x86_vfm == INTEL_WESTMERE_EX)) > set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); > > - if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) > + if (boot_cpu_has(X86_FEATURE_MWAIT) && > + (c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M)) This indentation is bogus. > set_cpu_bug(c, X86_BUG_MONITOR); > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 766f092dab80..910cb2d72c13 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) > for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { > /* Bring it out of mwait */ > WRITE_ONCE(md->control, newstate); > + /* If MONITOR unreliable, send IPI */ > + if (boot_cpu_has_bug(X86_BUG_MONITOR)) > + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); How is this supposed to work? The local APIC of the offline CPU is shut down and only responds to INIT, NMI, SMI, and SIPI. Even if the APIC would react to the IPI, then the offline CPU would not notice as is has interrupts disabled when it reaches mwait_play_dead(). Seriously? Thanks, tglx
On Fri, Nov 08, 2024 at 08:49:31AM -0500, Len Brown wrote: > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 766f092dab80..910cb2d72c13 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) > for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { > /* Bring it out of mwait */ > WRITE_ONCE(md->control, newstate); > + /* If MONITOR unreliable, send IPI */ > + if (boot_cpu_has_bug(X86_BUG_MONITOR)) > + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); > udelay(5); > } Going over that code again, mwait_play_dead() is doing __mwait(.exc=0) with IRQs disabled. So that IPI you're trying to send there won't do no nothing :-/ Now that comment there says MCE/NMI/SMI are still open (non-maskable etc.) so perhaps prod it on the NMI vector? This does seem to suggest the above code path wasn't actually tested. Perhaps mark your local machine with BUG_MONITOR, remove the md->control WRITE_ONCE() and try kexec to test it? Thomas, any other thoughts?
It was folly to respond to review feedback by attempting to add bits to the kexec path to this simple patch. So that misguided hunk is removed. Here is the 1-line patch that addresses the problem that users see in the field today. If kexec proves to be an issue on LNL, that can be addressed in a subsequent patch. thanks, -Len
From: Len Brown <len.brown@intel.com>
Under some conditions, MONITOR wakeups on Lunar Lake processors
can be lost, resulting in significant user-visible delays.
Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu()
always sends an IPI, avoiding this potential delay.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364
Cc: stable@vger.kernel.org # 6.11
Signed-off-by: Len Brown <len.brown@intel.com>
---
arch/x86/kernel/cpu/intel.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e7656cbef68d..284cd561499c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -586,7 +586,9 @@ static void init_intel(struct cpuinfo_x86 *c)
c->x86_vfm == INTEL_WESTMERE_EX))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
+ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (c->x86_vfm == INTEL_ATOM_GOLDMONT
+ || c->x86_vfm == INTEL_LUNARLAKE_M))
set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64
--
2.43.0
On Tue, Nov 12, 2024 at 6:37 AM Len Brown <lenb@kernel.org> wrote: > > From: Len Brown <len.brown@intel.com> > > Under some conditions, MONITOR wakeups on Lunar Lake processors > can be lost, resulting in significant user-visible delays. > > Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu() > always sends an IPI, avoiding this potential delay. > > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364 > > Cc: stable@vger.kernel.org # 6.11 > Signed-off-by: Len Brown <len.brown@intel.com> So again Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> and see one super-minor nit below. > --- > arch/x86/kernel/cpu/intel.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c > index e7656cbef68d..284cd561499c 100644 > --- a/arch/x86/kernel/cpu/intel.c > +++ b/arch/x86/kernel/cpu/intel.c > @@ -586,7 +586,9 @@ static void init_intel(struct cpuinfo_x86 *c) > c->x86_vfm == INTEL_WESTMERE_EX)) > set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); > > - if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) > + if (boot_cpu_has(X86_FEATURE_MWAIT) && > + (c->x86_vfm == INTEL_ATOM_GOLDMONT > + || c->x86_vfm == INTEL_LUNARLAKE_M)) I would put the || at the end of the previous line, that is > + (c->x86_vfm == INTEL_ATOM_GOLDMONT || > + c->x86_vfm == INTEL_LUNARLAKE_M)) > set_cpu_bug(c, X86_BUG_MONITOR); > > #ifdef CONFIG_X86_64 > -- > 2.43.0 >
On Mon, Nov 11 2024 at 17:23, Peter Zijlstra wrote: > On Fri, Nov 08, 2024 at 08:49:31AM -0500, Len Brown wrote: >> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c >> index 766f092dab80..910cb2d72c13 100644 >> --- a/arch/x86/kernel/smpboot.c >> +++ b/arch/x86/kernel/smpboot.c >> @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) >> for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { >> /* Bring it out of mwait */ >> WRITE_ONCE(md->control, newstate); >> + /* If MONITOR unreliable, send IPI */ >> + if (boot_cpu_has_bug(X86_BUG_MONITOR)) >> + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); >> udelay(5); >> } > > Going over that code again, mwait_play_dead() is doing __mwait(.exc=0) > with IRQs disabled. And the APIC is shut down. So it won't react on the IPI either. > So that IPI you're trying to send there won't do no nothing :-/ > > Now that comment there says MCE/NMI/SMI are still open (non-maskable > etc.) so perhaps prod it on the NMI vector? > > This does seem to suggest the above code path wasn't actually tested. I'm not sure whether that's just a suggestion :) > Perhaps mark your local machine with BUG_MONITOR, remove the md->control > WRITE_ONCE() and try kexec to test it? > > Thomas, any other thoughts? NMI should work. See exc_nmi(): if (arch_cpu_is_offline(smp_processor_id())) { if (microcode_nmi_handler_enabled()) microcode_offline_nmi_handler(); return; } Thanks, tglx
On Fri, Nov 8, 2024 at 2:52 PM Len Brown <lenb@kernel.org> wrote: > > From: Len Brown <len.brown@intel.com> > > Under some conditions, MONITOR wakeups on Lunar Lake processors > can be lost, resulting in significant user-visible delays. > > Add LunarLake to X86_BUG_MONITOR so that wake_up_idle_cpu() > always sends an IPI, avoiding this potential delay. > > Also update the X86_BUG_MONITOR workaround to handle > the new smp_kick_mwait_play_dead() path. > > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219364 > > Cc: stable@vger.kernel.org # 6.11 > Signed-off-by: Len Brown <len.brown@intel.com> Overall Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > --- > arch/x86/kernel/cpu/intel.c | 3 ++- > arch/x86/kernel/smpboot.c | 3 +++ > 2 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c > index e7656cbef68d..aa63f5f780a0 100644 > --- a/arch/x86/kernel/cpu/intel.c > +++ b/arch/x86/kernel/cpu/intel.c > @@ -586,7 +586,8 @@ static void init_intel(struct cpuinfo_x86 *c) > c->x86_vfm == INTEL_WESTMERE_EX)) > set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); > > - if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) > + if (boot_cpu_has(X86_FEATURE_MWAIT) && > + (c->x86_vfm == INTEL_ATOM_GOLDMONT || c->x86_vfm == INTEL_LUNARLAKE_M)) > set_cpu_bug(c, X86_BUG_MONITOR); > > #ifdef CONFIG_X86_64 > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 766f092dab80..910cb2d72c13 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -1377,6 +1377,9 @@ void smp_kick_mwait_play_dead(void) > for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { > /* Bring it out of mwait */ > WRITE_ONCE(md->control, newstate); > + /* If MONITOR unreliable, send IPI */ > + if (boot_cpu_has_bug(X86_BUG_MONITOR)) > + __apic_send_IPI(cpu, RESCHEDULE_VECTOR); The __apic_send_IPI() call could be wrapped into something like __native_smp_send_reschedule() to underline the analogy between this and what happens in native_smp_send_reschedule(). It is still fine as is though IMV. > udelay(5); > } > > --
© 2016 - 2024 Red Hat, Inc.