[RFC PATCH 22/32] x86/fred: FRED initialization code

Xin Li posted 32 patches 2 years, 9 months ago
There is a newer version of this series
[RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Xin Li 2 years, 9 months ago
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>

The code to initialize FRED when it's available and _not_ disabled.

cpu_init_fred_exceptions() is the core function to initialize FRED,
which
  1. Sets up FRED entrypoints for events happening in ring 0 and 3.
  2. Sets up a default stack for event handling.
  3. Sets up dedicated event stacks for DB/NMI/MC/DF, equivalent to
     the IDT IST stacks.
  4. Forces 32-bit system calls to use "int $0x80" only.
  5. Enables FRED and invalidtes IDT.

When the FRED is used, cpu_init_exception_handling() initializes FRED
through calling cpu_init_fred_exceptions(), otherwise it sets up TSS
IST and loads IDT.

As FRED uses the ring 3 FRED entrypoint for SYSCALL and SYSENTER,
it skips setting up SYSCALL/SYSENTER related MSRs, e.g., MSR_LSTAR.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Co-developed-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/include/asm/fred.h  | 14 +++++++
 arch/x86/include/asm/traps.h |  2 +
 arch/x86/kernel/Makefile     |  1 +
 arch/x86/kernel/cpu/common.c | 74 +++++++++++++++++++++++-------------
 arch/x86/kernel/fred.c       | 73 +++++++++++++++++++++++++++++++++++
 arch/x86/kernel/irqinit.c    |  7 +++-
 arch/x86/kernel/traps.c      | 16 +++++++-
 7 files changed, 157 insertions(+), 30 deletions(-)
 create mode 100644 arch/x86/kernel/fred.c

diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 01678ced5451..b6308e351e14 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -97,8 +97,22 @@ DECLARE_FRED_HANDLER(fred_exc_debug);
 DECLARE_FRED_HANDLER(fred_exc_page_fault);
 DECLARE_FRED_HANDLER(fred_exc_machine_check);
 
+/*
+ * The actual assembly entry and exit points
+ */
+extern __visible void fred_entrypoint_user(void);
+
+/*
+ * Initialization
+ */
+void cpu_init_fred_exceptions(void);
+void fred_setup_apic(void);
+
 #endif /* __ASSEMBLY__ */
 
+#else
+#define cpu_init_fred_exceptions() BUG()
+#define fred_setup_apic() BUG()
 #endif /* CONFIG_X86_FRED */
 
 #endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 77ffc580e821..963c51e680bd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -56,6 +56,8 @@ void __noreturn handle_stack_overflow(struct pt_regs *regs,
 	void f (struct pt_regs *regs)
 typedef DECLARE_SYSTEM_INTERRUPT_HANDLER((*system_interrupt_handler));
 
+system_interrupt_handler get_system_interrupt_handler(unsigned int i);
+
 int exc_raise_irq(struct pt_regs *regs, u32 vector);
 
 int external_interrupt(struct pt_regs *regs, unsigned int vector);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f901658d9f7c..1d9e669e288b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -48,6 +48,7 @@ obj-y			+= process_$(BITS).o signal.o
 obj-$(CONFIG_COMPAT)	+= signal_compat.o
 obj-y			+= traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o dumpstack.o nmi.o
+obj-$(CONFIG_X86_FRED)	+= fred.o
 obj-$(CONFIG_MODIFY_LDT_SYSCALL)	+= ldt.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 05a5538052ad..5de68356fe62 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -57,6 +57,7 @@
 #include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
+#include <asm/fred.h>
 #include <asm/uv/uv.h>
 #include <asm/sigframe.h>
 #include <asm/traps.h>
@@ -2034,28 +2035,6 @@ static void wrmsrl_cstar(unsigned long val)
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
-	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
-
-#ifdef CONFIG_IA32_EMULATION
-	wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
-	/*
-	 * This only works on Intel CPUs.
-	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
-	 * This does not cause SYSENTER to jump to the wrong location, because
-	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-	 */
-	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
-		    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
-#else
-	wrmsrl_cstar((unsigned long)ignore_sysret);
-	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
-#endif
-
 	/*
 	 * Flags to clear on syscall; clear as much as possible
 	 * to minimize user space-kernel interference.
@@ -2066,6 +2045,41 @@ void syscall_init(void)
 	       X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF|
 	       X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF|
 	       X86_EFLAGS_AC|X86_EFLAGS_ID);
+
+	/*
+	 * The default user and kernel segments
+	 */
+	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+
+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+		/* Both sysexit and sysret cause #UD when FRED is enabled */
+		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+		wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+	} else {
+		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+
+#ifdef CONFIG_IA32_EMULATION
+		wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
+		/*
+		 * This only works on Intel CPUs.
+		 * On AMD CPUs these MSRs are 32-bit, CPU truncates
+		 * MSR_IA32_SYSENTER_EIP.
+		 * This does not cause SYSENTER to jump to the wrong
+		 * location, because AMD doesn't allow SYSENTER in
+		 * long mode (either 32- or 64-bit).
+		 */
+		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+		wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+			    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
+		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+#else
+		wrmsrl_cstar((unsigned long)ignore_sysret);
+		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+		wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+#endif
+	}
 }
 
 #else	/* CONFIG_X86_64 */
@@ -2214,18 +2228,24 @@ void cpu_init_exception_handling(void)
 	/* paranoid_entry() gets the CPU number from the GDT */
 	setup_getcpu(cpu);
 
-	/* IST vectors need TSS to be set up. */
-	tss_setup_ist(tss);
+	/* Set up the TSS */
 	tss_setup_io_bitmap(tss);
 	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-
 	load_TR_desc();
 
 	/* GHCB needs to be setup to handle #VC. */
 	setup_ghcb();
 
-	/* Finally load the IDT */
-	load_current_idt();
+	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+		/* Set up FRED exception handling */
+		cpu_init_fred_exceptions();
+	} else {
+		/* IST vectors need TSS to be set up. */
+		tss_setup_ist(tss);
+
+		/* Finally load the IDT */
+		load_current_idt();
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
new file mode 100644
index 000000000000..827b58fd98d4
--- /dev/null
+++ b/arch/x86/kernel/fred.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <asm/desc.h>
+#include <asm/fred.h>
+#include <asm/tlbflush.h>	/* For cr4_set_bits() */
+#include <asm/traps.h>
+
+/*
+ * Initialize FRED on this CPU. This cannot be __init as it is called
+ * during CPU hotplug.
+ */
+void cpu_init_fred_exceptions(void)
+{
+	wrmsrl(MSR_IA32_FRED_CONFIG,
+	       FRED_CONFIG_ENTRYPOINT(fred_entrypoint_user) |
+	       FRED_CONFIG_REDZONE(8) | /* Reserve for CALL emulation */
+	       FRED_CONFIG_INT_STKLVL(0));
+
+	wrmsrl(MSR_IA32_FRED_STKLVLS,
+	       FRED_STKLVL(X86_TRAP_DB,  1) |
+	       FRED_STKLVL(X86_TRAP_NMI, 2) |
+	       FRED_STKLVL(X86_TRAP_MC,  2) |
+	       FRED_STKLVL(X86_TRAP_DF,  3));
+
+	/* The FRED equivalents to IST stacks... */
+	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
+	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
+	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+
+	/* Not used with FRED */
+	wrmsrl(MSR_LSTAR, 0ULL);
+	wrmsrl(MSR_CSTAR, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_CS,  (u64)GDT_ENTRY_INVALID_SEG);
+	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+
+	/* Enable FRED */
+	cr4_set_bits(X86_CR4_FRED);
+	idt_invalidate();	/* Any further IDT use is a bug */
+
+	/* Use int $0x80 for 32-bit system calls in FRED mode */
+	setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
+	setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+}
+
+/*
+ * Initialize system vectors from a FRED perspective, so
+ * lapic_assign_system_vectors() can do its job.
+ */
+void __init fred_setup_apic(void)
+{
+	int i;
+
+	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+		set_bit(i, system_vectors);
+
+	/*
+	 * Don't set the non assigned system vectors in the
+	 * system_vectors bitmap. Otherwise they show up in
+	 * /proc/interrupts.
+	 */
+#ifdef CONFIG_SMP
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, system_vectors);
+#endif
+
+	for (i = 0; i < NR_SYSTEM_VECTORS; i++) {
+		if (get_system_interrupt_handler(i) != NULL) {
+			set_bit(i + FIRST_SYSTEM_VECTOR, system_vectors);
+		}
+	}
+
+	/* The rest are fair game... */
+}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index beb1bada1b0a..bb59661f0278 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -28,6 +28,7 @@
 #include <asm/setup.h>
 #include <asm/i8259.h>
 #include <asm/traps.h>
+#include <asm/fred.h>
 #include <asm/prom.h>
 
 /*
@@ -94,7 +95,11 @@ void __init native_init_IRQ(void)
 	/* Execute any quirks before the call gates are initialised: */
 	x86_init.irqs.pre_vector_init();
 
-	idt_setup_apic_and_irq_gates();
+	if (cpu_feature_enabled(X86_FEATURE_FRED))
+		fred_setup_apic();
+	else
+		idt_setup_apic_and_irq_gates();
+
 	lapic_assign_system_vectors();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b0ee83bab9e6..36a15df9b5e5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1518,12 +1518,21 @@ static system_interrupt_handler system_interrupt_handlers[NR_SYSTEM_VECTORS] = {
 
 #undef SYSV
 
+system_interrupt_handler get_system_interrupt_handler(unsigned int i)
+{
+	if (i >= NR_SYSTEM_VECTORS)
+		return NULL;
+
+	return system_interrupt_handlers[i];
+}
+
 void __init install_system_interrupt_handler(unsigned int n, const void *asm_addr, const void *addr)
 {
 	BUG_ON(n < FIRST_SYSTEM_VECTOR);
 
 	system_interrupt_handlers[n - FIRST_SYSTEM_VECTOR] = (system_interrupt_handler)addr;
-	alloc_intr_gate(n, asm_addr);
+	if (!cpu_feature_enabled(X86_FEATURE_FRED))
+		alloc_intr_gate(n, asm_addr);
 }
 
 #ifndef CONFIG_X86_LOCAL_APIC
@@ -1591,7 +1600,10 @@ void __init trap_init(void)
 
 	/* Initialize TSS before setting up traps so ISTs work */
 	cpu_init_exception_handling();
+
 	/* Setup traps as cpu_init() might #GP */
-	idt_setup_traps();
+	if (!cpu_feature_enabled(X86_FEATURE_FRED))
+		idt_setup_traps();
+
 	cpu_init();
 }
-- 
2.34.1
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Peter Zijlstra 2 years, 9 months ago
On Mon, Dec 19, 2022 at 10:36:48PM -0800, Xin Li wrote:

> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
> +	       FRED_STKLVL(X86_TRAP_DF,  3));
> +
> +	/* The FRED equivalents to IST stacks... */
> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));

Not quite.. IIRC fred only switches to another stack when the level of
the exception is higher. Specifically, if we trigger #DB while inside
#NMI we will not switch to the #DB stack (since 1 < 2).

Now, as mentioned elsewhere, it all nests a lot saner, but stack
exhaustion is still a thing, given the above, what happens when a #DB
hits an #NMI which tickles a #VE or something?

I don't think we've increased the exception stack size, but perhaps we
should for FRED?
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Andrew Cooper 2 years, 9 months ago
On 20/12/2022 9:45 am, Peter Zijlstra wrote:
> On Mon, Dec 19, 2022 at 10:36:48PM -0800, Xin Li wrote:
>
>> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
>> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
>> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
>> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
>> +	       FRED_STKLVL(X86_TRAP_DF,  3));
>> +
>> +	/* The FRED equivalents to IST stacks... */
>> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
>> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
>> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
> Not quite.. IIRC fred only switches to another stack when the level of
> the exception is higher. Specifically, if we trigger #DB while inside
> #NMI we will not switch to the #DB stack (since 1 < 2).

There needs to be a new stack for #DF, and just possibly one for #MC. 
NMI and #DB do not need separate stacks under FRED.

> Now, as mentioned elsewhere, it all nests a lot saner, but stack
> exhaustion is still a thing, given the above, what happens when a #DB
> hits an #NMI which tickles a #VE or something?
>
> I don't think we've increased the exception stack size, but perhaps we
> should for FRED?

Not sure if it matters too much - it doesn't seem usefully different to
IDT delivery.  #DB shouldn't get too deep, and NMI gets properly
inhibited now.

~Andrew
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by H. Peter Anvin 2 years, 8 months ago
On December 20, 2022 1:55:31 AM PST, Andrew Cooper <Andrew.Cooper3@citrix.com> wrote:
>On 20/12/2022 9:45 am, Peter Zijlstra wrote:
>> On Mon, Dec 19, 2022 at 10:36:48PM -0800, Xin Li wrote:
>>
>>> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
>>> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
>>> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
>>> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
>>> +	       FRED_STKLVL(X86_TRAP_DF,  3));
>>> +
>>> +	/* The FRED equivalents to IST stacks... */
>>> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
>>> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
>>> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
>> Not quite.. IIRC fred only switches to another stack when the level of
>> the exception is higher. Specifically, if we trigger #DB while inside
>> #NMI we will not switch to the #DB stack (since 1 < 2).
>
>There needs to be a new stack for #DF, and just possibly one for #MC. 
>NMI and #DB do not need separate stacks under FRED.
>
>> Now, as mentioned elsewhere, it all nests a lot saner, but stack
>> exhaustion is still a thing, given the above, what happens when a #DB
>> hits an #NMI which tickles a #VE or something?
>>
>> I don't think we've increased the exception stack size, but perhaps we
>> should for FRED?
>
>Not sure if it matters too much - it doesn't seem usefully different to
>IDT delivery.  #DB shouldn't get too deep, and NMI gets properly
>inhibited now.
>
>~Andrew
>

I still don't think you want to take #DB or – especially – NMI on the task stack while in the kernel. In fact, the plan is to get rid of the software irqstack handling, too, but at tglx's request that will be a later changeset (correctness first, then optimization.)
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Peter Zijlstra 2 years, 9 months ago
On Tue, Dec 20, 2022 at 09:55:31AM +0000, Andrew Cooper wrote:
> On 20/12/2022 9:45 am, Peter Zijlstra wrote:
> > On Mon, Dec 19, 2022 at 10:36:48PM -0800, Xin Li wrote:
> >
> >> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
> >> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
> >> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
> >> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
> >> +	       FRED_STKLVL(X86_TRAP_DF,  3));
> >> +
> >> +	/* The FRED equivalents to IST stacks... */
> >> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
> >> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
> >> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
> > Not quite.. IIRC fred only switches to another stack when the level of
> > the exception is higher. Specifically, if we trigger #DB while inside
> > #NMI we will not switch to the #DB stack (since 1 < 2).
> 
> There needs to be a new stack for #DF, and just possibly one for #MC. 
> NMI and #DB do not need separate stacks under FRED.

True, there is very little need to use additional stacks with FRED.

> > Now, as mentioned elsewhere, it all nests a lot saner, but stack
> > exhaustion is still a thing, given the above, what happens when a #DB
> > hits an #NMI which tickles a #VE or something?
> >
> > I don't think we've increased the exception stack size, but perhaps we
> > should for FRED?
> 
> Not sure if it matters too much - it doesn't seem usefully different to
> IDT delivery.  #DB shouldn't get too deep, and NMI gets properly
> inhibited now.

Both #DB and #NMI can end up in perf, and all that goes quite deep :/
RE: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Li, Xin3 2 years, 9 months ago
> > >> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
> > >> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
> > >> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
> > >> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
> > >> +	       FRED_STKLVL(X86_TRAP_DF,  3));
> > >> +
> > >> +	/* The FRED equivalents to IST stacks... */
> > >> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
> > >> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
> > >> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
> > > Not quite.. IIRC fred only switches to another stack when the level
> > > of the exception is higher. Specifically, if we trigger #DB while
> > > inside #NMI we will not switch to the #DB stack (since 1 < 2).

Yes, current stack level can only grow higher.

> >
> > There needs to be a new stack for #DF, and just possibly one for #MC.
> > NMI and #DB do not need separate stacks under FRED.
> 
> True, there is very little need to use additional stacks with FRED.

Pretty much.

#DB/NMI from a ring 3 context uses CSL 0, and their CSLs increase only
when happening from a ring 0 context.

> 
> > > Now, as mentioned elsewhere, it all nests a lot saner, but stack
> > > exhaustion is still a thing, given the above, what happens when a
> > > #DB hits an #NMI which tickles a #VE or something?
> > >
> > > I don't think we've increased the exception stack size, but perhaps
> > > we should for FRED?
> >
> > Not sure if it matters too much - it doesn't seem usefully different
> > to IDT delivery.  #DB shouldn't get too deep, and NMI gets properly
> > inhibited now.
> 
> Both #DB and #NMI can end up in perf, and all that goes quite deep :/

Can you please elaborate it a bit?
RE: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by H. Peter Anvin 2 years, 9 months ago
On December 20, 2022 9:28:52 PM PST, "Li, Xin3" <xin3.li@intel.com> wrote:
>> > >> +	wrmsrl(MSR_IA32_FRED_STKLVLS,
>> > >> +	       FRED_STKLVL(X86_TRAP_DB,  1) |
>> > >> +	       FRED_STKLVL(X86_TRAP_NMI, 2) |
>> > >> +	       FRED_STKLVL(X86_TRAP_MC,  2) |
>> > >> +	       FRED_STKLVL(X86_TRAP_DF,  3));
>> > >> +
>> > >> +	/* The FRED equivalents to IST stacks... */
>> > >> +	wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
>> > >> +	wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
>> > >> +	wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
>> > > Not quite.. IIRC fred only switches to another stack when the level
>> > > of the exception is higher. Specifically, if we trigger #DB while
>> > > inside #NMI we will not switch to the #DB stack (since 1 < 2).
>
>Yes, current stack level can only grow higher.
>
>> >
>> > There needs to be a new stack for #DF, and just possibly one for #MC.
>> > NMI and #DB do not need separate stacks under FRED.
>> 
>> True, there is very little need to use additional stacks with FRED.
>
>Pretty much.
>
>#DB/NMI from a ring 3 context uses CSL 0, and their CSLs increase only
>when happening from a ring 0 context.
>
>> 
>> > > Now, as mentioned elsewhere, it all nests a lot saner, but stack
>> > > exhaustion is still a thing, given the above, what happens when a
>> > > #DB hits an #NMI which tickles a #VE or something?
>> > >
>> > > I don't think we've increased the exception stack size, but perhaps
>> > > we should for FRED?
>> >
>> > Not sure if it matters too much - it doesn't seem usefully different
>> > to IDT delivery.  #DB shouldn't get too deep, and NMI gets properly
>> > inhibited now.
>> 
>> Both #DB and #NMI can end up in perf, and all that goes quite deep :/
>
>Can you please elaborate it a bit?
>

Right, this is one major reason for putting #DB/NMI in a separate stack level.
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by Peter Zijlstra 2 years, 8 months ago
On Tue, Dec 20, 2022 at 09:44:33PM -0800, H. Peter Anvin wrote:

> Right, this is one major reason for putting #DB/NMI in a separate stack level.

But with the IST they each have their own stack, with FRED they'll share
this small stack.
Re: [RFC PATCH 22/32] x86/fred: FRED initialization code
Posted by H. Peter Anvin 2 years, 8 months ago
On December 22, 2022 5:09:28 AM PST, Peter Zijlstra <peterz@infradead.org> wrote:
>On Tue, Dec 20, 2022 at 09:44:33PM -0800, H. Peter Anvin wrote:
>
>> Right, this is one major reason for putting #DB/NMI in a separate stack level.
>
>But with the IST they each have their own stack, with FRED they'll share
>this small stack.

So make the stack larger if you think we need it. You still end up using less total stack space with FRED.