With the shadow stack and exception handling adjustements in place, we can now
activate FRED when appropriate. Note that opt_fred is still disabled by
default.
Introduce init_fred() to set up all the MSRs relevant for FRED. FRED uses
MSR_STAR (entries from Ring3 only), and MSR_FRED_SSP_SL0 aliases MSR_PL0_SSP
when CET-SS is active. Otherwise, they're all new MSRs.
With init_fred() existing, load_system_tables() and legacy_syscall_init()
should only be used when setting up IDT delivery. Insert ASSERT()s to this
effect, and adjust the various *_init() functions to make this property true.
Per the documentation, percpu_early_traps_init() is responsible for switching
off the boot GDT, which needs doing even in FRED mode.
Finally, set CR4.FRED in traps_init()/percpu_early_traps_init().
Xen can now boot in FRED mode up until starting a PV guest, where it faults
because IRET is not permitted to change privilege.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
v3:
* Fix poisoning of SL1 pointers.
* Adjust bsp_traps_reinit(). It probably doesn't matter.
v2:
* Explain the lack of BUG_ON()
* Posion SL1
In principle we can stop allocating the IDT and TSS for CPUs now, although I
want to get shutdown and kexec working before making this optimisation, in
case there's something I've overlooked.
---
xen/arch/x86/include/asm/current.h | 3 ++
xen/arch/x86/include/asm/traps.h | 2 +
xen/arch/x86/traps-setup.c | 83 ++++++++++++++++++++++++++++--
3 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h
index 62817e8476ec..6139980ab115 100644
--- a/xen/arch/x86/include/asm/current.h
+++ b/xen/arch/x86/include/asm/current.h
@@ -23,6 +23,9 @@
* 2 - NMI IST stack
* 1 - #MC IST stack
* 0 - IST Shadow Stacks (4x 1k, read-only)
+ *
+ * In FRED mode, #DB and NMI do not need special stacks, so their IST stacks
+ * are unused.
*/
/*
diff --git a/xen/arch/x86/include/asm/traps.h b/xen/arch/x86/include/asm/traps.h
index 73097e957d05..5d7504bc44d1 100644
--- a/xen/arch/x86/include/asm/traps.h
+++ b/xen/arch/x86/include/asm/traps.h
@@ -16,6 +16,8 @@ void traps_init(void);
void bsp_traps_reinit(void);
void percpu_traps_init(void);
+void nocall entry_FRED_R3(void);
+
extern unsigned int ler_msr;
const char *vector_name(unsigned int vec);
diff --git a/xen/arch/x86/traps-setup.c b/xen/arch/x86/traps-setup.c
index d77be8f83921..d937209ae606 100644
--- a/xen/arch/x86/traps-setup.c
+++ b/xen/arch/x86/traps-setup.c
@@ -59,6 +59,8 @@ static void load_system_tables(void)
.limit = sizeof(bsp_idt) - 1,
};
+ ASSERT(opt_fred == 0);
+
/*
* Set up the TSS. Warning - may be live, and the NMI/#MC must remain
* valid on every instruction boundary. (Note: these are all
@@ -191,6 +193,8 @@ static void legacy_syscall_init(void)
unsigned char *stub_page;
unsigned int offset;
+ ASSERT(opt_fred == 0);
+
/* No PV guests? No need to set up SYSCALL/SYSENTER infrastructure. */
if ( !IS_ENABLED(CONFIG_PV) )
return;
@@ -268,6 +272,52 @@ static void __init init_ler(void)
setup_force_cpu_cap(X86_FEATURE_XEN_LBR);
}
+/*
+ * Set up all MSRs relevant for FRED event delivery.
+ *
+ * Xen does not use any of the optional config in MSR_FRED_CONFIG, so all that
+ * is needed is the entrypoint.
+ *
+ * Because FRED always provides a good stack, NMI and #DB do not need any
+ * special treatment. Only #DF needs another stack level, and #MC for the
+ * offchance that Xen's main stack suffers an uncorrectable error.
+ *
+ * This makes Stack Level 1 unused, but we use #DB's stacks, and with the
+ * regular and shadow stacks reversed as posion to guarantee that any use
+ * escalates to #DF.
+ *
+ * FRED reuses MSR_STAR to provide the segment selector values to load on
+ * entry from Ring3. Entry from Ring0 leave %cs and %ss unmodified.
+ */
+static void init_fred(void)
+{
+ unsigned long stack_top = get_stack_bottom() & ~(STACK_SIZE - 1);
+
+ ASSERT(opt_fred == 1);
+
+ wrmsrns(MSR_STAR, XEN_MSR_STAR);
+ wrmsrns(MSR_FRED_CONFIG, (unsigned long)entry_FRED_R3);
+
+ /*
+ * MSR_FRED_RSP_* all come with an 64-byte alignment check, avoiding the
+ * need for an explicit BUG_ON().
+ */
+ wrmsrns(MSR_FRED_RSP_SL0, (unsigned long)(&get_cpu_info()->_fred + 1));
+ wrmsrns(MSR_FRED_RSP_SL1, stack_top + (IST_DB * IST_SHSTK_SIZE)); /* Poison */
+ wrmsrns(MSR_FRED_RSP_SL2, stack_top + (1 + IST_MCE) * PAGE_SIZE);
+ wrmsrns(MSR_FRED_RSP_SL3, stack_top + (1 + IST_DF) * PAGE_SIZE);
+ wrmsrns(MSR_FRED_STK_LVLS, ((2UL << (X86_EXC_MC * 2)) |
+ (3UL << (X86_EXC_DF * 2))));
+
+ if ( cpu_has_xen_shstk )
+ {
+ wrmsrns(MSR_FRED_SSP_SL0, stack_top + (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE);
+ wrmsrns(MSR_FRED_SSP_SL1, stack_top + (1 + IST_DB) * PAGE_SIZE); /* Poison */
+ wrmsrns(MSR_FRED_SSP_SL2, stack_top + (IST_MCE * IST_SHSTK_SIZE));
+ wrmsrns(MSR_FRED_SSP_SL3, stack_top + (IST_DF * IST_SHSTK_SIZE));
+ }
+}
+
/*
* Configure basic exception handling. This is prior to parsing the command
* line or configuring a console, and needs to be as simple as possible.
@@ -329,16 +379,20 @@ void __init traps_init(void)
printk(XENLOG_INFO "Disabling PV32 due to FRED\n");
}
#endif
+
+ init_fred();
+ set_in_cr4(X86_CR4_FRED);
+
setup_force_cpu_cap(X86_FEATURE_XEN_FRED);
printk("Using FRED event delivery\n");
}
else
{
+ load_system_tables();
+
printk("Using IDT event delivery\n");
}
- load_system_tables();
-
init_ler();
/* Cache {,compat_}gdt_l1e now that physically relocation is done. */
@@ -356,7 +410,11 @@ void __init traps_init(void)
*/
void __init bsp_traps_reinit(void)
{
- load_system_tables();
+ if ( opt_fred )
+ init_fred();
+ else
+ load_system_tables();
+
percpu_traps_init();
}
@@ -366,7 +424,8 @@ void __init bsp_traps_reinit(void)
*/
void percpu_traps_init(void)
{
- legacy_syscall_init();
+ if ( !opt_fred )
+ legacy_syscall_init();
if ( cpu_has_xen_lbr )
wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR);
@@ -381,7 +440,21 @@ void percpu_traps_init(void)
*/
void asmlinkage percpu_early_traps_init(void)
{
- load_system_tables();
+ if ( opt_fred )
+ {
+ const seg_desc_t *gdt = this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY;
+ const struct desc_ptr gdtr = {
+ .base = (unsigned long)gdt,
+ .limit = LAST_RESERVED_GDT_BYTE,
+ };
+
+ lgdt(&gdtr);
+
+ init_fred();
+ write_cr4(read_cr4() | X86_CR4_FRED);
+ }
+ else
+ load_system_tables();
}
static void __init __maybe_unused build_assertions(void)
--
2.39.5