When FRED is active, hardware automatically swaps GS when changing privilege,
and the SWAPGS instruction is disallowed.
For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated. Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.
This leads to the following changes:
* In load_segments(), we have to load both GSes. Account for this in the
SWAP() condition and avoid the path with SWAGS.
* In save_segments(), we need to read GS_SHADOW rather than GS_BASE.
* In toggle_guest_mode(), we need to emulate SWAPGS.
* In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
take FRED into account when choosing which base to update.
SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
so under FRED needs to be just a MOV %gs. Simply skip the SWAPGSes.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
v3:
* Rename things
v2:
* New
I think this functions, but it's not ideal. The conditions are asymmetric and
awkward.
---
xen/arch/x86/domain.c | 22 +++++++++++++++++-----
xen/arch/x86/pv/domain.c | 22 ++++++++++++++++++++--
xen/arch/x86/pv/misc-hypercalls.c | 16 ++++++++++------
3 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 8089ff929bf7..ce08f91be3af 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1819,9 +1819,10 @@ static void load_segments(struct vcpu *n)
/*
* Figure out which way around gsb/gss want to be. gsb needs to be
- * the active context, and gss needs to be the inactive context.
+ * the active context, and gss needs to be the inactive context,
+ * unless we're in FRED mode where they're reversed.
*/
- if ( !(n->arch.flags & TF_kernel_mode) )
+ if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
SWAP(gsb, gss);
if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1842,7 +1843,9 @@ static void load_segments(struct vcpu *n)
if ( !fs_gs_done && !compat )
{
- if ( read_cr4() & X86_CR4_FSGSBASE )
+ unsigned long cr4 = read_cr4();
+
+ if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
{
__wrgsbase(gss);
__wrfsbase(n->arch.pv.fs_base);
@@ -1959,6 +1962,9 @@ static void load_segments(struct vcpu *n)
* Guests however cannot use SWAPGS, so there is no mechanism to modify the
* inactive GS base behind Xen's back. Therefore, Xen's copy of the inactive
* GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
*/
static void save_segments(struct vcpu *v)
{
@@ -1974,12 +1980,18 @@ static void save_segments(struct vcpu *v)
if ( read_cr4() & X86_CR4_FSGSBASE )
{
fs_base = __rdfsbase();
- gs_base = __rdgsbase();
+ if ( opt_fred )
+ gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+ else
+ gs_base = __rdgsbase();
}
else
{
fs_base = rdmsr(MSR_FS_BASE);
- gs_base = rdmsr(MSR_GS_BASE);
+ if ( opt_fred )
+ gs_base = rdmsr(MSR_SHADOW_GS_BASE);
+ else
+ gs_base = rdmsr(MSR_GS_BASE);
}
v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 9c4785c187dd..369af444c29b 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
#include <asm/cpufeature.h>
#include <asm/fsgsbase.h>
#include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
#include <asm/pv/domain.h>
#include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
#ifdef CONFIG_PV32
int8_t __read_mostly opt_pv32 = -1;
@@ -480,11 +481,28 @@ void toggle_guest_mode(struct vcpu *v)
* subsequent context switch won't bother re-reading it.
*/
gs_base = read_gs_base();
+
+ /*
+ * In FRED mode, not only are the two GSes the other way around (i.e. we
+ * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+ * we have to emulate it.
+ */
+ if ( opt_fred )
+ {
+ unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+ wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+ write_gs_base(gs_shadow);
+
+ gs_base = gs_shadow;
+ }
+ else
+ asm volatile ( "swapgs" );
+
if ( v->arch.flags & TF_kernel_mode )
v->arch.pv.gs_base_kernel = gs_base;
else
v->arch.pv.gs_base_user = gs_base;
- asm volatile ( "swapgs" );
_toggle_guest_pt(v);
diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4add8..2c9cf50638db 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
#include <asm/debugreg.h>
#include <asm/fsgsbase.h>
+#include <asm/traps.h>
long do_set_debugreg(int reg, unsigned long value)
{
@@ -192,11 +193,12 @@ long do_set_segment_base(unsigned int which, unsigned long base)
case SEGBASE_GS_USER:
v->arch.pv.gs_base_user = base;
- write_gs_shadow(base);
- break;
-
+ fallthrough;
case SEGBASE_GS_KERNEL:
- write_gs_base(base);
+ if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+ write_gs_base(base);
+ else
+ write_gs_shadow(base);
break;
}
break;
@@ -209,7 +211,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
* We wish to update the user %gs from the GDT/LDT. Currently, the
* guest kernel's GS_BASE is in context.
*/
- asm volatile ( "swapgs" );
+ if ( !opt_fred )
+ asm volatile ( "swapgs" );
if ( sel > 3 )
/* Fix up RPL for non-NUL selectors. */
@@ -247,7 +250,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
/* Update the cache of the inactive base, as read from the GDT/LDT. */
v->arch.pv.gs_base_user = read_gs_base();
- asm volatile ( safe_swapgs );
+ if ( !opt_fred )
+ asm volatile ( safe_swapgs );
break;
}
--
2.39.5