When FRED is active, hardware automatically swaps GS when changing privilege,
and the SWAPGS instruction is disallowed.
For native OSes using GS as the thread local pointer this is a massive
improvement on the pre-FRED architecture, but under Xen it makes handling PV
guests more complicated. Specifically, it means that GS_BASE and GS_SHADOW
are the opposite way around in FRED mode, as opposed to IDT mode.
This leads to the following changes:
* In load_segments(), we have to load both GSes. Account for this in the
SWAP() condition and avoid the path with SWAGS.
* In save_segments(), we need to read GS_SHADOW rather than GS_BASE.
* In toggle_guest_mode(), we need to emulate SWAPGS.
* In {read,write}_msr() which access the live registers, GS_SHADOW and
GS_BASE need swapping.
* In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
take FRED into account when choosing which base to update.
SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
so under FRED needs to be just a MOV %gs. Simply skip the SWAPGSes.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
v4:
* Adjust GS accesses for emulated {RD,WR}MSR too.
---
xen/arch/x86/domain.c | 16 +++++++++++-----
xen/arch/x86/pv/domain.c | 22 ++++++++++++++++++++--
xen/arch/x86/pv/emul-priv-op.c | 24 +++++++++++++++---------
xen/arch/x86/pv/misc-hypercalls.c | 16 ++++++++++------
4 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e658c2d647b7..9c1f6ef76d52 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1791,9 +1791,10 @@ static void load_segments(struct vcpu *n)
/*
* Figure out which way around gsb/gss want to be. gsb needs to be
- * the active context, and gss needs to be the inactive context.
+ * the active context, and gss needs to be the inactive context,
+ * unless we're in FRED mode where they're reversed.
*/
- if ( !(n->arch.flags & TF_kernel_mode) )
+ if ( !(n->arch.flags & TF_kernel_mode) ^ opt_fred )
SWAP(gsb, gss);
if ( using_svm() && (n->arch.pv.fs | n->arch.pv.gs) <= 3 )
@@ -1814,7 +1815,9 @@ static void load_segments(struct vcpu *n)
if ( !fs_gs_done && !compat )
{
- if ( read_cr4() & X86_CR4_FSGSBASE )
+ unsigned long cr4 = read_cr4();
+
+ if ( !(cr4 & X86_CR4_FRED) && (cr4 & X86_CR4_FSGSBASE) )
{
__wrgsbase(gss);
__wrfsbase(n->arch.pv.fs_base);
@@ -1931,6 +1934,9 @@ static void load_segments(struct vcpu *n)
* Guests however cannot use SWAPGS, so there is no mechanism to modify the
* inactive GS base behind Xen's back. Therefore, Xen's copy of the inactive
* GS base is still accurate, and doesn't need reading back from hardware.
+ *
+ * Under FRED, hardware automatically swaps GS for us, so SHADOW_GS is the
+ * active GS from the guest's point of view.
*/
static void save_segments(struct vcpu *v)
{
@@ -1946,12 +1952,12 @@ static void save_segments(struct vcpu *v)
if ( read_cr4() & X86_CR4_FSGSBASE )
{
fs_base = __rdfsbase();
- gs_base = __rdgsbase();
+ gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : __rdgsbase();
}
else
{
fs_base = rdmsr(MSR_FS_BASE);
- gs_base = rdmsr(MSR_GS_BASE);
+ gs_base = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : rdmsr(MSR_GS_BASE);
}
v->arch.pv.fs_base = fs_base;
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index d16583a7454d..b85abb5ed903 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -14,9 +14,10 @@
#include <asm/cpufeature.h>
#include <asm/fsgsbase.h>
#include <asm/invpcid.h>
-#include <asm/spec_ctrl.h>
#include <asm/pv/domain.h>
#include <asm/shadow.h>
+#include <asm/spec_ctrl.h>
+#include <asm/traps.h>
#ifdef CONFIG_PV32
int8_t __read_mostly opt_pv32 = -1;
@@ -514,11 +515,28 @@ void toggle_guest_mode(struct vcpu *v)
* subsequent context switch won't bother re-reading it.
*/
gs_base = read_gs_base();
+
+ /*
+ * In FRED mode, not only are the two GSes the other way around (i.e. we
+ * want to read GS_SHADOW here), the SWAPGS instruction is disallowed so
+ * we have to emulate it.
+ */
+ if ( opt_fred )
+ {
+ unsigned long gs_shadow = rdmsr(MSR_SHADOW_GS_BASE);
+
+ wrmsrns(MSR_SHADOW_GS_BASE, gs_base);
+ write_gs_base(gs_shadow);
+
+ gs_base = gs_shadow;
+ }
+ else
+ asm volatile ( "swapgs" );
+
if ( v->arch.flags & TF_kernel_mode )
v->arch.pv.gs_base_kernel = gs_base;
else
v->arch.pv.gs_base_user = gs_base;
- asm volatile ( "swapgs" );
_toggle_guest_pt(v);
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 87d3bbcf901f..81153084129a 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -25,6 +25,7 @@
#include <asm/pv/traps.h>
#include <asm/shared.h>
#include <asm/stubs.h>
+#include <asm/traps.h>
#include <xsm/xsm.h>
@@ -926,7 +927,7 @@ static int cf_check read_msr(
case MSR_GS_BASE:
if ( !cp->extd.lm )
break;
- *val = read_gs_base();
+ *val = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : read_gs_base();
return X86EMUL_OKAY;
case MSR_SHADOW_GS_BASE:
@@ -1066,17 +1067,22 @@ static int cf_check write_msr(
if ( !cp->extd.lm || !is_canonical_address(val) )
break;
- if ( reg == MSR_FS_BASE )
- write_fs_base(val);
- else if ( reg == MSR_GS_BASE )
- write_gs_base(val);
- else if ( reg == MSR_SHADOW_GS_BASE )
+ switch ( reg )
{
- write_gs_shadow(val);
+ case MSR_FS_BASE:
+ write_fs_base(val);
+ break;
+
+ case MSR_SHADOW_GS_BASE:
curr->arch.pv.gs_base_user = val;
+ fallthrough;
+ case MSR_GS_BASE:
+ if ( (reg == MSR_GS_BASE) ^ opt_fred )
+ write_gs_base(val);
+ else
+ write_gs_shadow(val);
+ break;
}
- else
- ASSERT_UNREACHABLE();
return X86EMUL_OKAY;
case MSR_EFER:
diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
index 4c2abeb4add8..2c9cf50638db 100644
--- a/xen/arch/x86/pv/misc-hypercalls.c
+++ b/xen/arch/x86/pv/misc-hypercalls.c
@@ -11,6 +11,7 @@
#include <asm/debugreg.h>
#include <asm/fsgsbase.h>
+#include <asm/traps.h>
long do_set_debugreg(int reg, unsigned long value)
{
@@ -192,11 +193,12 @@ long do_set_segment_base(unsigned int which, unsigned long base)
case SEGBASE_GS_USER:
v->arch.pv.gs_base_user = base;
- write_gs_shadow(base);
- break;
-
+ fallthrough;
case SEGBASE_GS_KERNEL:
- write_gs_base(base);
+ if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
+ write_gs_base(base);
+ else
+ write_gs_shadow(base);
break;
}
break;
@@ -209,7 +211,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
* We wish to update the user %gs from the GDT/LDT. Currently, the
* guest kernel's GS_BASE is in context.
*/
- asm volatile ( "swapgs" );
+ if ( !opt_fred )
+ asm volatile ( "swapgs" );
if ( sel > 3 )
/* Fix up RPL for non-NUL selectors. */
@@ -247,7 +250,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
/* Update the cache of the inactive base, as read from the GDT/LDT. */
v->arch.pv.gs_base_user = read_gs_base();
- asm volatile ( safe_swapgs );
+ if ( !opt_fred )
+ asm volatile ( safe_swapgs );
break;
}
--
2.39.5
On 28.02.2026 00:16, Andrew Cooper wrote:
> When FRED is active, hardware automatically swaps GS when changing privilege,
> and the SWAPGS instruction is disallowed.
>
> For native OSes using GS as the thread local pointer this is a massive
> improvement on the pre-FRED architecture, but under Xen it makes handling PV
> guests more complicated. Specifically, it means that GS_BASE and GS_SHADOW
> are the opposite way around in FRED mode, as opposed to IDT mode.
>
> This leads to the following changes:
>
> * In load_segments(), we have to load both GSes. Account for this in the
> SWAP() condition and avoid the path with SWAGS.
>
> * In save_segments(), we need to read GS_SHADOW rather than GS_BASE.
>
> * In toggle_guest_mode(), we need to emulate SWAPGS.
>
> * In {read,write}_msr() which access the live registers, GS_SHADOW and
> GS_BASE need swapping.
>
> * In do_set_segment_base(), merge the SEGBASE_GS_{USER,KERNEL} cases and
> take FRED into account when choosing which base to update.
>
> SEGBASE_GS_USER_SEL was already an LKGS invocation (decades before FRED)
> so under FRED needs to be just a MOV %gs. Simply skip the SWAPGSes.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
>
> v4:
> * Adjust GS accesses for emulated {RD,WR}MSR too.
I think this ...
> @@ -926,7 +927,7 @@ static int cf_check read_msr(
> case MSR_GS_BASE:
> if ( !cp->extd.lm )
> break;
> - *val = read_gs_base();
> + *val = opt_fred ? rdmsr(MSR_SHADOW_GS_BASE) : read_gs_base();
> return X86EMUL_OKAY;
... calls for a comment both here and ...
> @@ -1066,17 +1067,22 @@ static int cf_check write_msr(
> if ( !cp->extd.lm || !is_canonical_address(val) )
> break;
>
> - if ( reg == MSR_FS_BASE )
> - write_fs_base(val);
> - else if ( reg == MSR_GS_BASE )
> - write_gs_base(val);
> - else if ( reg == MSR_SHADOW_GS_BASE )
> + switch ( reg )
> {
> - write_gs_shadow(val);
> + case MSR_FS_BASE:
> + write_fs_base(val);
> + break;
> +
> + case MSR_SHADOW_GS_BASE:
> curr->arch.pv.gs_base_user = val;
> + fallthrough;
> + case MSR_GS_BASE:
> + if ( (reg == MSR_GS_BASE) ^ opt_fred )
> + write_gs_base(val);
> + else
> + write_gs_shadow(val);
> + break;
... here, much like you do about everywhere else.
> @@ -192,11 +193,12 @@ long do_set_segment_base(unsigned int which, unsigned long base)
>
> case SEGBASE_GS_USER:
> v->arch.pv.gs_base_user = base;
> - write_gs_shadow(base);
> - break;
> -
> + fallthrough;
> case SEGBASE_GS_KERNEL:
> - write_gs_base(base);
> + if ( (which == SEGBASE_GS_KERNEL) ^ opt_fred )
> + write_gs_base(base);
> + else
> + write_gs_shadow(base);
> break;
> }
> break;
Same perhaps here, and ...
> @@ -209,7 +211,8 @@ long do_set_segment_base(unsigned int which, unsigned long base)
> * We wish to update the user %gs from the GDT/LDT. Currently, the
> * guest kernel's GS_BASE is in context.
> */
> - asm volatile ( "swapgs" );
> + if ( !opt_fred )
> + asm volatile ( "swapgs" );
... the comment in context could also do with inserting "unless using FRED"
or some such.
Jan
© 2016 - 2026 Red Hat, Inc.