With lazy FPU removed, fpu_initialised and fpu_dirty are always set to
true in vcpu_restore_fpu() so remove them and adjust the code
accordingly.
No functional change intended.
Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
---
xen/arch/x86/domctl.c | 3 +--
xen/arch/x86/hvm/emulate.c | 6 +-----
xen/arch/x86/hvm/hvm.c | 15 ++++++---------
xen/arch/x86/hvm/vlapic.c | 3 ---
xen/arch/x86/i387.c | 31 ++----------------------------
xen/arch/x86/include/asm/hvm/hvm.h | 1 -
xen/arch/x86/include/asm/xstate.h | 11 -----------
xen/arch/x86/xstate.c | 21 +++++---------------
xen/common/domain.c | 2 --
xen/include/xen/sched.h | 4 ----
10 files changed, 15 insertions(+), 82 deletions(-)
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 942f41c584d4..d9b08182ac1d 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1409,8 +1409,7 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
else
c(flags = 0);
- if ( v->fpu_initialised )
- c(flags |= VGCF_i387_valid);
+ c(flags |= VGCF_i387_valid);
if ( !(v->pause_flags & VPF_down) )
c(flags |= VGCF_online);
if ( !compat )
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 2daea084f15c..48c7320360c7 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2517,9 +2517,7 @@ static int cf_check hvmemul_get_fpu(
{
struct vcpu *curr = current;
- if ( !curr->fpu_dirtied )
- alternative_vcall(hvm_funcs.fpu_dirty_intercept);
- else if ( type == X86EMUL_FPU_fpu )
+ if ( type == X86EMUL_FPU_fpu )
{
/* Has a fastpath for `current`, so there's no actual map */
const struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(curr);
@@ -2537,8 +2535,6 @@ static int cf_check hvmemul_get_fpu(
* masking of all exceptions by FNSTENV.)
*/
save_fpu_enable();
- curr->fpu_initialised = true;
- curr->fpu_dirtied = true;
if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
{
uint16_t fcw;
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4d37a93c57a5..d4ba82845146 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -866,6 +866,7 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
.dr7 = v->arch.dr7,
.msr_efer = v->arch.hvm.guest_efer,
};
+ const struct xsave_struct *xsave_area;
/*
* We don't need to save state for a vcpu that is down; the restore
@@ -933,15 +934,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
ctxt.ldtr_base = seg.base;
ctxt.ldtr_arbytes = seg.attr;
- if ( v->fpu_initialised )
- {
- const struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(v);
-
- BUILD_BUG_ON(sizeof(ctxt.fpu_regs) != sizeof(xsave_area->fpu_sse));
- memcpy(ctxt.fpu_regs, &xsave_area->fpu_sse, sizeof(ctxt.fpu_regs));
- VCPU_UNMAP_XSAVE_AREA(v, xsave_area);
- ctxt.flags = XEN_X86_FPU_INITIALISED;
- }
+ xsave_area = VCPU_MAP_XSAVE_AREA(v);
+ BUILD_BUG_ON(sizeof(ctxt.fpu_regs) != sizeof(xsave_area->fpu_sse));
+ memcpy(ctxt.fpu_regs, &xsave_area->fpu_sse, sizeof(ctxt.fpu_regs));
+ VCPU_UNMAP_XSAVE_AREA(v, xsave_area);
+ ctxt.flags = XEN_X86_FPU_INITIALISED;
return hvm_save_entry(CPU, v->vcpu_id, h, &ctxt);
}
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 79697487ba90..885f5d304b2f 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -276,7 +276,6 @@ static void vlapic_init_sipi_one(struct vcpu *target, uint32_t icr)
switch ( icr & APIC_DM_MASK )
{
case APIC_DM_INIT: {
- bool fpu_initialised;
int rc;
/* No work on INIT de-assert for P4-type APIC. */
@@ -289,10 +288,8 @@ static void vlapic_init_sipi_one(struct vcpu *target, uint32_t icr)
hvm_vcpu_down(target);
domain_lock(target->domain);
/* Reset necessary VCPU state. This does not include FPU state. */
- fpu_initialised = target->fpu_initialised;
rc = vcpu_reset(target);
ASSERT(!rc);
- target->fpu_initialised = fpu_initialised;
vlapic_do_init(vcpu_vlapic(target));
domain_unlock(target->domain);
break;
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 88018397b1ad..5e893a2aab94 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -110,22 +110,7 @@ static inline void fpu_fxrstor(struct vcpu *v)
static inline uint64_t vcpu_xsave_mask(const struct vcpu *v)
{
- if ( v->fpu_dirtied )
- return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY;
-
- ASSERT(v->arch.nonlazy_xstate_used);
-
- /*
- * The offsets of components which live in the extended region of
- * compact xsave area are not fixed. Xsave area may be overwritten
- * when a xsave with v->fpu_dirtied set is followed by one with
- * v->fpu_dirtied clear.
- * In such case, if hypervisor uses compact xsave area and guest
- * has ever used lazy states (checking xcr0_accum excluding
- * XSTATE_FP_SSE), vcpu_xsave_mask will return XSTATE_ALL. Otherwise
- * return XSTATE_NONLAZY.
- */
- return xstate_all(v) ? XSTATE_ALL : XSTATE_NONLAZY;
+ return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY;
}
/* Save x87 extended state */
@@ -201,19 +186,11 @@ void vcpu_restore_fpu(struct vcpu *v)
/* Avoid recursion */
clts();
- /*
- * When saving full state even with !v->fpu_dirtied (see vcpu_xsave_mask()
- * above) we also need to restore full state, to prevent subsequently
- * saving state belonging to another vCPU.
- */
if ( cpu_has_xsave )
fpu_xrstor(v, XSTATE_ALL);
else
fpu_fxrstor(v);
- v->fpu_initialised = 1;
- v->fpu_dirtied = 1;
-
/* Xen doesn't need TS set, but the guest might. */
if ( is_pv_vcpu(v) && (v->arch.pv.ctrlreg[0] & X86_CR0_TS) )
stts();
@@ -225,7 +202,7 @@ void vcpu_restore_fpu(struct vcpu *v)
*/
static bool _vcpu_save_fpu(struct vcpu *v)
{
- if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
+ if ( !v->arch.nonlazy_xstate_used )
return false;
ASSERT(!is_idle_vcpu(v));
@@ -238,8 +215,6 @@ static bool _vcpu_save_fpu(struct vcpu *v)
else
fpu_fxsave(v);
- v->fpu_dirtied = 0;
-
return true;
}
@@ -265,7 +240,6 @@ void vcpu_reset_fpu(struct vcpu *v)
{
struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(v);
- v->fpu_initialised = false;
*xsave_area = (struct xsave_struct) {
.xsave_hdr.xstate_bv = X86_XCR0_X87,
};
@@ -282,7 +256,6 @@ void vcpu_setup_fpu(struct vcpu *v, const void *data)
{
struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(v);
- v->fpu_initialised = true;
*xsave_area = (struct xsave_struct) {
.fpu_sse = *(const fpusse_t*)data,
.xsave_hdr.xstate_bv = XSTATE_FP_SSE,
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index a9425c8cffe8..846b91ebefcc 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -184,7 +184,6 @@ struct hvm_function_table {
/* Instruction intercepts: non-void return values are X86EMUL codes. */
void (*wbinvd_intercept)(void);
- void (*fpu_dirty_intercept)(void);
int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
void (*handle_cd)(struct vcpu *v, unsigned long value);
diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
index e3b9745543d7..ca38c43ec1c3 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -132,17 +132,6 @@ xsave_area_compressed(const struct xsave_struct *xsave_area)
return xsave_area->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED;
}
-static inline bool xstate_all(const struct vcpu *v)
-{
- /*
- * XSTATE_FP_SSE may be excluded, because the offsets of XSTATE_FP_SSE
- * (in the legacy region of xsave area) are fixed, so saving
- * XSTATE_FP_SSE will not cause overwriting problem with XSAVES/XSAVEC.
- */
- return xsave_area_compressed(v->arch.xsave_area) &&
- (v->arch.xcr0_accum & XSTATE_LAZY & ~XSTATE_FP_SSE);
-}
-
/*
* Fetch a pointer to a vCPU's XSAVE area
*
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index e990abc9d18c..11d390cac985 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -994,28 +994,17 @@ int handle_xsetbv(u32 index, u64 new_bv)
if ( new_bv & XSTATE_NONLAZY )
curr->arch.nonlazy_xstate_used = 1;
- mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY;
+ mask &= ~XSTATE_FP_SSE;
if ( mask )
{
unsigned long cr0 = read_cr0();
+ /* Has a fastpath for `current`, so there's no actual map */
+ struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(curr);
clts();
- if ( curr->fpu_dirtied )
- {
- /* Has a fastpath for `current`, so there's no actual map */
- struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(curr);
- asm ( "stmxcsr %0" : "=m" (xsave_area->fpu_sse.mxcsr) );
- VCPU_UNMAP_XSAVE_AREA(curr, xsave_area);
- }
- else if ( xstate_all(curr) )
- {
- /* See the comment in i387.c:vcpu_restore_fpu_eager(). */
- mask |= XSTATE_LAZY;
- curr->fpu_initialised = 1;
- curr->fpu_dirtied = 1;
- cr0 &= ~X86_CR0_TS;
- }
+ asm ( "stmxcsr %0" : "=m" (xsave_area->fpu_sse.mxcsr) );
+ VCPU_UNMAP_XSAVE_AREA(curr, xsave_area);
xrstor(curr, mask);
if ( cr0 & X86_CR0_TS )
write_cr0(cr0);
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ab910fcf9306..30cfea30459a 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1824,8 +1824,6 @@ int vcpu_reset(struct vcpu *v)
clear_bit(v->vcpu_id, d->poll_mask);
v->poll_evtchn = 0;
- v->fpu_initialised = 0;
- v->fpu_dirtied = 0;
v->is_initialised = 0;
if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 40a35fc15c65..212c7d765c3e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -212,10 +212,6 @@ struct vcpu
struct guest_area runstate_guest_area;
unsigned int new_state;
- /* Has the FPU been initialised? */
- bool fpu_initialised;
- /* Has the FPU been used since it was last saved? */
- bool fpu_dirtied;
/* Initialization completed for this VCPU? */
bool is_initialised;
/* Currently running on a CPU? */
--
2.53.0
On 19/03/2026 1:29 pm, Ross Lagerwall wrote:
> With lazy FPU removed, fpu_initialised and fpu_dirty are always set to
> true in vcpu_restore_fpu() so remove them and adjust the code
> accordingly.
>
> No functional change intended.
>
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> ---
> xen/arch/x86/domctl.c | 3 +--
> xen/arch/x86/hvm/emulate.c | 6 +-----
> xen/arch/x86/hvm/hvm.c | 15 ++++++---------
> xen/arch/x86/hvm/vlapic.c | 3 ---
> xen/arch/x86/i387.c | 31 ++----------------------------
> xen/arch/x86/include/asm/hvm/hvm.h | 1 -
> xen/arch/x86/include/asm/xstate.h | 11 -----------
> xen/arch/x86/xstate.c | 21 +++++---------------
> xen/common/domain.c | 2 --
> xen/include/xen/sched.h | 4 ----
> 10 files changed, 15 insertions(+), 82 deletions(-)
>
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 942f41c584d4..d9b08182ac1d 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1409,8 +1409,7 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
> c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
> else
> c(flags = 0);
> - if ( v->fpu_initialised )
> - c(flags |= VGCF_i387_valid);
> + c(flags |= VGCF_i387_valid);
This is an API/ABI change. Previously, creating a vCPU and instantly
getting state will hand back a record with !VGCF_i387_valid.
It's fine - I've done a bunch of API/ABI changes in the FRED work, but
it at least needs calling out in the commit message.
We have had a lot of cases where calling arch_{get,set}_info_guest()
without an intervening __context_switch() would lead to subtle
differences. Generally I've been moving in the direction of
architectural behaviour and not worrying about API/ABI changes which
would occur naturally from running the vCPU.
That said, I think d1895441b3bad (2007) was the removal of the final
consumer of VGCF_i387_valid in Xen. We don't even have a conditional
reset of state based on it's absence, and of course it's documented in
the usual place, so it's really unclear what the purpose of this flag
ever was. [edit, see below]
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 79697487ba90..885f5d304b2f 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -289,10 +288,8 @@ static void vlapic_init_sipi_one(struct vcpu *target, uint32_t icr)
> hvm_vcpu_down(target);
> domain_lock(target->domain);
> /* Reset necessary VCPU state. This does not include FPU state. */
> - fpu_initialised = target->fpu_initialised;
> rc = vcpu_reset(target);
> ASSERT(!rc);
> - target->fpu_initialised = fpu_initialised;
> vlapic_do_init(vcpu_vlapic(target));
This whole code block irks me. x86 has two architectural events, #RESET
and #INIT which are well defined, and this is using the former to mean
the latter.
We are going to need to fix this, and it's going to be some fairly
invasive renaming, but the result will be better. [edit, see below]
> diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
> index 88018397b1ad..5e893a2aab94 100644
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -265,7 +240,6 @@ void vcpu_reset_fpu(struct vcpu *v)
> {
> struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(v);
>
> - v->fpu_initialised = false;
> *xsave_area = (struct xsave_struct) {
> .xsave_hdr.xstate_bv = X86_XCR0_X87,
> };
> @@ -282,7 +256,6 @@ void vcpu_setup_fpu(struct vcpu *v, const void *data)
> {
> struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(v);
>
> - v->fpu_initialised = true;
> *xsave_area = (struct xsave_struct) {
> .fpu_sse = *(const fpusse_t*)data,
> .xsave_hdr.xstate_bv = XSTATE_FP_SSE,
Hmm, looking at the callers of these two, we find that Xen has
VGCF_I387_VALID too, and does have a consumer of this flag. (This needs
deleting for sanity reasons.)
It also means that this patch does introduce a bug here. Calling
arch_get_info_guest() prior to scheduling will hand back a block of all
0's, claiming it to be valid.
We need to arrange for vcpu_reset_fpu() to be called during vCPU
construction (i.e. so we've never got a bad FPU state), before this
patch will be safe.
~Andrew
© 2016 - 2026 Red Hat, Inc.