fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or
a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
subfield is at the base and is identical in size and layout to the legacy
buffer.
This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
the very rare case in which the host doesn't support XSAVE all we're doing is
wasting a tiny amount of memory and trading those for a lot more simplicity in
the code.
While at it, dedup the setup logic in vcpu_init_fpu() and integrate it
into xstate_alloc_save_area().
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
--
v4:
* Amend commit message with extra note about deduping vcpu_init_fpu()
* Remove comment on top of cpu_user_regs (though I really think there
ought to be a credible one, in one form or another).
* Remove cast from blk.c so FXSAVE_AREA is "void *"
* Simplify comment in xstate_alloc_save_area() for the "host has no
XSAVE" case.
---
xen/arch/x86/domctl.c | 6 ++++-
xen/arch/x86/hvm/emulate.c | 4 +--
xen/arch/x86/hvm/hvm.c | 6 ++++-
xen/arch/x86/i387.c | 45 +++++--------------------------
xen/arch/x86/include/asm/domain.h | 6 -----
xen/arch/x86/x86_emulate/blk.c | 2 +-
xen/arch/x86/xstate.c | 12 ++++++---
7 files changed, 28 insertions(+), 53 deletions(-)
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 96d816cf1a7d..2d115395da90 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1379,7 +1379,11 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
#define c(fld) (c.nat->fld)
#endif
- memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
+ BUILD_BUG_ON(sizeof(c.nat->fpu_ctxt) !=
+ sizeof(v->arch.xsave_area->fpu_sse));
+ memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
+ sizeof(c.nat->fpu_ctxt));
+
if ( is_pv_domain(d) )
c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
else
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index aa97ca1cbffd..f2bc6967dfcb 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2371,7 +2371,7 @@ static int cf_check hvmemul_get_fpu(
alternative_vcall(hvm_funcs.fpu_dirty_intercept);
else if ( type == X86EMUL_FPU_fpu )
{
- const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+ const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
/*
* Latch current register state so that we can back out changes
@@ -2411,7 +2411,7 @@ static void cf_check hvmemul_put_fpu(
if ( aux )
{
- fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+ fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
bool dval = aux->dval;
int mode = hvm_guest_x86_mode(curr);
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7b2e1c9813d6..77fe282118f7 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -914,7 +914,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
if ( v->fpu_initialised )
{
- memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs));
+ BUILD_BUG_ON(sizeof(ctxt.fpu_regs) !=
+ sizeof(v->arch.xsave_area->fpu_sse));
+ memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse,
+ sizeof(ctxt.fpu_regs));
+
ctxt.flags = XEN_X86_FPU_INITIALISED;
}
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 134e0bece519..fbb9d3584a3d 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
/* Restore x87 FPU, MMX, SSE and SSE2 state */
static inline void fpu_fxrstor(struct vcpu *v)
{
- const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+ const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
/*
* Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -151,7 +151,7 @@ static inline void fpu_xsave(struct vcpu *v)
/* Save x87 FPU, MMX, SSE and SSE2 state */
static inline void fpu_fxsave(struct vcpu *v)
{
- fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+ fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
unsigned int fip_width = v->domain->arch.x87_fip_width;
if ( fip_width != 4 )
@@ -212,7 +212,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
* above) we also need to restore full state, to prevent subsequently
* saving state belonging to another vCPU.
*/
- if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) )
+ if ( v->arch.fully_eager_fpu || xstate_all(v) )
{
if ( cpu_has_xsave )
fpu_xrstor(v, XSTATE_ALL);
@@ -299,44 +299,14 @@ void save_fpu_enable(void)
/* Initialize FPU's context save area */
int vcpu_init_fpu(struct vcpu *v)
{
- int rc;
-
v->arch.fully_eager_fpu = opt_eager_fpu;
-
- if ( (rc = xstate_alloc_save_area(v)) != 0 )
- return rc;
-
- if ( v->arch.xsave_area )
- v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
- else
- {
- BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
- v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
- __alignof(v->arch.xsave_area->fpu_sse));
- if ( v->arch.fpu_ctxt )
- {
- fpusse_t *fpu_sse = v->arch.fpu_ctxt;
-
- fpu_sse->fcw = FCW_DEFAULT;
- fpu_sse->mxcsr = MXCSR_DEFAULT;
- }
- else
- rc = -ENOMEM;
- }
-
- return rc;
+ return xstate_alloc_save_area(v);
}
void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
const void *data, unsigned int fcw_default)
{
- /*
- * For the entire function please note that vcpu_init_fpu() (above) points
- * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available. Hence
- * accesses through both pointers alias one another, and the shorter form
- * is used here.
- */
- fpusse_t *fpu_sse = v->arch.fpu_ctxt;
+ fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
@@ -373,10 +343,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
/* Free FPU's context save area */
void vcpu_destroy_fpu(struct vcpu *v)
{
- if ( v->arch.xsave_area )
- xstate_free_save_area(v);
- else
- xfree(v->arch.fpu_ctxt);
+ xstate_free_save_area(v);
}
/*
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 5219c4fb0f69..b79d6badd71c 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -591,12 +591,6 @@ struct pv_vcpu
struct arch_vcpu
{
- /*
- * guest context (mirroring struct vcpu_guest_context) common
- * between pv and hvm guests
- */
-
- void *fpu_ctxt;
struct cpu_user_regs user_regs;
/* Debug registers. */
diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
index e790f4f90056..08a05f8453f7 100644
--- a/xen/arch/x86/x86_emulate/blk.c
+++ b/xen/arch/x86/x86_emulate/blk.c
@@ -11,7 +11,7 @@
!defined(X86EMUL_NO_SIMD)
# ifdef __XEN__
# include <asm/xstate.h>
-# define FXSAVE_AREA current->arch.fpu_ctxt
+# define FXSAVE_AREA ((void *)¤t->arch.xsave_area->fpu_sse)
# else
# define FXSAVE_AREA get_fpu_save_area()
# endif
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 57a0749f0d54..af9e345a7ace 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -508,9 +508,15 @@ int xstate_alloc_save_area(struct vcpu *v)
unsigned int size;
if ( !cpu_has_xsave )
- return 0;
-
- if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
+ {
+ /*
+ * On non-XSAVE systems, we allocate an XSTATE buffer for simplicity.
+ * XSTATE is backwards compatible to FXSAVE, and only one cacheline
+ * larger.
+ */
+ size = XSTATE_AREA_MIN_SIZE;
+ }
+ else if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
{
size = xsave_cntxt_size;
BUG_ON(size < XSTATE_AREA_MIN_SIZE);
--
2.46.0
On Mon, Oct 7, 2024 at 4:52 PM Alejandro Vallejo <alejandro.vallejo@cloud.com> wrote: > > fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or > a pointer aliased with xsave_area that points to its fpu_sse subfield. Such > subfield is at the base and is identical in size and layout to the legacy > buffer. > > This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In > the very rare case in which the host doesn't support XSAVE all we're doing is > wasting a tiny amount of memory and trading those for a lot more simplicity in > the code. > > While at it, dedup the setup logic in vcpu_init_fpu() and integrate it > into xstate_alloc_save_area(). > > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> > -- > v4: > * Amend commit message with extra note about deduping vcpu_init_fpu() > * Remove comment on top of cpu_user_regs (though I really think there > ought to be a credible one, in one form or another). > * Remove cast from blk.c so FXSAVE_AREA is "void *" > * Simplify comment in xstate_alloc_save_area() for the "host has no > XSAVE" case. > --- > xen/arch/x86/domctl.c | 6 ++++- > xen/arch/x86/hvm/emulate.c | 4 +-- > xen/arch/x86/hvm/hvm.c | 6 ++++- > xen/arch/x86/i387.c | 45 +++++-------------------------- > xen/arch/x86/include/asm/domain.h | 6 ----- > xen/arch/x86/x86_emulate/blk.c | 2 +- > xen/arch/x86/xstate.c | 12 ++++++--- > 7 files changed, 28 insertions(+), 53 deletions(-) > > diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c > index 96d816cf1a7d..2d115395da90 100644 > --- a/xen/arch/x86/domctl.c > +++ b/xen/arch/x86/domctl.c > @@ -1379,7 +1379,11 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) > #define c(fld) (c.nat->fld) > #endif > > - memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt)); > + BUILD_BUG_ON(sizeof(c.nat->fpu_ctxt) != > + sizeof(v->arch.xsave_area->fpu_sse)); > + memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse, > + sizeof(c.nat->fpu_ctxt)); > + > if ( is_pv_domain(d) ) > c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel)); > else > diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c > index aa97ca1cbffd..f2bc6967dfcb 100644 > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -2371,7 +2371,7 @@ static int cf_check hvmemul_get_fpu( > alternative_vcall(hvm_funcs.fpu_dirty_intercept); > else if ( type == X86EMUL_FPU_fpu ) > { > - const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt; > + const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse; > > /* > * Latch current register state so that we can back out changes > @@ -2411,7 +2411,7 @@ static void cf_check hvmemul_put_fpu( > > if ( aux ) > { > - fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt; > + fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse; > bool dval = aux->dval; > int mode = hvm_guest_x86_mode(curr); > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 7b2e1c9813d6..77fe282118f7 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -914,7 +914,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h) > > if ( v->fpu_initialised ) > { > - memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs)); > + BUILD_BUG_ON(sizeof(ctxt.fpu_regs) != > + sizeof(v->arch.xsave_area->fpu_sse)); > + memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse, > + sizeof(ctxt.fpu_regs)); > + > ctxt.flags = XEN_X86_FPU_INITIALISED; > } > > diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c > index 134e0bece519..fbb9d3584a3d 100644 > --- a/xen/arch/x86/i387.c > +++ b/xen/arch/x86/i387.c > @@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask) > /* Restore x87 FPU, MMX, SSE and SSE2 state */ > static inline void fpu_fxrstor(struct vcpu *v) > { > - const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt; > + const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse; > > /* > * Some CPUs don't save/restore FDP/FIP/FOP unless an exception > @@ -151,7 +151,7 @@ static inline void fpu_xsave(struct vcpu *v) > /* Save x87 FPU, MMX, SSE and SSE2 state */ > static inline void fpu_fxsave(struct vcpu *v) > { > - fpusse_t *fpu_ctxt = v->arch.fpu_ctxt; > + fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse; > unsigned int fip_width = v->domain->arch.x87_fip_width; > > if ( fip_width != 4 ) > @@ -212,7 +212,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts) > * above) we also need to restore full state, to prevent subsequently > * saving state belonging to another vCPU. > */ > - if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) ) > + if ( v->arch.fully_eager_fpu || xstate_all(v) ) > { > if ( cpu_has_xsave ) > fpu_xrstor(v, XSTATE_ALL); > @@ -299,44 +299,14 @@ void save_fpu_enable(void) > /* Initialize FPU's context save area */ > int vcpu_init_fpu(struct vcpu *v) > { > - int rc; > - > v->arch.fully_eager_fpu = opt_eager_fpu; > - > - if ( (rc = xstate_alloc_save_area(v)) != 0 ) > - return rc; > - > - if ( v->arch.xsave_area ) > - v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; > - else > - { > - BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16); > - v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), > - __alignof(v->arch.xsave_area->fpu_sse)); > - if ( v->arch.fpu_ctxt ) > - { > - fpusse_t *fpu_sse = v->arch.fpu_ctxt; > - > - fpu_sse->fcw = FCW_DEFAULT; > - fpu_sse->mxcsr = MXCSR_DEFAULT; > - } > - else > - rc = -ENOMEM; > - } > - > - return rc; > + return xstate_alloc_save_area(v); > } > > void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area, > const void *data, unsigned int fcw_default) > { > - /* > - * For the entire function please note that vcpu_init_fpu() (above) points > - * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available. Hence > - * accesses through both pointers alias one another, and the shorter form > - * is used here. > - */ > - fpusse_t *fpu_sse = v->arch.fpu_ctxt; > + fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse; > > ASSERT(!xsave_area || xsave_area == v->arch.xsave_area); > > @@ -373,10 +343,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area, > /* Free FPU's context save area */ > void vcpu_destroy_fpu(struct vcpu *v) > { > - if ( v->arch.xsave_area ) > - xstate_free_save_area(v); > - else > - xfree(v->arch.fpu_ctxt); > + xstate_free_save_area(v); > } > > /* > diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h > index 5219c4fb0f69..b79d6badd71c 100644 > --- a/xen/arch/x86/include/asm/domain.h > +++ b/xen/arch/x86/include/asm/domain.h > @@ -591,12 +591,6 @@ struct pv_vcpu > > struct arch_vcpu > { > - /* > - * guest context (mirroring struct vcpu_guest_context) common > - * between pv and hvm guests > - */ > - > - void *fpu_ctxt; > struct cpu_user_regs user_regs; > > /* Debug registers. */ > diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c > index e790f4f90056..08a05f8453f7 100644 > --- a/xen/arch/x86/x86_emulate/blk.c > +++ b/xen/arch/x86/x86_emulate/blk.c > @@ -11,7 +11,7 @@ > !defined(X86EMUL_NO_SIMD) > # ifdef __XEN__ > # include <asm/xstate.h> > -# define FXSAVE_AREA current->arch.fpu_ctxt > +# define FXSAVE_AREA ((void *)¤t->arch.xsave_area->fpu_sse) Could you use "struct x86_fxsr *" instead of "void*" ? Maybe adding another "struct x86_fxsr fxsr" inside the anonymous fpu_sse union would help here. > # else > # define FXSAVE_AREA get_fpu_save_area() > # endif > diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c > index 57a0749f0d54..af9e345a7ace 100644 > --- a/xen/arch/x86/xstate.c > +++ b/xen/arch/x86/xstate.c > @@ -508,9 +508,15 @@ int xstate_alloc_save_area(struct vcpu *v) > unsigned int size; > > if ( !cpu_has_xsave ) > - return 0; > - > - if ( !is_idle_vcpu(v) || !cpu_has_xsavec ) > + { > + /* > + * On non-XSAVE systems, we allocate an XSTATE buffer for simplicity. > + * XSTATE is backwards compatible to FXSAVE, and only one cacheline > + * larger. > + */ > + size = XSTATE_AREA_MIN_SIZE; > + } > + else if ( !is_idle_vcpu(v) || !cpu_has_xsavec ) > { > size = xsave_cntxt_size; > BUG_ON(size < XSTATE_AREA_MIN_SIZE); Frediano
On Tue Oct 8, 2024 at 8:47 AM BST, Frediano Ziglio wrote: > On Mon, Oct 7, 2024 at 4:52 PM Alejandro Vallejo > <alejandro.vallejo@cloud.com> wrote: > > > > fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or > > a pointer aliased with xsave_area that points to its fpu_sse subfield. Such > > subfield is at the base and is identical in size and layout to the legacy > > buffer. > > > > This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In > > the very rare case in which the host doesn't support XSAVE all we're doing is > > wasting a tiny amount of memory and trading those for a lot more simplicity in > > the code. > > > > While at it, dedup the setup logic in vcpu_init_fpu() and integrate it > > into xstate_alloc_save_area(). > > > > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> > > -- > > v4: > > * Amend commit message with extra note about deduping vcpu_init_fpu() > > * Remove comment on top of cpu_user_regs (though I really think there > > ought to be a credible one, in one form or another). > > * Remove cast from blk.c so FXSAVE_AREA is "void *" > > * Simplify comment in xstate_alloc_save_area() for the "host has no > > XSAVE" case. > > --- > > xen/arch/x86/domctl.c | 6 ++++- > > xen/arch/x86/hvm/emulate.c | 4 +-- > > xen/arch/x86/hvm/hvm.c | 6 ++++- > > xen/arch/x86/i387.c | 45 +++++-------------------------- > > xen/arch/x86/include/asm/domain.h | 6 ----- > > xen/arch/x86/x86_emulate/blk.c | 2 +- > > xen/arch/x86/xstate.c | 12 ++++++--- > > 7 files changed, 28 insertions(+), 53 deletions(-) > > > > diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c > > index e790f4f90056..08a05f8453f7 100644 > > --- a/xen/arch/x86/x86_emulate/blk.c > > +++ b/xen/arch/x86/x86_emulate/blk.c > > @@ -11,7 +11,7 @@ > > !defined(X86EMUL_NO_SIMD) > > # ifdef __XEN__ > > # include <asm/xstate.h> > > -# define FXSAVE_AREA current->arch.fpu_ctxt > > +# define FXSAVE_AREA ((void *)¤t->arch.xsave_area->fpu_sse) > > Could you use "struct x86_fxsr *" instead of "void*" ? > Maybe adding another "struct x86_fxsr fxsr" inside the anonymous > fpu_sse union would help here. > I did in v3, and Andrew suggested to keep the (void *). See: https://lore.kernel.org/xen-devel/2b42323a-961a-4dd8-8cde-f4b19eac0dc5@citrix.com/ Cheers, Alejandro
© 2016 - 2024 Red Hat, Inc.