[PATCH v4 1/2] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu

Alejandro Vallejo posted 2 patches 1 month, 2 weeks ago
[PATCH v4 1/2] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Alejandro Vallejo 1 month, 2 weeks ago
fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or
a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
subfield is at the base and is identical in size and layout to the legacy
buffer.

This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
the very rare case in which the host doesn't support XSAVE all we're doing is
wasting a tiny amount of memory and trading those for a lot more simplicity in
the code.

While at it, dedup the setup logic in vcpu_init_fpu() and integrate it
into xstate_alloc_save_area().

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
--
v4:
  * Amend commit message with extra note about deduping vcpu_init_fpu()
  * Remove comment on top of cpu_user_regs (though I really think there
    ought to be a credible one, in one form or another).
  * Remove cast from blk.c so FXSAVE_AREA is "void *"
  * Simplify comment in xstate_alloc_save_area() for the "host has no
    XSAVE" case.
---
 xen/arch/x86/domctl.c             |  6 ++++-
 xen/arch/x86/hvm/emulate.c        |  4 +--
 xen/arch/x86/hvm/hvm.c            |  6 ++++-
 xen/arch/x86/i387.c               | 45 +++++--------------------------
 xen/arch/x86/include/asm/domain.h |  6 -----
 xen/arch/x86/x86_emulate/blk.c    |  2 +-
 xen/arch/x86/xstate.c             | 12 ++++++---
 7 files changed, 28 insertions(+), 53 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 96d816cf1a7d..2d115395da90 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1379,7 +1379,11 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
 #define c(fld) (c.nat->fld)
 #endif
 
-    memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
+    BUILD_BUG_ON(sizeof(c.nat->fpu_ctxt) !=
+                 sizeof(v->arch.xsave_area->fpu_sse));
+    memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
+           sizeof(c.nat->fpu_ctxt));
+
     if ( is_pv_domain(d) )
         c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
     else
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index aa97ca1cbffd..f2bc6967dfcb 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2371,7 +2371,7 @@ static int cf_check hvmemul_get_fpu(
         alternative_vcall(hvm_funcs.fpu_dirty_intercept);
     else if ( type == X86EMUL_FPU_fpu )
     {
-        const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+        const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
 
         /*
          * Latch current register state so that we can back out changes
@@ -2411,7 +2411,7 @@ static void cf_check hvmemul_put_fpu(
 
     if ( aux )
     {
-        fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+        fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
         bool dval = aux->dval;
         int mode = hvm_guest_x86_mode(curr);
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7b2e1c9813d6..77fe282118f7 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -914,7 +914,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
 
     if ( v->fpu_initialised )
     {
-        memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs));
+        BUILD_BUG_ON(sizeof(ctxt.fpu_regs) !=
+                     sizeof(v->arch.xsave_area->fpu_sse));
+        memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse,
+               sizeof(ctxt.fpu_regs));
+
         ctxt.flags = XEN_X86_FPU_INITIALISED;
     }
 
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 134e0bece519..fbb9d3584a3d 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
 /* Restore x87 FPU, MMX, SSE and SSE2 state */
 static inline void fpu_fxrstor(struct vcpu *v)
 {
-    const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
 
     /*
      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -151,7 +151,7 @@ static inline void fpu_xsave(struct vcpu *v)
 /* Save x87 FPU, MMX, SSE and SSE2 state */
 static inline void fpu_fxsave(struct vcpu *v)
 {
-    fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
     unsigned int fip_width = v->domain->arch.x87_fip_width;
 
     if ( fip_width != 4 )
@@ -212,7 +212,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
      * above) we also need to restore full state, to prevent subsequently
      * saving state belonging to another vCPU.
      */
-    if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) )
+    if ( v->arch.fully_eager_fpu || xstate_all(v) )
     {
         if ( cpu_has_xsave )
             fpu_xrstor(v, XSTATE_ALL);
@@ -299,44 +299,14 @@ void save_fpu_enable(void)
 /* Initialize FPU's context save area */
 int vcpu_init_fpu(struct vcpu *v)
 {
-    int rc;
-
     v->arch.fully_eager_fpu = opt_eager_fpu;
-
-    if ( (rc = xstate_alloc_save_area(v)) != 0 )
-        return rc;
-
-    if ( v->arch.xsave_area )
-        v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
-    else
-    {
-        BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
-        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
-                                    __alignof(v->arch.xsave_area->fpu_sse));
-        if ( v->arch.fpu_ctxt )
-        {
-            fpusse_t *fpu_sse = v->arch.fpu_ctxt;
-
-            fpu_sse->fcw = FCW_DEFAULT;
-            fpu_sse->mxcsr = MXCSR_DEFAULT;
-        }
-        else
-            rc = -ENOMEM;
-    }
-
-    return rc;
+    return xstate_alloc_save_area(v);
 }
 
 void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
                     const void *data, unsigned int fcw_default)
 {
-    /*
-     * For the entire function please note that vcpu_init_fpu() (above) points
-     * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available. Hence
-     * accesses through both pointers alias one another, and the shorter form
-     * is used here.
-     */
-    fpusse_t *fpu_sse = v->arch.fpu_ctxt;
+    fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
 
     ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
 
@@ -373,10 +343,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
 /* Free FPU's context save area */
 void vcpu_destroy_fpu(struct vcpu *v)
 {
-    if ( v->arch.xsave_area )
-        xstate_free_save_area(v);
-    else
-        xfree(v->arch.fpu_ctxt);
+    xstate_free_save_area(v);
 }
 
 /*
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 5219c4fb0f69..b79d6badd71c 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -591,12 +591,6 @@ struct pv_vcpu
 
 struct arch_vcpu
 {
-    /*
-     * guest context (mirroring struct vcpu_guest_context) common
-     * between pv and hvm guests
-     */
-
-    void              *fpu_ctxt;
     struct cpu_user_regs user_regs;
 
     /* Debug registers. */
diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
index e790f4f90056..08a05f8453f7 100644
--- a/xen/arch/x86/x86_emulate/blk.c
+++ b/xen/arch/x86/x86_emulate/blk.c
@@ -11,7 +11,7 @@
     !defined(X86EMUL_NO_SIMD)
 # ifdef __XEN__
 #  include <asm/xstate.h>
-#  define FXSAVE_AREA current->arch.fpu_ctxt
+#  define FXSAVE_AREA ((void *)&current->arch.xsave_area->fpu_sse)
 # else
 #  define FXSAVE_AREA get_fpu_save_area()
 # endif
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 57a0749f0d54..af9e345a7ace 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -508,9 +508,15 @@ int xstate_alloc_save_area(struct vcpu *v)
     unsigned int size;
 
     if ( !cpu_has_xsave )
-        return 0;
-
-    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
+    {
+        /*
+         * On non-XSAVE systems, we allocate an XSTATE buffer for simplicity.
+         * XSTATE is backwards compatible to FXSAVE, and only one cacheline
+         * larger.
+         */
+        size = XSTATE_AREA_MIN_SIZE;
+    }
+    else if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
     {
         size = xsave_cntxt_size;
         BUG_ON(size < XSTATE_AREA_MIN_SIZE);
-- 
2.46.0
Re: [PATCH v4 1/2] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Frediano Ziglio 1 month, 2 weeks ago
On Mon, Oct 7, 2024 at 4:52 PM Alejandro Vallejo
<alejandro.vallejo@cloud.com> wrote:
>
> fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or
> a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
> subfield is at the base and is identical in size and layout to the legacy
> buffer.
>
> This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
> the very rare case in which the host doesn't support XSAVE all we're doing is
> wasting a tiny amount of memory and trading those for a lot more simplicity in
> the code.
>
> While at it, dedup the setup logic in vcpu_init_fpu() and integrate it
> into xstate_alloc_save_area().
>
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> --
> v4:
>   * Amend commit message with extra note about deduping vcpu_init_fpu()
>   * Remove comment on top of cpu_user_regs (though I really think there
>     ought to be a credible one, in one form or another).
>   * Remove cast from blk.c so FXSAVE_AREA is "void *"
>   * Simplify comment in xstate_alloc_save_area() for the "host has no
>     XSAVE" case.
> ---
>  xen/arch/x86/domctl.c             |  6 ++++-
>  xen/arch/x86/hvm/emulate.c        |  4 +--
>  xen/arch/x86/hvm/hvm.c            |  6 ++++-
>  xen/arch/x86/i387.c               | 45 +++++--------------------------
>  xen/arch/x86/include/asm/domain.h |  6 -----
>  xen/arch/x86/x86_emulate/blk.c    |  2 +-
>  xen/arch/x86/xstate.c             | 12 ++++++---
>  7 files changed, 28 insertions(+), 53 deletions(-)
>
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 96d816cf1a7d..2d115395da90 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1379,7 +1379,11 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
>  #define c(fld) (c.nat->fld)
>  #endif
>
> -    memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
> +    BUILD_BUG_ON(sizeof(c.nat->fpu_ctxt) !=
> +                 sizeof(v->arch.xsave_area->fpu_sse));
> +    memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
> +           sizeof(c.nat->fpu_ctxt));
> +
>      if ( is_pv_domain(d) )
>          c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
>      else
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index aa97ca1cbffd..f2bc6967dfcb 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -2371,7 +2371,7 @@ static int cf_check hvmemul_get_fpu(
>          alternative_vcall(hvm_funcs.fpu_dirty_intercept);
>      else if ( type == X86EMUL_FPU_fpu )
>      {
> -        const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
> +        const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
>
>          /*
>           * Latch current register state so that we can back out changes
> @@ -2411,7 +2411,7 @@ static void cf_check hvmemul_put_fpu(
>
>      if ( aux )
>      {
> -        fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
> +        fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
>          bool dval = aux->dval;
>          int mode = hvm_guest_x86_mode(curr);
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 7b2e1c9813d6..77fe282118f7 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -914,7 +914,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
>
>      if ( v->fpu_initialised )
>      {
> -        memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs));
> +        BUILD_BUG_ON(sizeof(ctxt.fpu_regs) !=
> +                     sizeof(v->arch.xsave_area->fpu_sse));
> +        memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse,
> +               sizeof(ctxt.fpu_regs));
> +
>          ctxt.flags = XEN_X86_FPU_INITIALISED;
>      }
>
> diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
> index 134e0bece519..fbb9d3584a3d 100644
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
>  /* Restore x87 FPU, MMX, SSE and SSE2 state */
>  static inline void fpu_fxrstor(struct vcpu *v)
>  {
> -    const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
> +    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>
>      /*
>       * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
> @@ -151,7 +151,7 @@ static inline void fpu_xsave(struct vcpu *v)
>  /* Save x87 FPU, MMX, SSE and SSE2 state */
>  static inline void fpu_fxsave(struct vcpu *v)
>  {
> -    fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
> +    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>      unsigned int fip_width = v->domain->arch.x87_fip_width;
>
>      if ( fip_width != 4 )
> @@ -212,7 +212,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
>       * above) we also need to restore full state, to prevent subsequently
>       * saving state belonging to another vCPU.
>       */
> -    if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) )
> +    if ( v->arch.fully_eager_fpu || xstate_all(v) )
>      {
>          if ( cpu_has_xsave )
>              fpu_xrstor(v, XSTATE_ALL);
> @@ -299,44 +299,14 @@ void save_fpu_enable(void)
>  /* Initialize FPU's context save area */
>  int vcpu_init_fpu(struct vcpu *v)
>  {
> -    int rc;
> -
>      v->arch.fully_eager_fpu = opt_eager_fpu;
> -
> -    if ( (rc = xstate_alloc_save_area(v)) != 0 )
> -        return rc;
> -
> -    if ( v->arch.xsave_area )
> -        v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
> -    else
> -    {
> -        BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
> -        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
> -                                    __alignof(v->arch.xsave_area->fpu_sse));
> -        if ( v->arch.fpu_ctxt )
> -        {
> -            fpusse_t *fpu_sse = v->arch.fpu_ctxt;
> -
> -            fpu_sse->fcw = FCW_DEFAULT;
> -            fpu_sse->mxcsr = MXCSR_DEFAULT;
> -        }
> -        else
> -            rc = -ENOMEM;
> -    }
> -
> -    return rc;
> +    return xstate_alloc_save_area(v);
>  }
>
>  void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
>                      const void *data, unsigned int fcw_default)
>  {
> -    /*
> -     * For the entire function please note that vcpu_init_fpu() (above) points
> -     * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available. Hence
> -     * accesses through both pointers alias one another, and the shorter form
> -     * is used here.
> -     */
> -    fpusse_t *fpu_sse = v->arch.fpu_ctxt;
> +    fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
>
>      ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
>
> @@ -373,10 +343,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
>  /* Free FPU's context save area */
>  void vcpu_destroy_fpu(struct vcpu *v)
>  {
> -    if ( v->arch.xsave_area )
> -        xstate_free_save_area(v);
> -    else
> -        xfree(v->arch.fpu_ctxt);
> +    xstate_free_save_area(v);
>  }
>
>  /*
> diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
> index 5219c4fb0f69..b79d6badd71c 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -591,12 +591,6 @@ struct pv_vcpu
>
>  struct arch_vcpu
>  {
> -    /*
> -     * guest context (mirroring struct vcpu_guest_context) common
> -     * between pv and hvm guests
> -     */
> -
> -    void              *fpu_ctxt;
>      struct cpu_user_regs user_regs;
>
>      /* Debug registers. */
> diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
> index e790f4f90056..08a05f8453f7 100644
> --- a/xen/arch/x86/x86_emulate/blk.c
> +++ b/xen/arch/x86/x86_emulate/blk.c
> @@ -11,7 +11,7 @@
>      !defined(X86EMUL_NO_SIMD)
>  # ifdef __XEN__
>  #  include <asm/xstate.h>
> -#  define FXSAVE_AREA current->arch.fpu_ctxt
> +#  define FXSAVE_AREA ((void *)&current->arch.xsave_area->fpu_sse)

Could you use "struct x86_fxsr *" instead of "void*" ?
Maybe adding another "struct x86_fxsr fxsr" inside the anonymous
fpu_sse union would help here.

>  # else
>  #  define FXSAVE_AREA get_fpu_save_area()
>  # endif
> diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
> index 57a0749f0d54..af9e345a7ace 100644
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -508,9 +508,15 @@ int xstate_alloc_save_area(struct vcpu *v)
>      unsigned int size;
>
>      if ( !cpu_has_xsave )
> -        return 0;
> -
> -    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> +    {
> +        /*
> +         * On non-XSAVE systems, we allocate an XSTATE buffer for simplicity.
> +         * XSTATE is backwards compatible to FXSAVE, and only one cacheline
> +         * larger.
> +         */
> +        size = XSTATE_AREA_MIN_SIZE;
> +    }
> +    else if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
>      {
>          size = xsave_cntxt_size;
>          BUG_ON(size < XSTATE_AREA_MIN_SIZE);

Frediano
Re: [PATCH v4 1/2] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Alejandro Vallejo 1 month, 2 weeks ago
On Tue Oct 8, 2024 at 8:47 AM BST, Frediano Ziglio wrote:
> On Mon, Oct 7, 2024 at 4:52 PM Alejandro Vallejo
> <alejandro.vallejo@cloud.com> wrote:
> >
> > fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or
> > a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
> > subfield is at the base and is identical in size and layout to the legacy
> > buffer.
> >
> > This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
> > the very rare case in which the host doesn't support XSAVE all we're doing is
> > wasting a tiny amount of memory and trading those for a lot more simplicity in
> > the code.
> >
> > While at it, dedup the setup logic in vcpu_init_fpu() and integrate it
> > into xstate_alloc_save_area().
> >
> > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> > --
> > v4:
> >   * Amend commit message with extra note about deduping vcpu_init_fpu()
> >   * Remove comment on top of cpu_user_regs (though I really think there
> >     ought to be a credible one, in one form or another).
> >   * Remove cast from blk.c so FXSAVE_AREA is "void *"
> >   * Simplify comment in xstate_alloc_save_area() for the "host has no
> >     XSAVE" case.
> > ---
> >  xen/arch/x86/domctl.c             |  6 ++++-
> >  xen/arch/x86/hvm/emulate.c        |  4 +--
> >  xen/arch/x86/hvm/hvm.c            |  6 ++++-
> >  xen/arch/x86/i387.c               | 45 +++++--------------------------
> >  xen/arch/x86/include/asm/domain.h |  6 -----
> >  xen/arch/x86/x86_emulate/blk.c    |  2 +-
> >  xen/arch/x86/xstate.c             | 12 ++++++---
> >  7 files changed, 28 insertions(+), 53 deletions(-)
> >
> > diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
> > index e790f4f90056..08a05f8453f7 100644
> > --- a/xen/arch/x86/x86_emulate/blk.c
> > +++ b/xen/arch/x86/x86_emulate/blk.c
> > @@ -11,7 +11,7 @@
> >      !defined(X86EMUL_NO_SIMD)
> >  # ifdef __XEN__
> >  #  include <asm/xstate.h>
> > -#  define FXSAVE_AREA current->arch.fpu_ctxt
> > +#  define FXSAVE_AREA ((void *)&current->arch.xsave_area->fpu_sse)
>
> Could you use "struct x86_fxsr *" instead of "void*" ?
> Maybe adding another "struct x86_fxsr fxsr" inside the anonymous
> fpu_sse union would help here.
>

I did in v3, and Andrew suggested to keep the (void *). See:

  https://lore.kernel.org/xen-devel/2b42323a-961a-4dd8-8cde-f4b19eac0dc5@citrix.com/

Cheers,
Alejandro