[PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu

Alejandro Vallejo posted 4 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Alejandro Vallejo 2 months, 1 week ago
fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE) or
a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
subfield is at the base and is identical in size and layout to the legacy
buffer.

This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
the very rare case in which the host doesn't support XSAVE all we're doing is
wasting a tiny amount of memory and trading those for a lot more simplicity in
the code.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
 xen/arch/x86/domctl.c             |  3 ++-
 xen/arch/x86/hvm/emulate.c        |  4 +--
 xen/arch/x86/hvm/hvm.c            |  3 ++-
 xen/arch/x86/i387.c               | 45 +++++--------------------------
 xen/arch/x86/include/asm/domain.h |  7 +----
 xen/arch/x86/x86_emulate/blk.c    |  3 ++-
 xen/arch/x86/xstate.c             | 13 ++++++---
 7 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9190e11faaa3..7b04b584c540 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1343,7 +1343,8 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
 #define c(fld) (c.nat->fld)
 #endif
 
-    memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
+    memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
+           sizeof(c.nat->fpu_ctxt));
     if ( is_pv_domain(d) )
         c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
     else
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 65ee70ce67db..72a8136a9bbf 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2364,7 +2364,7 @@ static int cf_check hvmemul_get_fpu(
         alternative_vcall(hvm_funcs.fpu_dirty_intercept);
     else if ( type == X86EMUL_FPU_fpu )
     {
-        const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+        const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
 
         /*
          * Latch current register state so that we can back out changes
@@ -2404,7 +2404,7 @@ static void cf_check hvmemul_put_fpu(
 
     if ( aux )
     {
-        fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
+        fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
         bool dval = aux->dval;
         int mode = hvm_guest_x86_mode(curr);
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7f4b627b1f5f..09b1426ee314 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -916,7 +916,8 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
 
     if ( v->fpu_initialised )
     {
-        memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs));
+        memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse,
+               sizeof(ctxt.fpu_regs));
         ctxt.flags = XEN_X86_FPU_INITIALISED;
     }
 
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 89804435b659..a964b84757ec 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
 /* Restore x87 FPU, MMX, SSE and SSE2 state */
 static inline void fpu_fxrstor(struct vcpu *v)
 {
-    const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
 
     /*
      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -152,7 +152,7 @@ static inline void fpu_xsave(struct vcpu *v)
 /* Save x87 FPU, MMX, SSE and SSE2 state */
 static inline void fpu_fxsave(struct vcpu *v)
 {
-    fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
+    fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
     unsigned int fip_width = v->domain->arch.x87_fip_width;
 
     if ( fip_width != 4 )
@@ -219,7 +219,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
      * above) we also need to restore full state, to prevent subsequently
      * saving state belonging to another vCPU.
      */
-    if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) )
+    if ( v->arch.fully_eager_fpu || xstate_all(v) )
     {
         if ( cpu_has_xsave )
             fpu_xrstor(v, XSTATE_ALL);
@@ -306,44 +306,14 @@ void save_fpu_enable(void)
 /* Initialize FPU's context save area */
 int vcpu_init_fpu(struct vcpu *v)
 {
-    int rc;
-
     v->arch.fully_eager_fpu = opt_eager_fpu;
-
-    if ( (rc = xstate_alloc_save_area(v)) != 0 )
-        return rc;
-
-    if ( v->arch.xsave_area )
-        v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
-    else
-    {
-        BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
-        v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
-                                    __alignof(v->arch.xsave_area->fpu_sse));
-        if ( v->arch.fpu_ctxt )
-        {
-            fpusse_t *fpu_sse = v->arch.fpu_ctxt;
-
-            fpu_sse->fcw = FCW_DEFAULT;
-            fpu_sse->mxcsr = MXCSR_DEFAULT;
-        }
-        else
-            rc = -ENOMEM;
-    }
-
-    return rc;
+    return xstate_alloc_save_area(v);
 }
 
 void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
                     const void *data, unsigned int fcw_default)
 {
-    /*
-     * For the entire function please note that vcpu_init_fpu() (above) points
-     * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available. Hence
-     * accesses through both pointers alias one another, and the shorter form
-     * is used here.
-     */
-    fpusse_t *fpu_sse = v->arch.fpu_ctxt;
+    fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
 
     ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
 
@@ -380,10 +350,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
 /* Free FPU's context save area */
 void vcpu_destroy_fpu(struct vcpu *v)
 {
-    if ( v->arch.xsave_area )
-        xstate_free_save_area(v);
-    else
-        xfree(v->arch.fpu_ctxt);
+    xstate_free_save_area(v);
 }
 
 /*
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index f5daeb182baa..4740a1244fbb 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -591,12 +591,7 @@ struct pv_vcpu
 
 struct arch_vcpu
 {
-    /*
-     * guest context (mirroring struct vcpu_guest_context) common
-     * between pv and hvm guests
-     */
-
-    void              *fpu_ctxt;
+    /* Fixed point registers */
     struct cpu_user_regs user_regs;
 
     /* Debug registers. */
diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
index e790f4f90056..8160da07295b 100644
--- a/xen/arch/x86/x86_emulate/blk.c
+++ b/xen/arch/x86/x86_emulate/blk.c
@@ -11,7 +11,8 @@
     !defined(X86EMUL_NO_SIMD)
 # ifdef __XEN__
 #  include <asm/xstate.h>
-#  define FXSAVE_AREA current->arch.fpu_ctxt
+#  define FXSAVE_AREA ((struct x86_fxsr *) \
+                           (void*)&current->arch.xsave_area->fpu_sse)
 # else
 #  define FXSAVE_AREA get_fpu_save_area()
 # endif
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 5c4144d55e89..850ee31bd18c 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -507,9 +507,16 @@ int xstate_alloc_save_area(struct vcpu *v)
     unsigned int size;
 
     if ( !cpu_has_xsave )
-        return 0;
-
-    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
+    {
+        /*
+         * This is bigger than FXSAVE_SIZE by 64 bytes, but it helps treating
+         * the FPU state uniformly as an XSAVE buffer even if XSAVE is not
+         * available in the host. Note the alignment restriction of the XSAVE
+         * area are stricter than those of the FXSAVE area.
+         */
+        size = XSTATE_AREA_MIN_SIZE;
+    }
+    else if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
     {
         size = xsave_cntxt_size;
         BUG_ON(size < XSTATE_AREA_MIN_SIZE);
-- 
2.34.1
Re: [PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Jan Beulich 2 months ago
On 09.07.2024 17:52, Alejandro Vallejo wrote:
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1343,7 +1343,8 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
>  #define c(fld) (c.nat->fld)
>  #endif
>  
> -    memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
> +    memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
> +           sizeof(c.nat->fpu_ctxt));

Now that the middle argument has proper type, maybe take the opportunity
and add BUILD_BUG_ON(sizeof(...) == sizeof(...))? (Also in e.g.
hvm_save_cpu_ctxt() then.)

> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -591,12 +591,7 @@ struct pv_vcpu
>  
>  struct arch_vcpu
>  {
> -    /*
> -     * guest context (mirroring struct vcpu_guest_context) common
> -     * between pv and hvm guests
> -     */
> -
> -    void              *fpu_ctxt;
> +    /* Fixed point registers */
>      struct cpu_user_regs user_regs;

Not exactly, no. Selector registers are there as well for example, which
I wouldn't consider "fixed point" ones. I wonder why the existing comment
cannot simply be kept, perhaps extended to mention that fpu_ctxt now lives
elsewhere.

> --- a/xen/arch/x86/x86_emulate/blk.c
> +++ b/xen/arch/x86/x86_emulate/blk.c
> @@ -11,7 +11,8 @@
>      !defined(X86EMUL_NO_SIMD)
>  # ifdef __XEN__
>  #  include <asm/xstate.h>
> -#  define FXSAVE_AREA current->arch.fpu_ctxt
> +#  define FXSAVE_AREA ((struct x86_fxsr *) \
> +                           (void*)&current->arch.xsave_area->fpu_sse)

Nit: Blank missing after before *.

> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -507,9 +507,16 @@ int xstate_alloc_save_area(struct vcpu *v)
>      unsigned int size;
>  
>      if ( !cpu_has_xsave )
> -        return 0;
> -
> -    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> +    {
> +        /*
> +         * This is bigger than FXSAVE_SIZE by 64 bytes, but it helps treating
> +         * the FPU state uniformly as an XSAVE buffer even if XSAVE is not
> +         * available in the host. Note the alignment restriction of the XSAVE
> +         * area are stricter than those of the FXSAVE area.
> +         */
> +        size = XSTATE_AREA_MIN_SIZE;

What exactly would break if just (a little over) 512 bytes worth were allocated
when there's no XSAVE? If it was exactly 512, something like xstate_all() would
need to apply a little more care, I guess. Yet for that having just always-zero
xstate_bv and xcomp_bv there would already suffice (e.g. using
offsetof(..., xsave_hdr.reserved) here, to cover further fields gaining meaning
down the road). Remember that due to xmalloc() overhead and the 64-byte-aligned
requirement, you can only have 6 of them in a page the way you do it, when the
alternative way 7 would fit (if I got my math right).

Jan
Re: [PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Alejandro Vallejo 2 months ago
On Thu Jul 18, 2024 at 12:49 PM BST, Jan Beulich wrote:
> On 09.07.2024 17:52, Alejandro Vallejo wrote:
> > --- a/xen/arch/x86/domctl.c
> > +++ b/xen/arch/x86/domctl.c
> > @@ -1343,7 +1343,8 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
> >  #define c(fld) (c.nat->fld)
> >  #endif
> >  
> > -    memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
> > +    memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
> > +           sizeof(c.nat->fpu_ctxt));
>
> Now that the middle argument has proper type, maybe take the opportunity
> and add BUILD_BUG_ON(sizeof(...) == sizeof(...))? (Also in e.g.
> hvm_save_cpu_ctxt() then.)

Sure.

>
> > --- a/xen/arch/x86/include/asm/domain.h
> > +++ b/xen/arch/x86/include/asm/domain.h
> > @@ -591,12 +591,7 @@ struct pv_vcpu
> >  
> >  struct arch_vcpu
> >  {
> > -    /*
> > -     * guest context (mirroring struct vcpu_guest_context) common
> > -     * between pv and hvm guests
> > -     */
> > -
> > -    void              *fpu_ctxt;
> > +    /* Fixed point registers */
> >      struct cpu_user_regs user_regs;
>
> Not exactly, no. Selector registers are there as well for example, which
> I wouldn't consider "fixed point" ones. I wonder why the existing comment
> cannot simply be kept, perhaps extended to mention that fpu_ctxt now lives
> elsewhere.

Would you prefer "general purpose registers"? It's not quite that either, but
it's arguably closer. I can part with the comment altogether but I'd rather
leave a token amount of information to say "non-FPU register state" (but not
that, because that would be a terrible description). 

I'd rather update it to something that better reflects reality, as I found it
quite misleading when reading through. I initially thought it may have been
related to struct layout (as in C-style single-level inheritance), but as it
turns out it's merely establishing a vague relationship between arch_vcpu and
vcpu_guest_context. I can believe once upon a time the relationship was closer
than it it now, but with the guest context missing AVX state, MSR state and
other bits and pieces I thought it better to avoid such confusions for future
navigators down the line so limit its description to the line below.

>
> > --- a/xen/arch/x86/x86_emulate/blk.c
> > +++ b/xen/arch/x86/x86_emulate/blk.c
> > @@ -11,7 +11,8 @@
> >      !defined(X86EMUL_NO_SIMD)
> >  # ifdef __XEN__
> >  #  include <asm/xstate.h>
> > -#  define FXSAVE_AREA current->arch.fpu_ctxt
> > +#  define FXSAVE_AREA ((struct x86_fxsr *) \
> > +                           (void*)&current->arch.xsave_area->fpu_sse)
>
> Nit: Blank missing after before *.

Heh, took me a while looking at x86_fxsr to realise you mean the void pointer.

Ack.

>
> > --- a/xen/arch/x86/xstate.c
> > +++ b/xen/arch/x86/xstate.c
> > @@ -507,9 +507,16 @@ int xstate_alloc_save_area(struct vcpu *v)
> >      unsigned int size;
> >  
> >      if ( !cpu_has_xsave )
> > -        return 0;
> > -
> > -    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> > +    {
> > +        /*
> > +         * This is bigger than FXSAVE_SIZE by 64 bytes, but it helps treating
> > +         * the FPU state uniformly as an XSAVE buffer even if XSAVE is not
> > +         * available in the host. Note the alignment restriction of the XSAVE
> > +         * area are stricter than those of the FXSAVE area.
> > +         */
> > +        size = XSTATE_AREA_MIN_SIZE;
>
> What exactly would break if just (a little over) 512 bytes worth were allocated
> when there's no XSAVE? If it was exactly 512, something like xstate_all() would
> need to apply a little more care, I guess. Yet for that having just always-zero
> xstate_bv and xcomp_bv there would already suffice (e.g. using
> offsetof(..., xsave_hdr.reserved) here, to cover further fields gaining meaning
> down the road). Remember that due to xmalloc() overhead and the 64-byte-aligned
> requirement, you can only have 6 of them in a page the way you do it, when the
> alternative way 7 would fit (if I got my math right).
>
> Jan

I'm slightly confused.

XSTATE_AREA_MIN_SIZE is already 512 + 64 to account for the XSAVE header,
including its reserved fields. Did you mean something else?

    #define XSAVE_HDR_SIZE            64
    #define XSAVE_SSE_OFFSET          160
    #define XSTATE_YMM_SIZE           256
    #define FXSAVE_SIZE               512
    #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
    #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)

Part of the rationale is to simplify other bits of code that are currently
conditionalized on v->xsave_header being NULL. And for that the full xsave
header must be present (even if unused because !cpu_xsave)

Do you mean something else?

Cheers,
Alejandro
Re: [PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Jan Beulich 1 month, 4 weeks ago
On 18.07.2024 18:54, Alejandro Vallejo wrote:
> On Thu Jul 18, 2024 at 12:49 PM BST, Jan Beulich wrote:
>> On 09.07.2024 17:52, Alejandro Vallejo wrote:
>>> --- a/xen/arch/x86/include/asm/domain.h
>>> +++ b/xen/arch/x86/include/asm/domain.h
>>> @@ -591,12 +591,7 @@ struct pv_vcpu
>>>  
>>>  struct arch_vcpu
>>>  {
>>> -    /*
>>> -     * guest context (mirroring struct vcpu_guest_context) common
>>> -     * between pv and hvm guests
>>> -     */
>>> -
>>> -    void              *fpu_ctxt;
>>> +    /* Fixed point registers */
>>>      struct cpu_user_regs user_regs;
>>
>> Not exactly, no. Selector registers are there as well for example, which
>> I wouldn't consider "fixed point" ones. I wonder why the existing comment
>> cannot simply be kept, perhaps extended to mention that fpu_ctxt now lives
>> elsewhere.
> 
> Would you prefer "general purpose registers"? It's not quite that either, but
> it's arguably closer. I can part with the comment altogether but I'd rather
> leave a token amount of information to say "non-FPU register state" (but not
> that, because that would be a terrible description). 
> 
> I'd rather update it to something that better reflects reality, as I found it
> quite misleading when reading through. I initially thought it may have been
> related to struct layout (as in C-style single-level inheritance), but as it
> turns out it's merely establishing a vague relationship between arch_vcpu and
> vcpu_guest_context. I can believe once upon a time the relationship was closer
> than it it now, but with the guest context missing AVX state, MSR state and
> other bits and pieces I thought it better to avoid such confusions for future
> navigators down the line so limit its description to the line below.

As said, I'd prefer if you amended the existing comment. Properly describing
what's in cpu_user_regs isn't quite as easy in only very few words. Neither
"fixed point register" nor "general purpose registers" really covers it. And
I'd really like to avoid having potentially confusing comments.

>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -507,9 +507,16 @@ int xstate_alloc_save_area(struct vcpu *v)
>>>      unsigned int size;
>>>  
>>>      if ( !cpu_has_xsave )
>>> -        return 0;
>>> -
>>> -    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
>>> +    {
>>> +        /*
>>> +         * This is bigger than FXSAVE_SIZE by 64 bytes, but it helps treating
>>> +         * the FPU state uniformly as an XSAVE buffer even if XSAVE is not
>>> +         * available in the host. Note the alignment restriction of the XSAVE
>>> +         * area are stricter than those of the FXSAVE area.
>>> +         */
>>> +        size = XSTATE_AREA_MIN_SIZE;
>>
>> What exactly would break if just (a little over) 512 bytes worth were allocated
>> when there's no XSAVE? If it was exactly 512, something like xstate_all() would
>> need to apply a little more care, I guess. Yet for that having just always-zero
>> xstate_bv and xcomp_bv there would already suffice (e.g. using
>> offsetof(..., xsave_hdr.reserved) here, to cover further fields gaining meaning
>> down the road). Remember that due to xmalloc() overhead and the 64-byte-aligned
>> requirement, you can only have 6 of them in a page the way you do it, when the
>> alternative way 7 would fit (if I got my math right).
> 
> I'm slightly confused.
> 
> XSTATE_AREA_MIN_SIZE is already 512 + 64 to account for the XSAVE header,
> including its reserved fields. Did you mean something else?

No, I didn't. I've in fact commented on it precisely because it is the value
you name. That's larger than necessary, and when suitably shrunk - as said -
one more of these structures could fit in a page (assuming they were all
allocated back-to-back, which isn't quite true right now, but other
intervening allocations may or may not take space from the same page, so
chances are still that the ones here all might come from one page as long as
there's space left).

>     #define XSAVE_HDR_SIZE            64
>     #define XSAVE_SSE_OFFSET          160
>     #define XSTATE_YMM_SIZE           256
>     #define FXSAVE_SIZE               512
>     #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
>     #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)
> 
> Part of the rationale is to simplify other bits of code that are currently
> conditionalized on v->xsave_header being NULL. And for that the full xsave
> header must be present (even if unused because !cpu_xsave)

But that's my point: The reserved[] part doesn't need to be there; it's
not being accessed anywhere, I don't think.

Jan
Re: [PATCH for-4.20 3/4] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
Posted by Alejandro Vallejo 1 month, 1 week ago
Hi. Sorry, I've been busy and couldn't get back to this sooner.

On Fri Jul 19, 2024 at 10:14 AM BST, Jan Beulich wrote:
> On 18.07.2024 18:54, Alejandro Vallejo wrote:
> > On Thu Jul 18, 2024 at 12:49 PM BST, Jan Beulich wrote:
> >> On 09.07.2024 17:52, Alejandro Vallejo wrote:
> >>> --- a/xen/arch/x86/include/asm/domain.h
> >>> +++ b/xen/arch/x86/include/asm/domain.h
> >>> @@ -591,12 +591,7 @@ struct pv_vcpu
> >>>  
> >>>  struct arch_vcpu
> >>>  {
> >>> -    /*
> >>> -     * guest context (mirroring struct vcpu_guest_context) common
> >>> -     * between pv and hvm guests
> >>> -     */
> >>> -
> >>> -    void              *fpu_ctxt;
> >>> +    /* Fixed point registers */
> >>>      struct cpu_user_regs user_regs;
> >>
> >> Not exactly, no. Selector registers are there as well for example, which
> >> I wouldn't consider "fixed point" ones. I wonder why the existing comment
> >> cannot simply be kept, perhaps extended to mention that fpu_ctxt now lives
> >> elsewhere.
> > 
> > Would you prefer "general purpose registers"? It's not quite that either, but
> > it's arguably closer. I can part with the comment altogether but I'd rather
> > leave a token amount of information to say "non-FPU register state" (but not
> > that, because that would be a terrible description). 
> > 
> > I'd rather update it to something that better reflects reality, as I found it
> > quite misleading when reading through. I initially thought it may have been
> > related to struct layout (as in C-style single-level inheritance), but as it
> > turns out it's merely establishing a vague relationship between arch_vcpu and
> > vcpu_guest_context. I can believe once upon a time the relationship was closer
> > than it it now, but with the guest context missing AVX state, MSR state and
> > other bits and pieces I thought it better to avoid such confusions for future
> > navigators down the line so limit its description to the line below.
>
> As said, I'd prefer if you amended the existing comment. Properly describing
> what's in cpu_user_regs isn't quite as easy in only very few words. Neither
> "fixed point register" nor "general purpose registers" really covers it. And
> I'd really like to avoid having potentially confusing comments.

Sure.

>
> >>> --- a/xen/arch/x86/xstate.c
> >>> +++ b/xen/arch/x86/xstate.c
> >>> @@ -507,9 +507,16 @@ int xstate_alloc_save_area(struct vcpu *v)
> >>>      unsigned int size;
> >>>  
> >>>      if ( !cpu_has_xsave )
> >>> -        return 0;
> >>> -
> >>> -    if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> >>> +    {
> >>> +        /*
> >>> +         * This is bigger than FXSAVE_SIZE by 64 bytes, but it helps treating
> >>> +         * the FPU state uniformly as an XSAVE buffer even if XSAVE is not
> >>> +         * available in the host. Note the alignment restriction of the XSAVE
> >>> +         * area are stricter than those of the FXSAVE area.
> >>> +         */
> >>> +        size = XSTATE_AREA_MIN_SIZE;
> >>
> >> What exactly would break if just (a little over) 512 bytes worth were allocated
> >> when there's no XSAVE? If it was exactly 512, something like xstate_all() would
> >> need to apply a little more care, I guess. Yet for that having just always-zero
> >> xstate_bv and xcomp_bv there would already suffice (e.g. using
> >> offsetof(..., xsave_hdr.reserved) here, to cover further fields gaining meaning
> >> down the road). Remember that due to xmalloc() overhead and the 64-byte-aligned
> >> requirement, you can only have 6 of them in a page the way you do it, when the
> >> alternative way 7 would fit (if I got my math right).
> > 
> > I'm slightly confused.
> > 
> > XSTATE_AREA_MIN_SIZE is already 512 + 64 to account for the XSAVE header,
> > including its reserved fields. Did you mean something else?
>
> No, I didn't. I've in fact commented on it precisely because it is the value
> you name. That's larger than necessary, and when suitably shrunk - as said -
> one more of these structures could fit in a page (assuming they were all
> allocated back-to-back, which isn't quite true right now, but other
> intervening allocations may or may not take space from the same page, so
> chances are still that the ones here all might come from one page as long as
> there's space left).
>
> >     #define XSAVE_HDR_SIZE            64
> >     #define XSAVE_SSE_OFFSET          160
> >     #define XSTATE_YMM_SIZE           256
> >     #define FXSAVE_SIZE               512
> >     #define XSAVE_HDR_OFFSET          FXSAVE_SIZE
> >     #define XSTATE_AREA_MIN_SIZE      (FXSAVE_SIZE + XSAVE_HDR_SIZE)
> > 
> > Part of the rationale is to simplify other bits of code that are currently
> > conditionalized on v->xsave_header being NULL. And for that the full xsave
> > header must be present (even if unused because !cpu_xsave)
>
> But that's my point: The reserved[] part doesn't need to be there; it's
> not being accessed anywhere, I don't think.
>
> Jan

The reserved octets want to be in the structure so we can zero them out on
machines with XSAVE. Once they are it's quite dangerous to not allocate space
for them, as doing something like "*xsave_area = *foo;" would overflow the
allocation silently (that's in fact what the next patch does).

Combined with the fact that (a) not having XSAVE is fairly rare and (b) all of
this is bound to end up round to the page anyway once we get Address Space
Isolation going... it really doesn't motivate me to save a handful of octets
per vCPU. The cost in memory would be negligible even if we had tens of
thousands of them running concurrently.

Cheers,
Alejandro