[PATCH v3 11/12] x86/fpu: Pass explicit xsave areas to fpu_(f)xrstor()

Alejandro Vallejo posted 12 patches 3 weeks, 3 days ago
[PATCH v3 11/12] x86/fpu: Pass explicit xsave areas to fpu_(f)xrstor()
Posted by Alejandro Vallejo 3 weeks, 3 days ago
No functional change.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
v2->v3:
  * const-ified v in fpu_xrstor()
  * Removed v in fpu_fxrstor()

v1->v2:
  * const-ified v in xrstor()
    ( it was incorrectly reported in v2 as it being fpu_xrstor() )
---
 xen/arch/x86/i387.c               | 26 ++++++++++++++++----------
 xen/arch/x86/include/asm/xstate.h |  2 +-
 xen/arch/x86/xstate.c             | 10 ++++++----
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 11d06f921269..943ae668606f 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -20,7 +20,8 @@
 /*     FPU Restore Functions   */
 /*******************************/
 /* Restore x87 extended state */
-static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
+static inline void fpu_xrstor(const struct vcpu *v,
+                              struct xsave_struct *xsave_area, uint64_t mask)
 {
     bool ok;
 
@@ -30,16 +31,14 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
      */
     ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
     ASSERT(ok);
-    xrstor(v, mask);
+    xrstor(v, xsave_area, mask);
     ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
     ASSERT(ok);
 }
 
 /* Restore x87 FPU, MMX, SSE and SSE2 state */
-static inline void fpu_fxrstor(struct vcpu *v)
+static inline void fpu_fxrstor(const fpusse_t *fpu_ctxt)
 {
-    const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
-
     /*
      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
      * is pending. Clear the x87 state here by setting it to fixed
@@ -195,6 +194,8 @@ static inline void fpu_fxsave(const struct vcpu *v, fpusse_t *fpu_ctxt)
 /* Restore FPU state whenever VCPU is schduled in. */
 void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
 {
+    struct xsave_struct *xsave_area;
+
     /* Restore nonlazy extended state (i.e. parts not tracked by CR0.TS). */
     if ( !v->arch.fully_eager_fpu && !v->arch.nonlazy_xstate_used )
         goto maybe_stts;
@@ -209,12 +210,13 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
      * above) we also need to restore full state, to prevent subsequently
      * saving state belonging to another vCPU.
      */
+    xsave_area = VCPU_MAP_XSAVE_AREA(v);
     if ( v->arch.fully_eager_fpu || xstate_all(v) )
     {
         if ( cpu_has_xsave )
-            fpu_xrstor(v, XSTATE_ALL);
+            fpu_xrstor(v, xsave_area, XSTATE_ALL);
         else
-            fpu_fxrstor(v);
+            fpu_fxrstor(&xsave_area->fpu_sse);
 
         v->fpu_initialised = 1;
         v->fpu_dirtied = 1;
@@ -224,9 +226,10 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
     }
     else
     {
-        fpu_xrstor(v, XSTATE_NONLAZY);
+        fpu_xrstor(v, xsave_area, XSTATE_NONLAZY);
         need_stts = true;
     }
+    VCPU_UNMAP_XSAVE_AREA(v, xsave_area);
 
  maybe_stts:
     if ( need_stts )
@@ -238,6 +241,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts)
  */
 void vcpu_restore_fpu_lazy(struct vcpu *v)
 {
+    struct xsave_struct *xsave_area;
     ASSERT(!is_idle_vcpu(v));
 
     /* Avoid recursion. */
@@ -248,10 +252,12 @@ void vcpu_restore_fpu_lazy(struct vcpu *v)
 
     ASSERT(!v->arch.fully_eager_fpu);
 
+    xsave_area = VCPU_MAP_XSAVE_AREA(v);
     if ( cpu_has_xsave )
-        fpu_xrstor(v, XSTATE_LAZY);
+        fpu_xrstor(v, xsave_area, XSTATE_LAZY);
     else
-        fpu_fxrstor(v);
+        fpu_fxrstor(&xsave_area->fpu_sse);
+    VCPU_UNMAP_XSAVE_AREA(v, xsave_area);
 
     v->fpu_initialised = 1;
     v->fpu_dirtied = 1;
diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
index 87f05dbca6f4..7d160d2b54be 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -98,7 +98,7 @@ void set_msr_xss(u64 xss);
 uint64_t get_msr_xss(void);
 uint64_t read_bndcfgu(void);
 void xsave(const struct vcpu *v, struct xsave_struct *ptr, uint64_t mask);
-void xrstor(struct vcpu *v, uint64_t mask);
+void xrstor(const struct vcpu *v, struct xsave_struct *ptr, uint64_t mask);
 void xstate_set_init(uint64_t mask);
 bool xsave_enabled(const struct vcpu *v);
 int __must_check validate_xstate(const struct domain *d,
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 24053b394200..3d4fb7664c5f 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -376,11 +376,10 @@ void xsave(const struct vcpu *v, struct xsave_struct *ptr, uint64_t mask)
         ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width;
 }
 
-void xrstor(struct vcpu *v, uint64_t mask)
+void xrstor(const struct vcpu *v, struct xsave_struct *ptr, uint64_t mask)
 {
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
-    struct xsave_struct *ptr = v->arch.xsave_area;
     unsigned int faults, prev_faults;
 
     /*
@@ -994,6 +993,7 @@ int handle_xsetbv(u32 index, u64 new_bv)
     mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY;
     if ( mask )
     {
+        struct xsave_struct *xsave_area = VCPU_MAP_XSAVE_AREA(curr);
         unsigned long cr0 = read_cr0();
 
         clts();
@@ -1013,7 +1013,9 @@ int handle_xsetbv(u32 index, u64 new_bv)
             curr->fpu_dirtied = 1;
             cr0 &= ~X86_CR0_TS;
         }
-        xrstor(curr, mask);
+        xrstor(curr, xsave_area, mask);
+        VCPU_UNMAP_XSAVE_AREA(curr, xsave_area);
+
         if ( cr0 & X86_CR0_TS )
             write_cr0(cr0);
     }
@@ -1080,7 +1082,7 @@ void xstate_set_init(uint64_t mask)
 
     xstate = VCPU_MAP_XSAVE_AREA(v);
     memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
-    xrstor(v, mask);
+    xrstor(v, xstate, mask);
     VCPU_UNMAP_XSAVE_AREA(v, xstate);
 
     if ( cr0 & X86_CR0_TS )
-- 
2.47.1
Re: [PATCH v3 11/12] x86/fpu: Pass explicit xsave areas to fpu_(f)xrstor()
Posted by Jan Beulich 1 week ago
On 10.01.2025 14:28, Alejandro Vallejo wrote:
> No functional change.
> 
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>

Acked-by: Jan Beulich <jbeulich@suse.com>

> ---
> v2->v3:
>   * const-ified v in fpu_xrstor()
>   * Removed v in fpu_fxrstor()

On this basis the parameter could also be removed from fpu_fxsave(), by
passing in fip_width instead.

Jan
Re: [PATCH v3 11/12] x86/fpu: Pass explicit xsave areas to fpu_(f)xrstor()
Posted by Alejandro Vallejo 1 week ago
Hi,

On Mon Jan 27, 2025 at 11:05 AM GMT, Jan Beulich wrote:
> On 10.01.2025 14:28, Alejandro Vallejo wrote:
> > No functional change.
> > 
> > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>
> Acked-by: Jan Beulich <jbeulich@suse.com>
>
> > ---
> > v2->v3:
> >   * const-ified v in fpu_xrstor()
> >   * Removed v in fpu_fxrstor()
>
> On this basis the parameter could also be removed from fpu_fxsave(), by
> passing in fip_width instead.
>
> Jan

Could be, but there's not a whole lot of gain to be had? The access must be
done either way before or after the fpu_fxsave() call, and a parameter must be
passed (be it fip_width or v). Passing the vCPU encapsulates the access of
fip_width where its actually used, which seems more desirable, I'd say.

Cheers,
Alejandro
Re: [PATCH v3 11/12] x86/fpu: Pass explicit xsave areas to fpu_(f)xrstor()
Posted by Jan Beulich 6 days, 23 hours ago
On 27.01.2025 16:48, Alejandro Vallejo wrote:
> On Mon Jan 27, 2025 at 11:05 AM GMT, Jan Beulich wrote:
>> On 10.01.2025 14:28, Alejandro Vallejo wrote:
>>> No functional change.
>>>
>>> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>>
>> Acked-by: Jan Beulich <jbeulich@suse.com>
>>
>>> ---
>>> v2->v3:
>>>   * const-ified v in fpu_xrstor()
>>>   * Removed v in fpu_fxrstor()
>>
>> On this basis the parameter could also be removed from fpu_fxsave(), by
>> passing in fip_width instead.
> 
> Could be, but there's not a whole lot of gain to be had? The access must be
> done either way before or after the fpu_fxsave() call, and a parameter must be
> passed (be it fip_width or v). Passing the vCPU encapsulates the access of
> fip_width where its actually used, which seems more desirable, I'd say.

Not much of a gain indeed, largely for symmetry between the two sibling
functions.

Jan