[PATCH v3 2/2] x86/fpu: Split fpu_setup_fpu() in three

Alejandro Vallejo posted 2 patches 1 month ago
[PATCH v3 2/2] x86/fpu: Split fpu_setup_fpu() in three
Posted by Alejandro Vallejo 1 month ago
It was trying to do too many things at once and there was no clear way of
defining what it was meant to do. This commit splits the function in three.

  1. A function to return the FPU to power-on reset values.
  2. A function to return the FPU to default values.
  3. A x87/SSE state loader (equivalent to the old function when it took a data
     pointer).

While at it, make sure the abridged tag is consistent with the manuals and
start as 0xFF.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
v3:
  * Adjust commit message, as the split is now in 3.
  * Remove bulky comment, as the rationale for it turned out to be
    unsubstantiated. I can't find proof in xen-devel of the stream
    operating the way I claimed, and at that point having the comment
    at all is pointless

I suspect the rationale for xsave_vcpu(v) was merely to skip writing the XSAVE
header when it would be rewritten later on. Whatever it might be the current
logic does the right thing and is several orders of magnitude clearer about its
objective and its intent.

---
 xen/arch/x86/domain.c             |  7 ++--
 xen/arch/x86/hvm/hvm.c            | 12 +++----
 xen/arch/x86/i387.c               | 60 +++++++++++++++----------------
 xen/arch/x86/include/asm/i387.h   | 28 ++++++++++++---
 xen/arch/x86/include/asm/xstate.h |  1 +
 5 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d977ec71ca20..5af9e3e7a8b4 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1186,9 +1186,10 @@ int arch_set_info_guest(
          is_pv_64bit_domain(d) )
         v->arch.flags &= ~TF_kernel_mode;
 
-    vcpu_setup_fpu(v, v->arch.xsave_area,
-                   flags & VGCF_I387_VALID ? &c.nat->fpu_ctxt : NULL,
-                   FCW_DEFAULT);
+    if ( flags & VGCF_I387_VALID )
+        vcpu_setup_fpu(v, &c.nat->fpu_ctxt);
+    else
+        vcpu_default_fpu(v);
 
     if ( !compat )
     {
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 76bbb645b77a..95d66e68a849 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1165,10 +1165,10 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
     seg.attr = ctxt.ldtr_arbytes;
     hvm_set_segment_register(v, x86_seg_ldtr, &seg);
 
-    /* Cover xsave-absent save file restoration on xsave-capable host. */
-    vcpu_setup_fpu(v, xsave_enabled(v) ? NULL : v->arch.xsave_area,
-                   ctxt.flags & XEN_X86_FPU_INITIALISED ? ctxt.fpu_regs : NULL,
-                   FCW_RESET);
+    if ( ctxt.flags & XEN_X86_FPU_INITIALISED )
+        vcpu_setup_fpu(v, &ctxt.fpu_regs);
+    else
+        vcpu_reset_fpu(v);
 
     v->arch.user_regs.rax = ctxt.rax;
     v->arch.user_regs.rbx = ctxt.rbx;
@@ -4008,9 +4008,7 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
         v->arch.guest_table = pagetable_null();
     }
 
-    if ( v->arch.xsave_area )
-        v->arch.xsave_area->xsave_hdr.xstate_bv = 0;
-    vcpu_setup_fpu(v, v->arch.xsave_area, NULL, FCW_RESET);
+    vcpu_reset_fpu(v);
 
     arch_vcpu_regs_init(v);
     v->arch.user_regs.rip = ip;
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index fbb9d3584a3d..f7a9dcd162ba 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -303,41 +303,37 @@ int vcpu_init_fpu(struct vcpu *v)
     return xstate_alloc_save_area(v);
 }
 
-void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
-                    const void *data, unsigned int fcw_default)
+void vcpu_reset_fpu(struct vcpu *v)
 {
-    fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
-
-    ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
-
-    v->fpu_initialised = !!data;
+    v->fpu_initialised = false;
+    *v->arch.xsave_area = (struct xsave_struct) {
+        .fpu_sse = {
+            .mxcsr = MXCSR_DEFAULT,
+            .fcw = FCW_RESET,
+            .ftw = FTW_RESET,
+        },
+        .xsave_hdr.xstate_bv = X86_XCR0_X87,
+    };
+}
 
-    if ( data )
-    {
-        memcpy(fpu_sse, data, sizeof(*fpu_sse));
-        if ( xsave_area )
-            xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
-    }
-    else if ( xsave_area && fcw_default == FCW_DEFAULT )
-    {
-        xsave_area->xsave_hdr.xstate_bv = 0;
-        fpu_sse->mxcsr = MXCSR_DEFAULT;
-    }
-    else
-    {
-        memset(fpu_sse, 0, sizeof(*fpu_sse));
-        fpu_sse->fcw = fcw_default;
-        fpu_sse->mxcsr = MXCSR_DEFAULT;
-        if ( v->arch.xsave_area )
-        {
-            v->arch.xsave_area->xsave_hdr.xstate_bv &= ~XSTATE_FP_SSE;
-            if ( fcw_default != FCW_DEFAULT )
-                v->arch.xsave_area->xsave_hdr.xstate_bv |= X86_XCR0_X87;
-        }
-    }
+void vcpu_default_fpu(struct vcpu *v)
+{
+    v->fpu_initialised = false;
+    *v->arch.xsave_area = (struct xsave_struct) {
+        .fpu_sse = {
+            .mxcsr = MXCSR_DEFAULT,
+            .fcw = FCW_DEFAULT,
+        },
+    };
+}
 
-    if ( xsave_area )
-        xsave_area->xsave_hdr.xcomp_bv = 0;
+void vcpu_setup_fpu(struct vcpu *v, const void *data)
+{
+    v->fpu_initialised = true;
+    *v->arch.xsave_area = (struct xsave_struct) {
+        .fpu_sse = *(const fpusse_t*)data,
+        .xsave_hdr.xstate_bv = XSTATE_FP_SSE,
+    };
 }
 
 /* Free FPU's context save area */
diff --git a/xen/arch/x86/include/asm/i387.h b/xen/arch/x86/include/asm/i387.h
index a783549db991..7a69577de45b 100644
--- a/xen/arch/x86/include/asm/i387.h
+++ b/xen/arch/x86/include/asm/i387.h
@@ -31,10 +31,30 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool need_stts);
 void vcpu_restore_fpu_lazy(struct vcpu *v);
 void vcpu_save_fpu(struct vcpu *v);
 void save_fpu_enable(void);
-
 int vcpu_init_fpu(struct vcpu *v);
-struct xsave_struct;
-void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
-                    const void *data, unsigned int fcw_default);
 void vcpu_destroy_fpu(struct vcpu *v);
+
+/*
+ * Restore v's FPU to power-on reset values
+ *
+ * @param v vCPU containing the FPU
+ */
+void vcpu_reset_fpu(struct vcpu *v);
+
+/*
+ * Restore v's FPU to default values
+ *
+ * @param v vCPU containing the FPU
+ */
+void vcpu_default_fpu(struct vcpu *v);
+
+/*
+ * Load x87/SSE state into v's FPU
+ *
+ * Overrides the XSAVE header to set the state components to be x87 and SSE.
+ *
+ * @param v    vCPU containing the FPU
+ * @param data 512-octet blob for x87/SSE state
+ */
+void vcpu_setup_fpu(struct vcpu *v, const void *data);
 #endif /* __ASM_I386_I387_H */
diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
index ebeb2a3dcaf9..6144ed6f8551 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -14,6 +14,7 @@
 
 #define FCW_DEFAULT               0x037f
 #define FCW_RESET                 0x0040
+#define FTW_RESET                 0xFF
 #define MXCSR_DEFAULT             0x1f80
 
 extern uint32_t mxcsr_mask;
-- 
2.45.2
Re: [PATCH v3 2/2] x86/fpu: Split fpu_setup_fpu() in three
Posted by Jan Beulich 1 month ago
On 13.08.2024 16:21, Alejandro Vallejo wrote:
> It was trying to do too many things at once and there was no clear way of
> defining what it was meant to do. This commit splits the function in three.
> 
>   1. A function to return the FPU to power-on reset values.
>   2. A function to return the FPU to default values.
>   3. A x87/SSE state loader (equivalent to the old function when it took a data
>      pointer).
> 
> While at it, make sure the abridged tag is consistent with the manuals and
> start as 0xFF.
> 
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

> ---
> v3:
>   * Adjust commit message, as the split is now in 3.
>   * Remove bulky comment, as the rationale for it turned out to be
>     unsubstantiated. I can't find proof in xen-devel of the stream
>     operating the way I claimed, and at that point having the comment
>     at all is pointless

So you deliberately removed the comment altogether, not just point 3 of it?

Jan
Re: [PATCH v3 2/2] x86/fpu: Split fpu_setup_fpu() in three
Posted by Alejandro Vallejo 1 month ago
On Tue Aug 13, 2024 at 3:32 PM BST, Jan Beulich wrote:
> On 13.08.2024 16:21, Alejandro Vallejo wrote:
> > It was trying to do too many things at once and there was no clear way of
> > defining what it was meant to do. This commit splits the function in three.
> > 
> >   1. A function to return the FPU to power-on reset values.
> >   2. A function to return the FPU to default values.
> >   3. A x87/SSE state loader (equivalent to the old function when it took a data
> >      pointer).
> > 
> > While at it, make sure the abridged tag is consistent with the manuals and
> > start as 0xFF.
> > 
> > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> > ---
> > v3:
> >   * Adjust commit message, as the split is now in 3.
> >   * Remove bulky comment, as the rationale for it turned out to be
> >     unsubstantiated. I can't find proof in xen-devel of the stream
> >     operating the way I claimed, and at that point having the comment
> >     at all is pointless
>
> So you deliberately removed the comment altogether, not just point 3 of it?
>
> Jan

Yes. The other two cases can be deduced pretty trivially from the conditional,
I reckon. I commented them more heavily in order to properly introduce (3), but
seeing how it was all a midsummer dream might as well reduce clutter.

I got as far as the original implementation of XSAVE in Xen and it seems to
have been tested against many combinations of src and dst, none of which was
that ficticious "xsave enabled + xsave context missing". I suspect the
xsave_enabled(v) was merely avoiding writing to the XSAVE buffer just for
efficiency (however minor effect it might have had). I just reverse engineering
it wrong.

Which reminds me. Thanks for mentioning that, because it was really just
guesswork on my part.

Cheers,
Alejandro