[PATCH v1 8/8] x86: Cleanup cr0.TS flag handling

Ross Lagerwall posted 8 patches 4 days, 6 hours ago
[PATCH v1 8/8] x86: Cleanup cr0.TS flag handling
Posted by Ross Lagerwall 4 days, 6 hours ago
With lazy FPU removed, Xen does not need to touch the cr0.TS flag on context
switch except when saving/restoring the FPU for a PV guest.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
---
 xen/arch/x86/cpu/common.c       |  3 ---
 xen/arch/x86/hvm/emulate.c      | 14 ++------------
 xen/arch/x86/i387.c             | 22 +++-------------------
 xen/arch/x86/include/asm/i387.h |  1 -
 xen/common/efi/runtime.c        |  2 +-
 5 files changed, 6 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 5d0523a78b52..04a049f01c07 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -883,9 +883,6 @@ void cpu_init(void)
 	/* Install correct page table. */
 	write_ptbase(current);
 
-	/* Ensure FPU gets initialised for each domain. */
-	stts();
-
 	/* Reset debug registers: */
 	write_debugreg(0, 0);
 	write_debugreg(1, 0);
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 48c7320360c7..f3aae158e9f8 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2527,14 +2527,8 @@ static int cf_check hvmemul_get_fpu(
          * Latch current register state so that we can back out changes
          * if needed (namely when a memory write fails after register state
          * has already been updated).
-         * NB: We don't really need the "enable" part of the called function
-         * (->fpu_dirtied set implies CR0.TS clear), but the additional
-         * overhead should be low enough to not warrant introduction of yet
-         * another slightly different function. However, we need to undo the
-         * ->fpu_dirtied clearing the function does as well as the possible
-         * masking of all exceptions by FNSTENV.)
          */
-        save_fpu_enable();
+        vcpu_save_fpu(curr);
         if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
         {
             uint16_t fcw;
@@ -2572,12 +2566,8 @@ static void cf_check hvmemul_put_fpu(
          * Latch current register state so that we can replace FIP/FDP/FOP
          * (which have values resulting from our own invocation of the FPU
          * instruction during emulation).
-         * NB: See also the comment in hvmemul_get_fpu(); we don't need to
-         * set ->fpu_dirtied here as it is going to be cleared below, and
-         * we also don't need to reload FCW as we're forcing full state to
-         * be reloaded anyway.
          */
-        save_fpu_enable();
+        vcpu_save_fpu(curr);
 
         if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
              !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 9acaaf4673df..336bc83b6e13 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -176,9 +176,6 @@ void vcpu_restore_fpu(struct vcpu *v)
 {
     ASSERT(!is_idle_vcpu(v));
 
-    /* Avoid recursion */
-    clts();
-
     if ( cpu_has_xsave )
         fpu_xrstor(v, XSTATE_ALL);
     else
@@ -193,31 +190,18 @@ void vcpu_restore_fpu(struct vcpu *v)
  * On each context switch, save the necessary FPU info of VCPU being switch 
  * out. It dispatches saving operation based on CPU's capability.
  */
-static bool _vcpu_save_fpu(struct vcpu *v)
+void vcpu_save_fpu(struct vcpu *v)
 {
     ASSERT(!is_idle_vcpu(v));
 
     /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
-    clts();
+    if ( is_pv_vcpu(v) )
+        clts();
 
     if ( cpu_has_xsave )
         fpu_xsave(v);
     else
         fpu_fxsave(v);
-
-    return true;
-}
-
-void vcpu_save_fpu(struct vcpu *v)
-{
-    _vcpu_save_fpu(v);
-    stts();
-}
-
-void save_fpu_enable(void)
-{
-    if ( !_vcpu_save_fpu(current) )
-        clts();
 }
 
 /* Initialize FPU's context save area */
diff --git a/xen/arch/x86/include/asm/i387.h b/xen/arch/x86/include/asm/i387.h
index fe5e4419b6f4..0717005d31f0 100644
--- a/xen/arch/x86/include/asm/i387.h
+++ b/xen/arch/x86/include/asm/i387.h
@@ -29,7 +29,6 @@ struct ix87_env {
 
 void vcpu_restore_fpu(struct vcpu *v);
 void vcpu_save_fpu(struct vcpu *v);
-void save_fpu_enable(void);
 int vcpu_init_fpu(struct vcpu *v);
 void vcpu_destroy_fpu(struct vcpu *v);
 
diff --git a/xen/common/efi/runtime.c b/xen/common/efi/runtime.c
index 982e42e8f341..0f1cc765ec5e 100644
--- a/xen/common/efi/runtime.c
+++ b/xen/common/efi/runtime.c
@@ -94,7 +94,7 @@ struct efi_rs_state efi_rs_enter(void)
         return state;
 
     state.cr3 = read_cr3();
-    save_fpu_enable();
+    vcpu_save_fpu(current);
     asm volatile ( "fnclex; fldcw %0" :: "m" (fcw) );
     asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) );
 
-- 
2.53.0
Re: [PATCH v1 8/8] x86: Cleanup cr0.TS flag handling
Posted by Jan Beulich 7 hours ago
On 19.03.2026 14:29, Ross Lagerwall wrote:
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -883,9 +883,6 @@ void cpu_init(void)
>  	/* Install correct page table. */
>  	write_ptbase(current);
>  
> -	/* Ensure FPU gets initialised for each domain. */
> -	stts();

I'm a little concerned by the removal of this and ...

> @@ -193,31 +190,18 @@ void vcpu_restore_fpu(struct vcpu *v)
>   * On each context switch, save the necessary FPU info of VCPU being switch 
>   * out. It dispatches saving operation based on CPU's capability.
>   */
> -static bool _vcpu_save_fpu(struct vcpu *v)
> +void vcpu_save_fpu(struct vcpu *v)
>  {
>      ASSERT(!is_idle_vcpu(v));
>  
>      /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
> -    clts();
> +    if ( is_pv_vcpu(v) )
> +        clts();
>  
>      if ( cpu_has_xsave )
>          fpu_xsave(v);
>      else
>          fpu_fxsave(v);
> -
> -    return true;
> -}
> -
> -void vcpu_save_fpu(struct vcpu *v)
> -{
> -    _vcpu_save_fpu(v);
> -    stts();

... this. At present it guards us against e.g. an idle CPU or context
switch code mistakenly using in particular XMM registers (but of course
also other extended state).

Jan
Re: [PATCH v1 8/8] x86: Cleanup cr0.TS flag handling
Posted by Ross Lagerwall 5 hours ago
On 3/23/26 12:30 PM, Jan Beulich wrote:
> On 19.03.2026 14:29, Ross Lagerwall wrote:
>> --- a/xen/arch/x86/cpu/common.c
>> +++ b/xen/arch/x86/cpu/common.c
>> @@ -883,9 +883,6 @@ void cpu_init(void)
>>   	/* Install correct page table. */
>>   	write_ptbase(current);
>>   
>> -	/* Ensure FPU gets initialised for each domain. */
>> -	stts();
> 
> I'm a little concerned by the removal of this and ...
> 
>> @@ -193,31 +190,18 @@ void vcpu_restore_fpu(struct vcpu *v)
>>    * On each context switch, save the necessary FPU info of VCPU being switch
>>    * out. It dispatches saving operation based on CPU's capability.
>>    */
>> -static bool _vcpu_save_fpu(struct vcpu *v)
>> +void vcpu_save_fpu(struct vcpu *v)
>>   {
>>       ASSERT(!is_idle_vcpu(v));
>>   
>>       /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
>> -    clts();
>> +    if ( is_pv_vcpu(v) )
>> +        clts();
>>   
>>       if ( cpu_has_xsave )
>>           fpu_xsave(v);
>>       else
>>           fpu_fxsave(v);
>> -
>> -    return true;
>> -}
>> -
>> -void vcpu_save_fpu(struct vcpu *v)
>> -{
>> -    _vcpu_save_fpu(v);
>> -    stts();
> 
> ... this. At present it guards us against e.g. an idle CPU or context
> switch code mistakenly using in particular XMM registers (but of course
> also other extended state).
> 

Given this concern and Andrew's comment, I could drop this patch for now.
It can be revisited in future if needed.

Ross
Re: [PATCH v1 8/8] x86: Cleanup cr0.TS flag handling
Posted by Jan Beulich 5 hours ago
On 23.03.2026 15:14, Ross Lagerwall wrote:
> On 3/23/26 12:30 PM, Jan Beulich wrote:
>> On 19.03.2026 14:29, Ross Lagerwall wrote:
>>> --- a/xen/arch/x86/cpu/common.c
>>> +++ b/xen/arch/x86/cpu/common.c
>>> @@ -883,9 +883,6 @@ void cpu_init(void)
>>>   	/* Install correct page table. */
>>>   	write_ptbase(current);
>>>   
>>> -	/* Ensure FPU gets initialised for each domain. */
>>> -	stts();
>>
>> I'm a little concerned by the removal of this and ...
>>
>>> @@ -193,31 +190,18 @@ void vcpu_restore_fpu(struct vcpu *v)
>>>    * On each context switch, save the necessary FPU info of VCPU being switch
>>>    * out. It dispatches saving operation based on CPU's capability.
>>>    */
>>> -static bool _vcpu_save_fpu(struct vcpu *v)
>>> +void vcpu_save_fpu(struct vcpu *v)
>>>   {
>>>       ASSERT(!is_idle_vcpu(v));
>>>   
>>>       /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
>>> -    clts();
>>> +    if ( is_pv_vcpu(v) )
>>> +        clts();
>>>   
>>>       if ( cpu_has_xsave )
>>>           fpu_xsave(v);
>>>       else
>>>           fpu_fxsave(v);
>>> -
>>> -    return true;
>>> -}
>>> -
>>> -void vcpu_save_fpu(struct vcpu *v)
>>> -{
>>> -    _vcpu_save_fpu(v);
>>> -    stts();
>>
>> ... this. At present it guards us against e.g. an idle CPU or context
>> switch code mistakenly using in particular XMM registers (but of course
>> also other extended state).
> 
> Given this concern and Andrew's comment, I could drop this patch for now.
> It can be revisited in future if needed.

We discussed this some on the x86 maintainers call earlier today. Andrew
(who wants to put together a more extensive reply) indicates that getting
rid of the stts() instances is a relevant goal of this work. If this can
be clearly stated for this patch, and if the implications are clear, then
I think this could still be okay to go in.

Jan
Re: [PATCH v1 8/8] x86: Cleanup cr0.TS flag handling
Posted by Andrew Cooper 3 days, 23 hours ago
On 19/03/2026 1:29 pm, Ross Lagerwall wrote:
> diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
> index 9acaaf4673df..336bc83b6e13 100644
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -193,31 +190,18 @@ void vcpu_restore_fpu(struct vcpu *v)
>   * On each context switch, save the necessary FPU info of VCPU being switch 
>   * out. It dispatches saving operation based on CPU's capability.
>   */
> -static bool _vcpu_save_fpu(struct vcpu *v)
> +void vcpu_save_fpu(struct vcpu *v)
>  {
>      ASSERT(!is_idle_vcpu(v));
>  
>      /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
> -    clts();
> +    if ( is_pv_vcpu(v) )
> +        clts();
>  

It's quite likely that this would be quicker to just leave as unconditional.

is_pv_vcpu() has evaluate_nospec() in it, so forces LFENCEs, and CLTS
has a fast nop path even in very early implementations.

~Andrew