The new toolchain baseline knows all the mnemonics, so a plain memory operand
can be used, rather than needing to hard-code the ModRM byte as (%rdi).
For xrstor(), use asm goto rather than hiding the increment of the faults
variable inside the .fixup section. Remove the loop and replace it with a
goto retry pattern. Put the domain_crash() into the default case for fault
handling, and provide a concrete error message rather than leaving it as an
exercise for extra code diving.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
xen/arch/x86/xstate.c | 77 ++++++++++++++++++++-----------------------
1 file changed, 36 insertions(+), 41 deletions(-)
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 384f78bd5281..4215a83efefb 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
uint32_t hmask = mask >> 32;
uint32_t lmask = mask;
unsigned int fip_width = v->domain->arch.x87_fip_width;
-#define XSAVE(pfx) \
- if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
- asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
- : "=m" (*ptr) \
- : "a" (lmask), "d" (hmask), "D" (ptr) ); \
- else \
- alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
- ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
- X86_FEATURE_XSAVEOPT, \
- "=m" (*ptr), \
- "a" (lmask), "d" (hmask), "D" (ptr))
+
+#define XSAVE(pfx) \
+ if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
+ asm volatile ( "xsaves %0" \
+ : "=m" (*ptr) \
+ : "a" (lmask), "d" (hmask) ); \
+ else \
+ alternative_io("xsave %0", \
+ "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
+ "=m" (*ptr), \
+ "a" (lmask), "d" (hmask))
if ( fip_width == 8 || !(mask & X86_XCR0_X87) )
{
- XSAVE("0x48,");
+ XSAVE("rex64 ");
}
else if ( fip_width == 4 )
{
@@ -349,7 +349,7 @@ void xsave(struct vcpu *v, uint64_t mask)
ptr->fpu_sse.fip.addr = bad_fip;
- XSAVE("0x48,");
+ XSAVE("rex64 ");
/* FIP/FDP not updated? Restore the old FIP value. */
if ( ptr->fpu_sse.fip.addr == bad_fip )
@@ -384,7 +384,7 @@ void xrstor(struct vcpu *v, uint64_t mask)
uint32_t hmask = mask >> 32;
uint32_t lmask = mask;
struct xsave_struct *ptr = v->arch.xsave_area;
- unsigned int faults, prev_faults;
+ unsigned int faults = 0;
/*
* Some CPUs don't save/restore FDP/FIP/FOP unless an exception
@@ -405,22 +405,15 @@ void xrstor(struct vcpu *v, uint64_t mask)
* possibility, which may occur if the block was passed to us by control
* tools or through VCPUOP_initialise, by silently adjusting state.
*/
- for ( prev_faults = faults = 0; ; prev_faults = faults )
- {
+ retry:
switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
{
- BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
-#define _xrstor(insn) \
- asm volatile ( "1: .byte " insn "\n" \
- "3:\n" \
- " .section .fixup,\"ax\"\n" \
- "2: incl %[faults]\n" \
- " jmp 3b\n" \
- " .previous\n" \
- _ASM_EXTABLE(1b, 2b) \
- : [mem] "+m" (*ptr), [faults] "+g" (faults) \
- : [lmask] "a" (lmask), [hmask] "d" (hmask), \
- [ptr] "D" (ptr) )
+#define _xrstor(insn) \
+ asm_inline volatile goto ( \
+ "1: " insn " %0\n" \
+ _ASM_EXTABLE(1b, %l[fault]) \
+ :: "m" (*ptr), "a" (lmask), "d" (hmask) \
+ :: fault )
#define XRSTOR(pfx) \
if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
@@ -432,13 +425,13 @@ void xrstor(struct vcpu *v, uint64_t mask)
ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \
XSTATE_COMPACTION_ENABLED; \
} \
- _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \
+ _xrstor(pfx "xrstors"); \
} \
else \
- _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
+ _xrstor(pfx "xrstor")
default:
- XRSTOR("0x48,");
+ XRSTOR("rex64 ");
break;
case 4: case 2:
@@ -449,8 +442,10 @@ void xrstor(struct vcpu *v, uint64_t mask)
#undef _xrstor
}
- if ( likely(faults == prev_faults) )
- break;
+ return;
+
+ fault:
+ faults++;
#ifndef NDEBUG
gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
@@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
ptr->xsave_hdr.xcomp_bv = 0;
}
memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
- continue;
+ goto retry;
case 2: /* Stage 2: Reset all state. */
ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
ptr->xsave_hdr.xstate_bv = 0;
ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
? XSTATE_COMPACTION_ENABLED : 0;
- continue;
- }
+ goto retry;
- domain_crash(current->domain);
+ default: /* Stage 3: Nothing else to do. */
+ domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
return;
}
}
@@ -1041,17 +1036,17 @@ uint64_t read_bndcfgu(void)
if ( cpu_has_xsavec )
{
- asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
+ asm ( "xsavec %0"
: "=m" (*xstate)
- : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
+ : "a" (X86_XCR0_BNDCSR), "d" (0) );
bndcsr = (void *)(xstate + 1);
}
else
{
- asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */
+ asm ( "xsave %0"
: "=m" (*xstate)
- : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
+ : "a" (X86_XCR0_BNDCSR), "d" (0) );
bndcsr = (void *)xstate + xstate_offsets[ilog2(X86_XCR0_BNDCSR)];
}
--
2.39.5
On 30.12.2025 14:54, Andrew Cooper wrote:
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
> uint32_t hmask = mask >> 32;
> uint32_t lmask = mask;
> unsigned int fip_width = v->domain->arch.x87_fip_width;
> -#define XSAVE(pfx) \
> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
> - : "=m" (*ptr) \
> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
> - else \
> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
> - X86_FEATURE_XSAVEOPT, \
> - "=m" (*ptr), \
> - "a" (lmask), "d" (hmask), "D" (ptr))
> +
> +#define XSAVE(pfx) \
> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> + asm volatile ( "xsaves %0" \
> + : "=m" (*ptr) \
> + : "a" (lmask), "d" (hmask) ); \
> + else \
> + alternative_io("xsave %0", \
> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
> + "=m" (*ptr), \
> + "a" (lmask), "d" (hmask))
While no doubt neater to read this way, there's a subtle latent issue here:
"m" doesn't exclude RIP-relative addressing, yet that addressing form can't
be used in replacement code (up and until we leverage your decode-lite to
actually be able to fix up the displacement). Sadly "o" as a constraint
doesn't look to be any different in this regard (I think it should be, as
adding a "small integer" may already bring the displacement beyond the
permitted range, but their definition of "offsettable" allows this).
This issue is latent until such time that (a) a caller appears passing in
the address of a Xen-internal variable and (b) we make LTO work again.
Since the breakage would be impossible to notice at build time, I think we
would be better off if we avoided it from the beginning. Which may mean
sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.
> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
> ptr->xsave_hdr.xcomp_bv = 0;
> }
> memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
> - continue;
> + goto retry;
>
> case 2: /* Stage 2: Reset all state. */
> ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
> ptr->xsave_hdr.xstate_bv = 0;
> ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
> ? XSTATE_COMPACTION_ENABLED : 0;
> - continue;
> - }
> + goto retry;
>
> - domain_crash(current->domain);
> + default: /* Stage 3: Nothing else to do. */
> + domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
> return;
There's an unexplained change here as to which domain is being crashed.
You switch to crashing the subject domain, yet if that's not also the
requester, it isn't "guilty" in causing the observed fault.
Jan
On 05/01/2026 3:46 pm, Jan Beulich wrote:
> On 30.12.2025 14:54, Andrew Cooper wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>> uint32_t hmask = mask >> 32;
>> uint32_t lmask = mask;
>> unsigned int fip_width = v->domain->arch.x87_fip_width;
>> -#define XSAVE(pfx) \
>> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>> - : "=m" (*ptr) \
>> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>> - else \
>> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>> - X86_FEATURE_XSAVEOPT, \
>> - "=m" (*ptr), \
>> - "a" (lmask), "d" (hmask), "D" (ptr))
>> +
>> +#define XSAVE(pfx) \
>> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> + asm volatile ( "xsaves %0" \
>> + : "=m" (*ptr) \
>> + : "a" (lmask), "d" (hmask) ); \
>> + else \
>> + alternative_io("xsave %0", \
>> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
>> + "=m" (*ptr), \
>> + "a" (lmask), "d" (hmask))
> While no doubt neater to read this way, there's a subtle latent issue here:
> "m" doesn't exclude RIP-relative addressing, yet that addressing form can't
> be used in replacement code (up and until we leverage your decode-lite to
> actually be able to fix up the displacement).
I guess I'll fix that first.
I'm not interested in trying to keep playing games to work around
deficiencies in our alternatives infrastructure.
> Sadly "o" as a constraint
> doesn't look to be any different in this regard (I think it should be, as
> adding a "small integer" may already bring the displacement beyond the
> permitted range, but their definition of "offsettable" allows this).
>
> This issue is latent until such time that (a) a caller appears passing in
> the address of a Xen-internal variable and (b) we make LTO work again.
> Since the breakage would be impossible to notice at build time, I think we
> would be better off if we avoided it from the beginning. Which may mean
> sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.
Even with LTO, I don't see any plausible case where we have build-time
struct vcpu's to pass in.
>
>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>> ptr->xsave_hdr.xcomp_bv = 0;
>> }
>> memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>> - continue;
>> + goto retry;
>>
>> case 2: /* Stage 2: Reset all state. */
>> ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>> ptr->xsave_hdr.xstate_bv = 0;
>> ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>> ? XSTATE_COMPACTION_ENABLED : 0;
>> - continue;
>> - }
>> + goto retry;
>>
>> - domain_crash(current->domain);
>> + default: /* Stage 3: Nothing else to do. */
>> + domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>> return;
> There's an unexplained change here as to which domain is being crashed.
> You switch to crashing the subject domain, yet if that's not also the
> requester, it isn't "guilty" in causing the observed fault.
So dom0 should be crashed because there bad data in the migration stream?
v is always curr. XRSTOR can't be used correctly outside of the subject
context, and indeed the Stage 2 logic above is definitely buggy when v
!= curr.
~Andrew
On 05.01.2026 18:45, Andrew Cooper wrote:
> On 05/01/2026 3:46 pm, Jan Beulich wrote:
>> On 30.12.2025 14:54, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>> uint32_t hmask = mask >> 32;
>>> uint32_t lmask = mask;
>>> unsigned int fip_width = v->domain->arch.x87_fip_width;
>>> -#define XSAVE(pfx) \
>>> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>> - : "=m" (*ptr) \
>>> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>> - else \
>>> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>> - X86_FEATURE_XSAVEOPT, \
>>> - "=m" (*ptr), \
>>> - "a" (lmask), "d" (hmask), "D" (ptr))
>>> +
>>> +#define XSAVE(pfx) \
>>> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> + asm volatile ( "xsaves %0" \
>>> + : "=m" (*ptr) \
>>> + : "a" (lmask), "d" (hmask) ); \
>>> + else \
>>> + alternative_io("xsave %0", \
>>> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
>>> + "=m" (*ptr), \
>>> + "a" (lmask), "d" (hmask))
>> While no doubt neater to read this way, there's a subtle latent issue here:
>> "m" doesn't exclude RIP-relative addressing, yet that addressing form can't
>> be used in replacement code (up and until we leverage your decode-lite to
>> actually be able to fix up the displacement).
>
> I guess I'll fix that first.
>
> I'm not interested in trying to keep playing games to work around
> deficiencies in our alternatives infrastructure.
>
>> Sadly "o" as a constraint
>> doesn't look to be any different in this regard (I think it should be, as
>> adding a "small integer" may already bring the displacement beyond the
>> permitted range, but their definition of "offsettable" allows this).
>>
>> This issue is latent until such time that (a) a caller appears passing in
>> the address of a Xen-internal variable and (b) we make LTO work again.
>> Since the breakage would be impossible to notice at build time, I think we
>> would be better off if we avoided it from the beginning. Which may mean
>> sacrificing on code gen, by using "r" and then "(%0)" as the insn operand.
>
> Even with LTO, I don't see any plausible case where we have build-time
> struct vcpu's to pass in.
Sure, but struct vcpu * also isn't a great parameter for this kind of a
function.
>>> @@ -489,17 +484,17 @@ void xrstor(struct vcpu *v, uint64_t mask)
>>> ptr->xsave_hdr.xcomp_bv = 0;
>>> }
>>> memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
>>> - continue;
>>> + goto retry;
>>>
>>> case 2: /* Stage 2: Reset all state. */
>>> ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
>>> ptr->xsave_hdr.xstate_bv = 0;
>>> ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
>>> ? XSTATE_COMPACTION_ENABLED : 0;
>>> - continue;
>>> - }
>>> + goto retry;
>>>
>>> - domain_crash(current->domain);
>>> + default: /* Stage 3: Nothing else to do. */
>>> + domain_crash(v->domain, "Uncorrectable XRSTOR fault\n");
>>> return;
>> There's an unexplained change here as to which domain is being crashed.
>> You switch to crashing the subject domain, yet if that's not also the
>> requester, it isn't "guilty" in causing the observed fault.
>
> So dom0 should be crashed because there bad data in the migration stream?
Well, I'm not saying the behavior needs to stay like this, or that's it's
the best of all possible options. But in principle Dom0 could sanitize the
migration stream before passing it to Xen. So it is still first and foremost
Dom0 which is to blame.
> v is always curr.
Not quite - see xstate_set_init(). And for some of the callers of
hvm_update_guest_cr() I also don't think they always act on current. In
particular hvm_vcpu_reset_state() never does, I suppose (not the least
because of the vcpu_pause() in its sole caller).
> XRSTOR can't be used correctly outside of the subject context,
Then are you suggesting e.g. xstate_set_init() is buggy?
> and indeed the Stage 2 logic above is definitely buggy when v != curr.
Apparently I'm blind, as I don't see an issue there. It's all v's data
which is being fiddled with.
Jan
On 30/12/2025 1:54 pm, Andrew Cooper wrote:
> diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
> index 384f78bd5281..4215a83efefb 100644
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
> uint32_t hmask = mask >> 32;
> uint32_t lmask = mask;
> unsigned int fip_width = v->domain->arch.x87_fip_width;
> -#define XSAVE(pfx) \
> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
> - : "=m" (*ptr) \
> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
> - else \
> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
> - X86_FEATURE_XSAVEOPT, \
> - "=m" (*ptr), \
> - "a" (lmask), "d" (hmask), "D" (ptr))
> +
> +#define XSAVE(pfx) \
> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
> + asm volatile ( "xsaves %0" \
> + : "=m" (*ptr) \
> + : "a" (lmask), "d" (hmask) ); \
> + else \
> + alternative_io("xsave %0", \
> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
> + "=m" (*ptr), \
> + "a" (lmask), "d" (hmask))
This loses the pfx. I've fixed up locally and double checked the
disassembly.
~Andrew
On 02.01.2026 17:01, Andrew Cooper wrote:
> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>> uint32_t hmask = mask >> 32;
>> uint32_t lmask = mask;
>> unsigned int fip_width = v->domain->arch.x87_fip_width;
>> -#define XSAVE(pfx) \
>> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>> - : "=m" (*ptr) \
>> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>> - else \
>> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>> - X86_FEATURE_XSAVEOPT, \
>> - "=m" (*ptr), \
>> - "a" (lmask), "d" (hmask), "D" (ptr))
>> +
>> +#define XSAVE(pfx) \
>> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>> + asm volatile ( "xsaves %0" \
>> + : "=m" (*ptr) \
>> + : "a" (lmask), "d" (hmask) ); \
>> + else \
>> + alternative_io("xsave %0", \
>> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
>> + "=m" (*ptr), \
>> + "a" (lmask), "d" (hmask))
>
> This loses the pfx. I've fixed up locally and double checked the
> disassembly.
Question being: Do we want to stick to using the prefix form, when gas
specifically has been offering a kind-of-suffix form instead from the
very beginning (xsaves and xsaves64)?
If we wanted to stick to the prefixes, I'd favor a form where the use
sites don't need to supply the separating blank (i.e. the macro itself
would insert it, as doing do with an empty prefix results in merely
an indentation "issue" in the generated assembly). Thoughts?
Jan
On 05/01/2026 3:16 pm, Jan Beulich wrote:
> On 02.01.2026 17:01, Andrew Cooper wrote:
>> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/xstate.c
>>> +++ b/xen/arch/x86/xstate.c
>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>> uint32_t hmask = mask >> 32;
>>> uint32_t lmask = mask;
>>> unsigned int fip_width = v->domain->arch.x87_fip_width;
>>> -#define XSAVE(pfx) \
>>> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>> - : "=m" (*ptr) \
>>> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>> - else \
>>> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>> - X86_FEATURE_XSAVEOPT, \
>>> - "=m" (*ptr), \
>>> - "a" (lmask), "d" (hmask), "D" (ptr))
>>> +
>>> +#define XSAVE(pfx) \
>>> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>> + asm volatile ( "xsaves %0" \
>>> + : "=m" (*ptr) \
>>> + : "a" (lmask), "d" (hmask) ); \
>>> + else \
>>> + alternative_io("xsave %0", \
>>> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
>>> + "=m" (*ptr), \
>>> + "a" (lmask), "d" (hmask))
>> This loses the pfx. I've fixed up locally and double checked the
>> disassembly.
> Question being: Do we want to stick to using the prefix form, when gas
> specifically has been offering a kind-of-suffix form instead from the
> very beginning (xsaves and xsaves64)?
>
> If we wanted to stick to the prefixes, I'd favor a form where the use
> sites don't need to supply the separating blank (i.e. the macro itself
> would insert it, as doing do with an empty prefix results in merely
> an indentation "issue" in the generated assembly). Thoughts?
I don't expect this macro to survive the fixes to use the compressed
format. From that point of view, "closest to the original" is least churn.
One problem with using a suffix form is that you could feed in "opt"
instead of "64" and break things in rather more subtle ways.
I'll adjust the position of the space, but I think this can keep on
using prefixes in the short term.
~Andrew
On 05.01.2026 17:55, Andrew Cooper wrote:
> On 05/01/2026 3:16 pm, Jan Beulich wrote:
>> On 02.01.2026 17:01, Andrew Cooper wrote:
>>> On 30/12/2025 1:54 pm, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/xstate.c
>>>> +++ b/xen/arch/x86/xstate.c
>>>> @@ -310,21 +310,21 @@ void xsave(struct vcpu *v, uint64_t mask)
>>>> uint32_t hmask = mask >> 32;
>>>> uint32_t lmask = mask;
>>>> unsigned int fip_width = v->domain->arch.x87_fip_width;
>>>> -#define XSAVE(pfx) \
>>>> - if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>>> - asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
>>>> - : "=m" (*ptr) \
>>>> - : "a" (lmask), "d" (hmask), "D" (ptr) ); \
>>>> - else \
>>>> - alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
>>>> - ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
>>>> - X86_FEATURE_XSAVEOPT, \
>>>> - "=m" (*ptr), \
>>>> - "a" (lmask), "d" (hmask), "D" (ptr))
>>>> +
>>>> +#define XSAVE(pfx) \
>>>> + if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
>>>> + asm volatile ( "xsaves %0" \
>>>> + : "=m" (*ptr) \
>>>> + : "a" (lmask), "d" (hmask) ); \
>>>> + else \
>>>> + alternative_io("xsave %0", \
>>>> + "xsaveopt %0", X86_FEATURE_XSAVEOPT, \
>>>> + "=m" (*ptr), \
>>>> + "a" (lmask), "d" (hmask))
>>> This loses the pfx. I've fixed up locally and double checked the
>>> disassembly.
>> Question being: Do we want to stick to using the prefix form, when gas
>> specifically has been offering a kind-of-suffix form instead from the
>> very beginning (xsaves and xsaves64)?
>>
>> If we wanted to stick to the prefixes, I'd favor a form where the use
>> sites don't need to supply the separating blank (i.e. the macro itself
>> would insert it, as doing do with an empty prefix results in merely
>> an indentation "issue" in the generated assembly). Thoughts?
>
> I don't expect this macro to survive the fixes to use the compressed
> format. From that point of view, "closest to the original" is least churn.
>
> One problem with using a suffix form is that you could feed in "opt"
> instead of "64" and break things in rather more subtle ways.
Except that there's no XSAVESOPT nor XSAVEOPTOPT.
> I'll adjust the position of the space, but I think this can keep on
> using prefixes in the short term.
Okay, I wanted the alternative to at least be considered.
Jan
© 2016 - 2026 Red Hat, Inc.