[PATCH v2] x86: Avoid using .byte for instructions where safe to do so

Andrew Cooper posted 1 patch 3 days, 9 hours ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20260409114151.179408-1-andrew.cooper3@citrix.com
xen/arch/x86/arch.mk                   |  4 +++
xen/arch/x86/include/asm/asm-defns.h   |  1 +
xen/arch/x86/include/asm/msr.h         |  2 ++
xen/arch/x86/include/asm/prot-key.h    |  4 +--
xen/arch/x86/include/asm/xstate.h      |  3 +--
xen/arch/x86/x86_emulate/0f01.c        |  2 +-
xen/arch/x86/x86_emulate/x86_emulate.c | 34 ++++++++++++--------------
7 files changed, 27 insertions(+), 23 deletions(-)
[PATCH v2] x86: Avoid using .byte for instructions where safe to do so
Posted by Andrew Cooper 3 days, 9 hours ago
The new toolchain baseline knows XGETBV, VPXOR and VPOR.

For the other cases using .byte, annotate the toolchain minima.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <jbeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Teddy Astie <teddy.astie@vates.tech>

v2
 * CLZERO, {WR,RD}PKRU can't be named yet.

Pull out of previous series as it's somewhat unrelated.  The XSAVE cleanup has
other prerequiesites before it can move away from .byte.
---
 xen/arch/x86/arch.mk                   |  4 +++
 xen/arch/x86/include/asm/asm-defns.h   |  1 +
 xen/arch/x86/include/asm/msr.h         |  2 ++
 xen/arch/x86/include/asm/prot-key.h    |  4 +--
 xen/arch/x86/include/asm/xstate.h      |  3 +--
 xen/arch/x86/x86_emulate/0f01.c        |  2 +-
 xen/arch/x86/x86_emulate/x86_emulate.c | 34 ++++++++++++--------------
 7 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
index 0b42e6312fac..cd0602a79aaf 100644
--- a/xen/arch/x86/arch.mk
+++ b/xen/arch/x86/arch.mk
@@ -17,7 +17,11 @@ CFLAGS-$(CONFIG_CC_IS_GCC) += -malign-data=abi
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-option-add,CFLAGS,CC,".equ \"x\"$(comma)1",-DHAVE_AS_QUOTED_SYM)
+
+# Binutils >= 2.31, Clang >= 7
 $(call as-option-add,CFLAGS,CC,"movdiri %rax$(comma)(%rax)",-DHAVE_AS_MOVDIR)
+
+# Binutils >= 2.33, Clang >= 9
 $(call as-option-add,CFLAGS,CC,"enqcmd (%rax)$(comma)%rax",-DHAVE_AS_ENQCMD)
 
 # Check to see whether the assembler supports the .nop directive.
diff --git a/xen/arch/x86/include/asm/asm-defns.h b/xen/arch/x86/include/asm/asm-defns.h
index 239dc3af096c..dc9b3ce272fd 100644
--- a/xen/arch/x86/include/asm/asm-defns.h
+++ b/xen/arch/x86/include/asm/asm-defns.h
@@ -1,5 +1,6 @@
 #include <asm/page-bits.h>
 
+/* binutils >= 2.26 or Clang >= 3.8 */
 .macro clzero
     .byte 0x0f, 0x01, 0xfc
 .endm
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 941a7612f4ba..1377d156f4e1 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -63,6 +63,8 @@ static inline void wrmsrns(uint32_t msr, uint64_t val)
     /*
      * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant CS
      * prefix to avoid a trailing NOP.
+     *
+     * Binutils >= 2.40, Clang >= 16
      */
     alternative_input(".byte 0x2e; wrmsr",
                       ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
diff --git a/xen/arch/x86/include/asm/prot-key.h b/xen/arch/x86/include/asm/prot-key.h
index 8fb15b5c32e9..e8550e0c9203 100644
--- a/xen/arch/x86/include/asm/prot-key.h
+++ b/xen/arch/x86/include/asm/prot-key.h
@@ -19,7 +19,7 @@ static inline uint32_t rdpkru(void)
 {
     uint32_t pkru;
 
-    asm volatile ( ".byte 0x0f,0x01,0xee"
+    asm volatile ( ".byte 0x0f,0x01,0xee" /* binutils >= 2.26 or Clang >= 3.8 */
                    : "=a" (pkru) : "c" (0) : "dx" );
 
     return pkru;
@@ -27,7 +27,7 @@ static inline uint32_t rdpkru(void)
 
 static inline void wrpkru(uint32_t pkru)
 {
-    asm volatile ( ".byte 0x0f,0x01,0xef"
+    asm volatile ( ".byte 0x0f,0x01,0xef" /* binutils >= 2.26 or Clang >= 3.8 */
                    :: "a" (pkru), "d" (0), "c" (0) );
 }
 
diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
index c96d75e38b25..0519379edb57 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -118,8 +118,7 @@ static inline uint64_t xgetbv(unsigned int index)
     uint32_t lo, hi;
 
     ASSERT(index); /* get_xcr0() should be used instead. */
-    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
-                   : "=a" (lo), "=d" (hi) : "c" (index) );
+    asm volatile ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (index) );
 
     return lo | ((uint64_t)hi << 32);
 }
diff --git a/xen/arch/x86/x86_emulate/0f01.c b/xen/arch/x86/x86_emulate/0f01.c
index 4d36c7d289a5..87d338f0c74a 100644
--- a/xen/arch/x86/x86_emulate/0f01.c
+++ b/xen/arch/x86/x86_emulate/0f01.c
@@ -122,7 +122,7 @@ int x86emul_0f01(struct x86_emulate_state *s,
         {
         case vex_none: /* serialize */
             host_and_vcpu_must_have(serialize);
-            asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
+            asm volatile ( ".byte 0x0f, 0x01, 0xe8" ); /* Binutils >= 2.34, Clang >= 11 */
             break;
         case vex_f2: /* xsusldtrk */
             vcpu_must_have(tsxldtrk);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 11d145e17723..e58735ee9590 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4748,27 +4748,25 @@ x86_emulate(
                  */
                 if ( vex.l )
                 {
-                    /* vpxor %xmmN, %xmmN, %xmmN */
-                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
-                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
-                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
-                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
-                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
-                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
-                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
-                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
+                    asm volatile ( "vpxor %xmm0, %xmm0, %xmm0" );
+                    asm volatile ( "vpxor %xmm1, %xmm1, %xmm1" );
+                    asm volatile ( "vpxor %xmm2, %xmm2, %xmm2" );
+                    asm volatile ( "vpxor %xmm3, %xmm3, %xmm3" );
+                    asm volatile ( "vpxor %xmm4, %xmm4, %xmm4" );
+                    asm volatile ( "vpxor %xmm5, %xmm5, %xmm5" );
+                    asm volatile ( "vpxor %xmm6, %xmm6, %xmm6" );
+                    asm volatile ( "vpxor %xmm7, %xmm7, %xmm7" );
                 }
                 else
                 {
-                    /* vpor %xmmN, %xmmN, %xmmN */
-                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
-                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
-                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
-                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
-                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
-                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
-                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
-                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
+                    asm volatile ( "vpor %xmm0, %xmm0, %xmm0" );
+                    asm volatile ( "vpor %xmm1, %xmm1, %xmm1" );
+                    asm volatile ( "vpor %xmm2, %xmm2, %xmm2" );
+                    asm volatile ( "vpor %xmm3, %xmm3, %xmm3" );
+                    asm volatile ( "vpor %xmm4, %xmm4, %xmm4" );
+                    asm volatile ( "vpor %xmm5, %xmm5, %xmm5" );
+                    asm volatile ( "vpor %xmm6, %xmm6, %xmm6" );
+                    asm volatile ( "vpor %xmm7, %xmm7, %xmm7" );
                 }
 
                 ASSERT(!state->simd_size);
-- 
2.39.5


Re: [PATCH v2] x86: Avoid using .byte for instructions where safe to do so
Posted by Teddy Astie 3 days, 8 hours ago
Le 09/04/2026 à 13:43, Andrew Cooper a écrit :
> The new toolchain baseline knows XGETBV, VPXOR and VPOR.
> 
> For the other cases using .byte, annotate the toolchain minima.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Teddy Astie <teddy.astie@vates.tech>
> 
> v2
>   * CLZERO, {WR,RD}PKRU can't be named yet.
> 
> Pull out of previous series as it's somewhat unrelated.  The XSAVE cleanup has
> other prerequiesites before it can move away from .byte.
> ---
>   xen/arch/x86/arch.mk                   |  4 +++
>   xen/arch/x86/include/asm/asm-defns.h   |  1 +
>   xen/arch/x86/include/asm/msr.h         |  2 ++
>   xen/arch/x86/include/asm/prot-key.h    |  4 +--
>   xen/arch/x86/include/asm/xstate.h      |  3 +--
>   xen/arch/x86/x86_emulate/0f01.c        |  2 +-
>   xen/arch/x86/x86_emulate/x86_emulate.c | 34 ++++++++++++--------------
>   7 files changed, 27 insertions(+), 23 deletions(-)
> 
> diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
> index 0b42e6312fac..cd0602a79aaf 100644
> --- a/xen/arch/x86/arch.mk
> +++ b/xen/arch/x86/arch.mk
> @@ -17,7 +17,11 @@ CFLAGS-$(CONFIG_CC_IS_GCC) += -malign-data=abi
>   $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
>   $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
>   $(call as-option-add,CFLAGS,CC,".equ \"x\"$(comma)1",-DHAVE_AS_QUOTED_SYM)
> +
> +# Binutils >= 2.31, Clang >= 7
>   $(call as-option-add,CFLAGS,CC,"movdiri %rax$(comma)(%rax)",-DHAVE_AS_MOVDIR)
> +
> +# Binutils >= 2.33, Clang >= 9
>   $(call as-option-add,CFLAGS,CC,"enqcmd (%rax)$(comma)%rax",-DHAVE_AS_ENQCMD)
>   
>   # Check to see whether the assembler supports the .nop directive.
> diff --git a/xen/arch/x86/include/asm/asm-defns.h b/xen/arch/x86/include/asm/asm-defns.h
> index 239dc3af096c..dc9b3ce272fd 100644
> --- a/xen/arch/x86/include/asm/asm-defns.h
> +++ b/xen/arch/x86/include/asm/asm-defns.h
> @@ -1,5 +1,6 @@
>   #include <asm/page-bits.h>
>   
> +/* binutils >= 2.26 or Clang >= 3.8 */
>   .macro clzero
>       .byte 0x0f, 0x01, 0xfc
>   .endm
> diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
> index 941a7612f4ba..1377d156f4e1 100644
> --- a/xen/arch/x86/include/asm/msr.h
> +++ b/xen/arch/x86/include/asm/msr.h
> @@ -63,6 +63,8 @@ static inline void wrmsrns(uint32_t msr, uint64_t val)
>       /*
>        * WRMSR is 2 bytes.  WRMSRNS is 3 bytes.  Pad WRMSR with a redundant CS
>        * prefix to avoid a trailing NOP.
> +     *
> +     * Binutils >= 2.40, Clang >= 16
>        */
>       alternative_input(".byte 0x2e; wrmsr",
>                         ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
> diff --git a/xen/arch/x86/include/asm/prot-key.h b/xen/arch/x86/include/asm/prot-key.h
> index 8fb15b5c32e9..e8550e0c9203 100644
> --- a/xen/arch/x86/include/asm/prot-key.h
> +++ b/xen/arch/x86/include/asm/prot-key.h
> @@ -19,7 +19,7 @@ static inline uint32_t rdpkru(void)
>   {
>       uint32_t pkru;
>   
> -    asm volatile ( ".byte 0x0f,0x01,0xee"
> +    asm volatile ( ".byte 0x0f,0x01,0xee" /* binutils >= 2.26 or Clang >= 3.8 */
>                      : "=a" (pkru) : "c" (0) : "dx" );
>   
>       return pkru;
> @@ -27,7 +27,7 @@ static inline uint32_t rdpkru(void)
>   
>   static inline void wrpkru(uint32_t pkru)
>   {
> -    asm volatile ( ".byte 0x0f,0x01,0xef"
> +    asm volatile ( ".byte 0x0f,0x01,0xef" /* binutils >= 2.26 or Clang >= 3.8 */
>                      :: "a" (pkru), "d" (0), "c" (0) );
>   }
>   
> diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h
> index c96d75e38b25..0519379edb57 100644
> --- a/xen/arch/x86/include/asm/xstate.h
> +++ b/xen/arch/x86/include/asm/xstate.h
> @@ -118,8 +118,7 @@ static inline uint64_t xgetbv(unsigned int index)
>       uint32_t lo, hi;
>   
>       ASSERT(index); /* get_xcr0() should be used instead. */
> -    asm volatile ( ".byte 0x0f,0x01,0xd0" /* xgetbv */
> -                   : "=a" (lo), "=d" (hi) : "c" (index) );
> +    asm volatile ( "xgetbv" : "=a" (lo), "=d" (hi) : "c" (index) );
>   
>       return lo | ((uint64_t)hi << 32);
>   }
> diff --git a/xen/arch/x86/x86_emulate/0f01.c b/xen/arch/x86/x86_emulate/0f01.c
> index 4d36c7d289a5..87d338f0c74a 100644
> --- a/xen/arch/x86/x86_emulate/0f01.c
> +++ b/xen/arch/x86/x86_emulate/0f01.c
> @@ -122,7 +122,7 @@ int x86emul_0f01(struct x86_emulate_state *s,
>           {
>           case vex_none: /* serialize */
>               host_and_vcpu_must_have(serialize);
> -            asm volatile ( ".byte 0x0f, 0x01, 0xe8" );
> +            asm volatile ( ".byte 0x0f, 0x01, 0xe8" ); /* Binutils >= 2.34, Clang >= 11 */
>               break;
>           case vex_f2: /* xsusldtrk */
>               vcpu_must_have(tsxldtrk);
> diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
> index 11d145e17723..e58735ee9590 100644
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -4748,27 +4748,25 @@ x86_emulate(
>                    */
>                   if ( vex.l )
>                   {
> -                    /* vpxor %xmmN, %xmmN, %xmmN */
> -                    asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
> -                    asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
> -                    asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
> -                    asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
> -                    asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
> -                    asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
> -                    asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
> -                    asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
> +                    asm volatile ( "vpxor %xmm0, %xmm0, %xmm0" );
> +                    asm volatile ( "vpxor %xmm1, %xmm1, %xmm1" );
> +                    asm volatile ( "vpxor %xmm2, %xmm2, %xmm2" );
> +                    asm volatile ( "vpxor %xmm3, %xmm3, %xmm3" );
> +                    asm volatile ( "vpxor %xmm4, %xmm4, %xmm4" );
> +                    asm volatile ( "vpxor %xmm5, %xmm5, %xmm5" );
> +                    asm volatile ( "vpxor %xmm6, %xmm6, %xmm6" );
> +                    asm volatile ( "vpxor %xmm7, %xmm7, %xmm7" );
>                   }
>                   else
>                   {
> -                    /* vpor %xmmN, %xmmN, %xmmN */
> -                    asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
> -                    asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
> -                    asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
> -                    asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
> -                    asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
> -                    asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
> -                    asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
> -                    asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
> +                    asm volatile ( "vpor %xmm0, %xmm0, %xmm0" );
> +                    asm volatile ( "vpor %xmm1, %xmm1, %xmm1" );
> +                    asm volatile ( "vpor %xmm2, %xmm2, %xmm2" );
> +                    asm volatile ( "vpor %xmm3, %xmm3, %xmm3" );
> +                    asm volatile ( "vpor %xmm4, %xmm4, %xmm4" );
> +                    asm volatile ( "vpor %xmm5, %xmm5, %xmm5" );
> +                    asm volatile ( "vpor %xmm6, %xmm6, %xmm6" );
> +                    asm volatile ( "vpor %xmm7, %xmm7, %xmm7" );
>                   }
>   
>                   ASSERT(!state->simd_size);

Reviewed-by: Teddy Astie <teddy.astie@vates.tech>


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH v2] x86: Avoid using .byte for instructions where safe to do so
Posted by Jan Beulich 3 days, 8 hours ago
On 09.04.2026 13:41, Andrew Cooper wrote:
> The new toolchain baseline knows XGETBV, VPXOR and VPOR.
> 
> For the other cases using .byte, annotate the toolchain minima.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Jan Beulich <jbeulich@suse.com>