[Xen-devel] [PATCH 0/5] x86emul: allow suppressing FPU/MMX/SIMD insn emulation

Jan Beulich posted 5 patches 4 years, 4 months ago
Only 0 patches received!
[Xen-devel] [PATCH 0/5] x86emul: allow suppressing FPU/MMX/SIMD insn emulation
Posted by Jan Beulich 4 years, 4 months ago
This is in particular helpful for pure PV environments, e.g. the
shim.

1: use CASE_SIMD_PACKED_INT() where possible
2: introduce CASE_SIMD_PACKED_INT_VEX()
3: drop CASE_SIMD_DOUBLE_FP()
4: introduce CASE_SIMD_..._FP_VEX()
5: disable FPU/MMX/SIMD insn emulation when !HVM

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 1/5] x86emul: use CASE_SIMD_PACKED_INT() where possible
Posted by Jan Beulich 4 years, 4 months ago
This (imo) improves readability (simply by the shrunk number of lines)
and helps prepare for optionally disabling MMX and SIMD support in the
emulator.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -8528,36 +8528,21 @@ x86_emulate(
         sfence = true;
         break;
 
-    case X86EMUL_OPC(0x0f38, 0x00):    /* pshufb mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x00): /* pshufb xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x01):    /* phaddw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x01): /* phaddw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x02):    /* phaddd mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x02): /* phaddd xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x03):    /* phaddsw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x03): /* phaddsw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x04):    /* pmaddubsw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x04): /* pmaddubsw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x05):    /* phsubw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x05): /* phsubw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x06):    /* phsubd mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x06): /* phsubd xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x07):    /* phsubsw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x07): /* phsubsw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x08):    /* psignb mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x08): /* psignb xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x09):    /* psignw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x09): /* psignw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x0a):    /* psignd mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x0a): /* psignd xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x0b):    /* pmulhrsw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x0b): /* pmulhrsw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x1c):    /* pabsb mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x1c): /* pabsb xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x1d):    /* pabsw mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x1d): /* pabsw xmm/m128,xmm */
-    case X86EMUL_OPC(0x0f38, 0x1e):    /* pabsd mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f38, 0x1e): /* pabsd xmm/m128,xmm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x00): /* pshufb {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x01): /* phaddw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x02): /* phaddd {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x03): /* phaddsw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x04): /* pmaddubsw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x05): /* phsubw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x06): /* phsubd {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x07): /* phsubsw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x08): /* psignb {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x09): /* psignw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x0a): /* psignd {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x0b): /* pmulhrsw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x1c): /* pabsb {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x1d): /* pabsw {,x}mm/mem,{,x}mm */
+    CASE_SIMD_PACKED_INT(0x0f38, 0x1e): /* pabsd {,x}mm/mem,{,x}mm */
         host_and_vcpu_must_have(ssse3);
         if ( vex.pfx )
         {
@@ -9982,8 +9967,7 @@ x86_emulate(
         avx512_vlen_check(b & 2);
         goto simd_imm8_zmm;
 
-    case X86EMUL_OPC(0x0f3a, 0x0f):    /* palignr $imm8,mm/m64,mm */
-    case X86EMUL_OPC_66(0x0f3a, 0x0f): /* palignr $imm8,xmm/m128,xmm */
+    CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */
         host_and_vcpu_must_have(ssse3);
         if ( vex.pfx )
         {


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 1/5] x86emul: use CASE_SIMD_PACKED_INT() where possible
Posted by Andrew Cooper 4 years, 4 months ago
On 20/12/2019 13:39, Jan Beulich wrote:
> This (imo) improves readability (simply by the shrunk number of lines)
> and helps prepare for optionally disabling MMX and SIMD support in the
> emulator.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 2/5] x86emul: introduce CASE_SIMD_PACKED_INT_VEX()
Posted by Jan Beulich 4 years, 4 months ago
Since there are many AVX{,2} insns having legacy MMX and SIMD
counterparts, have a macro covering all three in one go. This (imo)
improves readability (simply by the shrunk number of lines) and helps
prepare for optionally disabling MMX and SIMD support in the emulator.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -6006,6 +6006,10 @@ x86_emulate(
 #define CASE_SIMD_PACKED_INT(pfx, opc)       \
     case X86EMUL_OPC(pfx, opc):              \
     case X86EMUL_OPC_66(pfx, opc)
+#define CASE_SIMD_PACKED_INT_VEX(pfx, opc)   \
+    CASE_SIMD_PACKED_INT(pfx, opc):          \
+    case X86EMUL_OPC_VEX_66(pfx, opc)
+
 #define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
     case X86EMUL_OPC##kind(pfx, opc):        \
     case X86EMUL_OPC##kind##_F3(pfx, opc)
@@ -6706,8 +6710,8 @@ x86_emulate(
 
     CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd7):      /* pmovmskb {,x}mm,reg */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd7):   /* vpmovmskb {x,y}mm,reg */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
+                                           /* vpmovmskb {x,y}mm,reg */
         opc = init_prefixes(stub);
         opc[0] = b;
         /* Convert GPR destination to %rAX. */
@@ -6817,122 +6821,122 @@ x86_emulate(
         op_bytes = 16 << evex.lr;
         goto simd_zmm;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x61): /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x62):    /* punpckldq {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x62): /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x68):    /* punpckhbw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x68): /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x69):    /* punpckhwd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x69): /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x6a):    /* punpckhdq {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x6a): /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */
+                                          /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */
+                                          /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x62): /* punpckldq {,x}mm/mem,{,x}mm */
+                                          /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x68): /* punpckhbw {,x}mm/mem,{,x}mm */
+                                          /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x69): /* punpckhwd {,x}mm/mem,{,x}mm */
+                                          /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6a): /* punpckhdq {,x}mm/mem,{,x}mm */
+                                          /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
         op_bytes = vex.pfx ? 16 << vex.l : b & 8 ? 8 : 4;
         /* fall through */
-    CASE_SIMD_PACKED_INT(0x0f, 0x63):    /* packssbw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x63): /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x64):    /* pcmpgtb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x64): /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x65):    /* pcmpgtw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x65): /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x66):    /* pcmpgtd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x66): /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x67):    /* packusbw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x67): /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x6b):    /* packsswd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x6b): /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x63): /* packssbw {,x}mm/mem,{,x}mm */
+                                          /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x64): /* pcmpgtb {,x}mm/mem,{,x}mm */
+                                          /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x65): /* pcmpgtw {,x}mm/mem,{,x}mm */
+                                          /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x66): /* pcmpgtd {,x}mm/mem,{,x}mm */
+                                          /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x67): /* packusbw {,x}mm/mem,{,x}mm */
+                                          /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6b): /* packsswd {,x}mm/mem,{,x}mm */
+                                          /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6d): /* vpunpckhqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x74):    /* pcmpeqb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x74): /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x75):    /* pcmpeqw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x75): /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x76):    /* pcmpeqd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x76): /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd1):    /* psrlw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd2):    /* psrld {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd3):    /* psrlq {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x74): /* pcmpeqb {,x}mm/mem,{,x}mm */
+                                          /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x75): /* pcmpeqw {,x}mm/mem,{,x}mm */
+                                          /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x76): /* pcmpeqd {,x}mm/mem,{,x}mm */
+                                          /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd1): /* psrlw {,x}mm/mem,{,x}mm */
+                                          /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd2): /* psrld {,x}mm/mem,{,x}mm */
+                                          /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd3): /* psrlq {,x}mm/mem,{,x}mm */
+                                          /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xd4):     /* paddq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xd4): /* vpaddq {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd5):    /* pmullw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd5): /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd8):    /* psubusb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd8): /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xd9):    /* psubusw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xd9): /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd5): /* pmullw {,x}mm/mem,{,x}mm */
+                                          /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd8): /* psubusb {,x}mm/mem,{,x}mm */
+                                          /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd9): /* psubusw {,x}mm/mem,{,x}mm */
+                                          /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xda):     /* pminub xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xda): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xdb):    /* pand {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xdb): /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xdc):    /* paddusb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xdc): /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xdd):    /* paddusw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xdd): /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdb): /* pand {,x}mm/mem,{,x}mm */
+                                          /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdc): /* paddusb {,x}mm/mem,{,x}mm */
+                                          /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdd): /* paddusw {,x}mm/mem,{,x}mm */
+                                          /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xde):     /* pmaxub xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xde): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xdf):    /* pandn {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xdf): /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xdf): /* pandn {,x}mm/mem,{,x}mm */
+                                          /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xe0):     /* pavgb xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe0): /* vpavgb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xe1):    /* psraw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xe2):    /* psrad {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xe2): /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe1): /* psraw {,x}mm/mem,{,x}mm */
+                                          /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe2): /* psrad {,x}mm/mem,{,x}mm */
+                                          /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xe3):     /* pavgw xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe3): /* vpavgw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xe4):     /* pmulhuw xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xe4): /* vpmulhuw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xe5):    /* pmulhw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xe5): /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xe8):    /* psubsb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xe8): /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xe9):    /* psubsw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xe9): /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe5): /* pmulhw {,x}mm/mem,{,x}mm */
+                                          /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe8): /* psubsb {,x}mm/mem,{,x}mm */
+                                          /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xe9): /* psubsw {,x}mm/mem,{,x}mm */
+                                          /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xea):     /* pminsw xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xea): /* vpminsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xeb):    /* por {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xeb): /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xec):    /* paddsb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xec): /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xed):    /* paddsw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xed): /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xeb): /* por {,x}mm/mem,{,x}mm */
+                                          /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xec): /* paddsb {,x}mm/mem,{,x}mm */
+                                          /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xed): /* paddsw {,x}mm/mem,{,x}mm */
+                                          /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xee):     /* pmaxsw xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xee): /* vpmaxsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xef):    /* pxor {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xef): /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf1):    /* psllw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf2):    /* pslld {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf2): /* vpslld xmm/m128,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf3):    /* psllq {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf3): /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xef): /* pxor {,x}mm/mem,{,x}mm */
+                                          /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf1): /* psllw {,x}mm/mem,{,x}mm */
+                                          /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf2): /* pslld {,x}mm/mem,{,x}mm */
+                                          /* vpslld xmm/m128,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf3): /* psllq {,x}mm/mem,{,x}mm */
+                                          /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xf4):     /* pmuludq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xf4): /* vpmuludq {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf5):    /* pmaddwd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf5): /* vpmaddwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf5): /* pmaddwd {,x}mm/mem,{,x}mm */
+                                          /* vpmaddwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xf6):     /* psadbw xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xf6): /* vpsadbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf8):    /* psubb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf8): /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf9):    /* psubw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf9): /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xfa):    /* psubd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xfa): /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf8): /* psubb {,x}mm/mem,{,x}mm */
+                                          /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf9): /* psubw {,x}mm/mem,{,x}mm */
+                                          /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfa): /* psubd {,x}mm/mem,{,x}mm */
+                                          /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xfc):    /* paddb {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xfc): /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xfd):    /* paddw {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xfd): /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_INT(0x0f, 0xfe):    /* paddd {,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xfe): /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfc): /* paddb {,x}mm/mem,{,x}mm */
+                                          /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfd): /* paddw {,x}mm/mem,{,x}mm */
+                                          /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfe): /* paddd {,x}mm/mem,{,x}mm */
+                                          /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     simd_0f_int:
         if ( vex.opcx != vex_none )
         {
@@ -7073,10 +7077,10 @@ x86_emulate(
         generate_exception_if(!evex.w, EXC_UD);
         goto avx512f_no_sae;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
-    CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
+                                          /* vmov{d,q} r/m,xmm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
+                                          /* vmov{d,q} xmm,r/m */
         if ( vex.opcx != vex_none )
         {
             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
@@ -7191,8 +7195,8 @@ x86_emulate(
         op_bytes = 8;
         goto simd_0f_int;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0x70):    /* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0x70): /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0x70):/* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
+                                         /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x70): /* vpshufhw $imm8,{x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_F2(0x0f, 0x70):     /* pshuflw $imm8,xmm/m128,xmm */
@@ -8152,8 +8156,8 @@ x86_emulate(
         sfence = true;
         break;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0xc4):      /* pinsrw $imm8,r32/m16,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc4):  /* pinsrw $imm8,r32/m16,{,x}mm */
+                                           /* vpinsrw $imm8,r32/m16,xmm,xmm */
         generate_exception_if(vex.l, EXC_UD);
         memcpy(mmvalp, &src.val, 2);
         ea.type = OP_MEM;
@@ -8176,8 +8180,8 @@ x86_emulate(
         state->simd_size = simd_other;
         goto avx512f_imm8_no_sae;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0xc5):      /* pextrw $imm8,{,x}mm,reg */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc5):  /* pextrw $imm8,{,x}mm,reg */
+                                           /* vpextrw $imm8,xmm,reg */
         generate_exception_if(vex.l, EXC_UD);
         opc = init_prefixes(stub);
         opc[0] = b;
@@ -8464,8 +8468,7 @@ x86_emulate(
         op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
         goto simd_0f_cvt;
 
-    CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* maskmov{q,dqu} {,x}mm,{,x}mm */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* vmaskmovdqu xmm,xmm */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* {,v}maskmov{q,dqu} {,x}mm,{,x}mm */
         generate_exception_if(ea.type != OP_REG, EXC_UD);
         if ( vex.opcx != vex_none )
         {
@@ -11333,8 +11336,8 @@ x86_insn_is_mem_access(const struct x86_
     case 0xa4 ... 0xa7: /* MOVS / CMPS */
     case 0xaa ... 0xaf: /* STOS / LODS / SCAS */
     case 0xd7:          /* XLAT */
-    CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* MASKMOV{Q,DQU} */
-    case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* VMASKMOVDQU */
+    CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* MASKMOV{Q,DQU} */
+                                          /* VMASKMOVDQU */
         return true;
 
     case X86EMUL_OPC(0x0f, 0x01):


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 2/5] x86emul: introduce CASE_SIMD_PACKED_INT_VEX()
Posted by Andrew Cooper 4 years, 4 months ago
On 20/12/2019 13:39, Jan Beulich wrote:
> Since there are many AVX{,2} insns having legacy MMX and SIMD
> counterparts, have a macro covering all three in one go. This (imo)
> improves readability (simply by the shrunk number of lines) and helps
> prepare for optionally disabling MMX and SIMD support in the emulator.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 3/5] x86emul: drop CASE_SIMD_DOUBLE_FP()
Posted by Jan Beulich 4 years, 4 months ago
It's used only by CASE_SIMD_ALL_FP(), which can equally well be
implemented in terms of CASE_SIMD_{PACKED,SCALAR}_FP().

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -6010,21 +6010,18 @@ x86_emulate(
     CASE_SIMD_PACKED_INT(pfx, opc):          \
     case X86EMUL_OPC_VEX_66(pfx, opc)
 
-#define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
-    case X86EMUL_OPC##kind(pfx, opc):        \
-    case X86EMUL_OPC##kind##_F3(pfx, opc)
-#define CASE_SIMD_DOUBLE_FP(kind, pfx, opc)  \
-    case X86EMUL_OPC##kind##_66(pfx, opc):   \
-    case X86EMUL_OPC##kind##_F2(pfx, opc)
 #define CASE_SIMD_ALL_FP(kind, pfx, opc)     \
-    CASE_SIMD_SINGLE_FP(kind, pfx, opc):     \
-    CASE_SIMD_DOUBLE_FP(kind, pfx, opc)
+    CASE_SIMD_PACKED_FP(kind, pfx, opc):     \
+    CASE_SIMD_SCALAR_FP(kind, pfx, opc)
 #define CASE_SIMD_PACKED_FP(kind, pfx, opc)  \
     case X86EMUL_OPC##kind(pfx, opc):        \
     case X86EMUL_OPC##kind##_66(pfx, opc)
 #define CASE_SIMD_SCALAR_FP(kind, pfx, opc)  \
     case X86EMUL_OPC##kind##_F3(pfx, opc):   \
     case X86EMUL_OPC##kind##_F2(pfx, opc)
+#define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
+    case X86EMUL_OPC##kind(pfx, opc):        \
+    case X86EMUL_OPC##kind##_F3(pfx, opc)
 
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
         host_and_vcpu_must_have(sse4a);


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 3/5] x86emul: drop CASE_SIMD_DOUBLE_FP()
Posted by Andrew Cooper 4 years, 4 months ago
On 20/12/2019 13:40, Jan Beulich wrote:
> It's used only by CASE_SIMD_ALL_FP(), which can equally well be
> implemented in terms of CASE_SIMD_{PACKED,SCALAR}_FP().
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 4/5] x86emul: introduce CASE_SIMD_..._FP_VEX()
Posted by Jan Beulich 4 years, 4 months ago
Since there are many AVX{,2} insns having legacy SIMD counterparts, have
macros covering both in one go. This (imo) improves readability and helps
prepare for optionally disabling SIMD support in the emulator.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -6023,59 +6023,72 @@ x86_emulate(
     case X86EMUL_OPC##kind(pfx, opc):        \
     case X86EMUL_OPC##kind##_F3(pfx, opc)
 
+#define CASE_SIMD_ALL_FP_VEX(pfx, opc)       \
+    CASE_SIMD_ALL_FP(, pfx, opc):            \
+    CASE_SIMD_ALL_FP(_VEX, pfx, opc)
+#define CASE_SIMD_PACKED_FP_VEX(pfx, opc)    \
+    CASE_SIMD_PACKED_FP(, pfx, opc):         \
+    CASE_SIMD_PACKED_FP(_VEX, pfx, opc)
+#define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)    \
+    CASE_SIMD_SCALAR_FP(, pfx, opc):         \
+    CASE_SIMD_SCALAR_FP(_VEX, pfx, opc)
+#define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)    \
+    CASE_SIMD_SINGLE_FP(, pfx, opc):         \
+    CASE_SIMD_SINGLE_FP(_VEX, pfx, opc)
+
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
         host_and_vcpu_must_have(sse4a);
         /* fall through */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x2b):     /* movntp{s,d} xmm,m128 */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2b): /* vmovntp{s,d} {x,y}mm,mem */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2b):   /* movntp{s,d} xmm,m128 */
+                                           /* vmovntp{s,d} {x,y}mm,mem */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
         sfence = true;
         /* fall through */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x10):        /* mov{up,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x10): /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */
-    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x10):      /* mov{up,s}{s,d} xmm/mem,xmm */
+                                           /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */
+                                           /* vmovs{s,d} mem,xmm */
                                            /* vmovs{s,d} xmm,xmm,xmm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x11):        /* mov{up,s}{s,d} xmm,xmm/mem */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x11): /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */
-    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x11):      /* mov{up,s}{s,d} xmm,xmm/mem */
+                                           /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */
+                                           /* vmovs{s,d} xmm,mem */
                                            /* vmovs{s,d} xmm,xmm,xmm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x14):     /* unpcklp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x14): /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x15):     /* unpckhp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x15): /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x28):     /* movap{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x28): /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x29):     /* movap{s,d} xmm,xmm/m128 */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x29): /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x51):        /* sqrt{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x51):    /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x14):   /* unpcklp{s,d} xmm/m128,xmm */
+                                           /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x15):   /* unpckhp{s,d} xmm/m128,xmm */
+                                           /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x28):   /* movap{s,d} xmm/m128,xmm */
+                                           /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x29):   /* movap{s,d} xmm,xmm/m128 */
+                                           /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x51):      /* sqrt{p,s}{s,d} xmm/mem,xmm */
+                                           /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */
                                            /* vsqrts{s,d} xmm/m32,xmm,xmm */
-    CASE_SIMD_SINGLE_FP(, 0x0f, 0x52):     /* rsqrt{p,s}s xmm/mem,xmm */
-    CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x52): /* vrsqrtps {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x52):   /* rsqrt{p,s}s xmm/mem,xmm */
+                                           /* vrsqrtps {x,y}mm/mem,{x,y}mm */
                                            /* vrsqrtss xmm/m32,xmm,xmm */
-    CASE_SIMD_SINGLE_FP(, 0x0f, 0x53):     /* rcp{p,s}s xmm/mem,xmm */
-    CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x53): /* vrcpps {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_SINGLE_FP_VEX(0x0f, 0x53):   /* rcp{p,s}s xmm/mem,xmm */
+                                           /* vrcpps {x,y}mm/mem,{x,y}mm */
                                            /* vrcpss xmm/m32,xmm,xmm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x54):     /* andp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x54): /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x55):     /* andnp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x55): /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x56):     /* orp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x56): /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x57):     /* xorp{s,d} xmm/m128,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x57): /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x58):        /* add{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x58):    /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x59):        /* mul{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x59):    /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x5c):        /* sub{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5c):    /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x5d):        /* min{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5d):    /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x5e):        /* div{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5e):    /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_ALL_FP(, 0x0f, 0x5f):        /* max{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5f):    /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x54):   /* andp{s,d} xmm/m128,xmm */
+                                           /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x55):   /* andnp{s,d} xmm/m128,xmm */
+                                           /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x56):   /* orp{s,d} xmm/m128,xmm */
+                                           /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x57):   /* xorp{s,d} xmm/m128,xmm */
+                                           /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x58):      /* add{p,s}{s,d} xmm/mem,xmm */
+                                           /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x59):      /* mul{p,s}{s,d} xmm/mem,xmm */
+                                           /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5c):      /* sub{p,s}{s,d} xmm/mem,xmm */
+                                           /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5d):      /* min{p,s}{s,d} xmm/mem,xmm */
+                                           /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5e):      /* div{p,s}{s,d} xmm/mem,xmm */
+                                           /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5f):      /* max{p,s}{s,d} xmm/mem,xmm */
+                                           /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
     simd_0f_fp:
         if ( vex.opcx == vex_none )
         {
@@ -6162,12 +6175,12 @@ x86_emulate(
 
     case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x13):     /* movlp{s,d} xmm,m64 */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x13): /* vmovlp{s,d} xmm,m64 */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x13):   /* movlp{s,d} xmm,m64 */
+                                           /* vmovlp{s,d} xmm,m64 */
     case X86EMUL_OPC_66(0x0f, 0x16):       /* movhpd m64,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x16):   /* vmovhpd m64,xmm,xmm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x17):     /* movhp{s,d} xmm,m64 */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x17): /* vmovhp{s,d} xmm,m64 */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x17):   /* movhp{s,d} xmm,m64 */
+                                           /* vmovhp{s,d} xmm,m64 */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
         /* fall through */
     case X86EMUL_OPC(0x0f, 0x12):          /* movlps m64,xmm */
@@ -6303,8 +6316,7 @@ x86_emulate(
         op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
         goto simd_0f_fp;
 
-    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2a):     /* cvtsi2s{s,d} r/m,xmm */
-    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
+    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2a):   /* {,v}cvtsi2s{s,d} r/m,xmm */
         if ( vex.opcx == vex_none )
         {
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
@@ -6375,10 +6387,8 @@ x86_emulate(
         state->simd_size = simd_none;
         break;
 
-    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c):     /* cvtts{s,d}2si xmm/mem,reg */
-    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
-    CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d):     /* cvts{s,d}2si xmm/mem,reg */
-    CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2c):   /* {,v}cvtts{s,d}2si xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2d):   /* {,v}cvts{s,d}2si xmm/mem,reg */
         if ( vex.opcx == vex_none )
         {
             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
@@ -6456,10 +6466,8 @@ x86_emulate(
         opc = init_evex(stub);
         goto cvts_2si;
 
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x2e):     /* ucomis{s,d} xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x2f):     /* comis{s,d} xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2e):   /* {,v}ucomis{s,d} xmm/mem,xmm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x2f):   /* {,v}comis{s,d} xmm/mem,xmm */
         if ( vex.opcx == vex_none )
         {
             if ( vex.pfx )
@@ -6705,8 +6713,8 @@ x86_emulate(
         generate_exception_if(!vex.l || vex.w, EXC_UD);
         goto opmask_common;
 
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x50):   /* movmskp{s,d} xmm,reg */
+                                           /* vmovmskp{s,d} {x,y}mm,reg */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
                                            /* vpmovmskb {x,y}mm,reg */
         opc = init_prefixes(stub);
@@ -6772,8 +6780,8 @@ x86_emulate(
         avx512_vlen_check(false);
         goto simd_zmm;
 
-    CASE_SIMD_ALL_FP(, 0x0f, 0x5a):        /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a):    /* vcvtp{s,d}2p{s,d} {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0x5a):      /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
+                                           /* vcvtp{s,d}2p{s,d} {x,y}mm/mem,{x,y}mm */
                                            /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */
         op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) +
                          !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK));
@@ -6788,8 +6796,8 @@ x86_emulate(
                          evex.w);
         goto avx512f_all_fp;
 
-    CASE_SIMD_PACKED_FP(, 0x0f, 0x5b):     /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x5b): /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0x5b):   /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
+                                           /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x5b):   /* vcvttps2dq {x,y}mm/mem,{x,y}mm */
         d |= TwoOp;
@@ -8105,10 +8113,10 @@ x86_emulate(
         }
         goto add;
 
-    CASE_SIMD_ALL_FP(, 0x0f, 0xc2):        /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */
-    CASE_SIMD_ALL_FP(_VEX, 0x0f, 0xc2):    /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
-    CASE_SIMD_PACKED_FP(, 0x0f, 0xc6):     /* shufp{s,d} $imm8,xmm/mem,xmm */
-    CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_ALL_FP_VEX(0x0f, 0xc2):      /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */
+                                           /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    CASE_SIMD_PACKED_FP_VEX(0x0f, 0xc6):   /* shufp{s,d} $imm8,xmm/mem,xmm */
+                                           /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
         d = (d & ~SrcMask) | SrcMem;
         if ( vex.opcx == vex_none )
         {


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 4/5] x86emul: introduce CASE_SIMD_..._FP_VEX()
Posted by Andrew Cooper 4 years, 4 months ago
On 20/12/2019 13:40, Jan Beulich wrote:
> Since there are many AVX{,2} insns having legacy SIMD counterparts, have
> macros covering both in one go. This (imo) improves readability and helps
> prepare for optionally disabling SIMD support in the emulator.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Jan Beulich 4 years, 4 months ago
In a pure PV environment (the PV shim in particular) we don't really
need emulation of all these. To limit #ifdef-ary utilize some of the
CASE_*() macros we have, by providing variants expanding to
(effectively) nothing (really a label, which in turn requires passing
-Wno-unused-label to the compiler when build such configurations).

Due to the mixture of macro and #ifdef use, the placement of some of
the #ifdef-s is a little arbitrary.

The resulting object file's .text is less than half the size of the
original, and looks to also be compiling a little more quickly.

This is meant as a first step; more parts can likely be disabled down
the road.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I'll be happy to take suggestions allowing to avoid -Wno-unused-label.

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -79,6 +79,9 @@ obj-y += hpet.o
 obj-y += vm_event.o
 obj-y += xstate.o
 
+ifneq ($(CONFIG_HVM),y)
+x86_emulate.o: CFLAGS += -Wno-unused-label
+endif
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
 efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -42,6 +42,12 @@
     }                                                      \
 })
 
+#ifndef CONFIG_HVM
+# define X86EMUL_NO_FPU
+# define X86EMUL_NO_MMX
+# define X86EMUL_NO_SIMD
+#endif
+
 #include "x86_emulate/x86_emulate.c"
 
 int x86emul_read_xcr(unsigned int reg, uint64_t *val,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3476,6 +3476,7 @@ x86_decode(
             op_bytes = 4;
         break;
 
+#ifndef X86EMUL_NO_SIMD
     case simd_packed_int:
         switch ( vex.pfx )
         {
@@ -3541,6 +3542,7 @@ x86_decode(
     case simd_256:
         op_bytes = 32;
         break;
+#endif /* !X86EMUL_NO_SIMD */
 
     default:
         op_bytes = 0;
@@ -3695,6 +3697,7 @@ x86_emulate(
         break;
     }
 
+#ifndef X86EMUL_NO_SIMD
     /* With a memory operand, fetch the mask register in use (if any). */
     if ( ea.type == OP_MEM && evex.opmsk &&
          _get_fpu(fpu_type = X86EMUL_FPU_opmask, ctxt, ops) == X86EMUL_OKAY )
@@ -3725,6 +3728,7 @@ x86_emulate(
         put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops);
         fpu_type = X86EMUL_FPU_none;
     }
+#endif /* !X86EMUL_NO_SIMD */
 
     /* Decode (but don't fetch) the destination operand: register or memory. */
     switch ( d & DstMask )
@@ -4372,11 +4376,13 @@ x86_emulate(
         singlestep = _regs.eflags & X86_EFLAGS_TF;
         break;
 
+#ifndef X86EMUL_NO_FPU
     case 0x9b:  /* wait/fwait */
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait);
         emulate_fpu_insn_stub(b);
         break;
+#endif
 
     case 0x9c: /* pushf */
         if ( (_regs.eflags & X86_EFLAGS_VM) &&
@@ -4785,6 +4791,7 @@ x86_emulate(
         break;
     }
 
+#ifndef X86EMUL_NO_FPU
     case 0xd8: /* FPU 0xd8 */
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_fpu);
@@ -5119,6 +5126,7 @@ x86_emulate(
             }
         }
         break;
+#endif /* !X86EMUL_NO_FPU */
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
         unsigned long count = get_loop_count(&_regs, ad_bytes);
@@ -5983,6 +5991,8 @@ x86_emulate(
     case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
         break;
 
+#ifndef X86EMUL_NO_MMX
+
     case X86EMUL_OPC(0x0f, 0x0e): /* femms */
         host_and_vcpu_must_have(3dnow);
         asm volatile ( "femms" );
@@ -6003,39 +6013,71 @@ x86_emulate(
         state->simd_size = simd_other;
         goto simd_0f_imm8;
 
-#define CASE_SIMD_PACKED_INT(pfx, opc)       \
+#endif /* !X86EMUL_NO_MMX */
+
+#if !defined(X86EMUL_NO_SIMD) && !defined(X86EMUL_NO_MMX)
+# define CASE_SIMD_PACKED_INT(pfx, opc)      \
     case X86EMUL_OPC(pfx, opc):              \
     case X86EMUL_OPC_66(pfx, opc)
-#define CASE_SIMD_PACKED_INT_VEX(pfx, opc)   \
+#elif !defined(X86EMUL_NO_SIMD)
+# define CASE_SIMD_PACKED_INT(pfx, opc)      \
+    case X86EMUL_OPC_66(pfx, opc)
+#elif !defined(X86EMUL_NO_MMX)
+# define CASE_SIMD_PACKED_INT(pfx, opc)      \
+    case X86EMUL_OPC(pfx, opc)
+#else
+# define CASE_SIMD_PACKED_INT(pfx, opc) C##pfx##_##opc
+#endif
+
+#ifndef X86EMUL_NO_SIMD
+
+# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
     CASE_SIMD_PACKED_INT(pfx, opc):          \
     case X86EMUL_OPC_VEX_66(pfx, opc)
 
-#define CASE_SIMD_ALL_FP(kind, pfx, opc)     \
+# define CASE_SIMD_ALL_FP(kind, pfx, opc)    \
     CASE_SIMD_PACKED_FP(kind, pfx, opc):     \
     CASE_SIMD_SCALAR_FP(kind, pfx, opc)
-#define CASE_SIMD_PACKED_FP(kind, pfx, opc)  \
+# define CASE_SIMD_PACKED_FP(kind, pfx, opc) \
     case X86EMUL_OPC##kind(pfx, opc):        \
     case X86EMUL_OPC##kind##_66(pfx, opc)
-#define CASE_SIMD_SCALAR_FP(kind, pfx, opc)  \
+# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) \
     case X86EMUL_OPC##kind##_F3(pfx, opc):   \
     case X86EMUL_OPC##kind##_F2(pfx, opc)
-#define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
+# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) \
     case X86EMUL_OPC##kind(pfx, opc):        \
     case X86EMUL_OPC##kind##_F3(pfx, opc)
 
-#define CASE_SIMD_ALL_FP_VEX(pfx, opc)       \
+# define CASE_SIMD_ALL_FP_VEX(pfx, opc)      \
     CASE_SIMD_ALL_FP(, pfx, opc):            \
     CASE_SIMD_ALL_FP(_VEX, pfx, opc)
-#define CASE_SIMD_PACKED_FP_VEX(pfx, opc)    \
+# define CASE_SIMD_PACKED_FP_VEX(pfx, opc)   \
     CASE_SIMD_PACKED_FP(, pfx, opc):         \
     CASE_SIMD_PACKED_FP(_VEX, pfx, opc)
-#define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)    \
+# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)   \
     CASE_SIMD_SCALAR_FP(, pfx, opc):         \
     CASE_SIMD_SCALAR_FP(_VEX, pfx, opc)
-#define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)    \
+# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)   \
     CASE_SIMD_SINGLE_FP(, pfx, opc):         \
     CASE_SIMD_SINGLE_FP(_VEX, pfx, opc)
 
+#else
+
+# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
+    CASE_SIMD_PACKED_INT(pfx, opc)
+
+# define CASE_SIMD_ALL_FP(kind, pfx, opc)    C##kind##pfx##_##opc
+# define CASE_SIMD_PACKED_FP(kind, pfx, opc) Cp##kind##pfx##_##opc
+# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) Cs##kind##pfx##_##opc
+# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) C##kind##pfx##_##opc
+
+# define CASE_SIMD_ALL_FP_VEX(pfx, opc)    CASE_SIMD_ALL_FP(, pfx, opc)
+# define CASE_SIMD_PACKED_FP_VEX(pfx, opc) CASE_SIMD_PACKED_FP(, pfx, opc)
+# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc) CASE_SIMD_SCALAR_FP(, pfx, opc)
+# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc) CASE_SIMD_SINGLE_FP(, pfx, opc)
+
+#endif
+
     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
         host_and_vcpu_must_have(sse4a);
         /* fall through */
@@ -6173,6 +6215,8 @@ x86_emulate(
         insn_bytes = EVEX_PFX_BYTES + 2;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
     CASE_SIMD_PACKED_FP_VEX(0x0f, 0x13):   /* movlp{s,d} xmm,m64 */
@@ -6279,6 +6323,8 @@ x86_emulate(
         avx512_vlen_check(false);
         goto simd_zmm;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */
     case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */
     case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */
@@ -6305,6 +6351,8 @@ x86_emulate(
             goto done;
         break;
 
+#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
+
     case X86EMUL_OPC_66(0x0f, 0x2a):       /* cvtpi2pd mm/m64,xmm */
         if ( ea.type == OP_REG )
         {
@@ -6316,6 +6364,8 @@ x86_emulate(
         op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
         goto simd_0f_fp;
 
+#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
+
     CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2a):   /* {,v}cvtsi2s{s,d} r/m,xmm */
         if ( vex.opcx == vex_none )
         {
@@ -6659,6 +6709,8 @@ x86_emulate(
             dst.val = src.val;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_VEX(0x0f, 0x4a):    /* kadd{w,q} k,k,k */
         if ( !vex.w )
             host_and_vcpu_must_have(avx512dq);
@@ -6713,6 +6765,8 @@ x86_emulate(
         generate_exception_if(!vex.l || vex.w, EXC_UD);
         goto opmask_common;
 
+#endif /* X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_FP_VEX(0x0f, 0x50):   /* movmskp{s,d} xmm,reg */
                                            /* vmovmskp{s,d} {x,y}mm,reg */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
@@ -6796,6 +6850,8 @@ x86_emulate(
                          evex.w);
         goto avx512f_all_fp;
 
+#ifndef X86EMUL_NO_SIMD
+
     CASE_SIMD_PACKED_FP_VEX(0x0f, 0x5b):   /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
                                            /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
@@ -6826,6 +6882,8 @@ x86_emulate(
         op_bytes = 16 << evex.lr;
         goto simd_zmm;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */
                                           /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */
@@ -6852,6 +6910,7 @@ x86_emulate(
                                           /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6b): /* packsswd {,x}mm/mem,{,x}mm */
                                           /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
+#ifndef X86EMUL_NO_SIMD
     case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
@@ -6936,6 +6995,7 @@ x86_emulate(
                                           /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
+#endif /* !X86EMUL_NO_SIMD */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfc): /* paddb {,x}mm/mem,{,x}mm */
                                           /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfd): /* paddw {,x}mm/mem,{,x}mm */
@@ -6943,6 +7003,7 @@ x86_emulate(
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfe): /* paddd {,x}mm/mem,{,x}mm */
                                           /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
     simd_0f_int:
+#ifndef X86EMUL_NO_SIMD
         if ( vex.opcx != vex_none )
         {
     case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */
@@ -6984,11 +7045,14 @@ x86_emulate(
         }
         if ( vex.pfx )
             goto simd_0f_sse2;
+#endif /* !X86EMUL_NO_SIMD */
     simd_0f_mmx:
         host_and_vcpu_must_have(mmx);
         get_fpu(X86EMUL_FPU_mmx);
         goto simd_0f_common;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm */
         generate_exception_if(evex.opmsk, EXC_UD);
         /* fall through */
@@ -7082,6 +7146,8 @@ x86_emulate(
         generate_exception_if(!evex.w, EXC_UD);
         goto avx512f_no_sae;
 
+#endif /* X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
                                           /* vmov{d,q} r/m,xmm */
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
@@ -7123,6 +7189,8 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
     case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
         generate_exception_if((evex.lr || evex.opmsk || evex.brs ||
@@ -7195,11 +7263,15 @@ x86_emulate(
         d |= TwoOp;
         /* fall through */
     case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
+#endif /* !X86EMUL_NO_SIMD */
+#ifndef X86EMUL_NO_MMX
     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
+#endif
         op_bytes = 8;
         goto simd_0f_int;
 
+#ifndef X86EMUL_NO_SIMD
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0x70):/* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
                                          /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
@@ -7208,12 +7280,15 @@ x86_emulate(
     case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */
         d = (d & ~SrcMask) | SrcMem | TwoOp;
         op_bytes = vex.pfx ? 16 << vex.l : 8;
+#endif
     simd_0f_int_imm8:
         if ( vex.opcx != vex_none )
         {
+#ifndef X86EMUL_NO_SIMD
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+#endif
             if ( vex.l )
             {
     simd_0f_imm8_avx2:
@@ -7221,6 +7296,7 @@ x86_emulate(
             }
             else
             {
+#ifndef X86EMUL_NO_SIMD
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
@@ -7228,6 +7304,7 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+#endif
     simd_0f_imm8_avx:
                 host_and_vcpu_must_have(avx);
             }
@@ -7261,6 +7338,8 @@ x86_emulate(
         insn_bytes = PFX_BYTES + 3;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0x70): /* vpshufd $imm8,[xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_F3(0x0f, 0x70): /* vpshufhw $imm8,[xyz]mm/mem,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_F2(0x0f, 0x70): /* vpshuflw $imm8,[xyz]mm/mem,[xyz]mm{k} */
@@ -7319,6 +7398,9 @@ x86_emulate(
         opc[1] = modrm;
         opc[2] = imm1;
         insn_bytes = PFX_BYTES + 3;
+
+#endif /* X86EMUL_NO_SIMD */
+
     simd_0f_reg_only:
         opc[insn_bytes - PFX_BYTES] = 0xc3;
 
@@ -7329,6 +7411,8 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0x71): /* Grp12 */
         switch ( modrm_reg & 7 )
         {
@@ -7360,6 +7444,9 @@ x86_emulate(
         }
         goto unrecognized_insn;
 
+#endif /* !X86EMUL_NO_SIMD */
+#ifndef X86EMUL_NO_MMX
+
     case X86EMUL_OPC(0x0f, 0x73):        /* Grp14 */
         switch ( modrm_reg & 7 )
         {
@@ -7369,6 +7456,9 @@ x86_emulate(
         }
         goto unrecognized_insn;
 
+#endif /* !X86EMUL_NO_MMX */
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_66(0x0f, 0x73):
     case X86EMUL_OPC_VEX_66(0x0f, 0x73):
         switch ( modrm_reg & 7 )
@@ -7399,7 +7489,12 @@ x86_emulate(
         }
         goto unrecognized_insn;
 
+#endif /* !X86EMUL_NO_SIMD */
+
+#ifndef X86EMUL_NO_MMX
     case X86EMUL_OPC(0x0f, 0x77):        /* emms */
+#endif
+#ifndef X86EMUL_NO_SIMD
     case X86EMUL_OPC_VEX(0x0f, 0x77):    /* vzero{all,upper} */
         if ( vex.opcx != vex_none )
         {
@@ -7445,6 +7540,7 @@ x86_emulate(
 #endif
         }
         else
+#endif /* !X86EMUL_NO_SIMD */
         {
             host_and_vcpu_must_have(mmx);
             get_fpu(X86EMUL_FPU_mmx);
@@ -7458,6 +7554,8 @@ x86_emulate(
         insn_bytes = PFX_BYTES + 1;
         goto simd_0f_reg_only;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
         switch ( modrm_reg & 7 )
         {
@@ -7555,6 +7653,8 @@ x86_emulate(
         op_bytes = 8;
         goto simd_zmm;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
         if ( test_cc(b, _regs.eflags) )
             jmp_rel((int32_t)src.val);
@@ -7565,6 +7665,8 @@ x86_emulate(
         dst.val = test_cc(b, _regs.eflags);
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_VEX(0x0f, 0x91):    /* kmov{w,q} k,mem */
     case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
@@ -7713,6 +7815,8 @@ x86_emulate(
         dst.type = OP_NONE;
         break;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
         msr_val = 0;
         fail_if(ops->cpuid == NULL);
@@ -7809,6 +7913,7 @@ x86_emulate(
     case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
         switch ( modrm_reg & 7 )
         {
+#ifndef X86EMUL_NO_SIMD
         case 2: /* ldmxcsr */
             generate_exception_if(vex.pfx, EXC_UD);
             vcpu_must_have(sse);
@@ -7827,6 +7932,7 @@ x86_emulate(
             get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
             asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
             break;
+#endif /* X86EMUL_NO_SIMD */
 
         case 5: /* lfence */
             fail_if(modrm_mod != 3);
@@ -7875,6 +7981,8 @@ x86_emulate(
         }
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_VEX(0x0f, 0xae): /* Grp15 */
         switch ( modrm_reg & 7 )
         {
@@ -7889,6 +7997,8 @@ x86_emulate(
         }
         goto unrecognized_insn;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC_F3(0x0f, 0xae): /* Grp15 */
         fail_if(modrm_mod != 3);
         generate_exception_if((modrm_reg & 4) || !mode_64bit(), EXC_UD);
@@ -8128,6 +8238,8 @@ x86_emulate(
         }
         goto simd_0f_imm8_avx;
 
+#ifndef X86EMUL_NO_SIMD
+
     CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0xc2): /* vcmp{p,s}{s,d} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
         generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
                                (ea.type != OP_REG && evex.brs &&
@@ -8154,6 +8266,8 @@ x86_emulate(
         insn_bytes = EVEX_PFX_BYTES + 3;
         break;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f, 0xc3): /* movnti */
         /* Ignore the non-temporal hint for now. */
         vcpu_must_have(sse2);
@@ -8168,6 +8282,8 @@ x86_emulate(
         ea.type = OP_MEM;
         goto simd_0f_int_imm8;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
@@ -8185,6 +8301,8 @@ x86_emulate(
         state->simd_size = simd_other;
         goto avx512f_imm8_no_sae;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc5):  /* pextrw $imm8,{,x}mm,reg */
                                            /* vpextrw $imm8,xmm,reg */
         generate_exception_if(vex.l, EXC_UD);
@@ -8200,6 +8318,8 @@ x86_emulate(
         insn_bytes = PFX_BYTES + 3;
         goto simd_0f_to_gpr;
 
+#ifndef X86EMUL_NO_SIMD
+
     CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
                               EXC_UD);
@@ -8214,6 +8334,8 @@ x86_emulate(
         avx512_vlen_check(false);
         goto simd_imm8_zmm;
 
+#endif /* X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
     {
         union {
@@ -8404,6 +8526,8 @@ x86_emulate(
         }
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xe2): /* vpsra{d,q} xmm/m128,[xyz]mm,[xyz]mm{k} */
@@ -8425,12 +8549,18 @@ x86_emulate(
         generate_exception_if(evex.w != (b & 1), EXC_UD);
         goto avx512f_no_sae;
 
+#endif /* !X86EMUL_NO_SIMD */
+#ifndef X86EMUL_NO_MMX
+
     case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
     case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
     case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
         vcpu_must_have(sse2);
         goto simd_0f_mmx;
 
+#endif /* !X86EMUL_NO_MMX */
+#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
+
     case X86EMUL_OPC_F3(0x0f, 0xd6):     /* movq2dq mm,xmm */
     case X86EMUL_OPC_F2(0x0f, 0xd6):     /* movdq2q xmm,mm */
         generate_exception_if(ea.type != OP_REG, EXC_UD);
@@ -8438,6 +8568,9 @@ x86_emulate(
         host_and_vcpu_must_have(mmx);
         goto simd_0f_int;
 
+#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
+#ifndef X86EMUL_NO_MMX
+
     case X86EMUL_OPC(0x0f, 0xe7):        /* movntq mm,m64 */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
         sfence = true;
@@ -8453,6 +8586,9 @@ x86_emulate(
         vcpu_must_have(mmxext);
         goto simd_0f_mmx;
 
+#endif /* !X86EMUL_NO_MMX */
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
     case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
@@ -8473,6 +8609,8 @@ x86_emulate(
         op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
         goto simd_0f_cvt;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* {,v}maskmov{q,dqu} {,x}mm,{,x}mm */
         generate_exception_if(ea.type != OP_REG, EXC_UD);
         if ( vex.opcx != vex_none )
@@ -8576,6 +8714,8 @@ x86_emulate(
         insn_bytes = PFX_BYTES + 3;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
         generate_exception_if(!vex.l, EXC_UD);
@@ -9145,6 +9285,8 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */
         vcpu_must_have(invpcid);
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
@@ -9187,6 +9329,8 @@ x86_emulate(
         state->simd_size = simd_none;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         generate_exception_if(!evex.w, EXC_UD);
         host_and_vcpu_must_have(avx512_vbmi);
@@ -9750,6 +9894,8 @@ x86_emulate(
         generate_exception_if(evex.brs || evex.opmsk, EXC_UD);
         goto avx512f_no_sae;
 
+#endif /* !X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
         vcpu_must_have(movbe);
@@ -9915,6 +10061,8 @@ x86_emulate(
                             : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */
         generate_exception_if(!vex.l || !vex.w, EXC_UD);
@@ -9975,6 +10123,8 @@ x86_emulate(
         avx512_vlen_check(b & 2);
         goto simd_imm8_zmm;
 
+#endif /* X86EMUL_NO_SIMD */
+
     CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */
         host_and_vcpu_must_have(ssse3);
         if ( vex.pfx )
@@ -10002,6 +10152,8 @@ x86_emulate(
         insn_bytes = PFX_BYTES + 4;
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_EVEX_66(0x0f3a, 0x42): /* vdbpsadbw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
         generate_exception_if(evex.w, EXC_UD);
         /* fall through */
@@ -10500,6 +10652,8 @@ x86_emulate(
         generate_exception_if(vex.l, EXC_UD);
         goto simd_0f_imm8_avx;
 
+#endif /* X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
         vcpu_must_have(bmi2);
         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
@@ -10514,6 +10668,8 @@ x86_emulate(
             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
@@ -10549,6 +10705,8 @@ x86_emulate(
         host_and_vcpu_must_have(xop);
         goto simd_0f_imm8_ymm;
 
+#endif /* X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
         switch ( modrm_reg & 7 )
         {
@@ -10608,6 +10766,8 @@ x86_emulate(
         }
         goto unrecognized_insn;
 
+#ifndef X86EMUL_NO_SIMD
+
     case X86EMUL_OPC_XOP(09, 0x82): /* vfrczss xmm/m128,xmm */
     case X86EMUL_OPC_XOP(09, 0x83): /* vfrczsd xmm/m128,xmm */
         generate_exception_if(vex.l, EXC_UD);
@@ -10663,6 +10823,8 @@ x86_emulate(
         host_and_vcpu_must_have(xop);
         goto simd_0f_ymm;
 
+#endif /* X86EMUL_NO_SIMD */
+
     case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */
     {
         uint8_t *buf = get_stub(stub);


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Andrew Cooper 4 years, 4 months ago
On 20/12/2019 13:41, Jan Beulich wrote:
> In a pure PV environment (the PV shim in particular) we don't really
> need emulation of all these. To limit #ifdef-ary utilize some of the
> CASE_*() macros we have, by providing variants expanding to
> (effectively) nothing (really a label, which in turn requires passing
> -Wno-unused-label to the compiler when build such configurations).
>
> Due to the mixture of macro and #ifdef use, the placement of some of
> the #ifdef-s is a little arbitrary.
>
> The resulting object file's .text is less than half the size of the
> original, and looks to also be compiling a little more quickly.
>
> This is meant as a first step; more parts can likely be disabled down
> the road.

Presumably we can drop everything but the onebyte and twobyte tables,
and VEX/EVEX prefix parsing logic?

>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> I'll be happy to take suggestions allowing to avoid -Wno-unused-label.

I think I'm going to need a little while to figure out how this works. 
However...

>
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -79,6 +79,9 @@ obj-y += hpet.o
>  obj-y += vm_event.o
>  obj-y += xstate.o
>  
> +ifneq ($(CONFIG_HVM),y)
> +x86_emulate.o: CFLAGS += -Wno-unused-label
> +endif
>  x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
>  
>  efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -42,6 +42,12 @@
>      }                                                      \
>  })
>  
> +#ifndef CONFIG_HVM
> +# define X86EMUL_NO_FPU
> +# define X86EMUL_NO_MMX
> +# define X86EMUL_NO_SIMD
> +#endif

... isn't this going to cause problems for the emulator/fuzzer builds?

On that subject, it would be very helpful to at least be able to
configure reduced builds from these utilities.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Jan Beulich 4 years, 4 months ago
On 20.12.2019 17:01, Andrew Cooper wrote:
> On 20/12/2019 13:41, Jan Beulich wrote:
>> In a pure PV environment (the PV shim in particular) we don't really
>> need emulation of all these. To limit #ifdef-ary utilize some of the
>> CASE_*() macros we have, by providing variants expanding to
>> (effectively) nothing (really a label, which in turn requires passing
>> -Wno-unused-label to the compiler when build such configurations).
>>
>> Due to the mixture of macro and #ifdef use, the placement of some of
>> the #ifdef-s is a little arbitrary.
>>
>> The resulting object file's .text is less than half the size of the
>> original, and looks to also be compiling a little more quickly.
>>
>> This is meant as a first step; more parts can likely be disabled down
>> the road.
> 
> Presumably we can drop everything but the onebyte and twobyte tables,
> and VEX/EVEX prefix parsing logic?

Depends on what we expect VEX-encoded GPR insns to be used on.
I wouldn't want to forbid their use on MMIO, for example. I did
actually take a few initial steps to hide the EVEX parsing
behind an #ifdef, but something then made me undo this at least
for now (I don't recall what exactly it was).

>> --- a/xen/arch/x86/Makefile
>> +++ b/xen/arch/x86/Makefile
>> @@ -79,6 +79,9 @@ obj-y += hpet.o
>>  obj-y += vm_event.o
>>  obj-y += xstate.o
>>  
>> +ifneq ($(CONFIG_HVM),y)
>> +x86_emulate.o: CFLAGS += -Wno-unused-label
>> +endif
>>  x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
>>  
>>  efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
>> --- a/xen/arch/x86/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate.c
>> @@ -42,6 +42,12 @@
>>      }                                                      \
>>  })
>>  
>> +#ifndef CONFIG_HVM
>> +# define X86EMUL_NO_FPU
>> +# define X86EMUL_NO_MMX
>> +# define X86EMUL_NO_SIMD
>> +#endif
> 
> ... isn't this going to cause problems for the emulator/fuzzer builds?

No, this file doesn't get used by them. That's why I placed these
here, rather than e.g. in x86_emulate/x86_emulate.h or in
x86_emulate/x86_emulate.c itself.

> On that subject, it would be very helpful to at least be able to
> configure reduced builds from these utilities.

Yes, I too have been thinking this way. I may get there eventually.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Andrew Cooper 4 years ago
On 20/12/2019 16:01, Andrew Cooper wrote:
>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> I'll be happy to take suggestions allowing to avoid -Wno-unused-label.
> I think I'm going to need a little while to figure out how this works.

So, after having had an evening playing with this, things get massively
simpler when NO_MMX is folded with NO_SIMD.

MMX is a SIMD technology, and I can't see a compelling reason to control
their inclusion separately.  We're either going to want everything or
nothing.

The attached incremental works for me without a single out-of-place
label.  There is some further cleanup which can be done such as not
making the CASE_ macros conditional.  (OTOH, the compile error from
might be helpful to keep in some form).

Thoughts?

~Andrew
Re: [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Jan Beulich 4 years ago
On 03.04.2020 00:18, Andrew Cooper wrote:
> On 20/12/2019 16:01, Andrew Cooper wrote:
>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> I'll be happy to take suggestions allowing to avoid -Wno-unused-label.
>> I think I'm going to need a little while to figure out how this works.
> 
> So, after having had an evening playing with this, things get massively
> simpler when NO_MMX is folded with NO_SIMD.
> 
> MMX is a SIMD technology, and I can't see a compelling reason to control
> their inclusion separately.  We're either going to want everything or
> nothing.

I disagree - while MMX is a form of SIMD, what SIMD here means is
anything using the XMM register file and its extensions. Iirc
AMD once considered dropping MMX, and if I'm not mistaken early
Phi's didn't support MMX nor FPU. Hence I view a mode not
allowing MMX but allowing SIMD as a viable one to support.

> The attached incremental works for me without a single out-of-place
> label.  There is some further cleanup which can be done such as not
> making the CASE_ macros conditional.

Well, if we were to follow your alternative model - perhaps.
What I dislike though is something like the last hunk (an #ifdef
around a construct which can already abstract away things, and
which is specifically intended to avoid some #ifdef-ary).

>  (OTOH, the compile error from
> might be helpful to keep in some form).

There looks to be a word missing here, which puts me into trouble
understanding what you mean.

Jan

[Xen-devel] Ping: [PATCH 5/5] x86emul: disable FPU/MMX/SIMD insn emulation when !HVM
Posted by Jan Beulich 4 years ago
On 20.12.2019 14:41, Jan Beulich wrote:
> In a pure PV environment (the PV shim in particular) we don't really
> need emulation of all these. To limit #ifdef-ary utilize some of the
> CASE_*() macros we have, by providing variants expanding to
> (effectively) nothing (really a label, which in turn requires passing
> -Wno-unused-label to the compiler when build such configurations).
> 
> Due to the mixture of macro and #ifdef use, the placement of some of
> the #ifdef-s is a little arbitrary.
> 
> The resulting object file's .text is less than half the size of the
> original, and looks to also be compiling a little more quickly.
> 
> This is meant as a first step; more parts can likely be disabled down
> the road.
> 
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Ping?

> ---
> I'll be happy to take suggestions allowing to avoid -Wno-unused-label.
> 
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -79,6 +79,9 @@ obj-y += hpet.o
>  obj-y += vm_event.o
>  obj-y += xstate.o
>  
> +ifneq ($(CONFIG_HVM),y)
> +x86_emulate.o: CFLAGS += -Wno-unused-label
> +endif
>  x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
>  
>  efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -42,6 +42,12 @@
>      }                                                      \
>  })
>  
> +#ifndef CONFIG_HVM
> +# define X86EMUL_NO_FPU
> +# define X86EMUL_NO_MMX
> +# define X86EMUL_NO_SIMD
> +#endif
> +
>  #include "x86_emulate/x86_emulate.c"
>  
>  int x86emul_read_xcr(unsigned int reg, uint64_t *val,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -3476,6 +3476,7 @@ x86_decode(
>              op_bytes = 4;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
>      case simd_packed_int:
>          switch ( vex.pfx )
>          {
> @@ -3541,6 +3542,7 @@ x86_decode(
>      case simd_256:
>          op_bytes = 32;
>          break;
> +#endif /* !X86EMUL_NO_SIMD */
>  
>      default:
>          op_bytes = 0;
> @@ -3695,6 +3697,7 @@ x86_emulate(
>          break;
>      }
>  
> +#ifndef X86EMUL_NO_SIMD
>      /* With a memory operand, fetch the mask register in use (if any). */
>      if ( ea.type == OP_MEM && evex.opmsk &&
>           _get_fpu(fpu_type = X86EMUL_FPU_opmask, ctxt, ops) == X86EMUL_OKAY )
> @@ -3725,6 +3728,7 @@ x86_emulate(
>          put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops);
>          fpu_type = X86EMUL_FPU_none;
>      }
> +#endif /* !X86EMUL_NO_SIMD */
>  
>      /* Decode (but don't fetch) the destination operand: register or memory. */
>      switch ( d & DstMask )
> @@ -4372,11 +4376,13 @@ x86_emulate(
>          singlestep = _regs.eflags & X86_EFLAGS_TF;
>          break;
>  
> +#ifndef X86EMUL_NO_FPU
>      case 0x9b:  /* wait/fwait */
>          host_and_vcpu_must_have(fpu);
>          get_fpu(X86EMUL_FPU_wait);
>          emulate_fpu_insn_stub(b);
>          break;
> +#endif
>  
>      case 0x9c: /* pushf */
>          if ( (_regs.eflags & X86_EFLAGS_VM) &&
> @@ -4785,6 +4791,7 @@ x86_emulate(
>          break;
>      }
>  
> +#ifndef X86EMUL_NO_FPU
>      case 0xd8: /* FPU 0xd8 */
>          host_and_vcpu_must_have(fpu);
>          get_fpu(X86EMUL_FPU_fpu);
> @@ -5119,6 +5126,7 @@ x86_emulate(
>              }
>          }
>          break;
> +#endif /* !X86EMUL_NO_FPU */
>  
>      case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
>          unsigned long count = get_loop_count(&_regs, ad_bytes);
> @@ -5983,6 +5991,8 @@ x86_emulate(
>      case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
>          break;
>  
> +#ifndef X86EMUL_NO_MMX
> +
>      case X86EMUL_OPC(0x0f, 0x0e): /* femms */
>          host_and_vcpu_must_have(3dnow);
>          asm volatile ( "femms" );
> @@ -6003,39 +6013,71 @@ x86_emulate(
>          state->simd_size = simd_other;
>          goto simd_0f_imm8;
>  
> -#define CASE_SIMD_PACKED_INT(pfx, opc)       \
> +#endif /* !X86EMUL_NO_MMX */
> +
> +#if !defined(X86EMUL_NO_SIMD) && !defined(X86EMUL_NO_MMX)
> +# define CASE_SIMD_PACKED_INT(pfx, opc)      \
>      case X86EMUL_OPC(pfx, opc):              \
>      case X86EMUL_OPC_66(pfx, opc)
> -#define CASE_SIMD_PACKED_INT_VEX(pfx, opc)   \
> +#elif !defined(X86EMUL_NO_SIMD)
> +# define CASE_SIMD_PACKED_INT(pfx, opc)      \
> +    case X86EMUL_OPC_66(pfx, opc)
> +#elif !defined(X86EMUL_NO_MMX)
> +# define CASE_SIMD_PACKED_INT(pfx, opc)      \
> +    case X86EMUL_OPC(pfx, opc)
> +#else
> +# define CASE_SIMD_PACKED_INT(pfx, opc) C##pfx##_##opc
> +#endif
> +
> +#ifndef X86EMUL_NO_SIMD
> +
> +# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
>      CASE_SIMD_PACKED_INT(pfx, opc):          \
>      case X86EMUL_OPC_VEX_66(pfx, opc)
>  
> -#define CASE_SIMD_ALL_FP(kind, pfx, opc)     \
> +# define CASE_SIMD_ALL_FP(kind, pfx, opc)    \
>      CASE_SIMD_PACKED_FP(kind, pfx, opc):     \
>      CASE_SIMD_SCALAR_FP(kind, pfx, opc)
> -#define CASE_SIMD_PACKED_FP(kind, pfx, opc)  \
> +# define CASE_SIMD_PACKED_FP(kind, pfx, opc) \
>      case X86EMUL_OPC##kind(pfx, opc):        \
>      case X86EMUL_OPC##kind##_66(pfx, opc)
> -#define CASE_SIMD_SCALAR_FP(kind, pfx, opc)  \
> +# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) \
>      case X86EMUL_OPC##kind##_F3(pfx, opc):   \
>      case X86EMUL_OPC##kind##_F2(pfx, opc)
> -#define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
> +# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) \
>      case X86EMUL_OPC##kind(pfx, opc):        \
>      case X86EMUL_OPC##kind##_F3(pfx, opc)
>  
> -#define CASE_SIMD_ALL_FP_VEX(pfx, opc)       \
> +# define CASE_SIMD_ALL_FP_VEX(pfx, opc)      \
>      CASE_SIMD_ALL_FP(, pfx, opc):            \
>      CASE_SIMD_ALL_FP(_VEX, pfx, opc)
> -#define CASE_SIMD_PACKED_FP_VEX(pfx, opc)    \
> +# define CASE_SIMD_PACKED_FP_VEX(pfx, opc)   \
>      CASE_SIMD_PACKED_FP(, pfx, opc):         \
>      CASE_SIMD_PACKED_FP(_VEX, pfx, opc)
> -#define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)    \
> +# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc)   \
>      CASE_SIMD_SCALAR_FP(, pfx, opc):         \
>      CASE_SIMD_SCALAR_FP(_VEX, pfx, opc)
> -#define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)    \
> +# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc)   \
>      CASE_SIMD_SINGLE_FP(, pfx, opc):         \
>      CASE_SIMD_SINGLE_FP(_VEX, pfx, opc)
>  
> +#else
> +
> +# define CASE_SIMD_PACKED_INT_VEX(pfx, opc)  \
> +    CASE_SIMD_PACKED_INT(pfx, opc)
> +
> +# define CASE_SIMD_ALL_FP(kind, pfx, opc)    C##kind##pfx##_##opc
> +# define CASE_SIMD_PACKED_FP(kind, pfx, opc) Cp##kind##pfx##_##opc
> +# define CASE_SIMD_SCALAR_FP(kind, pfx, opc) Cs##kind##pfx##_##opc
> +# define CASE_SIMD_SINGLE_FP(kind, pfx, opc) C##kind##pfx##_##opc
> +
> +# define CASE_SIMD_ALL_FP_VEX(pfx, opc)    CASE_SIMD_ALL_FP(, pfx, opc)
> +# define CASE_SIMD_PACKED_FP_VEX(pfx, opc) CASE_SIMD_PACKED_FP(, pfx, opc)
> +# define CASE_SIMD_SCALAR_FP_VEX(pfx, opc) CASE_SIMD_SCALAR_FP(, pfx, opc)
> +# define CASE_SIMD_SINGLE_FP_VEX(pfx, opc) CASE_SIMD_SINGLE_FP(, pfx, opc)
> +
> +#endif
> +
>      CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
>          host_and_vcpu_must_have(sse4a);
>          /* fall through */
> @@ -6173,6 +6215,8 @@ x86_emulate(
>          insn_bytes = EVEX_PFX_BYTES + 2;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
>      CASE_SIMD_PACKED_FP_VEX(0x0f, 0x13):   /* movlp{s,d} xmm,m64 */
> @@ -6279,6 +6323,8 @@ x86_emulate(
>          avx512_vlen_check(false);
>          goto simd_zmm;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */
>      case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */
>      case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */
> @@ -6305,6 +6351,8 @@ x86_emulate(
>              goto done;
>          break;
>  
> +#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
> +
>      case X86EMUL_OPC_66(0x0f, 0x2a):       /* cvtpi2pd mm/m64,xmm */
>          if ( ea.type == OP_REG )
>          {
> @@ -6316,6 +6364,8 @@ x86_emulate(
>          op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
>          goto simd_0f_fp;
>  
> +#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_SCALAR_FP_VEX(0x0f, 0x2a):   /* {,v}cvtsi2s{s,d} r/m,xmm */
>          if ( vex.opcx == vex_none )
>          {
> @@ -6659,6 +6709,8 @@ x86_emulate(
>              dst.val = src.val;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_VEX(0x0f, 0x4a):    /* kadd{w,q} k,k,k */
>          if ( !vex.w )
>              host_and_vcpu_must_have(avx512dq);
> @@ -6713,6 +6765,8 @@ x86_emulate(
>          generate_exception_if(!vex.l || vex.w, EXC_UD);
>          goto opmask_common;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_FP_VEX(0x0f, 0x50):   /* movmskp{s,d} xmm,reg */
>                                             /* vmovmskp{s,d} {x,y}mm,reg */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xd7):  /* pmovmskb {,x}mm,reg */
> @@ -6796,6 +6850,8 @@ x86_emulate(
>                           evex.w);
>          goto avx512f_all_fp;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      CASE_SIMD_PACKED_FP_VEX(0x0f, 0x5b):   /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
>                                             /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
>      case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
> @@ -6826,6 +6882,8 @@ x86_emulate(
>          op_bytes = 16 << evex.lr;
>          goto simd_zmm;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x60): /* punpcklbw {,x}mm/mem,{,x}mm */
>                                            /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x61): /* punpcklwd {,x}mm/mem,{,x}mm */
> @@ -6852,6 +6910,7 @@ x86_emulate(
>                                            /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6b): /* packsswd {,x}mm/mem,{,x}mm */
>                                            /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
> +#ifndef X86EMUL_NO_SIMD
>      case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
> @@ -6936,6 +6995,7 @@ x86_emulate(
>                                            /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
>      case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
> +#endif /* !X86EMUL_NO_SIMD */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfc): /* paddb {,x}mm/mem,{,x}mm */
>                                            /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfd): /* paddw {,x}mm/mem,{,x}mm */
> @@ -6943,6 +7003,7 @@ x86_emulate(
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xfe): /* paddd {,x}mm/mem,{,x}mm */
>                                            /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      simd_0f_int:
> +#ifndef X86EMUL_NO_SIMD
>          if ( vex.opcx != vex_none )
>          {
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */
> @@ -6984,11 +7045,14 @@ x86_emulate(
>          }
>          if ( vex.pfx )
>              goto simd_0f_sse2;
> +#endif /* !X86EMUL_NO_SIMD */
>      simd_0f_mmx:
>          host_and_vcpu_must_have(mmx);
>          get_fpu(X86EMUL_FPU_mmx);
>          goto simd_0f_common;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm */
>          generate_exception_if(evex.opmsk, EXC_UD);
>          /* fall through */
> @@ -7082,6 +7146,8 @@ x86_emulate(
>          generate_exception_if(!evex.w, EXC_UD);
>          goto avx512f_no_sae;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
>                                            /* vmov{d,q} r/m,xmm */
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
> @@ -7123,6 +7189,8 @@ x86_emulate(
>          ASSERT(!state->simd_size);
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
>      case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
>          generate_exception_if((evex.lr || evex.opmsk || evex.brs ||
> @@ -7195,11 +7263,15 @@ x86_emulate(
>          d |= TwoOp;
>          /* fall through */
>      case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
> +#endif /* !X86EMUL_NO_SIMD */
> +#ifndef X86EMUL_NO_MMX
>      case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
>      case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
> +#endif
>          op_bytes = 8;
>          goto simd_0f_int;
>  
> +#ifndef X86EMUL_NO_SIMD
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0x70):/* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
>                                           /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
>      case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
> @@ -7208,12 +7280,15 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */
>          d = (d & ~SrcMask) | SrcMem | TwoOp;
>          op_bytes = vex.pfx ? 16 << vex.l : 8;
> +#endif
>      simd_0f_int_imm8:
>          if ( vex.opcx != vex_none )
>          {
> +#ifndef X86EMUL_NO_SIMD
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
> +#endif
>              if ( vex.l )
>              {
>      simd_0f_imm8_avx2:
> @@ -7221,6 +7296,7 @@ x86_emulate(
>              }
>              else
>              {
> +#ifndef X86EMUL_NO_SIMD
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
> @@ -7228,6 +7304,7 @@ x86_emulate(
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
> +#endif
>      simd_0f_imm8_avx:
>                  host_and_vcpu_must_have(avx);
>              }
> @@ -7261,6 +7338,8 @@ x86_emulate(
>          insn_bytes = PFX_BYTES + 3;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0x70): /* vpshufd $imm8,[xyz]mm/mem,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_F3(0x0f, 0x70): /* vpshufhw $imm8,[xyz]mm/mem,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_F2(0x0f, 0x70): /* vpshuflw $imm8,[xyz]mm/mem,[xyz]mm{k} */
> @@ -7319,6 +7398,9 @@ x86_emulate(
>          opc[1] = modrm;
>          opc[2] = imm1;
>          insn_bytes = PFX_BYTES + 3;
> +
> +#endif /* X86EMUL_NO_SIMD */
> +
>      simd_0f_reg_only:
>          opc[insn_bytes - PFX_BYTES] = 0xc3;
>  
> @@ -7329,6 +7411,8 @@ x86_emulate(
>          ASSERT(!state->simd_size);
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0x71): /* Grp12 */
>          switch ( modrm_reg & 7 )
>          {
> @@ -7360,6 +7444,9 @@ x86_emulate(
>          }
>          goto unrecognized_insn;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +#ifndef X86EMUL_NO_MMX
> +
>      case X86EMUL_OPC(0x0f, 0x73):        /* Grp14 */
>          switch ( modrm_reg & 7 )
>          {
> @@ -7369,6 +7456,9 @@ x86_emulate(
>          }
>          goto unrecognized_insn;
>  
> +#endif /* !X86EMUL_NO_MMX */
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_66(0x0f, 0x73):
>      case X86EMUL_OPC_VEX_66(0x0f, 0x73):
>          switch ( modrm_reg & 7 )
> @@ -7399,7 +7489,12 @@ x86_emulate(
>          }
>          goto unrecognized_insn;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
> +#ifndef X86EMUL_NO_MMX
>      case X86EMUL_OPC(0x0f, 0x77):        /* emms */
> +#endif
> +#ifndef X86EMUL_NO_SIMD
>      case X86EMUL_OPC_VEX(0x0f, 0x77):    /* vzero{all,upper} */
>          if ( vex.opcx != vex_none )
>          {
> @@ -7445,6 +7540,7 @@ x86_emulate(
>  #endif
>          }
>          else
> +#endif /* !X86EMUL_NO_SIMD */
>          {
>              host_and_vcpu_must_have(mmx);
>              get_fpu(X86EMUL_FPU_mmx);
> @@ -7458,6 +7554,8 @@ x86_emulate(
>          insn_bytes = PFX_BYTES + 1;
>          goto simd_0f_reg_only;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
>          switch ( modrm_reg & 7 )
>          {
> @@ -7555,6 +7653,8 @@ x86_emulate(
>          op_bytes = 8;
>          goto simd_zmm;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
>          if ( test_cc(b, _regs.eflags) )
>              jmp_rel((int32_t)src.val);
> @@ -7565,6 +7665,8 @@ x86_emulate(
>          dst.val = test_cc(b, _regs.eflags);
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_VEX(0x0f, 0x91):    /* kmov{w,q} k,mem */
>      case X86EMUL_OPC_VEX_66(0x0f, 0x91): /* kmov{b,d} k,mem */
>          generate_exception_if(ea.type != OP_MEM, EXC_UD);
> @@ -7713,6 +7815,8 @@ x86_emulate(
>          dst.type = OP_NONE;
>          break;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
>          msr_val = 0;
>          fail_if(ops->cpuid == NULL);
> @@ -7809,6 +7913,7 @@ x86_emulate(
>      case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
>          switch ( modrm_reg & 7 )
>          {
> +#ifndef X86EMUL_NO_SIMD
>          case 2: /* ldmxcsr */
>              generate_exception_if(vex.pfx, EXC_UD);
>              vcpu_must_have(sse);
> @@ -7827,6 +7932,7 @@ x86_emulate(
>              get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm);
>              asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
>              break;
> +#endif /* X86EMUL_NO_SIMD */
>  
>          case 5: /* lfence */
>              fail_if(modrm_mod != 3);
> @@ -7875,6 +7981,8 @@ x86_emulate(
>          }
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_VEX(0x0f, 0xae): /* Grp15 */
>          switch ( modrm_reg & 7 )
>          {
> @@ -7889,6 +7997,8 @@ x86_emulate(
>          }
>          goto unrecognized_insn;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC_F3(0x0f, 0xae): /* Grp15 */
>          fail_if(modrm_mod != 3);
>          generate_exception_if((modrm_reg & 4) || !mode_64bit(), EXC_UD);
> @@ -8128,6 +8238,8 @@ x86_emulate(
>          }
>          goto simd_0f_imm8_avx;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      CASE_SIMD_ALL_FP(_EVEX, 0x0f, 0xc2): /* vcmp{p,s}{s,d} $imm8,[xyz]mm/mem,[xyz]mm,k{k} */
>          generate_exception_if((evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK) ||
>                                 (ea.type != OP_REG && evex.brs &&
> @@ -8154,6 +8266,8 @@ x86_emulate(
>          insn_bytes = EVEX_PFX_BYTES + 3;
>          break;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f, 0xc3): /* movnti */
>          /* Ignore the non-temporal hint for now. */
>          vcpu_must_have(sse2);
> @@ -8168,6 +8282,8 @@ x86_emulate(
>          ea.type = OP_MEM;
>          goto simd_0f_int_imm8;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
>      case X86EMUL_OPC_EVEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
>      case X86EMUL_OPC_EVEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
> @@ -8185,6 +8301,8 @@ x86_emulate(
>          state->simd_size = simd_other;
>          goto avx512f_imm8_no_sae;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xc5):  /* pextrw $imm8,{,x}mm,reg */
>                                             /* vpextrw $imm8,xmm,reg */
>          generate_exception_if(vex.l, EXC_UD);
> @@ -8200,6 +8318,8 @@ x86_emulate(
>          insn_bytes = PFX_BYTES + 3;
>          goto simd_0f_to_gpr;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
>          generate_exception_if(evex.w != (evex.pfx & VEX_PREFIX_DOUBLE_MASK),
>                                EXC_UD);
> @@ -8214,6 +8334,8 @@ x86_emulate(
>          avx512_vlen_check(false);
>          goto simd_imm8_zmm;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
>      {
>          union {
> @@ -8404,6 +8526,8 @@ x86_emulate(
>          }
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,[xyz]mm,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,[xyz]mm,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xe2): /* vpsra{d,q} xmm/m128,[xyz]mm,[xyz]mm{k} */
> @@ -8425,12 +8549,18 @@ x86_emulate(
>          generate_exception_if(evex.w != (b & 1), EXC_UD);
>          goto avx512f_no_sae;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +#ifndef X86EMUL_NO_MMX
> +
>      case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
>      case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
>      case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
>          vcpu_must_have(sse2);
>          goto simd_0f_mmx;
>  
> +#endif /* !X86EMUL_NO_MMX */
> +#if !defined(X86EMUL_NO_MMX) && !defined(X86EMUL_NO_SIMD)
> +
>      case X86EMUL_OPC_F3(0x0f, 0xd6):     /* movq2dq mm,xmm */
>      case X86EMUL_OPC_F2(0x0f, 0xd6):     /* movdq2q xmm,mm */
>          generate_exception_if(ea.type != OP_REG, EXC_UD);
> @@ -8438,6 +8568,9 @@ x86_emulate(
>          host_and_vcpu_must_have(mmx);
>          goto simd_0f_int;
>  
> +#endif /* !X86EMUL_NO_MMX && !X86EMUL_NO_SIMD */
> +#ifndef X86EMUL_NO_MMX
> +
>      case X86EMUL_OPC(0x0f, 0xe7):        /* movntq mm,m64 */
>          generate_exception_if(ea.type != OP_MEM, EXC_UD);
>          sfence = true;
> @@ -8453,6 +8586,9 @@ x86_emulate(
>          vcpu_must_have(mmxext);
>          goto simd_0f_mmx;
>  
> +#endif /* !X86EMUL_NO_MMX */
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
>      case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
> @@ -8473,6 +8609,8 @@ x86_emulate(
>          op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
>          goto simd_0f_cvt;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_INT_VEX(0x0f, 0xf7): /* {,v}maskmov{q,dqu} {,x}mm,{,x}mm */
>          generate_exception_if(ea.type != OP_REG, EXC_UD);
>          if ( vex.opcx != vex_none )
> @@ -8576,6 +8714,8 @@ x86_emulate(
>          insn_bytes = PFX_BYTES + 3;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
>          generate_exception_if(!vex.l, EXC_UD);
> @@ -9145,6 +9285,8 @@ x86_emulate(
>          ASSERT(!state->simd_size);
>          break;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC_66(0x0f38, 0x82): /* invpcid reg,m128 */
>          vcpu_must_have(invpcid);
>          generate_exception_if(ea.type != OP_MEM, EXC_UD);
> @@ -9187,6 +9329,8 @@ x86_emulate(
>          state->simd_size = simd_none;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f38, 0x83): /* vpmultishiftqb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
>          generate_exception_if(!evex.w, EXC_UD);
>          host_and_vcpu_must_have(avx512_vbmi);
> @@ -9750,6 +9894,8 @@ x86_emulate(
>          generate_exception_if(evex.brs || evex.opmsk, EXC_UD);
>          goto avx512f_no_sae;
>  
> +#endif /* !X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
>      case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
>          vcpu_must_have(movbe);
> @@ -9915,6 +10061,8 @@ x86_emulate(
>                              : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
>      case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */
>          generate_exception_if(!vex.l || !vex.w, EXC_UD);
> @@ -9975,6 +10123,8 @@ x86_emulate(
>          avx512_vlen_check(b & 2);
>          goto simd_imm8_zmm;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      CASE_SIMD_PACKED_INT(0x0f3a, 0x0f): /* palignr $imm8,{,x}mm/mem,{,x}mm */
>          host_and_vcpu_must_have(ssse3);
>          if ( vex.pfx )
> @@ -10002,6 +10152,8 @@ x86_emulate(
>          insn_bytes = PFX_BYTES + 4;
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_EVEX_66(0x0f3a, 0x42): /* vdbpsadbw $imm8,[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
>          generate_exception_if(evex.w, EXC_UD);
>          /* fall through */
> @@ -10500,6 +10652,8 @@ x86_emulate(
>          generate_exception_if(vex.l, EXC_UD);
>          goto simd_0f_imm8_avx;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
>          vcpu_must_have(bmi2);
>          generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
> @@ -10514,6 +10668,8 @@ x86_emulate(
>              asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
>          break;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
>      case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
>      case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
> @@ -10549,6 +10705,8 @@ x86_emulate(
>          host_and_vcpu_must_have(xop);
>          goto simd_0f_imm8_ymm;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
>          switch ( modrm_reg & 7 )
>          {
> @@ -10608,6 +10766,8 @@ x86_emulate(
>          }
>          goto unrecognized_insn;
>  
> +#ifndef X86EMUL_NO_SIMD
> +
>      case X86EMUL_OPC_XOP(09, 0x82): /* vfrczss xmm/m128,xmm */
>      case X86EMUL_OPC_XOP(09, 0x83): /* vfrczsd xmm/m128,xmm */
>          generate_exception_if(vex.l, EXC_UD);
> @@ -10663,6 +10823,8 @@ x86_emulate(
>          host_and_vcpu_must_have(xop);
>          goto simd_0f_ymm;
>  
> +#endif /* X86EMUL_NO_SIMD */
> +
>      case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */
>      {
>          uint8_t *buf = get_stub(stub);
>