Handle the EVEX formats for APX (including the extension of BMI and CMPccXADD
instructions that already supported VEX) and add their validation to
validate_vex(). Exit disas_insn() early if the NF field is set.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 10 ++
target/i386/tcg/translate.c | 11 +-
target/i386/tcg/decode-new.c.inc | 168 ++++++++++++++++++++++++++++++-
3 files changed, 181 insertions(+), 8 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 8650b5ae7a0..1c7ed73c437 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -187,6 +187,9 @@ typedef enum X86InsnCheck {
/* No 0x67 prefix allowed */
X86_CHECK_no_adr = 16384,
+
+ /* EVEX.NF bit not allowed */
+ X86_CHECK_nf0 = 32768,
} X86InsnCheck;
typedef enum X86InsnSpecial {
@@ -274,6 +277,13 @@ typedef enum X86VEXSpecial {
X86_VEX_AVX2_256,
} X86VEXSpecial;
+typedef enum X86EVEXClass {
+ /* Non-vector instruction that can use APX EGPRs. */
+ X86_EVEX_APX = 128,
+ X86_EVEX_APX_cmp = 129,
+ X86_EVEX_APX_pp2 = 130,
+ X86_EVEX_APX_zu = 131,
+} X86EVEXClass;
typedef struct X86OpEntry X86OpEntry;
typedef struct X86DecodedInsn X86DecodedInsn;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a74d9b0436e..75eeed81fbd 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -107,18 +107,17 @@ typedef struct DisasContext {
uint8_t vex_l; /* vex vector length */
uint8_t vex_v; /* vex vvvv register, without 1's complement. */
bool vex_ndd; /* is this a 3-operand instruction? */
+ bool vex_w; /* used by AVX even on 32-bit processors */
uint8_t popl_esp_hack; /* for correct popl with esp base handling */
uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
-#ifdef TARGET_X86_64
- uint8_t rex_r;
+ uint8_t rex_r; /* 0 for i386, but left for simplicity */
uint8_t rex_x;
uint8_t rex_b;
-#endif
+
uint8_t evex2;
uint8_t evex3;
uint8_t evex4;
- bool vex_w; /* used by AVX even on 32-bit processors */
bool jmp_opt; /* use direct block chaining for direct jumps */
bool cc_op_dirty;
@@ -219,12 +218,16 @@ typedef struct DisasContext {
#define REX_R(S) ((S)->rex_r + 0)
#define REX_X(S) ((S)->rex_x + 0)
#define REX_B(S) ((S)->rex_b + 0)
+#define EVEX_APX_ND(S) (((S)->evex4 & 0x10) != 0)
+#define EVEX_APX_NF(S) (((S)->evex4 & 0x04) != 0)
#else
#define REX_PREFIX(S) false
#define REX_W(S) false
#define REX_R(S) 0
#define REX_X(S) 0
#define REX_B(S) 0
+#define EVEX_APX_ND(S) 0
+#define EVEX_APX_NF(S) 0
#endif
/*
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index adb5595ddf3..c80c61befea 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -253,6 +253,10 @@
#define vex11 .vex_class = 11,
#define vex12 .vex_class = 12,
#define vex13 .vex_class = 13,
+#define evex_apx .vex_class = X86_EVEX_APX,
+#define evex_apx_cmp .vex_class = X86_EVEX_APX_cmp,
+#define evex_apx_pp2 .vex_class = X86_EVEX_APX_pp2,
+#define evex_apx_zu .vex_class = X86_EVEX_APX_zu,
#define chk(a) .check = X86_CHECK_##a,
#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b,
@@ -2528,9 +2532,23 @@ static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
{
uint16_t sse_prefixes;
- if (!e->valid_prefix) {
- return true;
+ switch (e->vex_class) {
+ case X86_EVEX_APX:
+ case X86_EVEX_APX_cmp:
+ case X86_EVEX_APX_pp2:
+ case X86_EVEX_APX_zu:
+ /* For APX-extended instructions, only EVEX versions check the prefix. */
+ if (!(s->prefix & PREFIX_EVEX)) {
+ return true;
+ }
+
+ /* fallthrough */
+ default:
+ if (!e->valid_prefix) {
+ return true;
+ }
}
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
/* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */
s->prefix &= ~PREFIX_DATA;
@@ -2602,7 +2620,35 @@ static bool extract_evex_params(DisasContext *s, X86DecodedInsn *decode)
* Here, the position of RXB and (for AVX512) displacement multiplier
* should be known.
*/
- g_assert_not_reached();
+ X86OpEntry *e = &decode->e;
+ uint8_t rex_r, rex_x, rex_b;
+
+ switch (e->vex_class) {
+ case 13:
+ case X86_EVEX_APX:
+ case X86_EVEX_APX_cmp:
+ case X86_EVEX_APX_pp2:
+ case X86_EVEX_APX_zu:
+ /* Only reached through EVEX map 4. */
+ rex_r = (~s->evex2 & 0x10) | ((~s->evex2 >> 4) & 8);
+ rex_x = ((~s->evex3 << 2) & 0x10) | ((~s->evex2 >> 3) & 8);
+ rex_b = ((s->evex2 << 1) & 0x10) | ((~s->evex2 >> 2) & 8);
+ s->vex_v = ((~s->evex4 << 1) & 0x10) | ((~s->evex3 >> 3) & 15);
+ break;
+ default:
+ return false;
+ }
+
+#ifdef TARGET_X86_64
+ s->rex_r = rex_r;
+ s->rex_x = rex_x;
+ s->rex_b = rex_b;
+#else
+ (void)rex_r;
+ (void)rex_x;
+ (void)rex_b;
+#endif
+ return true;
}
static bool decode_ops(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
@@ -2732,6 +2778,87 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
g_assert_not_reached();
}
+static bool validate_evex_apx(DisasContext *s, X86DecodedInsn *decode)
+{
+ X86OpEntry *e = &decode->e;
+ uint8_t evex4_mask = 0;
+
+ assert(s->has_modrm);
+ switch (e->vex_class) {
+ case 13:
+ /*
+ * APX-EVEX-BMI and APX-EVEX-CMPCCXADD do not define EVEX.ND.
+ * They only differ for checks on NF, which happen elsewhere.
+ */
+ if (!CODE64(s) || !(s->flags & HF_APX_EN_MASK)) {
+ goto illegal;
+ }
+ evex4_mask = 0x0C;
+ break;
+
+ case X86_EVEX_APX:
+ /* APX-EVEX-INT. Checks on NF happen elsewhere. */
+ assert(e->s0 == e->s1);
+ if (!CODE64(s) || !(s->flags & HF_APX_EN_MASK)) {
+ goto illegal;
+ }
+ if (!EVEX_APX_ND(s)) {
+ s->vex_ndd = false;
+ } else if (!s->vex_ndd) {
+ goto illegal;
+ }
+ evex4_mask = 0x1C;
+ break;
+
+ case X86_EVEX_APX_zu:
+ /* APX-EVEX-INT, with ZU bit defined. Checks on NF happen elsewhere. */
+ assert(decode->e.special == 0 && !s->vex_ndd);
+ if (!CODE64(s) || !(s->flags & HF_APX_EN_MASK)) {
+ goto illegal;
+ }
+ if (s->vex_v != 0) {
+ goto illegal;
+ }
+ if (EVEX_APX_ND(s)) {
+ decode->e.special = X86_SPECIAL_Op0_ZU;
+ }
+ evex4_mask = 0x1C;
+ break;
+
+ case X86_EVEX_APX_pp2:
+ if (!CODE64(s) || !(s->flags & HF_APX_EN_MASK) ||
+ !EVEX_APX_ND(s) ||
+ (e->op0 != X86_TYPE_None && s->vex_v == ((decode->b & 7) | REX_B(s))) ||
+ ((decode->b & 7) | REX_B(s)) == 4 ||
+ s->vex_v == 4) {
+ goto illegal;
+ }
+ evex4_mask = 0x18;
+ break;
+
+ case X86_EVEX_APX_cmp:
+ if (!CODE64(s) || !(s->flags & HF_APX_EN_MASK)) {
+ goto illegal;
+ }
+ evex4_mask = 0x0F;
+ break;
+ }
+
+ if (s->evex4 & ~evex4_mask) {
+ goto illegal;
+ }
+
+ /* r/m == 11b requires X4=0 (that is, EVEX.U is 1) */
+ if ((s->modrm >> 6) == 3 && (s->rex_x & 16)) {
+ goto illegal;
+ }
+ return true;
+
+illegal:
+ gen_illegal_opcode(s);
+ return false;
+}
+
static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
{
X86OpEntry *e = &decode->e;
@@ -2824,14 +2951,32 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
}
break;
case 13:
- if (!(s->prefix & PREFIX_VEX)) {
+ if (!(s->prefix & (PREFIX_VEX | PREFIX_EVEX))) {
goto illegal;
}
if (s->vex_l) {
goto illegal;
}
+ if (s->prefix & PREFIX_EVEX) {
+ if (!validate_evex_apx(s, decode)) {
+ return false;
+ }
+ }
/* All integer instructions use VEX.vvvv, so exit. */
return true;
+ case X86_EVEX_APX:
+ case X86_EVEX_APX_cmp:
+ case X86_EVEX_APX_pp2:
+ case X86_EVEX_APX_zu:
+ /* Only reached through EVEX map 4. */
+ assert(!(s->prefix & PREFIX_VEX));
+ if (s->vex_l) {
+ goto illegal;
+ }
+ if ((s->prefix & PREFIX_EVEX) && !validate_evex_apx(s, decode)) {
+ return false;
+ }
+ return true;
}
if (s->vex_v != 0 && !s->vex_ndd) {
@@ -3193,6 +3338,9 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if ((decode.e.check & X86_CHECK_W1) && !s->vex_w) {
goto illegal_op;
}
+ if ((decode.e.check & X86_CHECK_nf0) && EVEX_APX_NF(s)) {
+ goto illegal_op;
+ }
}
if (op_has_modrm(decode.e.op0) || op_has_modrm(decode.e.op1) ||
@@ -3338,6 +3486,18 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
* cause incorrect tracking of CC_OP for instructions that write to both memory
* and flags.
*/
+ switch (decode.e.vex_class) {
+ case 13:
+ case X86_EVEX_APX:
+ case X86_EVEX_APX_zu:
+ if (EVEX_APX_NF(s)) {
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
if (decode.cc_op != -1) {
if (decode.cc_dst) {
tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
--
2.52.0