The REX2 prefix has two main complications: it does not apply
to vector registers, and it disables or mutates some opcodes
(thus needing separate decoding functions instead of decode_root
and decode_0F). Otherwise, all it does is extend s->rex_r,
s->rex_w and s->rex_b to two bits.
Since REX2 provides the ability to access r16...r31, extend
cpu_regs[] to CPU_NB_EREGS elements.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 22 +++++-
target/i386/tcg/decode-new.c.inc | 114 +++++++++++++++++++++++++++++--
2 files changed, 126 insertions(+), 10 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 061adcb7221..47eef81ba05 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -78,7 +78,7 @@
static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
static TCGv cpu_eip;
static TCGv_i32 cpu_cc_op;
-static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv cpu_regs[CPU_NB_EREGS];
static TCGv cpu_seg_base[6];
static TCGv_i64 cpu_bndl[4];
static TCGv_i64 cpu_bndu[4];
@@ -3349,7 +3349,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
void tcg_x86_init(void)
{
- static const char reg_names[CPU_NB_REGS][4] = {
+ static const char reg_names[CPU_NB_EREGS][4] = {
#ifdef TARGET_X86_64
[R_EAX] = "rax",
[R_EBX] = "rbx",
@@ -3367,6 +3367,22 @@ void tcg_x86_init(void)
[13] = "r13",
[14] = "r14",
[15] = "r15",
+ [16] = "r16",
+ [17] = "r17",
+ [18] = "r18",
+ [19] = "r19",
+ [20] = "r20",
+ [21] = "r21",
+ [22] = "r22",
+ [23] = "r23",
+ [24] = "r24",
+ [25] = "r25",
+ [26] = "r26",
+ [27] = "r27",
+ [28] = "r28",
+ [29] = "r29",
+ [30] = "r30",
+ [31] = "r31",
#else
[R_EAX] = "eax",
[R_EBX] = "ebx",
@@ -3411,7 +3427,7 @@ void tcg_x86_init(void)
"cc_src2");
cpu_eip = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, eip), eip_name);
- for (i = 0; i < CPU_NB_REGS; ++i) {
+ for (i = 0; i < CPU_NB_EREGS; ++i) {
cpu_regs[i] = tcg_global_mem_new(tcg_env,
offsetof(CPUX86State, regs[i]),
reg_names[i]);
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index b968db2b8ad..9ee69564ab1 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1988,6 +1988,65 @@ static const X86OpEntry opcodes_root[256] = {
[0xFF] = X86_OP_GROUP1(group4_5, E,v),
};
+#ifdef TARGET_X86_64
+static const X86OpEntry opcodes_rex2_map0_A0toAF[16] = {
+};
+
+static void decode_REX2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b,
+ const X86OpEntry **map)
+{
+ *b = x86_ldub_code(env, s);
+ const X86OpEntry *group = map[*b >> 4];
+ *entry = group ? group[*b & 15] : UNKNOWN_OPCODE;
+}
+
+static void decode_REX2_map0(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry *opcode_rex2_map0[16] = {
+ &opcodes_root[0x00],
+ &opcodes_root[0x10],
+ &opcodes_root[0x20],
+ &opcodes_root[0x30],
+ NULL,
+ &opcodes_root[0x50],
+ &opcodes_root[0x60],
+ NULL,
+ &opcodes_root[0x80],
+ &opcodes_root[0x90],
+ opcodes_rex2_map0_A0toAF,
+ &opcodes_root[0xB0],
+ &opcodes_root[0xC0],
+ &opcodes_root[0xD0],
+ NULL,
+ &opcodes_root[0xF0],
+ };
+ decode_REX2(s, env, entry, b, opcode_rex2_map0);
+}
+
+static void decode_REX2_map1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry *opcode_rex2_map1[16] = {
+ &opcodes_0F[0x00],
+ &opcodes_0F[0x10],
+ &opcodes_0F[0x20],
+ NULL,
+ &opcodes_0F[0x40],
+ &opcodes_0F[0x50],
+ &opcodes_0F[0x60],
+ &opcodes_0F[0x70],
+ NULL,
+ &opcodes_0F[0x90],
+ &opcodes_0F[0xA0],
+ &opcodes_0F[0xB0],
+ &opcodes_0F[0xC0],
+ &opcodes_0F[0xD0],
+ &opcodes_0F[0xE0],
+ &opcodes_0F[0xF0],
+ };
+ decode_REX2(s, env, entry, b, opcode_rex2_map1);
+}
+#endif
+
#undef mmx
#undef vex1
#undef vex2
@@ -2007,6 +2066,20 @@ static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
*entry = opcodes_root[*b];
}
+static int reg_nb_mask(DisasContext *s, int unit)
+{
+ switch (unit) {
+ case X86_OP_MMX:
+ return 7;
+ case X86_OP_SSE:
+ return 15;
+ break;
+ default:
+ return 31;
+ break;
+ }
+}
+
/* Decode the MODRM and SIB bytes into a register or memory operand. */
static void decode_modrm(DisasContext *s, CPUX86State *env,
X86DecodedInsn *decode, X86DecodedOp *op)
@@ -2018,10 +2091,7 @@ static void decode_modrm(DisasContext *s, CPUX86State *env,
int sib = -1;
if (mod == 3) {
- op->n = rm;
- if (op->unit != X86_OP_MMX) {
- op->n |= REX_B(s);
- }
+ op->n = (rm | REX_B(s)) & reg_nb_mask(s, op->unit);
return;
}
@@ -2300,9 +2370,7 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
}
get_reg:
op->n = ((get_modrm(s, env) >> 3) & 7);
- if (op->unit != X86_OP_MMX) {
- op->n |= REX_R(s);
- }
+ op->n |= REX_R(s) & reg_nb_mask(s, op->unit);
break;
case X86_TYPE_E: /* ALU modrm operand */
@@ -2749,6 +2817,24 @@ static void dump_unknown_opcode(CPUX86State *env, DisasContext *s)
}
}
+/* MASK must have two bits set. Bring the highest next to the lowest;
+ * for example if MASK == 0x11, bit 4 of value is moved to bit 1. Clear
+ * every other bit in VALUE.
+ *
+ * Generally mask will be a constant, so that all of the first three
+ * lines disappear. Likewise, if the bits in mask are already adjacent
+ * this becomes just "return value & mask".
+ */
+static inline uint8_t collapse_two_bits(uint8_t value, uint8_t mask)
+{
+ uint8_t high = mask & (mask - 1);
+ uint8_t low = mask & ~high;
+ uint8_t tweak = (low << 1) - high;
+
+ value &= mask;
+ return value + (value > low ? tweak : 0);
+}
+
/*
* Convert one instruction. s->base.is_jmp is set if the translation must
* be stopped.
@@ -2833,6 +2919,20 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
goto next_byte_rex;
}
break;
+ case 0xd5: /* REX2 */
+ if (CODE64(s) && (s->flags & HF_APX_EN_MASK)) {
+ int rex2 = x86_ldub_code(env, s);
+ if (rex != -1) {
+ goto illegal_op;
+ }
+ s->prefix |= PREFIX_REX2;
+ s->rex_b = collapse_two_bits(rex2, 0x11) << 3;
+ s->rex_x = collapse_two_bits(rex2, 0x22) << 2;
+ s->rex_r = collapse_two_bits(rex2, 0x44) << 1;
+ s->vex_w = (rex2 >> 3) & 1;
+ decode_func = rex2 & 0x80 ? decode_REX2_map1 : decode_REX2_map0;
+ }
+ break;
#endif
case 0xc5: /* 2-byte VEX */
case 0xc4: /* 3-byte VEX */
--
2.52.0