[PATCH 20/25] target/i386: convert CMPXCHG to new decoder

Paolo Bonzini posted 25 patches 5 months, 2 weeks ago
There is a newer version of this series
[PATCH 20/25] target/i386: convert CMPXCHG to new decoder
Posted by Paolo Bonzini 5 months, 2 weeks ago
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c      | 79 --------------------------------
 target/i386/tcg/decode-new.c.inc |  3 +-
 target/i386/tcg/emit.c.inc       | 51 +++++++++++++++++++++
 3 files changed, 53 insertions(+), 80 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7a63c927c1f..1f76339130a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -434,13 +434,6 @@ static inline MemOp mo_stacksize(DisasContext *s)
     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 }
 
-/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
-   byte vs word opcodes.  */
-static inline MemOp mo_b_d(int b, MemOp ot)
-{
-    return b & 1 ? ot : MO_8;
-}
-
 /* Compute the result of writing t0 to the OT-sized register REG.
  *
  * If DEST is NULL, store the result into the register and return the
@@ -715,11 +708,6 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
     return dst;
 }
 
-static void gen_extu(MemOp ot, TCGv reg)
-{
-    gen_ext_tl(reg, reg, ot, false);
-}
-
 static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1)
 {
     TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false);
@@ -2998,73 +2986,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
 
     /* now check op code */
     switch (b) {
-        /**************************/
-        /* arith & logic */
-    case 0x1b0:
-    case 0x1b1: /* cmpxchg Ev, Gv */
-        {
-            TCGv oldv, newv, cmpv, dest;
-
-            ot = mo_b_d(b, dflag);
-            modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            mod = (modrm >> 6) & 3;
-            oldv = tcg_temp_new();
-            newv = tcg_temp_new();
-            cmpv = tcg_temp_new();
-            gen_op_mov_v_reg(s, ot, newv, reg);
-            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
-            gen_extu(ot, cmpv);
-            if (s->prefix & PREFIX_LOCK) {
-                if (mod == 3) {
-                    goto illegal_op;
-                }
-                gen_lea_modrm(env, s, modrm);
-                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
-                                          s->mem_index, ot | MO_LE);
-            } else {
-                if (mod == 3) {
-                    rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(s, ot, oldv, rm);
-                    gen_extu(ot, oldv);
-
-                    /*
-                     * Unlike the memory case, where "the destination operand receives
-                     * a write cycle without regard to the result of the comparison",
-                     * rm must not be touched altogether if the write fails, including
-                     * not zero-extending it on 64-bit processors.  So, precompute
-                     * the result of a successful writeback and perform the movcond
-                     * directly on cpu_regs.  Also need to write accumulator first, in
-                     * case rm is part of RAX too.
-                     */
-                    dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv);
-                    tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
-                } else {
-                    gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, s->A0);
-
-                    /*
-                     * Perform an unconditional store cycle like physical cpu;
-                     * must be before changing accumulator to ensure
-                     * idempotency if the store faults and the instruction
-                     * is restarted
-                     */
-                    tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
-                    gen_op_st_v(s, ot, newv, s->A0);
-                }
-            }
-	    /*
-	     * Write EAX only if the cmpxchg fails; reuse newv as the destination,
-	     * since it's dead here.
-	     */
-            dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv);
-            tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv);
-            tcg_gen_mov_tl(cpu_cc_src, oldv);
-            tcg_gen_mov_tl(s->cc_srcT, cmpv);
-            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
-            set_cc_op(s, CC_OP_SUBB + ot);
-        }
-        break;
     case 0x1c7: /* cmpxchg8b */
         modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 11ecd1c6c1d..00ffaeb0763 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1181,6 +1181,8 @@ static const X86OpEntry opcodes_0F[256] = {
     [0xa4] = X86_OP_ENTRY4(SHLD,  E,v, 2op,v, G,v),
     [0xa5] = X86_OP_ENTRY3(SHLD,  E,v, 2op,v, G,v),
 
+    [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock),
+    [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock),
     [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
     [0xb3] = X86_OP_ENTRY2(BTR,    E,v, G,v,             btEvGv),
     [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
@@ -2612,7 +2614,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
             switch (b) {
             case 0x00 ... 0x01: /* mostly privileged instructions */
             case 0x1a ... 0x1b: /* MPX */
-            case 0xb0 ... 0xb1: /* cmpxchg */
             case 0xc7:          /* grp9 */
                 disas_insn_old(s, cpu, b + 0x100);
                 return;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 42e41a7a87c..857d270d247 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1708,6 +1708,57 @@ static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode)
     }
 }
 
+static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    TCGv cmpv = tcg_temp_new();
+    TCGv oldv = tcg_temp_new();
+    TCGv newv = tcg_temp_new();
+    TCGv dest;
+
+    tcg_gen_ext_tl(cmpv, cpu_regs[R_EAX], ot);
+    tcg_gen_ext_tl(newv, s->T1, ot);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
+                                  s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_ext_tl(oldv, s->T0, ot);
+        if (decode->op[0].has_ea) {
+            /*
+             * Perform an unconditional store cycle like physical cpu;
+             * must be before changing accumulator to ensure
+             * idempotency if the store faults and the instruction
+             * is restarted
+             */
+            tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+            gen_op_st_v(s, ot, newv, s->A0);
+        } else {
+            /*
+             * Unlike the memory case, where "the destination operand receives
+             * a write cycle without regard to the result of the comparison",
+             * rm must not be touched altogether if the write fails, including
+             * not zero-extending it on 64-bit processors.  So, precompute
+             * the result of a successful writeback and perform the movcond
+             * directly on cpu_regs.  In case rm is part of RAX, note that this
+             * movcond and the one below are mutually exclusive is executed.
+             */
+            dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, newv, newv);
+            tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
+        }
+        decode->op[0].unit = X86_OP_SKIP;
+    }
+
+    /* Write RAX only if the cmpxchg fails.  */
+    dest = gen_op_deposit_reg_v(s, ot, R_EAX, s->T0, oldv);
+    tcg_gen_movcond_tl(TCG_COND_NE, dest, oldv, cmpv, s->T0, dest);
+
+    tcg_gen_mov_tl(s->cc_srcT, cmpv);
+    tcg_gen_sub_tl(cmpv, cmpv, oldv);
+    decode->cc_dst = cmpv;
+    decode->cc_src = oldv;
+    decode->cc_op = CC_OP_SUBB + ot;
+}
+
 static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
-- 
2.45.1
Re: [PATCH 20/25] target/i386: convert CMPXCHG to new decoder
Posted by Richard Henderson 5 months, 2 weeks ago
On 6/8/24 01:41, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c      | 79 --------------------------------
>   target/i386/tcg/decode-new.c.inc |  3 +-
>   target/i386/tcg/emit.c.inc       | 51 +++++++++++++++++++++
>   3 files changed, 53 insertions(+), 80 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~