[PATCH v3 12/35] tcg/wasm: Add setcond/negsetcond/movcond instructions

Kohei Tokunaga posted 35 patches 2 weeks ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Thomas Huth <thuth@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, WANG Xuerui <git@xen0n.name>, Aurelien Jarno <aurelien@aurel32.net>, Huacai Chen <chenhuacai@kernel.org>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Aleksandar Rikalo <arikalo@gmail.com>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <Alistair.Francis@wdc.com>, Stefan Weil <sw@weilnetz.de>, Kohei Tokunaga <ktokunaga.mail@gmail.com>
[PATCH v3 12/35] tcg/wasm: Add setcond/negsetcond/movcond instructions
Posted by Kohei Tokunaga 2 weeks ago
These TCG instructions are implemented by using Wasm's if and else
instructions. TCI instructions are also generated in the same way as the
original TCI backend. Since support for TCG_COND_TSTEQ and TCG_COND_TSTNE is
not yet implemented, TCG_TARGET_HAS_tst is set to 0.

Signed-off-by: Kohei Tokunaga <ktokunaga.mail@gmail.com>
---
 tcg/wasm.c                    | 136 +++++++++++++++++++-
 tcg/wasm/tcg-target-has.h     |   7 ++
 tcg/wasm/tcg-target-opc.h.inc |   8 ++
 tcg/wasm/tcg-target.c.inc     | 230 ++++++++++++++++++++++++++++++++++
 4 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 tcg/wasm/tcg-target-has.h
 create mode 100644 tcg/wasm/tcg-target-opc.h.inc

diff --git a/tcg/wasm.c b/tcg/wasm.c
index b63b88e011..183dad10a2 100644
--- a/tcg/wasm.c
+++ b/tcg/wasm.c
@@ -21,6 +21,12 @@
 #include "qemu/osdep.h"
 #include "tcg/tcg.h"
 
+static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1)
+{
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+}
+
 static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
 {
     *r0 = extract32(insn, 8, 4);
@@ -37,6 +43,110 @@ static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
     *i3 = extract32(insn, 22, 6);
 }
 
+static void tci_args_rrrc(uint32_t insn,
+                          TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
+{
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *c3 = extract32(insn, 20, 4);
+}
+
+static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
+                            TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
+{
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *r3 = extract32(insn, 20, 4);
+    *r4 = extract32(insn, 24, 4);
+    *c5 = extract32(insn, 28, 4);
+}
+
+static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
+{
+    bool result = false;
+    int32_t i0 = u0;
+    int32_t i1 = u1;
+    switch (condition) {
+    case TCG_COND_EQ:
+        result = (u0 == u1);
+        break;
+    case TCG_COND_NE:
+        result = (u0 != u1);
+        break;
+    case TCG_COND_LT:
+        result = (i0 < i1);
+        break;
+    case TCG_COND_GE:
+        result = (i0 >= i1);
+        break;
+    case TCG_COND_LE:
+        result = (i0 <= i1);
+        break;
+    case TCG_COND_GT:
+        result = (i0 > i1);
+        break;
+    case TCG_COND_LTU:
+        result = (u0 < u1);
+        break;
+    case TCG_COND_GEU:
+        result = (u0 >= u1);
+        break;
+    case TCG_COND_LEU:
+        result = (u0 <= u1);
+        break;
+    case TCG_COND_GTU:
+        result = (u0 > u1);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return result;
+}
+
+static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
+{
+    bool result = false;
+    int64_t i0 = u0;
+    int64_t i1 = u1;
+    switch (condition) {
+    case TCG_COND_EQ:
+        result = (u0 == u1);
+        break;
+    case TCG_COND_NE:
+        result = (u0 != u1);
+        break;
+    case TCG_COND_LT:
+        result = (i0 < i1);
+        break;
+    case TCG_COND_GE:
+        result = (i0 >= i1);
+        break;
+    case TCG_COND_LE:
+        result = (i0 <= i1);
+        break;
+    case TCG_COND_GT:
+        result = (i0 > i1);
+        break;
+    case TCG_COND_LTU:
+        result = (u0 < u1);
+        break;
+    case TCG_COND_GEU:
+        result = (u0 >= u1);
+        break;
+    case TCG_COND_LEU:
+        result = (u0 <= u1);
+        break;
+    case TCG_COND_GTU:
+        result = (u0 > u1);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return result;
+}
+
 static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr)
 {
     const uint32_t *tb_ptr = v_tb_ptr;
@@ -50,8 +160,10 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr)
     for (;;) {
         uint32_t insn;
         TCGOpcode opc;
-        TCGReg r0, r1, r2;
+        TCGReg r0, r1, r2, r3, r4;
         uint8_t pos, len;
+        TCGCond condition;
+        uint32_t tmp32;
 
         insn = *tb_ptr++;
         opc = extract32(insn, 0, 8);
@@ -102,6 +214,28 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr)
             regs[r0] = ((tcg_target_long)regs[r1]
                         >> (regs[r2] % TCG_TARGET_REG_BITS));
             break;
+        case INDEX_op_neg:
+            tci_args_rr(insn, &r0, &r1);
+            regs[r0] = -regs[r1];
+            break;
+        case INDEX_op_setcond:
+            tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
+            regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
+            break;
+        case INDEX_op_movcond:
+            tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+            tmp32 = tci_compare64(regs[r1], regs[r2], condition);
+            regs[r0] = regs[tmp32 ? r3 : r4];
+            break;
+        case INDEX_op_tci_setcond32:
+            tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
+            regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
+            break;
+        case INDEX_op_tci_movcond32:
+            tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+            tmp32 = tci_compare32(regs[r1], regs[r2], condition);
+            regs[r0] = regs[tmp32 ? r3 : r4];
+            break;
         default:
             g_assert_not_reached();
         }
diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h
new file mode 100644
index 0000000000..7e3caf8790
--- /dev/null
+++ b/tcg/wasm/tcg-target-has.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef TCG_TARGET_HAS_H
+#define TCG_TARGET_HAS_H
+
+#define TCG_TARGET_HAS_tst 0
+
+#endif
diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc
new file mode 100644
index 0000000000..57274d4569
--- /dev/null
+++ b/tcg/wasm/tcg-target-opc.h.inc
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Based on tci/tcg-target-opc.h.inc
+ *
+ * These opcodes for use between the tci generator and interpreter.
+ */
+DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT)
+DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT)
diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc
index 3a2a707619..70de3bbf83 100644
--- a/tcg/wasm/tcg-target.c.inc
+++ b/tcg/wasm/tcg-target.c.inc
@@ -137,9 +137,37 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = {
 #define REG_IDX(r) tcg_target_reg_index[r]
 
 typedef enum {
+    OPC_IF = 0x04,
+    OPC_ELSE = 0x05,
+    OPC_END = 0x0b,
     OPC_GLOBAL_GET = 0x23,
     OPC_GLOBAL_SET = 0x24,
 
+    OPC_I32_CONST = 0x41,
+    OPC_I64_CONST = 0x42,
+
+    OPC_I32_EQ = 0x46,
+    OPC_I32_NE = 0x47,
+    OPC_I32_LT_S = 0x48,
+    OPC_I32_LT_U = 0x49,
+    OPC_I32_GT_S = 0x4a,
+    OPC_I32_GT_U = 0x4b,
+    OPC_I32_LE_S = 0x4c,
+    OPC_I32_LE_U = 0x4d,
+    OPC_I32_GE_S = 0x4e,
+    OPC_I32_GE_U = 0x4f,
+
+    OPC_I64_EQ = 0x51,
+    OPC_I64_NE = 0x52,
+    OPC_I64_LT_S = 0x53,
+    OPC_I64_LT_U = 0x54,
+    OPC_I64_GT_S = 0x55,
+    OPC_I64_GT_U = 0x56,
+    OPC_I64_LE_S = 0x57,
+    OPC_I64_LE_U = 0x58,
+    OPC_I64_GE_S = 0x59,
+    OPC_I64_GE_U = 0x5a,
+
     OPC_I32_SHR_S = 0x75,
     OPC_I32_SHR_U = 0x76,
 
@@ -157,6 +185,10 @@ typedef enum {
     OPC_I64_EXTEND_I32_U = 0xad,
 } WasmInsn;
 
+typedef enum {
+    BLOCK_I64 = 0x7e,
+} WasmBlockType;
+
 #define BUF_SIZE 1024
 typedef struct LinkedBufEntry {
     uint8_t data[BUF_SIZE];
@@ -191,6 +223,23 @@ static void linked_buf_out_leb128(LinkedBuf *p, uint64_t v)
     } while (v != 0);
 }
 
+static void linked_buf_out_sleb128(LinkedBuf *p, int64_t v)
+{
+    bool more = true;
+    uint8_t b;
+    while (more) {
+        b = v & 0x7f;
+        v >>= 7;
+        if (((v == 0) && ((b & 0x40) == 0)) ||
+            ((v == -1) && ((b & 0x40) != 0))) {
+            more = false;
+        } else {
+            b |= 0x80;
+        }
+        linked_buf_out8(p, b);
+    }
+}
+
 /*
  * wasm code is generataed in the dynamically allocated buffer which
  * are managed as a linked list.
@@ -209,6 +258,10 @@ static void tcg_wasm_out_leb128(TCGContext *s, uint64_t v)
 {
     linked_buf_out_leb128(&sub_buf, v);
 }
+static void tcg_wasm_out_sleb128(TCGContext *s, int64_t v)
+{
+    linked_buf_out_sleb128(&sub_buf, v);
+}
 
 static void tcg_wasm_out_op(TCGContext *s, WasmInsn opc)
 {
@@ -219,6 +272,25 @@ static void tcg_wasm_out_op_idx(TCGContext *s, WasmInsn opc, uint32_t idx)
     tcg_wasm_out8(s, opc);
     tcg_wasm_out_leb128(s, idx);
 }
+static void tcg_wasm_out_op_block(TCGContext *s, WasmInsn opc, WasmBlockType t)
+{
+    tcg_wasm_out8(s, opc);
+    tcg_wasm_out8(s, t);
+}
+static void tcg_wasm_out_op_const(TCGContext *s, WasmInsn opc, int64_t v)
+{
+    tcg_wasm_out8(s, opc);
+    switch (opc) {
+    case OPC_I32_CONST:
+        tcg_wasm_out_sleb128(s, (int32_t)v);
+        break;
+    case OPC_I64_CONST:
+        tcg_wasm_out_sleb128(s, v);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
 
 static void tcg_wasm_out_o1_i2(
     TCGContext *s, WasmInsn opc, TCGReg ret, TCGReg arg1, TCGReg arg2)
@@ -250,6 +322,85 @@ static void tcg_wasm_out_o1_i2_type(
     }
 }
 
+static const struct {
+    WasmInsn i32;
+    WasmInsn i64;
+} tcg_cond_to_inst[] = {
+    [TCG_COND_EQ] =  { OPC_I32_EQ,   OPC_I64_EQ },
+    [TCG_COND_NE] =  { OPC_I32_NE,   OPC_I64_NE },
+    [TCG_COND_LT] =  { OPC_I32_LT_S, OPC_I64_LT_S },
+    [TCG_COND_GE] =  { OPC_I32_GE_S, OPC_I64_GE_S },
+    [TCG_COND_LE] =  { OPC_I32_LE_S, OPC_I64_LE_S },
+    [TCG_COND_GT] =  { OPC_I32_GT_S, OPC_I64_GT_S },
+    [TCG_COND_LTU] = { OPC_I32_LT_U, OPC_I64_LT_U },
+    [TCG_COND_GEU] = { OPC_I32_GE_U, OPC_I64_GE_U },
+    [TCG_COND_LEU] = { OPC_I32_LE_U, OPC_I64_LE_U },
+    [TCG_COND_GTU] = { OPC_I32_GT_U, OPC_I64_GT_U }
+};
+
+static void tcg_wasm_out_cond(
+    TCGContext *s, TCGType type, TCGCond cond, TCGReg arg1, TCGReg arg2)
+{
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1));
+        tcg_wasm_out_op(s, OPC_I32_WRAP_I64);
+        tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2));
+        tcg_wasm_out_op(s, OPC_I32_WRAP_I64);
+        tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i32);
+        break;
+    case TCG_TYPE_I64:
+        tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1));
+        tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2));
+        tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i64);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_wasm_out_setcond(TCGContext *s, TCGType type, TCGReg ret,
+                                 TCGReg arg1, TCGReg arg2, TCGCond cond)
+{
+    tcg_wasm_out_cond(s, type, cond, arg1, arg2);
+    tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U);
+    tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret));
+}
+
+static void tcg_wasm_out_negsetcond(TCGContext *s, TCGType type, TCGReg ret,
+                                    TCGReg arg1, TCGReg arg2, TCGCond cond)
+{
+    tcg_wasm_out_op_const(s, OPC_I64_CONST, 0);
+    tcg_wasm_out_cond(s, type, cond, arg1, arg2);
+    tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U);
+    tcg_wasm_out_op(s, OPC_I64_SUB);
+    tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret));
+}
+
+static void tcg_wasm_out_movcond(TCGContext *s, TCGType type, TCGReg ret,
+                                 TCGReg c1, TCGReg c2,
+                                 TCGReg v1, TCGReg v2,
+                                 TCGCond cond)
+{
+    tcg_wasm_out_cond(s, type, cond, c1, c2);
+    tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64);
+    tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v1));
+    tcg_wasm_out_op(s, OPC_ELSE);
+    tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v2));
+    tcg_wasm_out_op(s, OPC_END);
+    tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret));
+}
+
+static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
+{
+    tcg_insn_unit_tci insn = 0;
+
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    tcg_out32(s, insn);
+}
+
 static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
                            TCGReg r0, TCGReg r1, TCGReg r2)
 {
@@ -277,6 +428,35 @@ static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
     tcg_out32(s, insn);
 }
 
+static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
+                            TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
+{
+    tcg_insn_unit_tci insn = 0;
+
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, c3);
+    tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
+                              TCGReg r0, TCGReg r1, TCGReg r2,
+                              TCGReg r3, TCGReg r4, TCGCond c5)
+{
+    tcg_insn_unit_tci insn = 0;
+
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, r3);
+    insn = deposit32(insn, 24, 4, r4);
+    insn = deposit32(insn, 28, 4, c5);
+    tcg_out32(s, insn);
+}
+
 static void tgen_and(TCGContext *s, TCGType type,
                      TCGReg a0, TCGReg a1, TCGReg a2)
 {
@@ -397,6 +577,56 @@ static const TCGOutOpBinary outop_sar = {
     .out_rrr = tgen_sar,
 };
 
+static void tgen_setcond_tci(TCGContext *s, TCGType type, TCGCond cond,
+                             TCGReg dest, TCGReg arg1, TCGReg arg2)
+{
+    TCGOpcode opc = (type == TCG_TYPE_I32
+                     ? INDEX_op_tci_setcond32
+                     : INDEX_op_setcond);
+    tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond);
+}
+
+static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
+                         TCGReg dest, TCGReg arg1, TCGReg arg2)
+{
+    tgen_setcond_tci(s, type, cond, dest, arg1, arg2);
+    tcg_wasm_out_setcond(s, type, dest, arg1, arg2, cond);
+}
+
+static const TCGOutOpSetcond outop_setcond = {
+    .base.static_constraint = C_O1_I2(r, r, r),
+    .out_rrr = tgen_setcond,
+};
+
+static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
+                            TCGReg dest, TCGReg arg1, TCGReg arg2)
+{
+    tgen_setcond_tci(s, type, cond, dest, arg1, arg2);
+    tcg_out_op_rr(s, INDEX_op_neg, dest, dest);
+    tcg_wasm_out_negsetcond(s, type, dest, arg1, arg2, cond);
+}
+
+static const TCGOutOpSetcond outop_negsetcond = {
+    .base.static_constraint = C_O1_I2(r, r, r),
+    .out_rrr = tgen_negsetcond,
+};
+
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
+                         TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
+                         TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf)
+{
+    TCGOpcode opc = (type == TCG_TYPE_I32
+                     ? INDEX_op_tci_movcond32
+                     : INDEX_op_movcond);
+    tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond);
+    tcg_wasm_out_movcond(s, type, ret, c1, c2, vt, vf, cond);
+}
+
+static const TCGOutOpMovcond outop_movcond = {
+    .base.static_constraint = C_O1_I4(r, r, r, r, r),
+    .out = tgen_movcond,
+};
+
 static void tcg_out_tb_start(TCGContext *s)
 {
     init_sub_buf();
-- 
2.43.0