From: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Signed-off-by: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Reviewed-by: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20241007025700.47259-5-zhiwei_liu@linux.alibaba.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/riscv/tcg-target.c.inc | 76 +++++++++++++++++++++++++++++++++++++-
1 file changed, 74 insertions(+), 2 deletions(-)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 38d71111c9..17fcc21b0e 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -309,6 +309,12 @@ typedef enum {
OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
+
+ OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
+ OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
+ OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
+
+ OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
} RISCVInsn;
/*
@@ -401,6 +407,16 @@ static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
}
+
+/* Type-OPIVI */
+
+static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
+ TCGReg vs2, bool vm)
+{
+ return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
+ (vs2 & 0x1f) << 20 | (vm << 25);
+}
+
/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
@@ -546,6 +562,24 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
* RISC-V vector instruction emitters
*/
+/*
+ * Vector registers uses the same 5 lower bits as GPR registers,
+ * and vm=0 (vm = false) means vector masking ENABLED.
+ * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
+ * second operand.
+ */
+static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
+ TCGReg vd, TCGReg vs2, TCGReg rs1)
+{
+ tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
+}
+
+static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
+ TCGReg vd, TCGReg vs2, int32_t imm)
+{
+ tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
+}
+
typedef struct VsetCache {
uint32_t movi_insn;
uint32_t vset_insn;
@@ -574,6 +608,13 @@ static MemOp set_vtype_len(TCGContext *s, TCGType type)
return s->riscv_cur_vsew;
}
+static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
+{
+ if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
+ set_vtype(s, type, vsew);
+ }
+}
+
/*
* TCG intrinsics
*/
@@ -588,6 +629,15 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
case TCG_TYPE_I64:
tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ {
+ int lmul = type - riscv_lg2_vlenb;
+ int nf = 1 << MAX(lmul, 0);
+ tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -951,18 +1001,35 @@ static void tcg_out_addsub2(TCGContext *s,
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
{
- return false;
+ set_vtype_len_sew(s, type, vece);
+ tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
+ return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset)
{
- return false;
+ tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
+ return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, int64_t arg)
{
+ /* Arg is replicated by VECE; extract the highest element. */
+ arg >>= (-8 << vece) & 63;
+
+ if (arg >= -16 && arg < 16) {
+ if (arg == 0 || arg == -1) {
+ set_vtype_len(s, type);
+ } else {
+ set_vtype_len_sew(s, type, vece);
+ }
+ tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
+ return;
+ }
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
+ tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
}
static const struct {
@@ -2104,6 +2171,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
a2 = args[2];
switch (opc) {
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
case INDEX_op_ld_vec:
tcg_out_ld(s, type, a0, a1, a2);
break;
@@ -2272,6 +2342,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_st_vec:
return C_O0_I2(v, r);
+ case INDEX_op_dup_vec:
+ case INDEX_op_dupm_vec:
case INDEX_op_ld_vec:
return C_O1_I1(v, r);
default:
--
2.43.0