From: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Signed-off-by: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Reviewed-by: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
---
tcg/riscv/tcg-target-con-set.h | 2 +
tcg/riscv/tcg-target.c.inc | 202 +++++++++++++++++++++++++++++++--
2 files changed, 196 insertions(+), 8 deletions(-)
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
index aac5ceee2b..d73a62b0f2 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -21,3 +21,5 @@ C_O1_I2(r, rZ, rZ)
C_N1_I2(r, r, rM)
C_O1_I4(r, r, rI, rM, rM)
C_O2_I4(r, r, rZ, rZ, rM, rM)
+C_O0_I2(v, r)
+C_O1_I1(v, r)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index df96d350a3..4b1079fc6f 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -174,6 +174,11 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define V_OPMVX (0x6 << 12)
#define V_OPCFG (0x7 << 12)
+/* NF <= 7 && BNF >= 0 */
+#define V_NF(x) (x << 29)
+#define V_UNIT_STRIDE (0x0 << 20)
+#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
+
typedef enum {
VLMUL_M1 = 0, /* LMUL=1 */
VLMUL_M2, /* LMUL=2 */
@@ -285,6 +290,25 @@ typedef enum {
OPC_VSETVLI = 0x57 | V_OPCFG,
OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
OPC_VSETVL = 0x80000057 | V_OPCFG,
+
+ OPC_VLE8_V = 0x7 | V_UNIT_STRIDE,
+ OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
+ OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
+ OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
+ OPC_VSE8_V = 0x27 | V_UNIT_STRIDE,
+ OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
+ OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
+ OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
+
+ OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
+ OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
+ OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
+ OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
+
+ OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
+ OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
+ OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
+ OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
} RISCVInsn;
/*
@@ -646,6 +670,20 @@ static void tcg_target_set_vec_config(TCGContext *s, TCGType type,
}
}
+static int riscv_set_vec_config_vl(TCGContext *s, TCGType type)
+{
+ int prev_vsew = s->riscv_host_vtype < 0 ? MO_8 :
+ ((s->riscv_host_vtype >> 3) & 0x7);
+ tcg_target_set_vec_config(s, type, prev_vsew);
+ return prev_vsew;
+}
+
+static void riscv_set_vec_config_vl_vece(TCGContext *s, TCGType type,
+ unsigned vece)
+{
+ tcg_target_set_vec_config(s, type, vece);
+}
+
/*
* TCG intrinsics
*/
@@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
tcg_out_ext32s(s, ret, arg);
}
-static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
- TCGReg addr, intptr_t offset)
+static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr,
+ intptr_t offset)
{
intptr_t imm12 = sextreg(offset, 0, 12);
if (offset != imm12) {
intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
- if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+ if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) {
imm12 = sextreg(diff, 0, 12);
tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
- if (addr != TCG_REG_ZERO) {
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+ if (*addr != TCG_REG_ZERO) {
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, *addr);
}
}
- addr = TCG_REG_TMP2;
+ *addr = TCG_REG_TMP2;
+ }
+ return imm12;
+}
+
+static void split_offset_vector(TCGContext *s, TCGReg *addr, intptr_t offset)
+{
+ if (offset != 0) {
+ if (offset == sextreg(offset, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, *addr);
+ }
+ *addr = TCG_REG_TMP0;
}
+}
+
+static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+ TCGReg addr, intptr_t offset)
+{
+ intptr_t imm12;
switch (opc) {
case OPC_SB:
case OPC_SH:
case OPC_SW:
case OPC_SD:
+ imm12 = split_offset_scalar(s, &addr, offset);
tcg_out_opc_store(s, opc, addr, data, imm12);
break;
case OPC_LB:
@@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
case OPC_LW:
case OPC_LWU:
case OPC_LD:
+ imm12 = split_offset_scalar(s, &addr, offset);
tcg_out_opc_imm(s, opc, data, addr, imm12);
break;
+ case OPC_VSE8_V:
+ case OPC_VSE16_V:
+ case OPC_VSE32_V:
+ case OPC_VSE64_V:
+ case OPC_VS1R_V:
+ case OPC_VS2R_V:
+ case OPC_VS4R_V:
+ case OPC_VS8R_V:
+ split_offset_vector(s, &addr, offset);
+ tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+ break;
+ case OPC_VLE8_V:
+ case OPC_VLE16_V:
+ case OPC_VLE32_V:
+ case OPC_VLE64_V:
+ case OPC_VL1RE64_V:
+ case OPC_VL2RE64_V:
+ case OPC_VL4RE64_V:
+ case OPC_VL8RE64_V:
+ split_offset_vector(s, &addr, offset);
+ tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+ break;
default:
g_assert_not_reached();
}
@@ -855,14 +937,101 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
+ RISCVInsn insn;
+
+ if (type < TCG_TYPE_V64) {
+ insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD;
+ } else {
+ int nf = get_vec_type_bytes(type) / riscv_vlenb;
+
+ switch (nf) {
+ case 1:
+ insn = OPC_VL1RE64_V;
+ break;
+ case 2:
+ insn = OPC_VL2RE64_V;
+ break;
+ case 4:
+ insn = OPC_VL4RE64_V;
+ break;
+ case 8:
+ insn = OPC_VL8RE64_V;
+ break;
+ default:
+ {
+ int prev_vsew = riscv_set_vec_config_vl(s, type);
+
+ switch (prev_vsew) {
+ case MO_8:
+ insn = OPC_VLE8_V;
+ break;
+ case MO_16:
+ insn = OPC_VLE16_V;
+ break;
+ case MO_32:
+ insn = OPC_VLE32_V;
+ break;
+ case MO_64:
+ insn = OPC_VLE64_V;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ break;
+ }
+ }
tcg_out_ldst(s, insn, arg, arg1, arg2);
}
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SW : OPC_SD;
+ RISCVInsn insn;
+
+ if (type < TCG_TYPE_V64) {
+ insn = (type == TCG_TYPE_I32) ? OPC_SW : OPC_SD;
+ tcg_out_ldst(s, insn, arg, arg1, arg2);
+ } else {
+ int nf = get_vec_type_bytes(type) / riscv_vlenb;
+
+ switch (nf) {
+ case 1:
+ insn = OPC_VS1R_V;
+ break;
+ case 2:
+ insn = OPC_VS2R_V;
+ break;
+ case 4:
+ insn = OPC_VS4R_V;
+ break;
+ case 8:
+ insn = OPC_VS8R_V;
+ break;
+ default:
+ {
+ int prev_vsew = riscv_set_vec_config_vl(s, type);
+
+ switch (prev_vsew) {
+ case MO_8:
+ insn = OPC_VSE8_V;
+ break;
+ case MO_16:
+ insn = OPC_VSE16_V;
+ break;
+ case MO_32:
+ insn = OPC_VSE32_V;
+ break;
+ case MO_64:
+ insn = OPC_VSE64_V;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ break;
+ }
+ }
tcg_out_ldst(s, insn, arg, arg1, arg2);
}
@@ -2057,7 +2226,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
{
+ TCGType type = vecl + TCG_TYPE_V64;
+ TCGArg a0, a1, a2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
@@ -2221,6 +2403,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a64_i64:
return C_O0_I2(rZ, r);
+ case INDEX_op_st_vec:
+ return C_O0_I2(v, r);
+ case INDEX_op_ld_vec:
+ return C_O1_I1(v, r);
default:
g_assert_not_reached();
}
--
2.43.0
On 9/4/24 07:27, LIU Zhiwei wrote:
> @@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
> tcg_out_ext32s(s, ret, arg);
> }
>
> -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
> - TCGReg addr, intptr_t offset)
> +static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr,
> + intptr_t offset)
> {
> intptr_t imm12 = sextreg(offset, 0, 12);
>
> if (offset != imm12) {
> intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
>
> - if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
> + if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) {
> imm12 = sextreg(diff, 0, 12);
> tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
> } else {
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
> - if (addr != TCG_REG_ZERO) {
> - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
> + if (*addr != TCG_REG_ZERO) {
> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, *addr);
> }
> }
> - addr = TCG_REG_TMP2;
> + *addr = TCG_REG_TMP2;
> + }
> + return imm12;
> +}
> +
> +static void split_offset_vector(TCGContext *s, TCGReg *addr, intptr_t offset)
> +{
> + if (offset != 0) {
> + if (offset == sextreg(offset, 0, 12)) {
> + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, *addr);
> + }
> + *addr = TCG_REG_TMP0;
> }
> +}
> +
> +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
> + TCGReg addr, intptr_t offset)
> +{
> + intptr_t imm12;
>
> switch (opc) {
> case OPC_SB:
> case OPC_SH:
> case OPC_SW:
> case OPC_SD:
> + imm12 = split_offset_scalar(s, &addr, offset);
> tcg_out_opc_store(s, opc, addr, data, imm12);
> break;
> case OPC_LB:
> @@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
> case OPC_LW:
> case OPC_LWU:
> case OPC_LD:
> + imm12 = split_offset_scalar(s, &addr, offset);
> tcg_out_opc_imm(s, opc, data, addr, imm12);
> break;
> + case OPC_VSE8_V:
> + case OPC_VSE16_V:
> + case OPC_VSE32_V:
> + case OPC_VSE64_V:
> + case OPC_VS1R_V:
> + case OPC_VS2R_V:
> + case OPC_VS4R_V:
> + case OPC_VS8R_V:
> + split_offset_vector(s, &addr, offset);
> + tcg_out_opc_ldst_vec(s, opc, data, addr, true);
> + break;
> + case OPC_VLE8_V:
> + case OPC_VLE16_V:
> + case OPC_VLE32_V:
> + case OPC_VLE64_V:
> + case OPC_VL1RE64_V:
> + case OPC_VL2RE64_V:
> + case OPC_VL4RE64_V:
> + case OPC_VL8RE64_V:
> + split_offset_vector(s, &addr, offset);
> + tcg_out_opc_ldst_vec(s, opc, data, addr, true);
> + break;
> default:
> g_assert_not_reached();
> }
This is more complicated than it needs to be, calling a combined function, then using a
switch to separate, then calling separate functions. Calling separate functions in the
first place is simpler. E.g.
static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
TCGReg addr, intptr_t offset)
{
tcg_debug_assert(data >= TCG_REG_V0);
tcg_debug_assert(addr < TCG_REG_V0);
if (offset) {
tcg_debug_assert(addr != TCG_REG_ZERO);
if (offset == sextreg(offset, 0, 12)) {
tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
}
addr = TCG_REG_TMP0;
}
tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25));
}
> static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
> TCGReg arg1, intptr_t arg2)
> {
> - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
> + RISCVInsn insn;
> +
> + if (type < TCG_TYPE_V64) {
> + insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD;
> + } else {
> + int nf = get_vec_type_bytes(type) / riscv_vlenb;
> +
> + switch (nf) {
> + case 1:
> + insn = OPC_VL1RE64_V;
> + break;
> + case 2:
> + insn = OPC_VL2RE64_V;
> + break;
> + case 4:
> + insn = OPC_VL4RE64_V;
> + break;
> + case 8:
> + insn = OPC_VL8RE64_V;
> + break;
> + default:
> + {
> + int prev_vsew = riscv_set_vec_config_vl(s, type);
> +
> + switch (prev_vsew) {
> + case MO_8:
> + insn = OPC_VLE8_V;
> + break;
> + case MO_16:
> + insn = OPC_VLE16_V;
> + break;
> + case MO_32:
> + insn = OPC_VLE32_V;
> + break;
> + case MO_64:
> + insn = OPC_VLE64_V;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> + }
> + break;
This can be simplified:
switch (type) {
case TCG_TYPE_I32:
tcg_out_ldst(s, OPC_LW, data, base, offset);
break;
case TCG_TYPE_I64:
tcg_out_ldst(s, OPC_LD, data, base, offset);
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
case TCG_TYPE_V256:
if (type >= riscv_lg2_vlenb) {
static const RISCVInsn whole_reg_ld[] = {
OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
};
unsigned idx = type - riscv_lg2_vlenb;
insn = whole_reg_ld[idx];
} else {
static const RISCVInsn unit_stride_ld[] = {
OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
};
MemOp prev_vsew = set_vtype_len(s, type);
insn = unit_stride_ld[prev_vsew];
}
tcg_out_vec_ldst(s, insn, data, base, offset);
break;
default:
g_assert_not_reached();
}
and similar for store.
r~
On 2024/9/5 14:39, Richard Henderson wrote:
> On 9/4/24 07:27, LIU Zhiwei wrote:
>> @@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext
>> *s, TCGReg ret, TCGReg arg)
>> tcg_out_ext32s(s, ret, arg);
>> }
>> -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
>> - TCGReg addr, intptr_t offset)
>> +static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr,
>> + intptr_t offset)
>> {
>> intptr_t imm12 = sextreg(offset, 0, 12);
>> if (offset != imm12) {
>> intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
>> - if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
>> + if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) {
>> imm12 = sextreg(diff, 0, 12);
>> tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff -
>> imm12);
>> } else {
>> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset -
>> imm12);
>> - if (addr != TCG_REG_ZERO) {
>> - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2,
>> TCG_REG_TMP2, addr);
>> + if (*addr != TCG_REG_ZERO) {
>> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2,
>> TCG_REG_TMP2, *addr);
>> }
>> }
>> - addr = TCG_REG_TMP2;
>> + *addr = TCG_REG_TMP2;
>> + }
>> + return imm12;
>> +}
>> +
>> +static void split_offset_vector(TCGContext *s, TCGReg *addr,
>> intptr_t offset)
>> +{
>> + if (offset != 0) {
>> + if (offset == sextreg(offset, 0, 12)) {
>> + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset);
>> + } else {
>> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
>> + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0,
>> *addr);
>> + }
>> + *addr = TCG_REG_TMP0;
>> }
>> +}
>> +
>> +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
>> + TCGReg addr, intptr_t offset)
>> +{
>> + intptr_t imm12;
>> switch (opc) {
>> case OPC_SB:
>> case OPC_SH:
>> case OPC_SW:
>> case OPC_SD:
>> + imm12 = split_offset_scalar(s, &addr, offset);
>> tcg_out_opc_store(s, opc, addr, data, imm12);
>> break;
>> case OPC_LB:
>> @@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s,
>> RISCVInsn opc, TCGReg data,
>> case OPC_LW:
>> case OPC_LWU:
>> case OPC_LD:
>> + imm12 = split_offset_scalar(s, &addr, offset);
>> tcg_out_opc_imm(s, opc, data, addr, imm12);
>> break;
>> + case OPC_VSE8_V:
>> + case OPC_VSE16_V:
>> + case OPC_VSE32_V:
>> + case OPC_VSE64_V:
>> + case OPC_VS1R_V:
>> + case OPC_VS2R_V:
>> + case OPC_VS4R_V:
>> + case OPC_VS8R_V:
>> + split_offset_vector(s, &addr, offset);
>> + tcg_out_opc_ldst_vec(s, opc, data, addr, true);
>> + break;
>> + case OPC_VLE8_V:
>> + case OPC_VLE16_V:
>> + case OPC_VLE32_V:
>> + case OPC_VLE64_V:
>> + case OPC_VL1RE64_V:
>> + case OPC_VL2RE64_V:
>> + case OPC_VL4RE64_V:
>> + case OPC_VL8RE64_V:
>> + split_offset_vector(s, &addr, offset);
>> + tcg_out_opc_ldst_vec(s, opc, data, addr, true);
>> + break;
>> default:
>> g_assert_not_reached();
>> }
>
> This is more complicated than it needs to be, calling a combined
> function, then using a switch to separate, then calling separate
> functions. Calling separate functions in the first place is simpler.
> E.g.
>
> static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
> TCGReg addr, intptr_t offset)
> {
> tcg_debug_assert(data >= TCG_REG_V0);
> tcg_debug_assert(addr < TCG_REG_V0);
>
> if (offset) {
> tcg_debug_assert(addr != TCG_REG_ZERO);
> if (offset == sextreg(offset, 0, 12)) {
> tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
> } else {
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
> tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0,
> addr);
> }
> addr = TCG_REG_TMP0;
> }
>
> tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25));
> }
>
>> static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
>> TCGReg arg1, intptr_t arg2)
>> {
>> - RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
>> + RISCVInsn insn;
>> +
>> + if (type < TCG_TYPE_V64) {
>> + insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD;
>> + } else {
>> + int nf = get_vec_type_bytes(type) / riscv_vlenb;
>> +
>> + switch (nf) {
>> + case 1:
>> + insn = OPC_VL1RE64_V;
>> + break;
>> + case 2:
>> + insn = OPC_VL2RE64_V;
>> + break;
>> + case 4:
>> + insn = OPC_VL4RE64_V;
>> + break;
>> + case 8:
>> + insn = OPC_VL8RE64_V;
>> + break;
>> + default:
>> + {
>> + int prev_vsew = riscv_set_vec_config_vl(s, type);
>> +
>> + switch (prev_vsew) {
>> + case MO_8:
>> + insn = OPC_VLE8_V;
>> + break;
>> + case MO_16:
>> + insn = OPC_VLE16_V;
>> + break;
>> + case MO_32:
>> + insn = OPC_VLE32_V;
>> + break;
>> + case MO_64:
>> + insn = OPC_VLE64_V;
>> + break;
>> + default:
>> + g_assert_not_reached();
>> + }
>> + }
>> + break;
>
> This can be simplified:
>
> switch (type) {
> case TCG_TYPE_I32:
> tcg_out_ldst(s, OPC_LW, data, base, offset);
> break;
> case TCG_TYPE_I64:
> tcg_out_ldst(s, OPC_LD, data, base, offset);
> break;
> case TCG_TYPE_V64:
> case TCG_TYPE_V128:
> case TCG_TYPE_V256:
> if (type >= riscv_lg2_vlenb) {
> static const RISCVInsn whole_reg_ld[] = {
> OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V,
> OPC_VL8RE64_V
> };
> unsigned idx = type - riscv_lg2_vlenb;
> insn = whole_reg_ld[idx];
> } else {
> static const RISCVInsn unit_stride_ld[] = {
> OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
> };
> MemOp prev_vsew = set_vtype_len(s, type);
> insn = unit_stride_ld[prev_vsew];
> }
> tcg_out_vec_ldst(s, insn, data, base, offset);
> break;
> default:
> g_assert_not_reached();
> }
>
> and similar for store.
Great. We will take this way.
Zhiwei
>
>
> r~
© 2016 - 2026 Red Hat, Inc.