According to version 20250508 of the unprivileged specification:
- vtype: bits 0..7 used, bit XLEN-1 illegal, rest reserved
=> fix to 64-bits.
- vxsat: bit 0 used, vxrm which would occupy bits 1..2 is stored
separately, and bits 3..31 are set to 0
=> fix to 8-bits.
- vxrm: 2 lowest bits are used for rounding mode, rest set to 0
=> fix to 8-bits.
- vstart: maximum value of VLMAX-1, where VLMAX is at most 2^16
=> fix to 32-bits as vstart is mapped to a TCG global.
- vl: maximum value of VLEN which is at most 2^16
=> fix to 32-bits as vl is mapped to a TCG global.
Fields are shuffled for reduced padding.
Signed-off-by: Anton Johansson <anjo@rev.ng>
---
target/riscv/cpu.h | 12 ++++++------
target/riscv/machine.c | 10 +++++-----
target/riscv/translate.c | 12 ++++++++----
target/riscv/vector_helper.c | 22 ++++++++++++++++++----
target/riscv/insn_trans/trans_rvv.c.inc | 22 +++++++++++-----------
5 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 2cd69fa150..8f844405bd 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -191,7 +191,7 @@ FIELD(VTYPE, VSEW, 3, 3)
FIELD(VTYPE, VTA, 6, 1)
FIELD(VTYPE, VMA, 7, 1)
FIELD(VTYPE, VEDIV, 8, 2)
-FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11)
+FIELD(VTYPE, RESERVED, 10, sizeof(uint64_t) * 8 - 11)
typedef struct PMUCTRState {
/* Current value of a counter */
@@ -217,11 +217,11 @@ struct CPUArchState {
/* vector coprocessor state. */
uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16);
- target_ulong vxrm;
- target_ulong vxsat;
- target_ulong vl;
- target_ulong vstart;
- target_ulong vtype;
+ uint64_t vtype;
+ uint32_t vl;
+ uint32_t vstart;
+ uint8_t vxrm;
+ uint8_t vxsat;
bool vill;
target_ulong pc;
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 9a14a805ef..8e3062aabb 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -141,11 +141,11 @@ static const VMStateDescription vmstate_vector = {
.needed = vector_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
- VMSTATE_UINTTL(env.vxrm, RISCVCPU),
- VMSTATE_UINTTL(env.vxsat, RISCVCPU),
- VMSTATE_UINTTL(env.vl, RISCVCPU),
- VMSTATE_UINTTL(env.vstart, RISCVCPU),
- VMSTATE_UINTTL(env.vtype, RISCVCPU),
+ VMSTATE_UINT8(env.vxrm, RISCVCPU),
+ VMSTATE_UINT8(env.vxsat, RISCVCPU),
+ VMSTATE_UINT32(env.vl, RISCVCPU),
+ VMSTATE_UINT32(env.vstart, RISCVCPU),
+ VMSTATE_UINT64(env.vtype, RISCVCPU),
VMSTATE_BOOL(env.vill, RISCVCPU),
VMSTATE_END_OF_LIST()
}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 2f8c7a6465..5e8fc3e543 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -38,8 +38,9 @@
#include "tcg/tcg-cpu.h"
/* global register indices */
-static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
+static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
+static TCGv_i32 cpu_vl, cpu_vstart;
static TCGv load_res;
static TCGv load_val;
@@ -1439,6 +1440,10 @@ void riscv_translate_init(void)
size_t field_offset = 0;
#endif
+ /* 32 bits in size, no offset needed */
+ size_t vl_offset = offsetof(CPURISCVState, vl);
+ size_t vstart_offset = offsetof(CPURISCVState, vstart);
+
for (i = 1; i < 32; i++) {
cpu_gpr[i] = tcg_global_mem_new(tcg_env,
offsetof(CPURISCVState, gpr[i]) + field_offset,
@@ -1454,9 +1459,8 @@ void riscv_translate_init(void)
}
cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc");
- cpu_vl = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vl), "vl");
- cpu_vstart = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vstart),
- "vstart");
+ cpu_vl = tcg_global_mem_new_i32(tcg_env, vl_offset, "vl");
+ cpu_vstart = tcg_global_mem_new_i32(tcg_env, vstart_offset, "vstart");
load_res = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_res),
"load_res");
load_val = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_val),
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 7c67d67a13..2fc5348044 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -360,6 +360,12 @@ vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
uint32_t evl = env->vstart + elems;
MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
+ /*
+ * Maximum vector length is VLMAX == 2^16 == LMUL * VL / SEW, and
+ * occurs for LMUL == 8, SEW == 8, VL == 2^16.
+ */
+ g_assert(env->vstart < UINT16_MAX && UINT16_MAX - env->vstart >= elems);
+
/* Check page permission/pmp/watchpoint/etc. */
probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags,
true);
@@ -2594,19 +2600,27 @@ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
d1 = extract64(v, shift - 1, 1);
D1 = extract64(v, 0, shift);
- if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
+ switch (vxrm) {
+ case 0:
+ /* round-to-nearest-up (add +0.5 LSB) */
return d1;
- } else if (vxrm == 1) { /* round-to-nearest-even */
+ case 1:
+ /* round-to-nearest-even */
if (shift > 1) {
D2 = extract64(v, 0, shift - 1);
return d1 & ((D2 != 0) | d);
} else {
return d1 & d;
}
- } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
+ case 2:
+ /* round-down (truncate) */
+ return 0;
+ case 3:
+ /* round-to-odd (OR bits into LSB, aka "jam") */
return !d & (D1 != 0);
+ default:
+ g_assert_not_reached();
}
- return 0; /* round-down (truncate) */
}
static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 71f98fb350..f1b624922a 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -194,7 +194,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2)
if (rd == 0 && rs1 == 0) {
s1 = tcg_temp_new();
- tcg_gen_mov_tl(s1, cpu_vl);
+ tcg_gen_ext_i32_tl(s1, cpu_vl);
} else if (rs1 == 0) {
/* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
s1 = tcg_constant_tl(RV_VLEN_MAX);
@@ -1213,9 +1213,9 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
MO_LE | MO_64 | atomicity);
}
if (i == size - 8) {
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
} else {
- tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz);
+ tcg_gen_addi_i32(cpu_vstart, cpu_vstart, 8 >> log2_esz);
}
}
} else {
@@ -1231,9 +1231,9 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
MO_LE | MO_32 | atomicity);
}
if (i == size - 4) {
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
} else {
- tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz);
+ tcg_gen_addi_i32(cpu_vstart, cpu_vstart, 4 >> log2_esz);
}
}
}
@@ -3459,7 +3459,7 @@ static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a)
vec_element_loadi(s, t1, a->rs2, 0, true);
tcg_gen_trunc_i64_tl(dest, t1);
gen_set_gpr(s, a->rd, dest);
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
finalize_rvv_inst(s);
return true;
}
@@ -3476,7 +3476,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
TCGv s1;
TCGLabel *over = gen_new_label();
- tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+ tcg_gen_brcond_i32(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
t1 = tcg_temp_new_i64();
@@ -3488,7 +3488,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
tcg_gen_ext_tl_i64(t1, s1);
vec_element_storei(s, a->rd, 0, t1);
gen_set_label(over);
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
finalize_rvv_inst(s);
return true;
}
@@ -3516,7 +3516,7 @@ static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a)
}
mark_fs_dirty(s);
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
finalize_rvv_inst(s);
return true;
}
@@ -3536,7 +3536,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
TCGLabel *over = gen_new_label();
/* if vstart >= vl, skip vector register write back */
- tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+ tcg_gen_brcond_i32(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
/* NaN-box f[rs1] */
t1 = tcg_temp_new_i64();
@@ -3545,7 +3545,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
vec_element_storei(s, a->rd, 0, t1);
gen_set_label(over);
- tcg_gen_movi_tl(cpu_vstart, 0);
+ tcg_gen_movi_i32(cpu_vstart, 0);
finalize_rvv_inst(s);
return true;
}
--
2.51.0