target/riscv/helper.h | 4 +++ target/riscv/insn_trans/trans_rvv.c.inc | 44 ++++++++----------------- target/riscv/vector_helper.c | 18 ++++++++++ 3 files changed, 36 insertions(+), 30 deletions(-)
The risc-v vector spec defines vmv.s.x and vfmv.s.f as writing
element 0 of the destination register while the remaining destination
elements follow the current tail policy. When QEMU runs with
rvv_ta_all_1s enabled, those elements must therefore become all 1s.
QEMU handled both instructions as translation-time special cases that
directly wrote vd[0] and skipped the usual tail processing. As a result,
vmv.s.x and vfmv.s.f left the remaining destination elements unchanged
instead of applying the configured tail policy.
Fix this by routing both instructions through a helper that writes
vd[0] and then treats the rest of the destination register as tail,
reusing the existing agnostic-element fill logic.
Signed-off-by: Zhongyao Chen <chen.zhongyao@zte.com.cn>
---
target/riscv/helper.h | 4 +++
target/riscv/insn_trans/trans_rvv.c.inc | 44 ++++++++-----------------
target/riscv/vector_helper.c | 18 ++++++++++
3 files changed, 36 insertions(+), 30 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index b785456ee0..5a7f043edb 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -660,6 +660,10 @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
+DEF_HELPER_4(vset_velem0_b, void, ptr, i64, env, i32)
+DEF_HELPER_4(vset_velem0_h, void, ptr, i64, env, i32)
+DEF_HELPER_4(vset_velem0_w, void, ptr, i64, env, i32)
+DEF_HELPER_4(vset_velem0_d, void, ptr, i64, env, i32)
DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 4df9a40b44..6a966c35c3 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3381,37 +3381,21 @@ static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
load_element(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew, sign);
}
-/* Integer Scalar Move Instruction */
+typedef void gen_helper_vset_velem0(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
-static void store_element(TCGv_i64 val, TCGv_ptr base,
- int ofs, int sew)
+static void vec_element_storei_tail(DisasContext *s, int vreg, TCGv_i64 val)
{
- switch (sew) {
- case MO_8:
- tcg_gen_st8_i64(val, base, ofs);
- break;
- case MO_16:
- tcg_gen_st16_i64(val, base, ofs);
- break;
- case MO_32:
- tcg_gen_st32_i64(val, base, ofs);
- break;
- case MO_64:
- tcg_gen_st_i64(val, base, ofs);
- break;
- default:
- g_assert_not_reached();
- }
-}
+ static gen_helper_vset_velem0 * const fns[4] = {
+ gen_helper_vset_velem0_b, gen_helper_vset_velem0_h,
+ gen_helper_vset_velem0_w, gen_helper_vset_velem0_d,
+ };
+ TCGv_ptr dest = tcg_temp_new_ptr();
+ uint32_t data = FIELD_DP32(0, VDATA, VTA, s->vta);
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
+ s->cfg_ptr->vlenb, data));
-/*
- * Store vreg[idx] = val.
- * The index must be in range of VLMAX.
- */
-static void vec_element_storei(DisasContext *s, int vreg,
- int idx, TCGv_i64 val)
-{
- store_element(val, tcg_env, endian_ofs(s, vreg, idx), s->sew);
+ tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vreg));
+ fns[s->sew](dest, val, tcg_env, desc);
}
/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
@@ -3458,7 +3442,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
*/
s1 = get_gpr(s, a->rs1, EXT_NONE);
tcg_gen_ext_tl_i64(t1, s1);
- vec_element_storei(s, a->rd, 0, t1);
+ vec_element_storei_tail(s, a->rd, t1);
gen_set_label(over);
tcg_gen_movi_tl(cpu_vstart, 0);
finalize_rvv_inst(s);
@@ -3514,7 +3498,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
t1 = tcg_temp_new_i64();
do_nanbox(s, t1, cpu_fpr[a->rs1]);
- vec_element_storei(s, a->rd, 0, t1);
+ vec_element_storei_tail(s, a->rd, t1);
gen_set_label(over);
tcg_gen_movi_tl(cpu_vstart, 0);
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 83dd26314d..6c7af25d82 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2112,6 +2112,24 @@ GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
+#define GEN_VEXT_SET_VELEM0(NAME, ETYPE, H) \
+void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
+ uint32_t desc) \
+{ \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; \
+ uint32_t vta = vext_vta(desc); \
+ \
+ *((ETYPE *)vd + H(0)) = (ETYPE)s1; \
+ /* Treat every element past vd[0] as tail for scalar-to-vector moves. */ \
+ vext_set_elems_1s(vd, vta, esz, vlenb); \
+}
+
+GEN_VEXT_SET_VELEM0(vset_velem0_b, int8_t, H1)
+GEN_VEXT_SET_VELEM0(vset_velem0_h, int16_t, H2)
+GEN_VEXT_SET_VELEM0(vset_velem0_w, int32_t, H4)
+GEN_VEXT_SET_VELEM0(vset_velem0_d, int64_t, H8)
+
#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
--
2.43.0
On Wed, May 6, 2026 at 10:48 PM <chen.zhongyao@zte.com.cn> wrote:
>
> The risc-v vector spec defines vmv.s.x and vfmv.s.f as writing
> element 0 of the destination register while the remaining destination
> elements follow the current tail policy. When QEMU runs with
> rvv_ta_all_1s enabled, those elements must therefore become all 1s.
>
> QEMU handled both instructions as translation-time special cases that
> directly wrote vd[0] and skipped the usual tail processing. As a result,
> vmv.s.x and vfmv.s.f left the remaining destination elements unchanged
> instead of applying the configured tail policy.
>
> Fix this by routing both instructions through a helper that writes
> vd[0] and then treats the rest of the destination register as tail,
> reusing the existing agnostic-element fill logic.
>
> Signed-off-by: Zhongyao Chen <chen.zhongyao@zte.com.cn>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Alistair
> ---
> target/riscv/helper.h | 4 +++
> target/riscv/insn_trans/trans_rvv.c.inc | 44 ++++++++-----------------
> target/riscv/vector_helper.c | 18 ++++++++++
> 3 files changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index b785456ee0..5a7f043edb 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -660,6 +660,10 @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_b, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_h, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_w, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_d, void, ptr, i64, env, i32)
>
> DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 4df9a40b44..6a966c35c3 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -3381,37 +3381,21 @@ static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
> load_element(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew, sign);
> }
>
> -/* Integer Scalar Move Instruction */
> +typedef void gen_helper_vset_velem0(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
>
> -static void store_element(TCGv_i64 val, TCGv_ptr base,
> - int ofs, int sew)
> +static void vec_element_storei_tail(DisasContext *s, int vreg, TCGv_i64 val)
> {
> - switch (sew) {
> - case MO_8:
> - tcg_gen_st8_i64(val, base, ofs);
> - break;
> - case MO_16:
> - tcg_gen_st16_i64(val, base, ofs);
> - break;
> - case MO_32:
> - tcg_gen_st32_i64(val, base, ofs);
> - break;
> - case MO_64:
> - tcg_gen_st_i64(val, base, ofs);
> - break;
> - default:
> - g_assert_not_reached();
> - }
> -}
> + static gen_helper_vset_velem0 * const fns[4] = {
> + gen_helper_vset_velem0_b, gen_helper_vset_velem0_h,
> + gen_helper_vset_velem0_w, gen_helper_vset_velem0_d,
> + };
> + TCGv_ptr dest = tcg_temp_new_ptr();
> + uint32_t data = FIELD_DP32(0, VDATA, VTA, s->vta);
> + TCGv_i32 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
> + s->cfg_ptr->vlenb, data));
>
> -/*
> - * Store vreg[idx] = val.
> - * The index must be in range of VLMAX.
> - */
> -static void vec_element_storei(DisasContext *s, int vreg,
> - int idx, TCGv_i64 val)
> -{
> - store_element(val, tcg_env, endian_ofs(s, vreg, idx), s->sew);
> + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vreg));
> + fns[s->sew](dest, val, tcg_env, desc);
> }
>
> /* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
> @@ -3458,7 +3442,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
> */
> s1 = get_gpr(s, a->rs1, EXT_NONE);
> tcg_gen_ext_tl_i64(t1, s1);
> - vec_element_storei(s, a->rd, 0, t1);
> + vec_element_storei_tail(s, a->rd, t1);
> gen_set_label(over);
> tcg_gen_movi_tl(cpu_vstart, 0);
> finalize_rvv_inst(s);
> @@ -3514,7 +3498,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
> t1 = tcg_temp_new_i64();
> do_nanbox(s, t1, cpu_fpr[a->rs1]);
>
> - vec_element_storei(s, a->rd, 0, t1);
> + vec_element_storei_tail(s, a->rd, t1);
>
> gen_set_label(over);
> tcg_gen_movi_tl(cpu_vstart, 0);
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 83dd26314d..6c7af25d82 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -2112,6 +2112,24 @@ GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
> GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
> GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
>
> +#define GEN_VEXT_SET_VELEM0(NAME, ETYPE, H) \
> +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
> + uint32_t desc) \
> +{ \
> + uint32_t esz = sizeof(ETYPE); \
> + uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; \
> + uint32_t vta = vext_vta(desc); \
> + \
> + *((ETYPE *)vd + H(0)) = (ETYPE)s1; \
> + /* Treat every element past vd[0] as tail for scalar-to-vector moves. */ \
> + vext_set_elems_1s(vd, vta, esz, vlenb); \
> +}
> +
> +GEN_VEXT_SET_VELEM0(vset_velem0_b, int8_t, H1)
> +GEN_VEXT_SET_VELEM0(vset_velem0_h, int16_t, H2)
> +GEN_VEXT_SET_VELEM0(vset_velem0_w, int32_t, H4)
> +GEN_VEXT_SET_VELEM0(vset_velem0_d, int64_t, H8)
> +
> #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
> void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
> CPURISCVState *env, uint32_t desc) \
> --
> 2.43.0
>
On Wed, May 6, 2026 at 10:48 PM <chen.zhongyao@zte.com.cn> wrote:
>
> The risc-v vector spec defines vmv.s.x and vfmv.s.f as writing
> element 0 of the destination register while the remaining destination
> elements follow the current tail policy. When QEMU runs with
> rvv_ta_all_1s enabled, those elements must therefore become all 1s.
>
> QEMU handled both instructions as translation-time special cases that
> directly wrote vd[0] and skipped the usual tail processing. As a result,
> vmv.s.x and vfmv.s.f left the remaining destination elements unchanged
> instead of applying the configured tail policy.
>
> Fix this by routing both instructions through a helper that writes
> vd[0] and then treats the rest of the destination register as tail,
> reusing the existing agnostic-element fill logic.
>
> Signed-off-by: Zhongyao Chen <chen.zhongyao@zte.com.cn>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Alistair
> ---
> target/riscv/helper.h | 4 +++
> target/riscv/insn_trans/trans_rvv.c.inc | 44 ++++++++-----------------
> target/riscv/vector_helper.c | 18 ++++++++++
> 3 files changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index b785456ee0..5a7f043edb 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -660,6 +660,10 @@ DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32)
> DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_b, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_h, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_w, void, ptr, i64, env, i32)
> +DEF_HELPER_4(vset_velem0_d, void, ptr, i64, env, i32)
>
> DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 4df9a40b44..6a966c35c3 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -3381,37 +3381,21 @@ static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
> load_element(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew, sign);
> }
>
> -/* Integer Scalar Move Instruction */
> +typedef void gen_helper_vset_velem0(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32);
>
> -static void store_element(TCGv_i64 val, TCGv_ptr base,
> - int ofs, int sew)
> +static void vec_element_storei_tail(DisasContext *s, int vreg, TCGv_i64 val)
> {
> - switch (sew) {
> - case MO_8:
> - tcg_gen_st8_i64(val, base, ofs);
> - break;
> - case MO_16:
> - tcg_gen_st16_i64(val, base, ofs);
> - break;
> - case MO_32:
> - tcg_gen_st32_i64(val, base, ofs);
> - break;
> - case MO_64:
> - tcg_gen_st_i64(val, base, ofs);
> - break;
> - default:
> - g_assert_not_reached();
> - }
> -}
> + static gen_helper_vset_velem0 * const fns[4] = {
> + gen_helper_vset_velem0_b, gen_helper_vset_velem0_h,
> + gen_helper_vset_velem0_w, gen_helper_vset_velem0_d,
> + };
> + TCGv_ptr dest = tcg_temp_new_ptr();
> + uint32_t data = FIELD_DP32(0, VDATA, VTA, s->vta);
> + TCGv_i32 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
> + s->cfg_ptr->vlenb, data));
>
> -/*
> - * Store vreg[idx] = val.
> - * The index must be in range of VLMAX.
> - */
> -static void vec_element_storei(DisasContext *s, int vreg,
> - int idx, TCGv_i64 val)
> -{
> - store_element(val, tcg_env, endian_ofs(s, vreg, idx), s->sew);
> + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vreg));
> + fns[s->sew](dest, val, tcg_env, desc);
> }
>
> /* vmv.x.s rd, vs2 # x[rd] = vs2[0] */
> @@ -3458,7 +3442,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
> */
> s1 = get_gpr(s, a->rs1, EXT_NONE);
> tcg_gen_ext_tl_i64(t1, s1);
> - vec_element_storei(s, a->rd, 0, t1);
> + vec_element_storei_tail(s, a->rd, t1);
> gen_set_label(over);
> tcg_gen_movi_tl(cpu_vstart, 0);
> finalize_rvv_inst(s);
> @@ -3514,7 +3498,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a)
> t1 = tcg_temp_new_i64();
> do_nanbox(s, t1, cpu_fpr[a->rs1]);
>
> - vec_element_storei(s, a->rd, 0, t1);
> + vec_element_storei_tail(s, a->rd, t1);
>
> gen_set_label(over);
> tcg_gen_movi_tl(cpu_vstart, 0);
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 83dd26314d..6c7af25d82 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -2112,6 +2112,24 @@ GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
> GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
> GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
>
> +#define GEN_VEXT_SET_VELEM0(NAME, ETYPE, H) \
> +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
> + uint32_t desc) \
> +{ \
> + uint32_t esz = sizeof(ETYPE); \
> + uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; \
> + uint32_t vta = vext_vta(desc); \
> + \
> + *((ETYPE *)vd + H(0)) = (ETYPE)s1; \
> + /* Treat every element past vd[0] as tail for scalar-to-vector moves. */ \
> + vext_set_elems_1s(vd, vta, esz, vlenb); \
> +}
> +
> +GEN_VEXT_SET_VELEM0(vset_velem0_b, int8_t, H1)
> +GEN_VEXT_SET_VELEM0(vset_velem0_h, int16_t, H2)
> +GEN_VEXT_SET_VELEM0(vset_velem0_w, int32_t, H4)
> +GEN_VEXT_SET_VELEM0(vset_velem0_d, int64_t, H8)
> +
> #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
> void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
> CPURISCVState *env, uint32_t desc) \
> --
> 2.43.0
>
© 2016 - 2026 Red Hat, Inc.