The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
The VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: FP32 to OFP8.e4m3
* altfmt = 1: FP32 to OFP8.e5m2
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/insn32.decode | 2 +
target/riscv/insn_trans/trans_rvofp8.c.inc | 63 ++++++++++++++++++++++
target/riscv/insn_trans/trans_rvv.c.inc | 39 ++++++++++++++
3 files changed, 104 insertions(+)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 49201c0c20..f2b413c7d4 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -974,6 +974,8 @@ vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm
vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm
# *** Zvfofp8min Extension ***
+vfncvt_f_f_q 010010 . ..... 11001 001 ..... 1010111 @r2_vm
+vfncvt_sat_f_f_q 010010 . ..... 11011 001 ..... 1010111 @r2_vm
vfncvtbf16_sat_f_f_w 010010 . ..... 11111 001 ..... 1010111 @r2_vm
# *** Zvbc vector crypto extension ***
diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
index d28f92e050..619ee4d773 100644
--- a/target/riscv/insn_trans/trans_rvofp8.c.inc
+++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
@@ -12,6 +12,13 @@
} \
} while (0)
+static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
+{
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_sq(s, a->rd, a->rs2, a->vm) &&
+ (s->sew == MO_8);
+}
static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
{
@@ -40,3 +47,59 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
}
return false;
}
+
+static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZVFOFP8MIN(ctx);
+
+ if (zvfofp8min_narrow_quad_check(ctx, a)) {
+ gen_helper_gvec_3_ptr *fn;
+ uint32_t data = 0;
+
+ fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
+ gen_helper_vfncvt_f_f_q_ofp8e4m3;
+
+ gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+ data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+ vreg_ofs(ctx, a->rs2), tcg_env,
+ ctx->cfg_ptr->vlenb,
+ ctx->cfg_ptr->vlenb, data, fn);
+ finalize_rvv_inst(ctx);
+ return true;
+ }
+ return false;
+}
+
+static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+ REQUIRE_FPU;
+ REQUIRE_ZVFOFP8MIN(ctx);
+
+ if (zvfofp8min_narrow_quad_check(ctx, a)) {
+ gen_helper_gvec_3_ptr *fn;
+ uint32_t data = 0;
+
+ fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
+ gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
+
+ gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+ data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+ data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+ tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+ vreg_ofs(ctx, a->rs2), tcg_env,
+ ctx->cfg_ptr->vlenb,
+ ctx->cfg_ptr->vlenb, data, fn);
+ finalize_rvv_inst(ctx);
+ return true;
+ }
+ return false;
+}
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index bcd45b0aa3..9053b9fb57 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
require_align(vs1, s->lmul);
}
+/*
+ * Common check function for vector narrowing instructions
+ * of single-width result (SEW) and quad-width source (4*SEW).
+ *
+ * Rules to be checked here:
+ * 1. The largest vector register group used by an instruction
+ * can not be greater than 8 vector registers
+ * (Section 31.5.2)
+ * 2. Quad-width SEW cannot greater than ELEN.
+ * (Section 31.2)
+ * 3. Source vector register number is multiples of 4 * LMUL.
+ * (Section 31.3.4.2)
+ * 4. Destination vector register number is multiples of LMUL.
+ * (Section 31.3.4.2)
+ * 5. Destination vector register group for a masked vector
+ * instruction cannot overlap the source mask register (v0).
+ * (Section 31.5.3)
+ * risc-v unprivileged spec
+ */
+static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
+ int vm)
+{
+ return (s->lmul <= 1) &&
+ (s->sew < MO_32) &&
+ ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
+ require_align(vs2, s->lmul + 2) &&
+ require_align(vd, s->lmul) &&
+ require_vm(vm, vd);
+}
+
+static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
+{
+ bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
+ if (vd != vs) {
+ ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
+ }
+ return ret;
+}
+
/*
* Check function for vector reduction instructions.
*
--
2.52.0