[PATCH v2 15/17] target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension

Max Chou posted 17 patches 1 week, 3 days ago
There is a newer version of this series
[PATCH v2 15/17] target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension
Posted by Max Chou 1 week, 3 days ago
The vfext.vf2 instruction converts a vector of OCP FP4 E2M1
floating-point numbers to a vector of OFP FP8 E4M3 floating-points
numbers.

Signed-off-by: Max Chou <max.chou@sifive.com>
---
 target/riscv/helper.h                      |  3 ++
 target/riscv/insn32.decode                 |  3 ++
 target/riscv/insn_trans/trans_rvofp4.c.inc | 44 ++++++++++++++++++++++
 target/riscv/translate.c                   |  1 +
 target/riscv/vector_helper.c               | 33 ++++++++++++++++
 5 files changed, 84 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 356c24d9fb..162303fb6c 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1259,6 +1259,9 @@ DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
 
+/* OFP4 function */
+DEF_HELPER_5(vfext_vf2, void, ptr, ptr, ptr, env, i32)
+
 /* Vector crypto functions */
 DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f2b413c7d4..c58223ebd8 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -754,6 +754,9 @@ vsext_vf2       010010 . ..... 00111 010 ..... 1010111 @r2_vm
 vsext_vf4       010010 . ..... 00101 010 ..... 1010111 @r2_vm
 vsext_vf8       010010 . ..... 00011 010 ..... 1010111 @r2_vm
 
+# Zvfofp4min Extension
+vfext_vf2       010010 . ..... 10110 010 ..... 1010111 @r2_vm
+
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm11
 vsetivli        11 .......... ..... 111 ..... 1010111  @r2_zimm10
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvofp4.c.inc b/target/riscv/insn_trans/trans_rvofp4.c.inc
new file mode 100644
index 0000000000..91bf50834f
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvofp4.c.inc
@@ -0,0 +1,44 @@
+/*
+ * RISC-V translation routines for the OFP4 Standard Extensions.
+ *
+ * Copyright (C) 2025 SiFive, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+static bool vext_zvfofp4min_check(DisasContext *s, arg_rmr *a)
+{
+    return s->cfg_ptr->ext_zvfofp4min &&
+           (s->sew == MO_8) &&
+           vext_check_altfmt(s, -1) &&
+           (s->lmul >= -2) &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           (a->rd != a->rs2) &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul - 1) &&
+           require_vm(a->vm, a->rd) &&
+           require_noover(a->rd, s->lmul, a->rs2, s->lmul - 1);
+}
+
+static bool trans_vfext_vf2(DisasContext *s, arg_rmr *a)
+{
+    if (vext_zvfofp4min_check(s, a)) {
+        uint32_t data = 0;
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs2), tcg_env,
+                           s->cfg_ptr->vlenb, s->cfg_ptr->vlenb, data,
+                           gen_helper_vfext_vf2);
+        tcg_gen_movi_tl(cpu_vstart, 0);
+        finalize_rvv_inst(s);
+
+        return true;
+    }
+    return false;
+}
+
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 137022d7fb..bf403785b5 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1220,6 +1220,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 #include "insn_trans/trans_svinval.c.inc"
 #include "insn_trans/trans_rvbf16.c.inc"
 #include "insn_trans/trans_rvofp8.c.inc"
+#include "insn_trans/trans_rvofp4.c.inc"
 #include "decode-xthead.c.inc"
 #include "decode-xmips.c.inc"
 #include "insn_trans/trans_xthead.c.inc"
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 418212973d..a87728f130 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5121,6 +5121,7 @@ RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
 GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
 GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
 
+/* Zvfofp4min: vfext.vf2 - OFP4 E2M1 to OFP8 E4M3 conversion */
 /*
  * Vector Reduction Operations
  */
@@ -5920,3 +5921,35 @@ GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
+
+
+void HELPER(vfext_vf2)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                       uint32_t desc)
+{
+    float_status fp_status = env->fp_status;
+    uint32_t vl = env->vl;
+    uint32_t vm = vext_vm(desc);
+    uint32_t esz = sizeof(uint8_t);
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
+    uint32_t i;
+
+    VSTART_CHECK_EARLY_EXIT(env, vl);
+
+    for (i = env->vstart; i < vl; ++i) {
+        if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
+            continue;
+        }
+
+        uint8_t input = *((uint8_t *)vs2 + H1((i % 2 ? i - 1 : i) / 2));
+        input = (i % 2) ? ((input >> 4) & 0xf) : (input & 0xf);
+        *((uint8_t *)vd + H1(i)) = float4_e2m1_to_float8_e4m3(input,
+                                                              &fp_status);
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
+}
-- 
2.52.0