[v3] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support

[PATCH v3 17/19] target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension

Posted by Max Chou 3 days ago

The vfext.vf2 instruction converts a vector of OCP FP4 E2M1
floating-point numbers to a vector of OFP FP8 E4M3 floating-points
numbers.

Signed-off-by: Max Chou <max.chou@sifive.com>
---
 target/riscv/helper.h                      |  3 ++
 target/riscv/insn32.decode                 |  3 ++
 target/riscv/insn_trans/trans_rvofp4.c.inc | 43 ++++++++++++++++++++++
 target/riscv/translate.c                   |  1 +
 target/riscv/vector_helper.c               | 33 +++++++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 356c24d9fb..162303fb6c 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1259,6 +1259,9 @@ DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
 
+/* OFP4 function */
+DEF_HELPER_5(vfext_vf2, void, ptr, ptr, ptr, env, i32)
+
 /* Vector crypto functions */
 DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f2b413c7d4..c58223ebd8 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -754,6 +754,9 @@ vsext_vf2       010010 . ..... 00111 010 ..... 1010111 @r2_vm
 vsext_vf4       010010 . ..... 00101 010 ..... 1010111 @r2_vm
 vsext_vf8       010010 . ..... 00011 010 ..... 1010111 @r2_vm
 
+# Zvfofp4min Extension
+vfext_vf2       010010 . ..... 10110 010 ..... 1010111 @r2_vm
+
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm11
 vsetivli        11 .......... ..... 111 ..... 1010111  @r2_zimm10
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvofp4.c.inc b/target/riscv/insn_trans/trans_rvofp4.c.inc
new file mode 100644
index 0000000000..0fb5d7d534
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvofp4.c.inc
@@ -0,0 +1,43 @@
+/*
+ * RISC-V translation routines for the OFP4 Standard Extensions.
+ *
+ * Copyright (C) 2025 SiFive, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+static bool vext_zvfofp4min_check(DisasContext *s, arg_rmr *a)
+{
+    return s->cfg_ptr->ext_zvfofp4min &&
+           (s->sew == MO_8) &&
+           vext_check_altfmt(s, -1) &&
+           (s->lmul >= -2) &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           (a->rd != a->rs2) &&
+           require_align(a->rd, s->lmul) &&
+           require_align(a->rs2, s->lmul - 1) &&
+           require_vm(a->vm, a->rd) &&
+           require_noover(a->rd, s->lmul, a->rs2, s->lmul - 1);
+}
+
+static bool trans_vfext_vf2(DisasContext *s, arg_rmr *a)
+{
+    if (vext_zvfofp4min_check(s, a)) {
+        uint32_t data = 0;
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, s->vta);
+        data = FIELD_DP32(data, VDATA, VMA, s->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
+                           vreg_ofs(s, a->rs2), tcg_env,
+                           s->cfg_ptr->vlenb, s->cfg_ptr->vlenb, data,
+                           gen_helper_vfext_vf2);
+        tcg_gen_movi_tl(cpu_vstart, 0);
+        finalize_rvv_inst(s);
+
+        return true;
+    }
+    return false;
+}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 137022d7fb..bf403785b5 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1220,6 +1220,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 #include "insn_trans/trans_svinval.c.inc"
 #include "insn_trans/trans_rvbf16.c.inc"
 #include "insn_trans/trans_rvofp8.c.inc"
+#include "insn_trans/trans_rvofp4.c.inc"
 #include "decode-xthead.c.inc"
 #include "decode-xmips.c.inc"
 #include "insn_trans/trans_xthead.c.inc"
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 418212973d..a87728f130 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5121,6 +5121,7 @@ RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
 GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
 GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
 
+/* Zvfofp4min: vfext.vf2 - OFP4 E2M1 to OFP8 E4M3 conversion */
 /*
  * Vector Reduction Operations
  */
@@ -5920,3 +5921,35 @@ GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
+
+
+void HELPER(vfext_vf2)(void *vd, void *v0, void *vs2, CPURISCVState *env,
+                       uint32_t desc)
+{
+    float_status fp_status = env->fp_status;
+    uint32_t vl = env->vl;
+    uint32_t vm = vext_vm(desc);
+    uint32_t esz = sizeof(uint8_t);
+    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vma = vext_vma(desc);
+    uint32_t i;
+
+    VSTART_CHECK_EARLY_EXIT(env, vl);
+
+    for (i = env->vstart; i < vl; ++i) {
+        if (!vm && !vext_elem_mask(v0, i)) {
+            /* set masked-off elements to 1s */
+            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
+            continue;
+        }
+
+        uint8_t input = *((uint8_t *)vs2 + H1((i % 2 ? i - 1 : i) / 2));
+        input = (i % 2) ? ((input >> 4) & 0xf) : (input & 0xf);
+        *((uint8_t *)vd + H1(i)) = float4_e2m1_to_float8_e4m3(input,
+                                                              &fp_status);
+    }
+    env->vstart = 0;
+    /* set tail elements to 1s */
+    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
+}
-- 
2.52.0

Re: [PATCH v3 17/19] target/riscv: rvv: Add vfext.vf2 instruction for Zvfofp4min extension

Posted by Chao Liu 2 days, 1 hour ago

On Wed, Feb 04, 2026 at 01:17:53PM +0800, Max Chou wrote:
> The vfext.vf2 instruction converts a vector of OCP FP4 E2M1
> floating-point numbers to a vector of OFP FP8 E4M3 floating-points
> numbers.
> 
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
>  target/riscv/helper.h                      |  3 ++
>  target/riscv/insn32.decode                 |  3 ++
>  target/riscv/insn_trans/trans_rvofp4.c.inc | 43 ++++++++++++++++++++++
>  target/riscv/translate.c                   |  1 +
>  target/riscv/vector_helper.c               | 33 +++++++++++++++++
>  5 files changed, 83 insertions(+)
>  create mode 100644 target/riscv/insn_trans/trans_rvofp4.c.inc
> 
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 356c24d9fb..162303fb6c 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1259,6 +1259,9 @@ DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
>  DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
>  DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
>  
> +/* OFP4 function */
> +DEF_HELPER_5(vfext_vf2, void, ptr, ptr, ptr, env, i32)
> +
>  /* Vector crypto functions */
>  DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
>  DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index f2b413c7d4..c58223ebd8 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -754,6 +754,9 @@ vsext_vf2       010010 . ..... 00111 010 ..... 1010111 @r2_vm
>  vsext_vf4       010010 . ..... 00101 010 ..... 1010111 @r2_vm
>  vsext_vf8       010010 . ..... 00011 010 ..... 1010111 @r2_vm
>  
> +# Zvfofp4min Extension
> +vfext_vf2       010010 . ..... 10110 010 ..... 1010111 @r2_vm
> +
>  vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm11
>  vsetivli        11 .......... ..... 111 ..... 1010111  @r2_zimm10
>  vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
> diff --git a/target/riscv/insn_trans/trans_rvofp4.c.inc b/target/riscv/insn_trans/trans_rvofp4.c.inc
> new file mode 100644
> index 0000000000..0fb5d7d534
> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_rvofp4.c.inc
> @@ -0,0 +1,43 @@
> +/*
> + * RISC-V translation routines for the OFP4 Standard Extensions.
> + *
> + * Copyright (C) 2025 SiFive, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +static bool vext_zvfofp4min_check(DisasContext *s, arg_rmr *a)
> +{
> +    return s->cfg_ptr->ext_zvfofp4min &&
> +           (s->sew == MO_8) &&
> +           vext_check_altfmt(s, -1) &&
> +           (s->lmul >= -2) &&
> +           require_rvv(s) &&
> +           vext_check_isa_ill(s) &&
> +           (a->rd != a->rs2) &&
> +           require_align(a->rd, s->lmul) &&
> +           require_align(a->rs2, s->lmul - 1) &&
> +           require_vm(a->vm, a->rd) &&
> +           require_noover(a->rd, s->lmul, a->rs2, s->lmul - 1);
> +}
> +
> +static bool trans_vfext_vf2(DisasContext *s, arg_rmr *a)
> +{
> +    if (vext_zvfofp4min_check(s, a)) {
> +        uint32_t data = 0;
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, s->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, s->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
> +                           vreg_ofs(s, a->rs2), tcg_env,
> +                           s->cfg_ptr->vlenb, s->cfg_ptr->vlenb, data,
> +                           gen_helper_vfext_vf2);
> +        tcg_gen_movi_tl(cpu_vstart, 0);
> +        finalize_rvv_inst(s);
> +
> +        return true;
> +    }
> +    return false;
> +}
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 137022d7fb..bf403785b5 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1220,6 +1220,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
>  #include "insn_trans/trans_svinval.c.inc"
>  #include "insn_trans/trans_rvbf16.c.inc"
>  #include "insn_trans/trans_rvofp8.c.inc"
> +#include "insn_trans/trans_rvofp4.c.inc"
>  #include "decode-xthead.c.inc"
>  #include "decode-xmips.c.inc"
>  #include "insn_trans/trans_xthead.c.inc"
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 418212973d..a87728f130 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -5121,6 +5121,7 @@ RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
>  GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
>  GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
>  
> +/* Zvfofp4min: vfext.vf2 - OFP4 E2M1 to OFP8 E4M3 conversion */
>  /*
>   * Vector Reduction Operations
>   */
> @@ -5920,3 +5921,35 @@ GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
>  GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
>  GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
>  GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
> +
> +
> +void HELPER(vfext_vf2)(void *vd, void *v0, void *vs2, CPURISCVState *env,
> +                       uint32_t desc)
> +{
> +    float_status fp_status = env->fp_status;
> +    uint32_t vl = env->vl;
> +    uint32_t vm = vext_vm(desc);
> +    uint32_t esz = sizeof(uint8_t);
> +    uint32_t total_elems = vext_get_total_elems(env, desc, esz);
> +    uint32_t vta = vext_vta(desc);
> +    uint32_t vma = vext_vma(desc);
> +    uint32_t i;
> +
> +    VSTART_CHECK_EARLY_EXIT(env, vl);
> +
> +    for (i = env->vstart; i < vl; ++i) {
> +        if (!vm && !vext_elem_mask(v0, i)) {
> +            /* set masked-off elements to 1s */
> +            vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
> +            continue;
> +        }
> +
> +        uint8_t input = *((uint8_t *)vs2 + H1((i % 2 ? i - 1 : i) / 2));
> +        input = (i % 2) ? ((input >> 4) & 0xf) : (input & 0xf);
> +        *((uint8_t *)vd + H1(i)) = float4_e2m1_to_float8_e4m3(input,
> +                                                              &fp_status);
> +    }
> +    env->vstart = 0;
> +    /* set tail elements to 1s */
> +    vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
> +}
> -- 
> 2.52.0
> 
> 

Reviewed-by: Chao Liu <chao.liu.zevorn@gmail.com>

Thanks,
Chao