[v1] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support

[PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension

Posted by Max Chou 1 month ago

The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
The VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: FP32 to OFP8.e4m3
* altfmt = 1: FP32 to OFP8.e5m2

Signed-off-by: Max Chou <max.chou@sifive.com>
---
 target/riscv/insn32.decode                 |  2 +
 target/riscv/insn_trans/trans_rvofp8.c.inc | 62 ++++++++++++++++++++++
 target/riscv/insn_trans/trans_rvv.c.inc    | 39 ++++++++++++++
 3 files changed, 103 insertions(+)

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 49201c0c20..f2b413c7d4 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -974,6 +974,8 @@ vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
 vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
 
 # *** Zvfofp8min Extension ***
+vfncvt_f_f_q          010010 . ..... 11001 001 ..... 1010111 @r2_vm
+vfncvt_sat_f_f_q      010010 . ..... 11011 001 ..... 1010111 @r2_vm
 vfncvtbf16_sat_f_f_w  010010 . ..... 11111 001 ..... 1010111 @r2_vm
 
 # *** Zvbc vector crypto extension ***
diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
index 70a22616dc..4b44417c47 100644
--- a/target/riscv/insn_trans/trans_rvofp8.c.inc
+++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
@@ -22,6 +22,13 @@
     } \
 } while (0)
 
+static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
+{
+    return require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           vext_check_sq(s, a->rd, a->rs2, a->vm) &&
+           (s->sew == MO_8);
+}
 
 static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
 {
@@ -51,3 +58,58 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
     return false;
 }
 
+static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFOFP8MIN(ctx);
+
+    if (zvfofp8min_narrow_quad_check(ctx, a)) {
+        gen_helper_gvec_3_ptr *fn;
+        uint32_t data = 0;
+
+        fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
+                           gen_helper_vfncvt_f_f_q_ofp8e4m3;
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), tcg_env,
+                           ctx->cfg_ptr->vlenb,
+                           ctx->cfg_ptr->vlenb, data, fn);
+        finalize_rvv_inst(ctx);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFOFP8MIN(ctx);
+
+    if (zvfofp8min_narrow_quad_check(ctx, a)) {
+        gen_helper_gvec_3_ptr *fn;
+        uint32_t data = 0;
+
+        fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
+                           gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), tcg_env,
+                           ctx->cfg_ptr->vlenb,
+                           ctx->cfg_ptr->vlenb, data, fn);
+        finalize_rvv_inst(ctx);
+        return true;
+    }
+    return false;
+}
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index bcd45b0aa3..93c0761171 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
            require_align(vs1, s->lmul);
 }
 
+/*
+ * Common check function for vector narrowing instructions
+ * of single-width result (SEW) and quad-width source (4*SEW).
+ *
+ * Rules to be checked here:
+ *   1. The largest vector register group used by an instruction
+ *      can not be greater than 8 vector registers 
+ *      (Section 31.5.2)
+ *   2. Quad-width SEW cannot greater than ELEN.
+ *      (Section 31.2)
+ *   3. Source vector register number is multiples of 4 * LMUL.
+ *      (Section 31.3.4.2)
+ *   4. Destination vector register number is multiples of LMUL.
+ *      (Section 31.3.4.2)
+ *   5. Destination vector register group for a masked vector
+ *      instruction cannot overlap the source mask register (v0).
+ *      (Section 31.5.3)
+ * risc-v unprivileged spec
+ */
+static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
+                                          int vm)
+{
+    return (s->lmul <= 1) &&
+           (s->sew < MO_32) &&
+           ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
+           require_align(vs2, s->lmul + 2) &&
+           require_align(vd, s->lmul) &&
+           require_vm(vm, vd);
+}
+
+static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
+{
+    bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
+    if (vd != vs) {
+        ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
+    }
+    return ret;
+}
+
 /*
  * Check function for vector reduction instructions.
  *
-- 
2.43.7

Re: [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension

Posted by Chao Liu 1 month ago

Hi, Max:

On 1/8/2026 11:16 PM, Max Chou wrote:
> The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
> FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
> converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
> The VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: FP32 to OFP8.e4m3
> * altfmt = 1: FP32 to OFP8.e5m2
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
>  target/riscv/insn32.decode                 |  2 +
>  target/riscv/insn_trans/trans_rvofp8.c.inc | 62 ++++++++++++++++++++++
>  target/riscv/insn_trans/trans_rvv.c.inc    | 39 ++++++++++++++
>  3 files changed, 103 insertions(+)
>
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 49201c0c20..f2b413c7d4 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -974,6 +974,8 @@ vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
>  vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
>
>  # *** Zvfofp8min Extension ***
> +vfncvt_f_f_q          010010 . ..... 11001 001 ..... 1010111 @r2_vm
> +vfncvt_sat_f_f_q      010010 . ..... 11011 001 ..... 1010111 @r2_vm
>  vfncvtbf16_sat_f_f_w  010010 . ..... 11111 001 ..... 1010111 @r2_vm
>
>  # *** Zvbc vector crypto extension ***
> diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
> index 70a22616dc..4b44417c47 100644
> --- a/target/riscv/insn_trans/trans_rvofp8.c.inc
> +++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
> @@ -22,6 +22,13 @@
>      } \
>  } while (0)
>
> +static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
> +{
> +    return require_rvv(s) &&
> +           vext_check_isa_ill(s) &&
> +           vext_check_sq(s, a->rd, a->rs2, a->vm) &&
> +           (s->sew == MO_8);
> +}
>
>  static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
>  {
> @@ -51,3 +58,58 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
>      return false;
>  }
>
> +static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
Missing the trailing blank line.

Thanks,
Chao


> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index bcd45b0aa3..93c0761171 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
>             require_align(vs1, s->lmul);
>  }
>
> +/*
> + * Common check function for vector narrowing instructions
> + * of single-width result (SEW) and quad-width source (4*SEW).
> + *
> + * Rules to be checked here:
> + *   1. The largest vector register group used by an instruction
> + *      can not be greater than 8 vector registers
> + *      (Section 31.5.2)
> + *   2. Quad-width SEW cannot greater than ELEN.
> + *      (Section 31.2)
> + *   3. Source vector register number is multiples of 4 * LMUL.
> + *      (Section 31.3.4.2)
> + *   4. Destination vector register number is multiples of LMUL.
> + *      (Section 31.3.4.2)
> + *   5. Destination vector register group for a masked vector
> + *      instruction cannot overlap the source mask register (v0).
> + *      (Section 31.5.3)
> + * risc-v unprivileged spec
> + */
> +static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
> +                                          int vm)
> +{
> +    return (s->lmul <= 1) &&
> +           (s->sew < MO_32) &&
> +           ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
> +           require_align(vs2, s->lmul + 2) &&
> +           require_align(vd, s->lmul) &&
> +           require_vm(vm, vd);
> +}
> +
> +static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
> +{
> +    bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
> +    if (vd != vs) {
> +        ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
> +    }
> +    return ret;
> +}
> +
>  /*
>   * Check function for vector reduction instructions.
>   *

Re: [PATCH 11/18] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension

Posted by Max Chou 3 weeks, 4 days ago

On 2026-01-09 14:37, Chao Liu wrote:
> > +    return false;
> > +}
> Missing the trailing blank line.
>

Will fix it at v2.

Thanks,
rnax

> Thanks,
> Chao