[v4] Add OCP FP8/FP4 and RISC-V Zvfofp8min/Zvfofp4min extension support

[PATCH v4 07/14] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension

Posted by Max Chou 1 month, 1 week ago

The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
The VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: FP32 to OFP8.e4m3
* altfmt = 1: FP32 to OFP8.e5m2

Reviewed-by: Chao Liu <chao.liu.zevorn@gmail.com>
Signed-off-by: Max Chou <max.chou@sifive.com>
---
 target/riscv/insn32.decode                 |  2 +
 target/riscv/insn_trans/trans_rvofp8.c.inc | 63 ++++++++++++++++++++++
 target/riscv/insn_trans/trans_rvv.c.inc    | 39 ++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 49201c0c20..f2b413c7d4 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -974,6 +974,8 @@ vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
 vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
 
 # *** Zvfofp8min Extension ***
+vfncvt_f_f_q          010010 . ..... 11001 001 ..... 1010111 @r2_vm
+vfncvt_sat_f_f_q      010010 . ..... 11011 001 ..... 1010111 @r2_vm
 vfncvtbf16_sat_f_f_w  010010 . ..... 11111 001 ..... 1010111 @r2_vm
 
 # *** Zvbc vector crypto extension ***
diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
index d28f92e050..619ee4d773 100644
--- a/target/riscv/insn_trans/trans_rvofp8.c.inc
+++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
@@ -12,6 +12,13 @@
     }                                       \
 } while (0)
 
+static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
+{
+    return require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           vext_check_sq(s, a->rd, a->rs2, a->vm) &&
+           (s->sew == MO_8);
+}
 
 static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
 {
@@ -40,3 +47,59 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
     }
     return false;
 }
+
+static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFOFP8MIN(ctx);
+
+    if (zvfofp8min_narrow_quad_check(ctx, a)) {
+        gen_helper_gvec_3_ptr *fn;
+        uint32_t data = 0;
+
+        fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
+                           gen_helper_vfncvt_f_f_q_ofp8e4m3;
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), tcg_env,
+                           ctx->cfg_ptr->vlenb,
+                           ctx->cfg_ptr->vlenb, data, fn);
+        finalize_rvv_inst(ctx);
+        return true;
+    }
+    return false;
+}
+
+static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_ZVFOFP8MIN(ctx);
+
+    if (zvfofp8min_narrow_quad_check(ctx, a)) {
+        gen_helper_gvec_3_ptr *fn;
+        uint32_t data = 0;
+
+        fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
+                           gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
+
+        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
+
+        data = FIELD_DP32(data, VDATA, VM, a->vm);
+        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
+        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
+        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
+        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
+                           vreg_ofs(ctx, a->rs2), tcg_env,
+                           ctx->cfg_ptr->vlenb,
+                           ctx->cfg_ptr->vlenb, data, fn);
+        finalize_rvv_inst(ctx);
+        return true;
+    }
+    return false;
+}
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 161bf94a07..bbe864dd7c 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
            require_align(vs1, s->lmul);
 }
 
+/*
+ * Common check function for vector narrowing instructions
+ * of single-width result (SEW) and quad-width source (4*SEW).
+ *
+ * Rules to be checked here:
+ *   1. The largest vector register group used by an instruction
+ *      can not be greater than 8 vector registers
+ *      (Section 31.5.2)
+ *   2. Quad-width SEW cannot greater than ELEN.
+ *      (Section 31.2)
+ *   3. Source vector register number is multiples of 4 * LMUL.
+ *      (Section 31.3.4.2)
+ *   4. Destination vector register number is multiples of LMUL.
+ *      (Section 31.3.4.2)
+ *   5. Destination vector register group for a masked vector
+ *      instruction cannot overlap the source mask register (v0).
+ *      (Section 31.5.3)
+ * risc-v unprivileged spec
+ */
+static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
+                                          int vm)
+{
+    return (s->lmul <= 1) &&
+           (s->sew < MO_32) &&
+           ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
+           require_align(vs2, s->lmul + 2) &&
+           require_align(vd, s->lmul) &&
+           require_vm(vm, vd);
+}
+
+static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
+{
+    bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
+    if (vd != vs) {
+        ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
+    }
+    return ret;
+}
+
 /*
  * Check function for vector reduction instructions.
  *
-- 
2.52.0

Re: [PATCH v4 07/14] target/riscv: rvv: Add vfncvt.f.f.q and vfncvt.sat.f.f.q instructions for Zvfofp8min extension

Posted by Chao Liu 1 month, 1 week ago

On Wed, Mar 04, 2026 at 09:39:59PM +0800, Max Chou wrote:
> The vfncvt.f.f.q and vfncvt.sat.f.f.q instructions convert a vector of
> FP32 elements to a vector of OFP8 elements. The vfncvt.sat.f.fq instruction
> converts a vector of FP32 elements to a vector of OFP8 elements with saturation.
> The VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: FP32 to OFP8.e4m3
> * altfmt = 1: FP32 to OFP8.e5m2
> 
> Reviewed-by: Chao Liu <chao.liu.zevorn@gmail.com>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
>  target/riscv/insn32.decode                 |  2 +
>  target/riscv/insn_trans/trans_rvofp8.c.inc | 63 ++++++++++++++++++++++
>  target/riscv/insn_trans/trans_rvv.c.inc    | 39 ++++++++++++++
>  3 files changed, 104 insertions(+)
> 
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 49201c0c20..f2b413c7d4 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -974,6 +974,8 @@ vfwmaccbf16_vv    111011 . ..... ..... 001 ..... 1010111 @r_vm
>  vfwmaccbf16_vf    111011 . ..... ..... 101 ..... 1010111 @r_vm
>  
>  # *** Zvfofp8min Extension ***
> +vfncvt_f_f_q          010010 . ..... 11001 001 ..... 1010111 @r2_vm
> +vfncvt_sat_f_f_q      010010 . ..... 11011 001 ..... 1010111 @r2_vm
>  vfncvtbf16_sat_f_f_w  010010 . ..... 11111 001 ..... 1010111 @r2_vm
>  
>  # *** Zvbc vector crypto extension ***
> diff --git a/target/riscv/insn_trans/trans_rvofp8.c.inc b/target/riscv/insn_trans/trans_rvofp8.c.inc
> index d28f92e050..619ee4d773 100644
> --- a/target/riscv/insn_trans/trans_rvofp8.c.inc
> +++ b/target/riscv/insn_trans/trans_rvofp8.c.inc
> @@ -12,6 +12,13 @@
>      }                                       \
>  } while (0)
>  
> +static bool zvfofp8min_narrow_quad_check(DisasContext *s, arg_rmr *a)
> +{
> +    return require_rvv(s) &&
> +           vext_check_isa_ill(s) &&
> +           vext_check_sq(s, a->rd, a->rs2, a->vm) &&
> +           (s->sew == MO_8);
> +}
>  
>  static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
>  {
> @@ -40,3 +47,59 @@ static bool trans_vfncvtbf16_sat_f_f_w(DisasContext *ctx, arg_rmr *a)
>      }
>      return false;
>  }
> +
> +static bool trans_vfncvt_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
> +
> +static bool trans_vfncvt_sat_f_f_q(DisasContext *ctx, arg_rmr *a)
> +{
> +    REQUIRE_FPU;
> +    REQUIRE_ZVFOFP8MIN(ctx);
> +
> +    if (zvfofp8min_narrow_quad_check(ctx, a)) {
> +        gen_helper_gvec_3_ptr *fn;
> +        uint32_t data = 0;
> +
> +        fn = ctx->altfmt ? gen_helper_vfncvt_sat_f_f_q_ofp8e5m2 :
> +                           gen_helper_vfncvt_sat_f_f_q_ofp8e4m3;
> +
> +        gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
> +
> +        data = FIELD_DP32(data, VDATA, VM, a->vm);
> +        data = FIELD_DP32(data, VDATA, LMUL, ctx->lmul);
> +        data = FIELD_DP32(data, VDATA, VTA, ctx->vta);
> +        data = FIELD_DP32(data, VDATA, VMA, ctx->vma);
> +        tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> +                           vreg_ofs(ctx, a->rs2), tcg_env,
> +                           ctx->cfg_ptr->vlenb,
> +                           ctx->cfg_ptr->vlenb, data, fn);
> +        finalize_rvv_inst(ctx);
> +        return true;
> +    }
> +    return false;
> +}
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
> index 161bf94a07..bbe864dd7c 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -621,6 +621,45 @@ static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
>             require_align(vs1, s->lmul);
>  }
>  
> +/*
> + * Common check function for vector narrowing instructions
> + * of single-width result (SEW) and quad-width source (4*SEW).
> + *
> + * Rules to be checked here:
> + *   1. The largest vector register group used by an instruction
> + *      can not be greater than 8 vector registers
> + *      (Section 31.5.2)
> + *   2. Quad-width SEW cannot greater than ELEN.
> + *      (Section 31.2)
> + *   3. Source vector register number is multiples of 4 * LMUL.
> + *      (Section 31.3.4.2)
> + *   4. Destination vector register number is multiples of LMUL.
> + *      (Section 31.3.4.2)
> + *   5. Destination vector register group for a masked vector
> + *      instruction cannot overlap the source mask register (v0).
> + *      (Section 31.5.3)
> + * risc-v unprivileged spec
> + */
> +static bool vext_quad_narrow_check_common(DisasContext *s, int vd, int vs2,
> +                                          int vm)
> +{
> +    return (s->lmul <= 1) &&
> +           (s->sew < MO_32) &&
> +           ((s->sew + 2) <= (s->cfg_ptr->elen >> 4)) &&
> +           require_align(vs2, s->lmul + 2) &&
> +           require_align(vd, s->lmul) &&
> +           require_vm(vm, vd);
> +}
The quad-narrow check logic is correct:
- lmul <= 1 caps source at 8 regs (4 * LMUL = 4 * 2 = 8)
- sew < MO_32 prevents quad SEW exceeding 128 bits
- elen check verified for both ELEN = 32 and 64

Thanks,
Chao
> +
> +static bool vext_check_sq(DisasContext *s, int vd, int vs, int vm)
> +{
> +    bool ret = vext_quad_narrow_check_common(s, vd, vs, vm);
> +    if (vd != vs) {
> +        ret &= require_noover(vd, s->lmul, vs, s->lmul + 2);
> +    }
> +    return ret;
> +}
> +
>  /*
>   * Check function for vector reduction instructions.
>   *
> -- 
> 2.52.0
>