According to the Zvfofp8min extension, the vfwcvtbf16.f.f.v instruction
supports OFP8 to BF16 conversion when SEW is 8.
And the VTYPE.altfmt field is used to select the OFP8 format.
* altfmt = 0: OFP8.e4m3 to BF16
* altfmt = 1: OFP8.e5m2 to BF16
Signed-off-by: Max Chou <max.chou@sifive.com>
---
target/riscv/helper.h | 12 +++
target/riscv/insn_trans/trans_rvbf16.c.inc | 16 +++-
target/riscv/vector_helper.c | 97 ++++++++++++++++++++++
3 files changed, 121 insertions(+), 4 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index eb0a488ba8..356c24d9fb 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1247,6 +1247,18 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
+/* OFP8 functions */
+DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
+
/* Vector crypto functions */
DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
index 6cfda03d2e..9aafd4d2ef 100644
--- a/target/riscv/insn_trans/trans_rvbf16.c.inc
+++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
@@ -92,11 +92,20 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
{
REQUIRE_FPU;
- REQUIRE_ZVFBFMIN(ctx);
- if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
+ if (opfv_widen_check(ctx, a) &&
+ ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) ||
+ (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) {
+ gen_helper_gvec_3_ptr *fn;
uint32_t data = 0;
+ if (ctx->sew == MO_16) {
+ fn = gen_helper_vfwcvtbf16_f_f_v;
+ } else {
+ fn = ctx->altfmt ? gen_helper_vfwcvtbf16_f_f_v_ofp8e5m2 :
+ gen_helper_vfwcvtbf16_f_f_v_ofp8e4m3;
+ }
+
gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
data = FIELD_DP32(data, VDATA, VM, a->vm);
@@ -106,8 +115,7 @@ static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
vreg_ofs(ctx, a->rs2), tcg_env,
ctx->cfg_ptr->vlenb,
- ctx->cfg_ptr->vlenb, data,
- gen_helper_vfwcvtbf16_f_f_v);
+ ctx->cfg_ptr->vlenb, data, fn);
finalize_rvv_inst(ctx);
return true;
}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index ee5a1e595b..418212973d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5024,6 +5024,103 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
+/*
+ * OCP FP8 Narrowing Conversions (BF16/F32 -> FP8)
+ * 1. Initialize a local float_status with RISC-V specific NaN handling
+ * 2. Call the softfloat conversion function with saturation parameter
+ * 3. Merge exception flags back to the original status
+ */
+#define GEN_OCP_FP8_NARROW(NAME, CONVERT_FN, SATURATE, IN_TYPE) \
+static uint8_t NAME(IN_TYPE a, float_status *s) \
+{ \
+ float_status local = *s; \
+ local.default_nan_pattern = 0x70; \
+ local.default_nan_mode = true; \
+ uint8_t result = CONVERT_FN(a, SATURATE, &local); \
+ s->float_exception_flags |= local.float_exception_flags; \
+ return result; \
+}
+
+/* BF16 -> E4M3/E5M2 conversions */
+GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3, bfloat16_to_float8_e4m3, false,
+ uint16_t)
+GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2, bfloat16_to_float8_e5m2, false,
+ uint16_t)
+GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3_sat, bfloat16_to_float8_e4m3, true,
+ uint16_t)
+GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2_sat, bfloat16_to_float8_e5m2, true,
+ uint16_t)
+
+/* F32 -> E4M3/E5M2 conversions */
+GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3, float32_to_float8_e4m3, false, uint32_t)
+GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2, float32_to_float8_e5m2, false, uint32_t)
+GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3_sat, float32_to_float8_e4m3, true,
+ uint32_t)
+GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2_sat, float32_to_float8_e5m2, true,
+ uint32_t)
+
+/*
+ * OCP FP8 Widening Conversions (FP8 -> BF16)
+ * According to Zvfofp8min isa specification: "No rounding occurs, and no
+ * floating-point exception flags are set."
+ * 1. Initialize a local float_status with no_signaling_nans=true
+ * 2. Call the softfloat conversion function
+ * 3. Intentionally DISCARD exception flags (not merged back)
+ */
+#define GEN_OCP_FP8_WIDEN(NAME, CONVERT_FN) \
+static uint16_t NAME(uint8_t a, float_status *s) \
+{ \
+ float_status local = *s; \
+ local.no_signaling_nans = true; \
+ return CONVERT_FN(a, &local); \
+}
+
+GEN_OCP_FP8_WIDEN(vfwcvt_e4m3_to_bf16, float8_e4m3_to_bfloat16)
+GEN_OCP_FP8_WIDEN(vfwcvt_e5m2_to_bf16, float8_e5m2_to_bfloat16)
+
+/* vfwcvtbf16.f.f.w vd, vs2, vm # Convert OFP8 to BF16. */
+RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e4m3, WOP_UU_B, H2, H1,
+ vfwcvt_e4m3_to_bf16)
+RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e5m2, WOP_UU_B, H2, H1,
+ vfwcvt_e5m2_to_bf16)
+GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e4m3, 2)
+GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e5m2, 2)
+
+/* vfncvtbf16.f.f.w vd, vs2, vm # Convert BF16 to OFP8 without saturation. */
+RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e4m3)
+RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e5m2)
+GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e5m2, 1)
+
+/* vfncvtbf16.sat.f.f.w vd, vs2, vm # Convert BF16 to OFP8 with saturation. */
+RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e4m3_sat)
+RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
+ vfncvt_bf16_to_e5m2_sat)
+GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e5m2, 1)
+
+/* Quad-width narrowing type for FP32 to OFP8 */
+#define QOP_UU_B uint8_t, uint32_t, uint32_t
+
+/* vfncvt.f.f.q vd, vs2, vm # Convert FP32 to OFP8. */
+RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e4m3)
+RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e5m2)
+GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e5m2, 1)
+
+/* vfncvt.sat.f.f.q vd, vs2, vm # Convert FP32 to OFP8 with saturation. */
+RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e4m3_sat)
+RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
+ vfncvt_f32_to_e5m2_sat)
+GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
+GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
+
/*
* Vector Reduction Operations
*/
--
2.52.0
On Wed, Feb 04, 2026 at 01:17:45PM +0800, Max Chou wrote:
> According to the Zvfofp8min extension, the vfwcvtbf16.f.f.v instruction
> supports OFP8 to BF16 conversion when SEW is 8.
> And the VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: OFP8.e4m3 to BF16
> * altfmt = 1: OFP8.e5m2 to BF16
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> target/riscv/helper.h | 12 +++
> target/riscv/insn_trans/trans_rvbf16.c.inc | 16 +++-
> target/riscv/vector_helper.c | 97 ++++++++++++++++++++++
> 3 files changed, 121 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index eb0a488ba8..356c24d9fb 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1247,6 +1247,18 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
>
> +/* OFP8 functions */
> +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +
> /* Vector crypto functions */
> DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
> diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
> index 6cfda03d2e..9aafd4d2ef 100644
> --- a/target/riscv/insn_trans/trans_rvbf16.c.inc
> +++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
> @@ -92,11 +92,20 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
> static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
> {
> REQUIRE_FPU;
> - REQUIRE_ZVFBFMIN(ctx);
>
> - if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
> + if (opfv_widen_check(ctx, a) &&
> + ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) ||
> + (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) {
> + gen_helper_gvec_3_ptr *fn;
> uint32_t data = 0;
>
> + if (ctx->sew == MO_16) {
> + fn = gen_helper_vfwcvtbf16_f_f_v;
> + } else {
> + fn = ctx->altfmt ? gen_helper_vfwcvtbf16_f_f_v_ofp8e5m2 :
> + gen_helper_vfwcvtbf16_f_f_v_ofp8e4m3;
> + }
> +
> gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
>
> data = FIELD_DP32(data, VDATA, VM, a->vm);
> @@ -106,8 +115,7 @@ static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
> tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> vreg_ofs(ctx, a->rs2), tcg_env,
> ctx->cfg_ptr->vlenb,
> - ctx->cfg_ptr->vlenb, data,
> - gen_helper_vfwcvtbf16_f_f_v);
> + ctx->cfg_ptr->vlenb, data, fn);
> finalize_rvv_inst(ctx);
> return true;
> }
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index ee5a1e595b..418212973d 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -5024,6 +5024,103 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
> RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
> GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
>
> +/*
> + * OCP FP8 Narrowing Conversions (BF16/F32 -> FP8)
> + * 1. Initialize a local float_status with RISC-V specific NaN handling
> + * 2. Call the softfloat conversion function with saturation parameter
> + * 3. Merge exception flags back to the original status
> + */
> +#define GEN_OCP_FP8_NARROW(NAME, CONVERT_FN, SATURATE, IN_TYPE) \
> +static uint8_t NAME(IN_TYPE a, float_status *s) \
> +{ \
> + float_status local = *s; \
> + local.default_nan_pattern = 0x70; \
I suggest adding a comment in GEN_OCP_FP8_NARROW() to explain the
choice of default_nan_pattern:
* 0x70 produces canonical NaN 0x7f for both E4M3 and E5M2 per
* Zvfofp8min spec
Thanks,
Chao
> + local.default_nan_mode = true; \
> + uint8_t result = CONVERT_FN(a, SATURATE, &local); \
> + s->float_exception_flags |= local.float_exception_flags; \
> + return result; \
> +}
> +
> +/* BF16 -> E4M3/E5M2 conversions */
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3, bfloat16_to_float8_e4m3, false,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2, bfloat16_to_float8_e5m2, false,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3_sat, bfloat16_to_float8_e4m3, true,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2_sat, bfloat16_to_float8_e5m2, true,
> + uint16_t)
> +
> +/* F32 -> E4M3/E5M2 conversions */
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3, float32_to_float8_e4m3, false, uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2, float32_to_float8_e5m2, false, uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3_sat, float32_to_float8_e4m3, true,
> + uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2_sat, float32_to_float8_e5m2, true,
> + uint32_t)
> +
> +/*
> + * OCP FP8 Widening Conversions (FP8 -> BF16)
> + * According to Zvfofp8min isa specification: "No rounding occurs, and no
> + * floating-point exception flags are set."
> + * 1. Initialize a local float_status with no_signaling_nans=true
> + * 2. Call the softfloat conversion function
> + * 3. Intentionally DISCARD exception flags (not merged back)
> + */
> +#define GEN_OCP_FP8_WIDEN(NAME, CONVERT_FN) \
> +static uint16_t NAME(uint8_t a, float_status *s) \
> +{ \
> + float_status local = *s; \
> + local.no_signaling_nans = true; \
> + return CONVERT_FN(a, &local); \
> +}
> +
> +GEN_OCP_FP8_WIDEN(vfwcvt_e4m3_to_bf16, float8_e4m3_to_bfloat16)
> +GEN_OCP_FP8_WIDEN(vfwcvt_e5m2_to_bf16, float8_e5m2_to_bfloat16)
> +
> +/* vfwcvtbf16.f.f.w vd, vs2, vm # Convert OFP8 to BF16. */
> +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e4m3, WOP_UU_B, H2, H1,
> + vfwcvt_e4m3_to_bf16)
> +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e5m2, WOP_UU_B, H2, H1,
> + vfwcvt_e5m2_to_bf16)
> +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e4m3, 2)
> +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e5m2, 2)
> +
> +/* vfncvtbf16.f.f.w vd, vs2, vm # Convert BF16 to OFP8 without saturation. */
> +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e4m3)
> +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e5m2)
> +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e5m2, 1)
> +
> +/* vfncvtbf16.sat.f.f.w vd, vs2, vm # Convert BF16 to OFP8 with saturation. */
> +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e4m3_sat)
> +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e5m2_sat)
> +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e5m2, 1)
> +
> +/* Quad-width narrowing type for FP32 to OFP8 */
> +#define QOP_UU_B uint8_t, uint32_t, uint32_t
> +
> +/* vfncvt.f.f.q vd, vs2, vm # Convert FP32 to OFP8. */
> +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e4m3)
> +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e5m2)
> +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e5m2, 1)
> +
> +/* vfncvt.sat.f.f.q vd, vs2, vm # Convert FP32 to OFP8 with saturation. */
> +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e4m3_sat)
> +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e5m2_sat)
> +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
> +
> /*
> * Vector Reduction Operations
> */
> --
> 2.52.0
>
>
On Wed, Feb 04, 2026 at 01:17:45PM +0800, Max Chou wrote:
> According to the Zvfofp8min extension, the vfwcvtbf16.f.f.v instruction
> supports OFP8 to BF16 conversion when SEW is 8.
> And the VTYPE.altfmt field is used to select the OFP8 format.
> * altfmt = 0: OFP8.e4m3 to BF16
> * altfmt = 1: OFP8.e5m2 to BF16
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> ---
> target/riscv/helper.h | 12 +++
> target/riscv/insn_trans/trans_rvbf16.c.inc | 16 +++-
> target/riscv/vector_helper.c | 97 ++++++++++++++++++++++
> 3 files changed, 121 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index eb0a488ba8..356c24d9fb 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -1247,6 +1247,18 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32)
>
> +/* OFP8 functions */
> +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfwcvtbf16_f_f_v_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvtbf16_sat_f_f_w_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e4m3, void, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_5(vfncvt_sat_f_f_q_ofp8e5m2, void, ptr, ptr, ptr, env, i32)
> +
> /* Vector crypto functions */
> DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32)
> DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32)
> diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc
> index 6cfda03d2e..9aafd4d2ef 100644
> --- a/target/riscv/insn_trans/trans_rvbf16.c.inc
> +++ b/target/riscv/insn_trans/trans_rvbf16.c.inc
> @@ -92,11 +92,20 @@ static bool trans_vfncvtbf16_f_f_w(DisasContext *ctx, arg_vfncvtbf16_f_f_w *a)
> static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
> {
> REQUIRE_FPU;
> - REQUIRE_ZVFBFMIN(ctx);
>
> - if (opfv_widen_check(ctx, a) && (ctx->sew == MO_16)) {
> + if (opfv_widen_check(ctx, a) &&
> + ((ctx->sew == MO_16 && ctx->cfg_ptr->ext_zvfbfmin) ||
> + (ctx->sew == MO_8 && ctx->cfg_ptr->ext_zvfofp8min))) {
> + gen_helper_gvec_3_ptr *fn;
> uint32_t data = 0;
>
> + if (ctx->sew == MO_16) {
> + fn = gen_helper_vfwcvtbf16_f_f_v;
> + } else {
> + fn = ctx->altfmt ? gen_helper_vfwcvtbf16_f_f_v_ofp8e5m2 :
> + gen_helper_vfwcvtbf16_f_f_v_ofp8e4m3;
> + }
> +
> gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN);
>
> data = FIELD_DP32(data, VDATA, VM, a->vm);
> @@ -106,8 +115,7 @@ static bool trans_vfwcvtbf16_f_f_v(DisasContext *ctx, arg_vfwcvtbf16_f_f_v *a)
> tcg_gen_gvec_3_ptr(vreg_ofs(ctx, a->rd), vreg_ofs(ctx, 0),
> vreg_ofs(ctx, a->rs2), tcg_env,
> ctx->cfg_ptr->vlenb,
> - ctx->cfg_ptr->vlenb, data,
> - gen_helper_vfwcvtbf16_f_f_v);
> + ctx->cfg_ptr->vlenb, data, fn);
> finalize_rvv_inst(ctx);
> return true;
> }
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index ee5a1e595b..418212973d 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -5024,6 +5024,103 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
> RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
> GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
>
> +/*
> + * OCP FP8 Narrowing Conversions (BF16/F32 -> FP8)
> + * 1. Initialize a local float_status with RISC-V specific NaN handling
> + * 2. Call the softfloat conversion function with saturation parameter
> + * 3. Merge exception flags back to the original status
> + */
> +#define GEN_OCP_FP8_NARROW(NAME, CONVERT_FN, SATURATE, IN_TYPE) \
> +static uint8_t NAME(IN_TYPE a, float_status *s) \
checkpatch reports:
ERROR: spaces required around that '*' (ctx:WxV)
Please fix the spacing around the '*' in the macro definition.
Thanks,
Chao
> +{ \
> + float_status local = *s; \
> + local.default_nan_pattern = 0x70; \
> + local.default_nan_mode = true; \
> + uint8_t result = CONVERT_FN(a, SATURATE, &local); \
> + s->float_exception_flags |= local.float_exception_flags; \
> + return result; \
> +}
> +
> +/* BF16 -> E4M3/E5M2 conversions */
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3, bfloat16_to_float8_e4m3, false,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2, bfloat16_to_float8_e5m2, false,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e4m3_sat, bfloat16_to_float8_e4m3, true,
> + uint16_t)
> +GEN_OCP_FP8_NARROW(vfncvt_bf16_to_e5m2_sat, bfloat16_to_float8_e5m2, true,
> + uint16_t)
> +
> +/* F32 -> E4M3/E5M2 conversions */
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3, float32_to_float8_e4m3, false, uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2, float32_to_float8_e5m2, false, uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e4m3_sat, float32_to_float8_e4m3, true,
> + uint32_t)
> +GEN_OCP_FP8_NARROW(vfncvt_f32_to_e5m2_sat, float32_to_float8_e5m2, true,
> + uint32_t)
> +
> +/*
> + * OCP FP8 Widening Conversions (FP8 -> BF16)
> + * According to Zvfofp8min isa specification: "No rounding occurs, and no
> + * floating-point exception flags are set."
> + * 1. Initialize a local float_status with no_signaling_nans=true
> + * 2. Call the softfloat conversion function
> + * 3. Intentionally DISCARD exception flags (not merged back)
> + */
> +#define GEN_OCP_FP8_WIDEN(NAME, CONVERT_FN) \
> +static uint16_t NAME(uint8_t a, float_status *s) \
> +{ \
> + float_status local = *s; \
> + local.no_signaling_nans = true; \
> + return CONVERT_FN(a, &local); \
> +}
> +
> +GEN_OCP_FP8_WIDEN(vfwcvt_e4m3_to_bf16, float8_e4m3_to_bfloat16)
> +GEN_OCP_FP8_WIDEN(vfwcvt_e5m2_to_bf16, float8_e5m2_to_bfloat16)
> +
> +/* vfwcvtbf16.f.f.w vd, vs2, vm # Convert OFP8 to BF16. */
> +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e4m3, WOP_UU_B, H2, H1,
> + vfwcvt_e4m3_to_bf16)
> +RVVCALL(OPFVV1, vfwcvtbf16_f_f_v_ofp8e5m2, WOP_UU_B, H2, H1,
> + vfwcvt_e5m2_to_bf16)
> +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e4m3, 2)
> +GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v_ofp8e5m2, 2)
> +
> +/* vfncvtbf16.f.f.w vd, vs2, vm # Convert BF16 to OFP8 without saturation. */
> +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e4m3)
> +RVVCALL(OPFVV1, vfncvtbf16_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e5m2)
> +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvtbf16_f_f_w_ofp8e5m2, 1)
> +
> +/* vfncvtbf16.sat.f.f.w vd, vs2, vm # Convert BF16 to OFP8 with saturation. */
> +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e4m3, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e4m3_sat)
> +RVVCALL(OPFVV1, vfncvtbf16_sat_f_f_w_ofp8e5m2, NOP_UU_B, H1, H2,
> + vfncvt_bf16_to_e5m2_sat)
> +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvtbf16_sat_f_f_w_ofp8e5m2, 1)
> +
> +/* Quad-width narrowing type for FP32 to OFP8 */
> +#define QOP_UU_B uint8_t, uint32_t, uint32_t
> +
> +/* vfncvt.f.f.q vd, vs2, vm # Convert FP32 to OFP8. */
> +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e4m3)
> +RVVCALL(OPFVV1, vfncvt_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e5m2)
> +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvt_f_f_q_ofp8e5m2, 1)
> +
> +/* vfncvt.sat.f.f.q vd, vs2, vm # Convert FP32 to OFP8 with saturation. */
> +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e4m3, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e4m3_sat)
> +RVVCALL(OPFVV1, vfncvt_sat_f_f_q_ofp8e5m2, QOP_UU_B, H1, H4,
> + vfncvt_f32_to_e5m2_sat)
> +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e4m3, 1)
> +GEN_VEXT_V_ENV(vfncvt_sat_f_f_q_ofp8e5m2, 1)
> +
> /*
> * Vector Reduction Operations
> */
> --
> 2.52.0
>
>
© 2016 - 2026 Red Hat, Inc.