[v1] hexagon: add missing HVX float instructions

[PATCH 07/13] target/hexagon: add v68 HVX IEEE float conversion insns

Posted by Matheus Tavares Bernardino 1 week, 4 days ago

Add HVX IEEE floating-point conversion instructions:
- vconv_hf_h, vconv_h_hf, vconv_sf_w, vconv_w_sf: vconv operations
- vcvt_hf_sf, vcvt_sf_hf: float <-> half float conversions
- vcvt_hf_b, vcvt_hf_h, vcvt_hf_ub, vcvt_hf_uh: int to half float
- vcvt_b_hf, vcvt_h_hf, vcvt_ub_hf, vcvt_uh_hf: half float to int

Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
---
 target/hexagon/mmvec/kvx_ieee.h              | 21 +++++
 target/hexagon/mmvec/kvx_ieee.c              | 98 ++++++++++++++++++++
 target/hexagon/imported/mmvec/encode_ext.def | 18 ++++
 target/hexagon/imported/mmvec/ext.idef       | 97 +++++++++++++++++++
 4 files changed, 234 insertions(+)

diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h
index 263feb7e94..8a6816f6b3 100644
--- a/target/hexagon/mmvec/kvx_ieee.h
+++ b/target/hexagon/mmvec/kvx_ieee.h
@@ -59,4 +59,25 @@ uint32_t qf_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
 uint16_t qf_max_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
 uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
 
+/*
+ * IEEE - FP Convert instructions
+ */
+uint16_t f32_to_f16(uint32_t a, float_status *fp_status);
+uint32_t f16_to_f32(uint16_t a, float_status *fp_status);
+
+uint16_t f16_to_uh(uint16_t op1, float_status *fp_status);
+int16_t  f16_to_h(uint16_t op1, float_status *fp_status);
+uint8_t  f16_to_ub(uint16_t op1, float_status *fp_status);
+int8_t   f16_to_b(uint16_t op1, float_status *fp_status);
+
+uint16_t uh_to_f16(uint16_t op1);
+uint16_t h_to_f16(int16_t op1);
+uint16_t ub_to_f16(uint8_t op1);
+uint16_t b_to_f16(int8_t op1);
+
+int32_t conv_sf_w(int32_t a, float_status *fp_status);
+int16_t conv_hf_h(int16_t a, float_status *fp_status);
+int32_t conv_w_sf(uint32_t a, float_status *fp_status);
+int16_t conv_h_hf(uint16_t a, float_status *fp_status);
+
 #endif
diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
index 33621a15f3..bbeec09707 100644
--- a/target/hexagon/mmvec/kvx_ieee.c
+++ b/target/hexagon/mmvec/kvx_ieee.c
@@ -131,3 +131,101 @@ uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
     if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) return a1;
     return fp_min_hf(a1, a2, fp_status);
 }
+
+uint16_t f32_to_f16(uint32_t a, float_status *fp_status)
+{
+    return float16_val(float32_to_float16(make_float32(a), true, fp_status));
+}
+
+uint32_t f16_to_f32(uint16_t a, float_status *fp_status)
+{
+    return float32_val(float16_to_float32(make_float16(a), true, fp_status));
+}
+
+uint16_t f16_to_uh(uint16_t op1, float_status *fp_status)
+{
+    return float16_to_uint16_scalbn(make_float16(op1),
+                                    float_round_nearest_even,
+                                    0, fp_status);
+}
+
+int16_t f16_to_h(uint16_t op1, float_status *fp_status)
+{
+    return float16_to_int16_scalbn(make_float16(op1),
+                                   float_round_nearest_even,
+                                   0, fp_status);
+}
+
+uint8_t f16_to_ub(uint16_t op1, float_status *fp_status)
+{
+    return float16_to_uint8_scalbn(make_float16(op1),
+                                   float_round_nearest_even,
+                                   0, fp_status);
+}
+
+int8_t f16_to_b(uint16_t op1, float_status *fp_status)
+{
+    return float16_to_int8_scalbn(make_float16(op1),
+                                   float_round_nearest_even,
+                                   0, fp_status);
+}
+
+uint16_t uh_to_f16(uint16_t op1)
+{
+    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
+}
+
+uint16_t h_to_f16(int16_t op1)
+{
+    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
+}
+
+uint16_t ub_to_f16(uint8_t op1)
+{
+    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
+}
+
+uint16_t b_to_f16(int8_t op1)
+{
+    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
+}
+
+int32_t conv_sf_w(int32_t a, float_status *fp_status)
+{
+    return float32_val(int32_to_float32(a, fp_status));
+}
+
+int16_t conv_hf_h(int16_t a, float_status *fp_status)
+{
+    return float16_val(int16_to_float16(a, fp_status));
+}
+
+int32_t conv_w_sf(uint32_t a, float_status *fp_status)
+{
+    float_status scratch_fpst = {};
+    const float32 W_MAX = int32_to_float32(INT32_MAX, &scratch_fpst);
+    const float32 W_MIN = int32_to_float32(INT32_MIN, &scratch_fpst);
+    float32 f1 = make_float32(a);
+
+    if (float32_is_any_nan(f1) || float32_is_infinity(f1) ||
+        float32_le_quiet(W_MAX, f1, fp_status) ||
+        float32_le_quiet(f1, W_MIN, fp_status)) {
+        return float32_is_neg(f1) ? INT32_MIN : INT32_MAX;
+    }
+    return float32_to_int32_round_to_zero(f1, fp_status);
+}
+
+int16_t conv_h_hf(uint16_t a, float_status *fp_status)
+{
+    float_status scratch_fpst = {};
+    const float16 H_MAX = int16_to_float16(INT16_MAX, &scratch_fpst);
+    const float16 H_MIN = int16_to_float16(INT16_MIN, &scratch_fpst);
+    float16 f1 = make_float16(a);
+
+    if (float16_is_any_nan(f1) || float16_is_infinity(f1) ||
+        float16_le_quiet(H_MAX, f1, fp_status) ||
+        float16_le_quiet(f1, H_MIN, fp_status)) {
+        return float16_is_neg(f1) ? INT16_MIN : INT16_MAX;
+    }
+    return float16_to_int16_round_to_zero(f1, fp_status);
+}
diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def
index 7138e593dd..5325bbd704 100644
--- a/target/hexagon/imported/mmvec/encode_ext.def
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -841,4 +841,22 @@ DEF_ENC(V6_vfneg_sf,"00011110--0-0110PP1uuuuu011ddddd")
 DEF_ENC(V6_vabs_hf,"00011110--0-0110PP1uuuuu100ddddd")
 DEF_ENC(V6_vabs_sf,"00011110--0-0110PP1uuuuu101ddddd")
 
+/* IEEE FP vcvt instructions */
+DEF_ENC(V6_vcvt_sf_hf,"00011110--0-0100PP1uuuuu100ddddd")
+DEF_ENC(V6_vcvt_hf_sf,"00011111011vvvvvPP1uuuuu001ddddd")
+DEF_ENC(V6_vcvt_hf_ub,"00011110--0-0100PP1uuuuu001ddddd")
+DEF_ENC(V6_vcvt_hf_b,"00011110--0-0100PP1uuuuu010ddddd")
+DEF_ENC(V6_vcvt_hf_uh,"00011110--0-0100PP1uuuuu101ddddd")
+DEF_ENC(V6_vcvt_hf_h,"00011110--0-0100PP1uuuuu111ddddd")
+DEF_ENC(V6_vcvt_uh_hf,"00011110--0--101PP1uuuuu000ddddd")
+DEF_ENC(V6_vcvt_h_hf,"00011110--0-0110PP1uuuuu000ddddd")
+DEF_ENC(V6_vcvt_ub_hf,"00011111110vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vcvt_b_hf,"00011111110vvvvvPP1uuuuu110ddddd")
+
+/* IEEE FP vconv instructions */
+DEF_ENC(V6_vconv_sf_w,"00011110--0--101PP1uuuuu011ddddd")
+DEF_ENC(V6_vconv_w_sf,"00011110--0--101PP1uuuuu001ddddd")
+DEF_ENC(V6_vconv_hf_h,"00011110--0--101PP1uuuuu100ddddd")
+DEF_ENC(V6_vconv_h_hf,"00011110--0--101PP1uuuuu010ddddd")
+
 #endif /* NO MMVEC */
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
index 5ef5baa404..8b832166e0 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -63,6 +63,9 @@ ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
 EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  \
 DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
 
+#define ITERATOR_INSN_SHIFT_SLOT_FLT(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS,A_HVX_FLT),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
 
 #define ITERATOR_INSN_SHIFT3_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \
 EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS,A_CVI_VS_3SRC,A_NOTE_SHIFT_RESOURCE,A_NOTE_NOVP,A_NOTE_VA_UNARY),  \
@@ -3032,6 +3035,100 @@ ITERATOR_INSN_IEEE_FP_16_32_LATE(32, vabs_sf,  "Vd32.sf=vabs(Vu32.sf)", \
     "Vector IEEE abs: sf", \
     VdV.sf[i] = ((signF32UI(VuV.sf[i])) ? (VuV.sf[i] ^ 0x80000000) : VuV.sf[i]))
 
+/* Two pipes: P2 & P3, two outputs, 16-bit */
+#define ITERATOR_INSN_IEEE_FP_DOUBLE_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_16), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/* Two pipes: P2 & P3, two outputs, 32-bit output */
+#define ITERATOR_INSN_IEEE_FP_DOUBLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+    ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32), \
+    DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/* Single pipe, 16-bit output */
+#define ITERATOR_INSN_IEEE_FP_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+    ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_16), \
+    DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/* single pipe, output can feed 16- or 32-bit accumulate */
+#define ITERATOR_INSN_IEEE_FP_16_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+    ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_16,A_HVX_IEEE_FP_OUT_32), \
+    DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/******************************************************************************
+ * IEEE FP convert instructions
+ ******************************************************************************/
+
+ITERATOR_INSN_IEEE_FP_DOUBLE_16(32,  vcvt_hf_ub, "Vdd32.hf=vcvt(Vu32.ub)",
+    "Vector IEEE cvt from int: ub widen to hf",
+    VddV.v[0].hf[2*i]   = ub_to_f16(VuV.ub[4*i]);
+    VddV.v[0].hf[2*i+1] = ub_to_f16(VuV.ub[4*i+1]);
+    VddV.v[1].hf[2*i]   = ub_to_f16(VuV.ub[4*i+2]);
+    VddV.v[1].hf[2*i+1] = ub_to_f16(VuV.ub[4*i+3]))
+
+ITERATOR_INSN_IEEE_FP_DOUBLE_16(32,  vcvt_hf_b,  "Vdd32.hf=vcvt(Vu32.b)",
+    "Vector IEEE cvt from int: b widen to hf",
+    VddV.v[0].hf[2*i]   = b_to_f16(VuV.b[4*i]);
+    VddV.v[0].hf[2*i+1] = b_to_f16(VuV.b[4*i+1]);
+    VddV.v[1].hf[2*i]   = b_to_f16(VuV.b[4*i+2]);
+    VddV.v[1].hf[2*i+1] = b_to_f16(VuV.b[4*i+3]))
+
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vcvt_sf_hf, "Vdd32.sf=vcvt(Vu32.hf)",
+    "Vector IEEE cvt: hf widen to sf",
+    VddV.v[0].sf[i] = f16_to_f32(VuV.hf[2*i], &env->fp_status);
+    VddV.v[1].sf[i] = f16_to_f32(VuV.hf[2*i+1], &env->fp_status))
+
+ITERATOR_INSN_IEEE_FP_16(16,    vcvt_hf_uh, "Vd32.hf=vcvt(Vu32.uh)",
+    "Vector IEEE cvt from int: uh to hf",
+    VdV.hf[i] = uh_to_f16(VuV.uh[i]))
+ITERATOR_INSN_IEEE_FP_16(16,    vcvt_hf_h,  "Vd32.hf=vcvt(Vu32.h)",
+    "Vector IEEE cvt from int: h to hf",
+    VdV.hf[i] = h_to_f16(VuV.h[i]))
+ITERATOR_INSN_IEEE_FP_16_32(16, vcvt_uh_hf, "Vd32.uh=vcvt(Vu32.hf)",
+    "Vector IEEE cvt to int: hf to uh",
+    VdV.uh[i] = f16_to_uh(VuV.hf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_16_32(16, vcvt_h_hf,  "Vd32.h=vcvt(Vu32.hf)",
+    "Vector IEEE cvt to int: hf to h",
+    VdV.h[i]  = f16_to_h(VuV.hf[i], &env->fp_status))
+
+ITERATOR_INSN_IEEE_FP_16(32, vcvt_hf_sf, "Vd32.hf=vcvt(Vu32.sf,Vv32.sf)",
+    "Vector IEEE cvt: sf to hf",
+    VdV.hf[2*i]   = f32_to_f16(VuV.sf[i], &env->fp_status);
+    VdV.hf[2*i+1] = f32_to_f16(VvV.sf[i], &env->fp_status))
+
+ITERATOR_INSN_IEEE_FP_16_32(32, vcvt_ub_hf, "Vd32.ub=vcvt(Vu32.hf,Vv32.hf)", "Vector cvt to int: hf narrow to ub",
+    VdV.ub[4*i]   = f16_to_ub(VuV.hf[2*i], &env->fp_status);
+    VdV.ub[4*i+1] = f16_to_ub(VuV.hf[2*i+1], &env->fp_status);
+    VdV.ub[4*i+2] = f16_to_ub(VvV.hf[2*i], &env->fp_status);
+    VdV.ub[4*i+3] = f16_to_ub(VvV.hf[2*i+1], &env->fp_status))
+
+ITERATOR_INSN_IEEE_FP_16_32(32, vcvt_b_hf,  "Vd32.b=vcvt(Vu32.hf,Vv32.hf)",
+    "Vector cvt to int: hf narrow to b",
+    VdV.b[4*i]   = f16_to_b(VuV.hf[2*i], &env->fp_status);
+    VdV.b[4*i+1] = f16_to_b(VuV.hf[2*i+1], &env->fp_status);
+    VdV.b[4*i+2] = f16_to_b(VvV.hf[2*i], &env->fp_status);
+    VdV.b[4*i+3] = f16_to_b(VvV.hf[2*i+1], &env->fp_status))
+
+ITERATOR_INSN_SHIFT_SLOT_FLT(32, vconv_w_sf,"Vd32.w=Vu32.sf",
+    "Vector conversion of sf32 format to int w",
+    VdV.w[i] = conv_w_sf(VuV.sf[i], &env->fp_status))
+
+ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_h_hf,"Vd32.h=Vu32.hf",
+    "Vector conversion of hf16 format to int hw",
+    VdV.h[i] = conv_h_hf(VuV.hf[i], &env->fp_status))
+
+ITERATOR_INSN_SHIFT_SLOT_FLT(32, vconv_sf_w,"Vd32.sf=Vu32.w",
+    "Vector conversion of int w format to sf32",
+    VdV.sf[i] = conv_sf_w(VuV.w[i], &env->fp_status))
+
+ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_hf_h,"Vd32.hf=Vu32.h",
+    "Vector conversion of int hw format to hf16",
+    VdV.hf[i] = conv_hf_h(VuV.h[i], &env->fp_status))
+
 /******************************************************************************
  DEBUG Vector/Register Printing
  ******************************************************************************/
-- 
2.37.2

Re: [PATCH 07/13] target/hexagon: add v68 HVX IEEE float conversion insns

Posted by Taylor Simpson 1 week, 3 days ago

On Mon, Mar 23, 2026 at 7:16 AM Matheus Tavares Bernardino <
matheus.bernardino@oss.qualcomm.com> wrote:

> Add HVX IEEE floating-point conversion instructions:
> - vconv_hf_h, vconv_h_hf, vconv_sf_w, vconv_w_sf: vconv operations
> - vcvt_hf_sf, vcvt_sf_hf: float <-> half float conversions
> - vcvt_hf_b, vcvt_hf_h, vcvt_hf_ub, vcvt_hf_uh: int to half float
> - vcvt_b_hf, vcvt_h_hf, vcvt_ub_hf, vcvt_uh_hf: half float to int
>
> Signed-off-by: Matheus Tavares Bernardino <
> matheus.bernardino@oss.qualcomm.com>
> ---
>  target/hexagon/mmvec/kvx_ieee.h              | 21 +++++
>  target/hexagon/mmvec/kvx_ieee.c              | 98 ++++++++++++++++++++
>  target/hexagon/imported/mmvec/encode_ext.def | 18 ++++
>  target/hexagon/imported/mmvec/ext.idef       | 97 +++++++++++++++++++
>  4 files changed, 234 insertions(+)
>
> diff --git a/target/hexagon/mmvec/kvx_ieee.c
> b/target/hexagon/mmvec/kvx_ieee.c
> index 33621a15f3..bbeec09707 100644
> --- a/target/hexagon/mmvec/kvx_ieee.c
> +++ b/target/hexagon/mmvec/kvx_ieee.c
> @@ -131,3 +131,101 @@ uint16_t qf_min_hf(uint16_t a1, uint16_t a2,
> float_status *fp_status)
>      if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) return a1;
>      return fp_min_hf(a1, a2, fp_status);
>  }
> +
> +uint16_t f32_to_f16(uint32_t a, float_status *fp_status)
> +{
> +    return float16_val(float32_to_float16(make_float32(a), true,
> fp_status));
> +}
> +
> +uint32_t f16_to_f32(uint16_t a, float_status *fp_status)
> +{
> +    return float32_val(float16_to_float32(make_float16(a), true,
> fp_status));
> +}
> +
> +uint16_t f16_to_uh(uint16_t op1, float_status *fp_status)
> +{
> +    return float16_to_uint16_scalbn(make_float16(op1),
> +                                    float_round_nearest_even,
>

Does HVX always use this rounding mode?  The scalar core uses the rounding
mode in USR.

There are several more instances below.


> +                                    0, fp_status);
> +}
> +
> +int16_t f16_to_h(uint16_t op1, float_status *fp_status)
> +{
> +    return float16_to_int16_scalbn(make_float16(op1),
> +                                   float_round_nearest_even,

+                                   0, fp_status);
> +}
> +
> +uint8_t f16_to_ub(uint16_t op1, float_status *fp_status)
> +{
> +    return float16_to_uint8_scalbn(make_float16(op1),
> +                                   float_round_nearest_even,

+                                   0, fp_status);
> +}
> +
> +int8_t f16_to_b(uint16_t op1, float_status *fp_status)
> +{
> +    return float16_to_int8_scalbn(make_float16(op1),
> +                                   float_round_nearest_even,

+                                   0, fp_status);
> +}
> +
> +uint16_t uh_to_f16(uint16_t op1)
> +{
> +    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
> +}
> +
> +uint16_t h_to_f16(int16_t op1)
> +{
> +    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
> +}
> +
> +uint16_t ub_to_f16(uint8_t op1)
> +{
> +    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
> +}
> +
> +uint16_t b_to_f16(int8_t op1)
> +{
> +    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
> +}
> +
> +int32_t conv_sf_w(int32_t a, float_status *fp_status)
> +{
> +    return float32_val(int32_to_float32(a, fp_status));
> +}
> +
> +int16_t conv_hf_h(int16_t a, float_status *fp_status)
> +{
> +    return float16_val(int16_to_float16(a, fp_status));
> +}
> +
> +int32_t conv_w_sf(uint32_t a, float_status *fp_status)
> +{
> +    float_status scratch_fpst = {};
> +    const float32 W_MAX = int32_to_float32(INT32_MAX, &scratch_fpst);
> +    const float32 W_MIN = int32_to_float32(INT32_MIN, &scratch_fpst);
> +    float32 f1 = make_float32(a);
> +
> +    if (float32_is_any_nan(f1) || float32_is_infinity(f1) ||
> +        float32_le_quiet(W_MAX, f1, fp_status) ||
> +        float32_le_quiet(f1, W_MIN, fp_status)) {
> +        return float32_is_neg(f1) ? INT32_MIN : INT32_MAX;
> +    }
>

Does float32_to_int32 handle these checks?


> +    return float32_to_int32_round_to_zero(f1, fp_status);
>

Rounding mode?


> +}
> +
> +int16_t conv_h_hf(uint16_t a, float_status *fp_status)
> +{/
> +    float_status scratch_fpst = {};
> +    const float16 H_MAX = int16_to_float16(INT16_MAX, &scratch_fpst);
> +    const float16 H_MIN = int16_to_float16(INT16_MIN, &scratch_fpst);
> +    float16 f1 = make_float16(a);
> +
> +    if (float16_is_any_nan(f1) || float16_is_infinity(f1) ||
> +        float16_le_quiet(H_MAX, f1, fp_status) ||
> +        float16_le_quiet(f1, H_MIN, fp_status)) {
> +        return float16_is_neg(f1) ? INT16_MIN : INT16_MAX;
> +    }
> +    return float16_to_int16_round_to_zero(f1, fp_status);
> +}
>

Ditto

Thanks,
Taylor

Re: [PATCH 07/13] target/hexagon: add v68 HVX IEEE float conversion insns

Posted by Matheus Bernardino 1 week, 2 days ago

On Mon, Mar 23, 2026 at 6:26 PM Taylor Simpson <ltaylorsimpson@gmail.com> wrote:
>
>
>
> On Mon, Mar 23, 2026 at 7:16 AM Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com> wrote:
>>
>>
>> diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
>> index 33621a15f3..bbeec09707 100644
>> --- a/target/hexagon/mmvec/kvx_ieee.c
>> +++ b/target/hexagon/mmvec/kvx_ieee.c
>> @@ -131,3 +131,101 @@ uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
>>      if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) return a1;
>>      return fp_min_hf(a1, a2, fp_status);
>>  }
>> +
>> +uint16_t f16_to_uh(uint16_t op1, float_status *fp_status)
>> +{
>> +    return float16_to_uint16_scalbn(make_float16(op1),
>> +                                    float_round_nearest_even,
>
>
> Does HVX always use this rounding mode?  The scalar core uses the rounding mode in USR.

Yeah, almost always this mode, with the exception of some
instructions. It's not configurable via USR (or anything else).

>> +
>> +int32_t conv_w_sf(uint32_t a, float_status *fp_status)
>> +{
>> +    float_status scratch_fpst = {};
>> +    const float32 W_MAX = int32_to_float32(INT32_MAX, &scratch_fpst);
>> +    const float32 W_MIN = int32_to_float32(INT32_MIN, &scratch_fpst);
>> +    float32 f1 = make_float32(a);
>> +
>> +    if (float32_is_any_nan(f1) || float32_is_infinity(f1) ||
>> +        float32_le_quiet(W_MAX, f1, fp_status) ||
>> +        float32_le_quiet(f1, W_MIN, fp_status)) {
>> +        return float32_is_neg(f1) ? INT32_MIN : INT32_MAX;
>> +    }
>
>
> Does float32_to_int32 handle these checks?

Hmm, I don't think they are exactly the same. For example,
float32_to_int32 will return INT32_MAX for any NAN. But the hexagon
implementation here returns INT32_MIN for negative NAN.

>>
>> +    return float32_to_int32_round_to_zero(f1, fp_status);
>
>
> Rounding mode?

This is one of those exceptions I mentioned earlier.

Re: [PATCH 07/13] target/hexagon: add v68 HVX IEEE float conversion insns

Posted by Taylor Simpson 1 week, 2 days ago

On Tue, Mar 24, 2026 at 3:04 PM Matheus Bernardino <
matheus.bernardino@oss.qualcomm.com> wrote:

> On Mon, Mar 23, 2026 at 6:26 PM Taylor Simpson <ltaylorsimpson@gmail.com>
> wrote:
> >
> >
> >
> > On Mon, Mar 23, 2026 at 7:16 AM Matheus Tavares Bernardino <
> matheus.bernardino@oss.qualcomm.com> wrote:
> >>
> >>
> >> diff --git a/target/hexagon/mmvec/kvx_ieee.c
> b/target/hexagon/mmvec/kvx_ieee.c
> >> index 33621a15f3..bbeec09707 100644
> >> --- a/target/hexagon/mmvec/kvx_ieee.c
> >> +++ b/target/hexagon/mmvec/kvx_ieee.c
> >> @@ -131,3 +131,101 @@ uint16_t qf_min_hf(uint16_t a1, uint16_t a2,
> float_status *fp_status)
> >>      if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) return a1;
> >>      return fp_min_hf(a1, a2, fp_status);
> >>  }
> >> +
> >> +uint16_t f16_to_uh(uint16_t op1, float_status *fp_status)
> >> +{
> >> +    return float16_to_uint16_scalbn(make_float16(op1),
> >> +                                    float_round_nearest_even,
> >
> >
> > Does HVX always use this rounding mode?  The scalar core uses the
> rounding mode in USR.
>
> Yeah, almost always this mode, with the exception of some
> instructions. It's not configurable via USR (or anything else).
>

You can set that in the hvx_fp_status, and the softfloat lib will handle it
from there.


>
> >> +
> >> +int32_t conv_w_sf(uint32_t a, float_status *fp_status)
> >> +{
> >> +    float_status scratch_fpst = {};
> >> +    const float32 W_MAX = int32_to_float32(INT32_MAX, &scratch_fpst);
> >> +    const float32 W_MIN = int32_to_float32(INT32_MIN, &scratch_fpst);
> >> +    float32 f1 = make_float32(a);
> >> +
> >> +    if (float32_is_any_nan(f1) || float32_is_infinity(f1) ||
> >> +        float32_le_quiet(W_MAX, f1, fp_status) ||
> >> +        float32_le_quiet(f1, W_MIN, fp_status)) {
> >> +        return float32_is_neg(f1) ? INT32_MIN : INT32_MAX;
> >> +    }
> >
> >
> > Does float32_to_int32 handle these checks?
>
> Hmm, I don't think they are exactly the same. For example,
> float32_to_int32 will return INT32_MAX for any NAN. But the hexagon
> implementation here returns INT32_MIN for negative NAN.
>

Look around in the softfloat code - especially fields in float_status.  Ths
scalar core has a few exceptions, but not alot.