[PATCH v3 00/16] hexagon: add missing HVX float instructions

Matheus Tavares Bernardino posted 16 patches 2 days, 22 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/cover.1775665981.git.matheus.bernardino@oss.qualcomm.com
Maintainers: Brian Cain <brian.cain@oss.qualcomm.com>, "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
There is a newer version of this series
target/hexagon/cpu.h                          |  10 +-
target/hexagon/cpu_bits.h                     |  10 +-
target/hexagon/mmvec/hvx_ieee_fp.h            |  69 ++++
target/hexagon/mmvec/macros.h                 |   8 +
target/hexagon/mmvec/mmvec.h                  |   3 +
target/hexagon/printinsn.h                    |   2 +-
target/hexagon/translate.h                    |   1 +
tests/tcg/hexagon/hex_test.h                  |  32 ++
tests/tcg/hexagon/hvx_misc.h                  |  73 ++++
target/hexagon/attribs_def.h.inc              |   9 +
disas/hexagon.c                               |   3 +-
target/hexagon/arch.c                         |   8 +
target/hexagon/cpu.c                          |  18 +-
target/hexagon/decode.c                       |   4 +-
target/hexagon/mmvec/hvx_ieee_fp.c            | 136 +++++++
target/hexagon/printinsn.c                    |   7 +-
target/hexagon/translate.c                    |   5 +-
tests/tcg/hexagon/fp_hvx.c                    | 226 +++++++++++
tests/tcg/hexagon/fp_hvx_cmp.c                | 279 +++++++++++++
tests/tcg/hexagon/fp_hvx_cvt.c                | 219 +++++++++++
tests/tcg/hexagon/fp_hvx_disabled.c           |  57 +++
target/hexagon/gen_tcg_funcs.py               |  11 +
target/hexagon/hex_common.py                  |  37 ++
target/hexagon/imported/mmvec/encode_ext.def  | 126 ++++--
target/hexagon/imported/mmvec/ext.idef        | 369 +++++++++++++++++-
target/hexagon/meson.build                    |   1 +
.../dockerfiles/debian-hexagon-cross.docker   |  10 +-
tests/tcg/hexagon/Makefile.target             |  14 +
28 files changed, 1699 insertions(+), 48 deletions(-)
create mode 100644 target/hexagon/mmvec/hvx_ieee_fp.h
create mode 100644 target/hexagon/mmvec/hvx_ieee_fp.c
create mode 100644 tests/tcg/hexagon/fp_hvx.c
create mode 100644 tests/tcg/hexagon/fp_hvx_cmp.c
create mode 100644 tests/tcg/hexagon/fp_hvx_cvt.c
create mode 100644 tests/tcg/hexagon/fp_hvx_disabled.c
[PATCH v3 00/16] hexagon: add missing HVX float instructions
Posted by Matheus Tavares Bernardino 2 days, 22 hours ago
This patchset adds 59 HVX floating point instructions from Hexagon
revisions v68 and v73 that were missing in qemu. Tests are also added at
the end.

v2: https://lore.kernel.org/qemu-devel/cover.1775122853.git.matheus.bernardino@oss.qualcomm.com/
v1: https://lore.kernel.org/qemu-devel/cover.1774271525.git.matheus.bernardino@oss.qualcomm.com/

Changes in v3:
- replaced uint32_t/uint16_t in MMVector with float32/float16, making it
  clearer and greatly reducing the code size.
- Many functions were inlined (now that we don't have to use make_float,
  the functions were mostly one-liners).

Brian Cain (1):
  tests/docker: Update hexagon cross toolchain to 22.1.0

Matheus Tavares Bernardino (15):
  target/hexagon: fix incorrect/too-permissive HVX encodings
  target/hexagon/cpu: add HVX IEEE FP extension
  hexagon: group cpu configurations in their own struct
  hexagon: print info on "-d in_asm" for disabled IEEE FP instructions
  target/hexagon: add v68 HVX IEEE float arithmetic insns
  target/hexagon: add v68 HVX IEEE float min/max insns
  target/hexagon: add v68 HVX IEEE float misc insns
  target/hexagon: add v68 HVX IEEE float conversion insns
  target/hexagon: add v68 HVX IEEE float compare insns
  target/hexagon: add v73 HVX IEEE bfloat16 insns
  tests/hexagon: add tests for v68 HVX IEEE float arithmetics
  tests/hexagon: add tests for v68 HVX IEEE float min/max
  tests/hexagon: add tests for v68 HVX IEEE float conversions
  tests/hexagon: add tests for v68 HVX IEEE float comparisons
  tests/hexagon: add tests for HVX bfloat

 target/hexagon/cpu.h                          |  10 +-
 target/hexagon/cpu_bits.h                     |  10 +-
 target/hexagon/mmvec/hvx_ieee_fp.h            |  69 ++++
 target/hexagon/mmvec/macros.h                 |   8 +
 target/hexagon/mmvec/mmvec.h                  |   3 +
 target/hexagon/printinsn.h                    |   2 +-
 target/hexagon/translate.h                    |   1 +
 tests/tcg/hexagon/hex_test.h                  |  32 ++
 tests/tcg/hexagon/hvx_misc.h                  |  73 ++++
 target/hexagon/attribs_def.h.inc              |   9 +
 disas/hexagon.c                               |   3 +-
 target/hexagon/arch.c                         |   8 +
 target/hexagon/cpu.c                          |  18 +-
 target/hexagon/decode.c                       |   4 +-
 target/hexagon/mmvec/hvx_ieee_fp.c            | 136 +++++++
 target/hexagon/printinsn.c                    |   7 +-
 target/hexagon/translate.c                    |   5 +-
 tests/tcg/hexagon/fp_hvx.c                    | 226 +++++++++++
 tests/tcg/hexagon/fp_hvx_cmp.c                | 279 +++++++++++++
 tests/tcg/hexagon/fp_hvx_cvt.c                | 219 +++++++++++
 tests/tcg/hexagon/fp_hvx_disabled.c           |  57 +++
 target/hexagon/gen_tcg_funcs.py               |  11 +
 target/hexagon/hex_common.py                  |  37 ++
 target/hexagon/imported/mmvec/encode_ext.def  | 126 ++++--
 target/hexagon/imported/mmvec/ext.idef        | 369 +++++++++++++++++-
 target/hexagon/meson.build                    |   1 +
 .../dockerfiles/debian-hexagon-cross.docker   |  10 +-
 tests/tcg/hexagon/Makefile.target             |  14 +
 28 files changed, 1699 insertions(+), 48 deletions(-)
 create mode 100644 target/hexagon/mmvec/hvx_ieee_fp.h
 create mode 100644 target/hexagon/mmvec/hvx_ieee_fp.c
 create mode 100644 tests/tcg/hexagon/fp_hvx.c
 create mode 100644 tests/tcg/hexagon/fp_hvx_cmp.c
 create mode 100644 tests/tcg/hexagon/fp_hvx_cvt.c
 create mode 100644 tests/tcg/hexagon/fp_hvx_disabled.c

Range-diff against v2:
 -:  ---------- >  1:  a04c3c5feb tests/docker: Update hexagon cross toolchain to 22.1.0
 -:  ---------- >  2:  c63e568f6c target/hexagon: fix incorrect/too-permissive HVX encodings
 -:  ---------- >  3:  bd05d9aa88 target/hexagon/cpu: add HVX IEEE FP extension
 -:  ---------- >  4:  d7cc954b23 hexagon: group cpu configurations in their own struct
 -:  ---------- >  5:  192fd1ca5c hexagon: print info on "-d in_asm" for disabled IEEE FP instructions
 1:  fd24bfcb36 !  6:  42b4b2d1c6 target/hexagon: add v68 HVX IEEE float arithmetic insns
    @@ target/hexagon/mmvec/hvx_ieee_fp.h (new)
     +
     +#include "fpu/softfloat.h"
     +
    -+/* Hexagon canonical NaN */
    -+#define FP32_DEF_NAN      0x7FFFFFFF
    -+#define FP16_DEF_NAN      0x7FFF
    ++#define f16_to_f32(A) float16_to_float32((A), true, &env->hvx_fp_status)
     +
    -+/*
    -+ * IEEE - FP ADD/SUB/MPY instructions
    -+ */
    -+uint32_t fp_mult_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    -+uint32_t fp_add_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    -+uint32_t fp_sub_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    -+
    -+uint16_t fp_mult_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+uint16_t fp_add_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+uint16_t fp_sub_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+
    -+uint32_t fp_mult_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+uint32_t fp_add_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+uint32_t fp_sub_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+
    -+/*
    -+ * IEEE - FP Accumulate instructions
    -+ */
    -+uint16_t fp_mult_hf_hf_acc(uint16_t a1, uint16_t a2, uint16_t acc,
    -+                           float_status *fp_status);
    -+uint32_t fp_mult_sf_hf_acc(uint16_t a1, uint16_t a2, uint32_t acc,
    -+                           float_status *fp_status);
    -+
    -+/*
    -+ * IEEE - FP Reduce instructions
    -+ */
    -+uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
    -+                  float_status *fp_status);
    -+uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t a3,
    -+                      uint16_t a4, float_status *fp_status);
    ++float32 fp_mult_sf_hf(float16 a1, float16 a2, float_status *fp_status);
    ++float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
    ++                 float_status *fp_status);
     +
     +#endif
     
    @@ target/hexagon/mmvec/mmvec.h: typedef union {
          int16_t   h[MAX_VEC_SIZE_BYTES / 2];
          uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
          int8_t    b[MAX_VEC_SIZE_BYTES / 1];
    -+    int32_t  sf[MAX_VEC_SIZE_BYTES / 4];   /* single float (32-bit) */
    -+    int16_t  hf[MAX_VEC_SIZE_BYTES / 2];   /* half float (16-bit) */
    ++    float32  sf[MAX_VEC_SIZE_BYTES / 4];
    ++    float16  hf[MAX_VEC_SIZE_BYTES / 2];
      } MMVector;
      
      typedef union {
    @@ target/hexagon/mmvec/hvx_ieee_fp.c (new)
     +#include "qemu/osdep.h"
     +#include "hvx_ieee_fp.h"
     +
    -+#define DEF_FP_INSN_2(name, rt, a1t, a2t, op) \
    -+    uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
    -+                           float_status *fp_status) { \
    -+        float##a1t f1 = make_float##a1t(a1); \
    -+        float##a2t f2 = make_float##a2t(a2); \
    -+        return (op); \
    -+    }
    -+
    -+#define DEF_FP_INSN_3(name, rt, a1t, a2t, a3t, op) \
    -+    uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
    -+                           uint##a3t##_t a3, float_status *fp_status) { \
    -+        float##a1t f1 = make_float##a1t(a1); \
    -+        float##a2t f2 = make_float##a2t(a2); \
    -+        float##a3t f3 = make_float##a3t(a3); \
    -+        return (op); \
    -+    }
    -+
    -+DEF_FP_INSN_2(mult_sf_sf, 32, 32, 32, float32_mul(f1, f2, fp_status))
    -+DEF_FP_INSN_2(add_sf_sf, 32, 32, 32, float32_add(f1, f2, fp_status))
    -+DEF_FP_INSN_2(sub_sf_sf, 32, 32, 32, float32_sub(f1, f2, fp_status))
    -+
    -+DEF_FP_INSN_2(mult_hf_hf, 16, 16, 16, float16_mul(f1, f2, fp_status))
    -+DEF_FP_INSN_2(add_hf_hf, 16, 16, 16, float16_add(f1, f2, fp_status))
    -+DEF_FP_INSN_2(sub_hf_hf, 16, 16, 16, float16_sub(f1, f2, fp_status))
    -+
    -+DEF_FP_INSN_2(mult_sf_hf, 32, 16, 16,
    -+              float32_mul(float16_to_float32(f1, true, fp_status),
    -+                          float16_to_float32(f2, true, fp_status),
    -+                          fp_status))
    -+DEF_FP_INSN_2(add_sf_hf, 32, 16, 16,
    -+              float32_add(float16_to_float32(f1, true, fp_status),
    -+                          float16_to_float32(f2, true, fp_status),
    -+                          fp_status))
    -+DEF_FP_INSN_2(sub_sf_hf, 32, 16, 16,
    -+              float32_sub(float16_to_float32(f1, true, fp_status),
    -+                          float16_to_float32(f2, true, fp_status),
    -+                          fp_status))
    -+
    -+DEF_FP_INSN_3(mult_hf_hf_acc, 16, 16, 16, 16,
    -+              float16_muladd(f1, f2, f3, 0, fp_status))
    -+DEF_FP_INSN_3(mult_sf_hf_acc, 32, 16, 16, 32,
    -+              float32_muladd(float16_to_float32(f1, true, fp_status),
    -+                             float16_to_float32(f2, true, fp_status),
    -+                             f3, 0, fp_status))
    -+
    -+uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
    -+                 float_status *fp_status)
    ++float32 fp_mult_sf_hf(float16 a1, float16 a2, float_status *fp_status)
     +{
    -+    float32 prod1 = fp_mult_sf_hf(a1, a3, fp_status);
    -+    float32 prod2 = fp_mult_sf_hf(a2, a4, fp_status);
    -+    return fp_add_sf_sf(float32_val(prod1), float32_val(prod2), fp_status);
    ++    return float32_mul(float16_to_float32(a1, true, fp_status),
    ++                       float16_to_float32(a2, true, fp_status), fp_status);
     +}
     +
    -+uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2,
    -+                      uint16_t a3, uint16_t a4,
    -+                      float_status *fp_status)
    ++float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
    ++                 float_status *fp_status)
     +{
    -+    float32 red = fp_vdmpy(a1, a2, a3, a4, fp_status);
    -+    return fp_add_sf_sf(float32_val(red), acc, fp_status);
    ++    return float32_add(fp_mult_sf_hf(a1, a3, fp_status),
    ++                       fp_mult_sf_hf(a2, a4, fp_status), fp_status);
     +}
     
      ## target/hexagon/hex_common.py ##
    @@ target/hexagon/imported/mmvec/ext.idef: EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Q
     +/* IEEE FP multiply instructions */
     +ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(32, vmpy_sf_sf,
     +    "Vd32.sf=vmpy(Vu32.sf,Vv32.sf)", "Vector IEEE mul: sf",
    -+    VdV.sf[i] = fp_mult_sf_sf(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
    ++    VdV.sf[i] = float32_mul(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_hf,
     +    "Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE mul: hf widen to sf",
     +    VddV.v[0].sf[i] = fp_mult_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->hvx_fp_status);
     +    VddV.v[1].sf[i] = fp_mult_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16(16, vmpy_hf_hf,     "Vd32.hf=vmpy(Vu32.hf,Vv32.hf)",
     +    "Vector IEEE mul: hf",
    -+    VdV.hf[i] = fp_mult_hf_hf(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
    ++    VdV.hf[i] = float16_mul(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_32(32, vdmpy_sf_hf,     "Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)",
     +    "Vector IEEE mul reduction: hf widen to sf",
     +    VdV.sf[i] = fp_vdmpy(VuV.hf[2*i+1], VuV.hf[2*i], VvV.hf[2*i+1],
    @@ target/hexagon/imported/mmvec/ext.idef: EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Q
     +/* IEEE FP multiply-accumulate instructions */
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_hf_acc,
     +    "Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma: hf widen to sf",
    -+    VxxV.v[0].sf[i] = fp_mult_sf_hf_acc(VuV.hf[2*i], VvV.hf[2*i],
    -+        VxxV.v[0].sf[i], &env->hvx_fp_status);
    -+    VxxV.v[1].sf[i] = fp_mult_sf_hf_acc(VuV.hf[2*i+1], VvV.hf[2*i+1],
    -+        VxxV.v[1].sf[i], &env->hvx_fp_status))
    ++    VxxV.v[0].sf[i] = float32_muladd(f16_to_f32(VuV.hf[2*i]),
    ++                                     f16_to_f32(VvV.hf[2*i]),
    ++                                     VxxV.v[0].sf[i], 0, &env->hvx_fp_status);
    ++    VxxV.v[1].sf[i] = float32_muladd(f16_to_f32(VuV.hf[2*i+1]),
    ++                                     f16_to_f32(VvV.hf[2*i+1]),
    ++                                     VxxV.v[1].sf[i], 0, &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_ACC_16(16, vmpy_hf_hf_acc,
     +    "Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma: hf",
    -+    VxV.hf[i] = fp_mult_hf_hf_acc(VuV.hf[i], VvV.hf[i], VxV.hf[i], &env->hvx_fp_status))
    ++    VxV.hf[i] = float16_muladd(VuV.hf[i], VvV.hf[i], VxV.hf[i], 0, &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_ACC_32(32, vdmpy_sf_hf_acc,
     +    "Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma reduce: hf widen to sf",
    -+    VxV.sf[i] = fp_vdmpy_acc(VxV.sf[i], VuV.hf[2*i+1], VuV.hf[2*i], VvV.hf[2*i+1],
    -+        VvV.hf[2*i], &env->hvx_fp_status))
    ++    VxV.sf[i] = float32_add(fp_vdmpy(VuV.hf[2*i+1], VuV.hf[2*i],
    ++                                     VvV.hf[2*i+1], VvV.hf[2*i],
    ++                                     &env->hvx_fp_status),
    ++                            VxV.sf[i], &env->hvx_fp_status))
     +
     +/* IEEE FP add/sub instructions */
     +ITERATOR_INSN_IEEE_FP_32(32, vadd_sf_sf, "Vd32.sf=vadd(Vu32.sf,Vv32.sf)",
     +    "Vector IEEE add: sf",
    -+    VdV.sf[i] = fp_add_sf_sf(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
    ++    VdV.sf[i] = float32_add(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_32(32, vsub_sf_sf, "Vd32.sf=vsub(Vu32.sf,Vv32.sf)",
     +    "Vector IEEE sub: sf",
    -+    VdV.sf[i] = fp_sub_sf_sf(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
    ++    VdV.sf[i] = float32_sub(VuV.sf[i], VvV.sf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16(16, vadd_hf_hf, "Vd32.hf=vadd(Vu32.hf,Vv32.hf)",
     +    "Vector IEEE add: hf",
    -+    VdV.hf[i] = fp_add_hf_hf(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
    ++    VdV.hf[i] = float16_add(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16(16, vsub_hf_hf, "Vd32.hf=vsub(Vu32.hf,Vv32.hf)",
     +    "Vector IEEE sub: hf",
    -+    VdV.hf[i] = fp_sub_hf_hf(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
    ++    VdV.hf[i] = float16_sub(VuV.hf[i], VvV.hf[i], &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vadd_sf_hf,
     +    "Vdd32.sf=vadd(Vu32.hf,Vv32.hf)",  "Vector IEEE add: hf widen to sf",
    -+    VddV.v[0].sf[i] = fp_add_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->hvx_fp_status);
    -+    VddV.v[1].sf[i] = fp_add_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->hvx_fp_status))
    ++    VddV.v[0].sf[i] = float32_add(f16_to_f32(VuV.hf[2*i]),
    ++                                  f16_to_f32(VvV.hf[2*i]), &env->hvx_fp_status);
    ++    VddV.v[1].sf[i] = float32_add(f16_to_f32(VuV.hf[2*i+1]),
    ++                                  f16_to_f32(VvV.hf[2*i+1]), &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vsub_sf_hf,
     +    "Vdd32.sf=vsub(Vu32.hf,Vv32.hf)",  "Vector IEEE sub: hf widen to sf",
    -+    VddV.v[0].sf[i] = fp_sub_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->hvx_fp_status);
    -+    VddV.v[1].sf[i] = fp_sub_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->hvx_fp_status))
    ++    VddV.v[0].sf[i] = float32_sub(f16_to_f32(VuV.hf[2*i]),
    ++                                  f16_to_f32(VvV.hf[2*i]), &env->hvx_fp_status);
    ++    VddV.v[1].sf[i] = float32_sub(f16_to_f32(VuV.hf[2*i+1]),
    ++                                  f16_to_f32(VvV.hf[2*i+1]), &env->hvx_fp_status))
      
      /******************************************************************************
       DEBUG Vector/Register Printing
 2:  30254b5750 !  7:  0104072468 target/hexagon: add v68 HVX IEEE float min/max insns
    @@ Commit message
         Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
     
      ## target/hexagon/mmvec/hvx_ieee_fp.h ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.h: uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
    - uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t a3,
    -                       uint16_t a4, float_status *fp_status);
    +@@ target/hexagon/mmvec/hvx_ieee_fp.h: float32 fp_mult_sf_hf(float16 a1, float16 a2, float_status *fp_status);
    + float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
    +                  float_status *fp_status);
      
    -+/* IEEE - FP min/max instructions */
    -+uint32_t fp_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    -+uint32_t fp_max_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    -+uint16_t fp_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+uint16_t fp_max_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    -+
     +/* Qfloat min/max treat +NaN as greater than +INF and -NaN as smaller than -INF */
     +uint32_t qf_max_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
     +uint32_t qf_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    @@ target/hexagon/mmvec/hvx_ieee_fp.h: uint32_t fp_vdmpy(uint16_t a1, uint16_t a2,
     +
      #endif
     
    + ## target/hexagon/attribs_def.h.inc ##
    +@@ target/hexagon/attribs_def.h.inc: DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "")
    + DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "")
    + DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "")
    + DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "")
    ++DEF_ATTRIB(CVI_VA_2SRC, "Execs on multimedia vector engine; requires two srcs", "", "")
    + 
    + DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "")
    + 
    +@@ target/hexagon/attribs_def.h.inc: DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "")
    + DEF_ATTRIB(HVX_IEEE_FP_OUT_16, "HVX IEEE FP 16-bit output", "", "")
    + DEF_ATTRIB(HVX_IEEE_FP_OUT_32, "HVX IEEE FP 32-bit output", "", "")
    + DEF_ATTRIB(CVI_VX_NO_TMP_LD, "HVX multiply without tmp load", "", "")
    ++DEF_ATTRIB(HVX_FLT, "This a floating point HVX instruction.", "", "")
    + 
    + /* Keep this as the last attribute: */
    + DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "")
    +
      ## target/hexagon/mmvec/hvx_ieee_fp.c ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.c: uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2,
    -     float32 red = fp_vdmpy(a1, a2, a3, a4, fp_status);
    -     return fp_add_sf_sf(float32_val(red), acc, fp_status);
    +@@ target/hexagon/mmvec/hvx_ieee_fp.c: float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
    +     return float32_add(fp_mult_sf_hf(a1, a3, fp_status),
    +                        fp_mult_sf_hf(a2, a4, fp_status), fp_status);
      }
     +
    -+DEF_FP_INSN_2(min_sf, 32, 32, 32, float32_min(f1, f2, fp_status))
    -+DEF_FP_INSN_2(max_sf, 32, 32, 32, float32_max(f1, f2, fp_status))
    -+DEF_FP_INSN_2(min_hf, 16, 16, 16, float16_min(f1, f2, fp_status))
    -+DEF_FP_INSN_2(max_hf, 16, 16, 16, float16_max(f1, f2, fp_status))
    -+
     +#define float32_is_pos_nan(X) (float32_is_any_nan(X) && !float32_is_neg(X))
     +#define float32_is_neg_nan(X) (float32_is_any_nan(X) && float32_is_neg(X))
     +#define float16_is_pos_nan(X) (float16_is_any_nan(X) && !float16_is_neg(X))
     +#define float16_is_neg_nan(X) (float16_is_any_nan(X) && float16_is_neg(X))
     +
    -+uint32_t qf_max_sf(uint32_t a1, uint32_t a2, float_status *fp_status)
    ++float32 qf_max_sf(float32 a1, float32 a2, float_status *fp_status)
     +{
    -+    float32 f1 = make_float32(a1);
    -+    float32 f2 = make_float32(a2);
    -+    if (float32_is_pos_nan(f1) || float32_is_neg_nan(f2)) {
    ++    if (float32_is_pos_nan(a1) || float32_is_neg_nan(a2)) {
     +        return a1;
     +    }
    -+    if (float32_is_pos_nan(f2) || float32_is_neg_nan(f1)) {
    ++    if (float32_is_pos_nan(a2) || float32_is_neg_nan(a1)) {
     +        return a2;
     +    }
    -+    return fp_max_sf(a1, a2, fp_status);
    ++    return float32_max(a1, a2, fp_status);
     +}
     +
    -+uint32_t qf_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status)
    ++float32 qf_min_sf(float32 a1, float32 a2, float_status *fp_status)
     +{
    -+    float32 f1 = make_float32(a1);
    -+    float32 f2 = make_float32(a2);
    -+    if (float32_is_pos_nan(f1) || float32_is_neg_nan(f2)) {
    ++    if (float32_is_pos_nan(a1) || float32_is_neg_nan(a2)) {
     +        return a2;
     +    }
    -+    if (float32_is_pos_nan(f2) || float32_is_neg_nan(f1)) {
    ++    if (float32_is_pos_nan(a2) || float32_is_neg_nan(a1)) {
     +        return a1;
     +    }
    -+    return fp_min_sf(a1, a2, fp_status);
    ++    return float32_min(a1, a2, fp_status);
     +}
     +
    -+uint16_t qf_max_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
    ++float16 qf_max_hf(float16 a1, float16 a2, float_status *fp_status)
     +{
    -+    float16 f1 = make_float16(a1);
    -+    float16 f2 = make_float16(a2);
    -+    if (float16_is_pos_nan(f1) || float16_is_neg_nan(f2)) {
    ++    if (float16_is_pos_nan(a1) || float16_is_neg_nan(a2)) {
     +        return a1;
     +    }
    -+    if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) {
    ++    if (float16_is_pos_nan(a2) || float16_is_neg_nan(a1)) {
     +        return a2;
     +    }
    -+    return fp_max_hf(a1, a2, fp_status);
    ++    return float16_max(a1, a2, fp_status);
     +}
     +
    -+uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
    ++float16 qf_min_hf(float16 a1, float16 a2, float_status *fp_status)
     +{
    -+    float16 f1 = make_float16(a1);
    -+    float16 f2 = make_float16(a2);
    -+    if (float16_is_pos_nan(f1) || float16_is_neg_nan(f2)) {
    ++    if (float16_is_pos_nan(a1) || float16_is_neg_nan(a2)) {
     +        return a2;
     +    }
    -+    if (float16_is_pos_nan(f2) || float16_is_neg_nan(f1)) {
    ++    if (float16_is_pos_nan(a2) || float16_is_neg_nan(a1)) {
     +        return a1;
     +    }
    -+    return fp_min_hf(a1, a2, fp_status);
    ++    return float16_min(a1, a2, fp_status);
     +}
     
    + ## target/hexagon/hex_common.py ##
    +@@ target/hexagon/hex_common.py: def need_env(tag):
    +             "A_CVI_GATHER" in attribdict[tag] or
    +             "A_CVI_SCATTER" in attribdict[tag] or
    +             "A_HVX_IEEE_FP" in attribdict[tag] or
    ++            "A_HVX_FLT" in attribdict[tag] or
    +             "A_IMPLICIT_WRITES_USR" in attribdict[tag])
    + 
    + 
    +
      ## target/hexagon/imported/mmvec/encode_ext.def ##
     @@ target/hexagon/imported/mmvec/encode_ext.def: DEF_ENC(V6_vsub_sf_hf,"00011111100vvvvvPP1uuuuu101ddddd")
      DEF_ENC(V6_vadd_hf_hf,"00011111101vvvvvPP1uuuuu111ddddd")
    @@ target/hexagon/imported/mmvec/ext.idef
      #define ITERATOR_INSN2_ANY_SLOT(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
      ITERATOR_INSN_ANY_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
     @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vsub_sf_hf,
    -     VddV.v[0].sf[i] = fp_sub_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->hvx_fp_status);
    -     VddV.v[1].sf[i] = fp_sub_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->hvx_fp_status))
    +     VddV.v[1].sf[i] = float32_sub(f16_to_f32(VuV.hf[2*i+1]),
    +                                   f16_to_f32(VvV.hf[2*i+1]), &env->hvx_fp_status))
      
     +#define ITERATOR_INSN_IEEE_FP_16_32_LATE(WIDTH,TAG,SYNTAX,DESCR,CODE) \
     +EXTINSN(V6_##TAG, SYNTAX, \
    @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vsub
     +
     +/* IEEE FP min/max instructions */
     +ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vfmin_hf, "Vd32.hf=vfmin(Vu32.hf,Vv32.hf)", \
    -+    "Vector IEEE min: hf",  VdV.hf[i] = fp_min_hf(VuV.hf[i], VvV.hf[i], \
    ++    "Vector IEEE min: hf",  VdV.hf[i] = float16_min(VuV.hf[i], VvV.hf[i], \
     +	&env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16_32_LATE(32, vfmin_sf, "Vd32.sf=vfmin(Vu32.sf,Vv32.sf)", \
    -+    "Vector IEEE min: sf",  VdV.sf[i] = fp_min_sf(VuV.sf[i], VvV.sf[i], \
    ++    "Vector IEEE min: sf",  VdV.sf[i] = float32_min(VuV.sf[i], VvV.sf[i], \
     +	&env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vfmax_hf,  "Vd32.hf=vfmax(Vu32.hf,Vv32.hf)", \
    -+    "Vector IEEE max: hf", VdV.hf[i] = fp_max_hf(VuV.hf[i], VvV.hf[i], \
    ++    "Vector IEEE max: hf", VdV.hf[i] = float16_max(VuV.hf[i], VvV.hf[i], \
     +	&env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16_32_LATE(32, vfmax_sf,  "Vd32.sf=vfmax(Vu32.sf,Vv32.sf)", \
    -+    "Vector IEEE max: sf", VdV.sf[i] = fp_max_sf(VuV.sf[i], VvV.sf[i], \
    ++    "Vector IEEE max: sf", VdV.sf[i] = float32_max(VuV.sf[i], VvV.sf[i], \
     +	&env->hvx_fp_status))
     +
     +ITERATOR_INSN_ANY_SLOT_2SRC(32,vmax_sf,"Vd32.sf=vmax(Vu32.sf,Vv32.sf)", \
 3:  c6fe780abf =  8:  2aa7f10503 target/hexagon: add v68 HVX IEEE float misc insns
 4:  85dccc1913 !  9:  99bac24648 target/hexagon: add v68 HVX IEEE float conversion insns
    @@ Commit message
         Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
     
      ## target/hexagon/mmvec/hvx_ieee_fp.h ##
    +@@
    + #include "fpu/softfloat.h"
    + 
    + #define f16_to_f32(A) float16_to_float32((A), true, &env->hvx_fp_status)
    ++#define f32_to_f16(A) float32_to_float16((A), true, &env->hvx_fp_status)
    + 
    + float32 fp_mult_sf_hf(float16 a1, float16 a2, float_status *fp_status);
    + float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
     @@ target/hexagon/mmvec/hvx_ieee_fp.h: uint32_t qf_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
      uint16_t qf_max_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
      uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
      
    -+/*
    -+ * IEEE - FP Convert instructions
    -+ */
    -+uint16_t f32_to_f16(uint32_t a, float_status *fp_status);
    -+uint32_t f16_to_f32(uint16_t a, float_status *fp_status);
    -+
    -+uint16_t f16_to_uh(uint16_t op1, float_status *fp_status);
    -+int16_t  f16_to_h(uint16_t op1, float_status *fp_status);
    -+uint8_t  f16_to_ub(uint16_t op1, float_status *fp_status);
    -+int8_t   f16_to_b(uint16_t op1, float_status *fp_status);
    -+
    -+uint16_t uh_to_f16(uint16_t op1);
    -+uint16_t h_to_f16(int16_t op1);
    -+uint16_t ub_to_f16(uint8_t op1);
    -+uint16_t b_to_f16(int8_t op1);
    -+
    -+int32_t conv_sf_w(int32_t a, float_status *fp_status);
    -+int16_t conv_hf_h(int16_t a, float_status *fp_status);
    -+int32_t conv_w_sf(uint32_t a, float_status *fp_status);
    -+int16_t conv_h_hf(uint16_t a, float_status *fp_status);
    ++int32_t conv_w_sf(float32 a, float_status *fp_status);
    ++int16_t conv_h_hf(float16 a, float_status *fp_status);
     +
      #endif
     
      ## target/hexagon/mmvec/hvx_ieee_fp.c ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.c: uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
    +@@ target/hexagon/mmvec/hvx_ieee_fp.c: float16 qf_min_hf(float16 a1, float16 a2, float_status *fp_status)
          }
    -     return fp_min_hf(a1, a2, fp_status);
    +     return float16_min(a1, a2, fp_status);
      }
     +
    -+uint16_t f32_to_f16(uint32_t a, float_status *fp_status)
    ++int32_t conv_w_sf(float32 a, float_status *fp_status)
     +{
    -+    return float16_val(float32_to_float16(make_float32(a), true, fp_status));
    -+}
    -+
    -+uint32_t f16_to_f32(uint16_t a, float_status *fp_status)
    -+{
    -+    return float32_val(float16_to_float32(make_float16(a), true, fp_status));
    -+}
    -+
    -+uint16_t f16_to_uh(uint16_t op1, float_status *fp_status)
    -+{
    -+    return float16_to_uint16_scalbn(make_float16(op1),
    -+                                    float_round_nearest_even,
    -+                                    0, fp_status);
    -+}
    -+
    -+int16_t f16_to_h(uint16_t op1, float_status *fp_status)
    -+{
    -+    return float16_to_int16_scalbn(make_float16(op1),
    -+                                   float_round_nearest_even,
    -+                                   0, fp_status);
    -+}
    -+
    -+uint8_t f16_to_ub(uint16_t op1, float_status *fp_status)
    -+{
    -+    return float16_to_uint8_scalbn(make_float16(op1),
    -+                                   float_round_nearest_even,
    -+                                   0, fp_status);
    -+}
    -+
    -+int8_t f16_to_b(uint16_t op1, float_status *fp_status)
    -+{
    -+    return float16_to_int8_scalbn(make_float16(op1),
    -+                                   float_round_nearest_even,
    -+                                   0, fp_status);
    -+}
    -+
    -+uint16_t uh_to_f16(uint16_t op1)
    -+{
    -+    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
    -+}
    -+
    -+uint16_t h_to_f16(int16_t op1)
    -+{
    -+    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
    -+}
    -+
    -+uint16_t ub_to_f16(uint8_t op1)
    -+{
    -+    return uint64_to_float16_scalbn(op1, float_round_nearest_even, 0);
    -+}
    -+
    -+uint16_t b_to_f16(int8_t op1)
    -+{
    -+    return int64_to_float16_scalbn(op1, float_round_nearest_even, 0);
    -+}
    -+
    -+int32_t conv_sf_w(int32_t a, float_status *fp_status)
    -+{
    -+    return float32_val(int32_to_float32(a, fp_status));
    -+}
    -+
    -+int16_t conv_hf_h(int16_t a, float_status *fp_status)
    -+{
    -+    return float16_val(int16_to_float16(a, fp_status));
    -+}
    -+
    -+int32_t conv_w_sf(uint32_t a, float_status *fp_status)
    -+{
    -+    float32 f1 = make_float32(a);
     +    /* float32_to_int32 converts any NaN to MAX, hexagon looks at the sign. */
    -+    if (float32_is_any_nan(f1)) {
    -+        return float32_is_neg(f1) ? INT32_MIN : INT32_MAX;
    ++    if (float32_is_any_nan(a)) {
    ++        return float32_is_neg(a) ? INT32_MIN : INT32_MAX;
     +    }
    -+    return float32_to_int32_round_to_zero(f1, fp_status);
    ++    return float32_to_int32_round_to_zero(a, fp_status);
     +}
     +
    -+int16_t conv_h_hf(uint16_t a, float_status *fp_status)
    ++int16_t conv_h_hf(float16 a, float_status *fp_status)
     +{
    -+    float16 f1 = make_float16(a);
     +    /* float16_to_int16 converts any NaN to MAX, hexagon looks at the sign. */
    -+    if (float16_is_any_nan(f1)) {
    -+        return float16_is_neg(f1) ? INT16_MIN : INT16_MAX;
    ++    if (float16_is_any_nan(a)) {
    ++        return float16_is_neg(a) ? INT16_MIN : INT16_MAX;
     +    }
    -+    return float16_to_int16_round_to_zero(f1, fp_status);
    ++    return float16_to_int16_round_to_zero(a, fp_status);
     +}
     
      ## target/hexagon/imported/mmvec/encode_ext.def ##
    @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vab
     +
     +ITERATOR_INSN_IEEE_FP_DOUBLE_16(32,  vcvt_hf_ub, "Vdd32.hf=vcvt(Vu32.ub)",
     +    "Vector IEEE cvt from int: ub widen to hf",
    -+    VddV.v[0].hf[2*i]   = ub_to_f16(VuV.ub[4*i]);
    -+    VddV.v[0].hf[2*i+1] = ub_to_f16(VuV.ub[4*i+1]);
    -+    VddV.v[1].hf[2*i]   = ub_to_f16(VuV.ub[4*i+2]);
    -+    VddV.v[1].hf[2*i+1] = ub_to_f16(VuV.ub[4*i+3]))
    ++    VddV.v[0].hf[2*i]   = uint64_to_float16_scalbn(VuV.ub[4*i], float_round_nearest_even, 0);
    ++    VddV.v[0].hf[2*i+1] = uint64_to_float16_scalbn(VuV.ub[4*i+1], float_round_nearest_even, 0);
    ++    VddV.v[1].hf[2*i]   = uint64_to_float16_scalbn(VuV.ub[4*i+2], float_round_nearest_even, 0);
    ++    VddV.v[1].hf[2*i+1] = uint64_to_float16_scalbn(VuV.ub[4*i+3], float_round_nearest_even, 0))
     +
     +ITERATOR_INSN_IEEE_FP_DOUBLE_16(32,  vcvt_hf_b,  "Vdd32.hf=vcvt(Vu32.b)",
     +    "Vector IEEE cvt from int: b widen to hf",
    -+    VddV.v[0].hf[2*i]   = b_to_f16(VuV.b[4*i]);
    -+    VddV.v[0].hf[2*i+1] = b_to_f16(VuV.b[4*i+1]);
    -+    VddV.v[1].hf[2*i]   = b_to_f16(VuV.b[4*i+2]);
    -+    VddV.v[1].hf[2*i+1] = b_to_f16(VuV.b[4*i+3]))
    ++    VddV.v[0].hf[2*i]   = int64_to_float16_scalbn(VuV.b[4*i], float_round_nearest_even, 0);
    ++    VddV.v[0].hf[2*i+1] = int64_to_float16_scalbn(VuV.b[4*i+1], float_round_nearest_even, 0);
    ++    VddV.v[1].hf[2*i]   = int64_to_float16_scalbn(VuV.b[4*i+2], float_round_nearest_even, 0);
    ++    VddV.v[1].hf[2*i+1] = int64_to_float16_scalbn(VuV.b[4*i+3], float_round_nearest_even, 0))
     +
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vcvt_sf_hf, "Vdd32.sf=vcvt(Vu32.hf)",
     +    "Vector IEEE cvt: hf widen to sf",
    -+    VddV.v[0].sf[i] = f16_to_f32(VuV.hf[2*i], &env->hvx_fp_status);
    -+    VddV.v[1].sf[i] = f16_to_f32(VuV.hf[2*i+1], &env->hvx_fp_status))
    ++    VddV.v[0].sf[i] = f16_to_f32(VuV.hf[2*i]);
    ++    VddV.v[1].sf[i] = f16_to_f32(VuV.hf[2*i+1]))
     +
     +ITERATOR_INSN_IEEE_FP_16(16,    vcvt_hf_uh, "Vd32.hf=vcvt(Vu32.uh)",
     +    "Vector IEEE cvt from int: uh to hf",
    -+    VdV.hf[i] = uh_to_f16(VuV.uh[i]))
    ++    VdV.hf[i] = uint64_to_float16_scalbn(VuV.uh[i], float_round_nearest_even, 0))
     +ITERATOR_INSN_IEEE_FP_16(16,    vcvt_hf_h,  "Vd32.hf=vcvt(Vu32.h)",
     +    "Vector IEEE cvt from int: h to hf",
    -+    VdV.hf[i] = h_to_f16(VuV.h[i]))
    ++    VdV.hf[i] = int64_to_float16_scalbn(VuV.h[i], float_round_nearest_even, 0))
     +ITERATOR_INSN_IEEE_FP_16_32(16, vcvt_uh_hf, "Vd32.uh=vcvt(Vu32.hf)",
     +    "Vector IEEE cvt to int: hf to uh",
    -+    VdV.uh[i] = f16_to_uh(VuV.hf[i], &env->hvx_fp_status))
    ++    VdV.uh[i] = float16_to_uint16_scalbn(VuV.hf[i], float_round_nearest_even, 0, &env->hvx_fp_status))
     +ITERATOR_INSN_IEEE_FP_16_32(16, vcvt_h_hf,  "Vd32.h=vcvt(Vu32.hf)",
     +    "Vector IEEE cvt to int: hf to h",
    -+    VdV.h[i]  = f16_to_h(VuV.hf[i], &env->hvx_fp_status))
    ++    VdV.h[i]  = float16_to_int16_scalbn(VuV.hf[i], float_round_nearest_even, 0, &env->hvx_fp_status))
     +
     +ITERATOR_INSN_IEEE_FP_16(32, vcvt_hf_sf, "Vd32.hf=vcvt(Vu32.sf,Vv32.sf)",
     +    "Vector IEEE cvt: sf to hf",
    -+    VdV.hf[2*i]   = f32_to_f16(VuV.sf[i], &env->hvx_fp_status);
    -+    VdV.hf[2*i+1] = f32_to_f16(VvV.sf[i], &env->hvx_fp_status))
    ++    VdV.hf[2*i]   = f32_to_f16(VuV.sf[i]);
    ++    VdV.hf[2*i+1] = f32_to_f16(VvV.sf[i]))
     +
     +ITERATOR_INSN_IEEE_FP_16_32(32, vcvt_ub_hf, "Vd32.ub=vcvt(Vu32.hf,Vv32.hf)", "Vector cvt to int: hf narrow to ub",
    -+    VdV.ub[4*i]   = f16_to_ub(VuV.hf[2*i], &env->hvx_fp_status);
    -+    VdV.ub[4*i+1] = f16_to_ub(VuV.hf[2*i+1], &env->hvx_fp_status);
    -+    VdV.ub[4*i+2] = f16_to_ub(VvV.hf[2*i], &env->hvx_fp_status);
    -+    VdV.ub[4*i+3] = f16_to_ub(VvV.hf[2*i+1], &env->hvx_fp_status))
    ++    VdV.ub[4*i]   = float16_to_uint8_scalbn(VuV.hf[2*i], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.ub[4*i+1] = float16_to_uint8_scalbn(VuV.hf[2*i+1], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.ub[4*i+2] = float16_to_uint8_scalbn(VvV.hf[2*i], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.ub[4*i+3] = float16_to_uint8_scalbn(VvV.hf[2*i+1], float_round_nearest_even, 0, &env->hvx_fp_status))
     +
     +ITERATOR_INSN_IEEE_FP_16_32(32, vcvt_b_hf,  "Vd32.b=vcvt(Vu32.hf,Vv32.hf)",
     +    "Vector cvt to int: hf narrow to b",
    -+    VdV.b[4*i]   = f16_to_b(VuV.hf[2*i], &env->hvx_fp_status);
    -+    VdV.b[4*i+1] = f16_to_b(VuV.hf[2*i+1], &env->hvx_fp_status);
    -+    VdV.b[4*i+2] = f16_to_b(VvV.hf[2*i], &env->hvx_fp_status);
    -+    VdV.b[4*i+3] = f16_to_b(VvV.hf[2*i+1], &env->hvx_fp_status))
    ++    VdV.b[4*i]   = float16_to_int8_scalbn(VuV.hf[2*i], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.b[4*i+1] = float16_to_int8_scalbn(VuV.hf[2*i+1], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.b[4*i+2] = float16_to_int8_scalbn(VvV.hf[2*i], float_round_nearest_even, 0, &env->hvx_fp_status);
    ++    VdV.b[4*i+3] = float16_to_int8_scalbn(VvV.hf[2*i+1], float_round_nearest_even, 0, &env->hvx_fp_status))
     +
     +ITERATOR_INSN_SHIFT_SLOT_FLT(32, vconv_w_sf,"Vd32.w=Vu32.sf",
     +    "Vector conversion of sf32 format to int w",
    @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_IEEE_FP_16_32_LATE(16, vab
     +
     +ITERATOR_INSN_SHIFT_SLOT_FLT(32, vconv_sf_w,"Vd32.sf=Vu32.w",
     +    "Vector conversion of int w format to sf32",
    -+    VdV.sf[i] = conv_sf_w(VuV.w[i], &env->hvx_fp_status))
    ++    VdV.sf[i] = int32_to_float32(VuV.w[i], &env->hvx_fp_status))
     +
     +ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_hf_h,"Vd32.hf=Vu32.h",
     +    "Vector conversion of int hw format to hf16",
    -+    VdV.hf[i] = conv_hf_h(VuV.h[i], &env->hvx_fp_status))
    ++    VdV.hf[i] = float16_val(int16_to_float16(VuV.h[i], &env->hvx_fp_status)))
     +
      /******************************************************************************
       DEBUG Vector/Register Printing
 5:  9ac626fa17 ! 10:  9518dd95bd target/hexagon: add v68 HVX IEEE float compare insns
    @@ Commit message
         Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
     
      ## target/hexagon/mmvec/hvx_ieee_fp.h ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.h: uint32_t qf_min_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    - uint16_t qf_max_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    - uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    +@@ target/hexagon/mmvec/hvx_ieee_fp.h: uint16_t qf_min_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
    + int32_t conv_w_sf(float32 a, float_status *fp_status);
    + int16_t conv_h_hf(float16 a, float_status *fp_status);
      
     +/* IEEE - FP compare instructions */
     +uint32_t cmpgt_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
     +uint16_t cmpgt_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
     +
    - /*
    -  * IEEE - FP Convert instructions
    -  */
    + #endif
     
      ## target/hexagon/mmvec/macros.h ##
     @@
    @@ target/hexagon/mmvec/macros.h
     +
      #endif
     
    - ## target/hexagon/attribs_def.h.inc ##
    -@@ target/hexagon/attribs_def.h.inc: DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "")
    - DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "")
    - DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "")
    - DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "")
    -+DEF_ATTRIB(CVI_VA_2SRC, "Execs on multimedia vector engine; requires two srcs", "", "")
    - 
    - DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "")
    - 
    -@@ target/hexagon/attribs_def.h.inc: DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "")
    - DEF_ATTRIB(HVX_IEEE_FP_OUT_16, "HVX IEEE FP 16-bit output", "", "")
    - DEF_ATTRIB(HVX_IEEE_FP_OUT_32, "HVX IEEE FP 32-bit output", "", "")
    - DEF_ATTRIB(CVI_VX_NO_TMP_LD, "HVX multiply without tmp load", "", "")
    -+DEF_ATTRIB(HVX_FLT, "This a floating point HVX instruction.", "", "")
    - 
    - /* Keep this as the last attribute: */
    - DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "")
    -
      ## target/hexagon/mmvec/hvx_ieee_fp.c ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.c: int16_t conv_h_hf(uint16_t a, float_status *fp_status)
    +@@ target/hexagon/mmvec/hvx_ieee_fp.c: int16_t conv_h_hf(float16 a, float_status *fp_status)
          }
    -     return float16_to_int16_round_to_zero(f1, fp_status);
    +     return float16_to_int16_round_to_zero(a, fp_status);
      }
     +
     +/*
    @@ target/hexagon/mmvec/hvx_ieee_fp.c: int16_t conv_h_hf(uint16_t a, float_status *
     +    return float16_is_neg(f1) ? !result : result;
     +}
     +
    -+uint32_t cmpgt_sf(uint32_t a1, uint32_t a2, float_status *fp_status)
    ++uint32_t cmpgt_sf(float32 a1, float32 a2, float_status *fp_status)
     +{
    -+    float32 f1 = make_float32(a1);
    -+    float32 f2 = make_float32(a2);
    -+    if (float32_is_any_nan(f1) || float32_is_any_nan(f2)) {
    -+        return float32_nan_compare(f1, f2, fp_status);
    ++    if (float32_is_any_nan(a1) || float32_is_any_nan(a2)) {
    ++        return float32_nan_compare(a1, a2, fp_status);
     +    }
     +    return float32_compare(a1, a2, fp_status) == float_relation_greater;
     +}
     +
    -+uint16_t cmpgt_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
    ++uint16_t cmpgt_hf(float16 a1, float16 a2, float_status *fp_status)
     +{
    -+    float16 f1 = make_float16(a1);
    -+    float16 f2 = make_float16(a2);
    -+    if (float16_is_any_nan(f1) || float16_is_any_nan(f2)) {
    -+        return float16_nan_compare(f1, f2, fp_status);
    ++    if (float16_is_any_nan(a1) || float16_is_any_nan(a2)) {
    ++        return float16_nan_compare(a1, a2, fp_status);
     +    }
     +    return float16_compare(a1, a2, fp_status) == float_relation_greater;
     +}
     
    - ## target/hexagon/hex_common.py ##
    -@@ target/hexagon/hex_common.py: def need_env(tag):
    -             "A_CVI_GATHER" in attribdict[tag] or
    -             "A_CVI_SCATTER" in attribdict[tag] or
    -             "A_HVX_IEEE_FP" in attribdict[tag] or
    -+            "A_HVX_FLT" in attribdict[tag] or
    -             "A_IMPLICIT_WRITES_USR" in attribdict[tag])
    - 
    - 
    -
      ## target/hexagon/imported/mmvec/encode_ext.def ##
     @@ target/hexagon/imported/mmvec/encode_ext.def: DEF_ENC(V6_vconv_w_sf,"00011110--0--101PP1uuuuu001ddddd")
      DEF_ENC(V6_vconv_hf_h,"00011110--0--101PP1uuuuu100ddddd")
    @@ target/hexagon/imported/mmvec/encode_ext.def: DEF_ENC(V6_vconv_w_sf,"00011110--0
      ## target/hexagon/imported/mmvec/ext.idef ##
     @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_hf_h,"Vd32.hf=Vu32.h",
          "Vector conversion of int hw format to hf16",
    -     VdV.hf[i] = conv_hf_h(VuV.h[i], &env->hvx_fp_status))
    +     VdV.hf[i] = float16_val(int16_to_float16(VuV.h[i], &env->hvx_fp_status)))
      
     +/******************************************************************************
     + * IEEE FP compare instructions
 6:  b12d94be22 ! 11:  f84d180547 target/hexagon: add v73 HVX IEEE bfloat16 insns
    @@ Commit message
         Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
     
      ## target/hexagon/mmvec/hvx_ieee_fp.h ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.h: int16_t conv_hf_h(int16_t a, float_status *fp_status);
    - int32_t conv_w_sf(uint32_t a, float_status *fp_status);
    - int16_t conv_h_hf(uint16_t a, float_status *fp_status);
    +@@
    + 
    + #include "fpu/softfloat.h"
    + 
    ++#define FP32_DEF_NAN 0x7FFFFFFF
    ++
    + #define f16_to_f32(A) float16_to_float32((A), true, &env->hvx_fp_status)
    + #define f32_to_f16(A) float32_to_float16((A), true, &env->hvx_fp_status)
    ++#define bf_to_sf(A) bfloat16_to_float32(A, &env->hvx_fp_status)
    + 
    + float32 fp_mult_sf_hf(float16 a1, float16 a2, float_status *fp_status);
    + float32 fp_vdmpy(float16 a1, float16 a2, float16 a3, float16 a4,
    +@@ target/hexagon/mmvec/hvx_ieee_fp.h: int16_t conv_h_hf(float16 a, float_status *fp_status);
    + uint32_t cmpgt_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
    + uint16_t cmpgt_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
      
     +/* IEEE BFloat instructions */
     +
     +#define fp_mult_sf_bf(A, B) \
    -+    fp_mult_sf_sf(bfloat16_to_float32(A, &env->hvx_fp_status), \
    -+                  bfloat16_to_float32(B, &env->hvx_fp_status), \
    -+                  &env->hvx_fp_status)
    ++    float32_mul(bf_to_sf(A), bf_to_sf(B), &env->hvx_fp_status)
    ++
     +#define fp_add_sf_bf(A, B) \
    -+    fp_add_sf_sf(bfloat16_to_float32(A, &env->hvx_fp_status), \
    -+                 bfloat16_to_float32(B, &env->hvx_fp_status), \
    -+                 &env->hvx_fp_status)
    ++    float32_add(bf_to_sf(A), bf_to_sf(B), &env->hvx_fp_status)
    ++
     +#define fp_sub_sf_bf(A, B) \
    -+    fp_sub_sf_sf(bfloat16_to_float32(A, &env->hvx_fp_status), \
    -+                 bfloat16_to_float32(B, &env->hvx_fp_status), \
    -+                 &env->hvx_fp_status)
    ++    float32_sub(bf_to_sf(A), bf_to_sf(B), &env->hvx_fp_status)
     +
    -+uint32_t fp_mult_sf_bf_acc(uint16_t op1, uint16_t op2, uint32_t acc,
    -+                           float_status *fp_status);
    -+
    -+#define bf_to_sf(A, fp_status) bfloat16_to_float32(A, fp_status)
    ++#define fp_mult_sf_bf_acc(f1, f2, f3) \
    ++    float32_muladd(bf_to_sf(f1), bf_to_sf(f2), f3, 0, &env->hvx_fp_status)
     +
     +static inline uint16_t sf_to_bf(int32_t A, float_status *fp_status)
     +{
    @@ target/hexagon/mmvec/hvx_ieee_fp.h: int16_t conv_hf_h(int16_t a, float_status *f
     +}
     +
     +#define fp_min_bf(A, B) \
    -+    sf_to_bf(fp_min_sf(bf_to_sf(A, &env->hvx_fp_status), \
    -+                       bf_to_sf(B, &env->hvx_fp_status), \
    -+                       &env->hvx_fp_status), \
    ++    sf_to_bf(float32_min(bf_to_sf(A), bf_to_sf(B), &env->hvx_fp_status), \
     +             &env->hvx_fp_status);
     +
     +#define fp_max_bf(A, B) \
    -+    sf_to_bf(fp_max_sf(bf_to_sf(A, &env->hvx_fp_status), \
    -+                       bf_to_sf(B, &env->hvx_fp_status), \
    -+                       &env->hvx_fp_status), \
    ++    sf_to_bf(float32_max(bf_to_sf(A), bf_to_sf(B), &env->hvx_fp_status), \
     +             &env->hvx_fp_status);
     +
      #endif
    @@ target/hexagon/mmvec/macros.h
      ## target/hexagon/mmvec/mmvec.h ##
     @@ target/hexagon/mmvec/mmvec.h: typedef union {
          int8_t    b[MAX_VEC_SIZE_BYTES / 1];
    -     int32_t  sf[MAX_VEC_SIZE_BYTES / 4];   /* single float (32-bit) */
    -     int16_t  hf[MAX_VEC_SIZE_BYTES / 2];   /* half float (16-bit) */
    -+    uint16_t bf[MAX_VEC_SIZE_BYTES / 2];   /* bfloat16 */
    +     float32  sf[MAX_VEC_SIZE_BYTES / 4];
    +     float16  hf[MAX_VEC_SIZE_BYTES / 2];
    ++    bfloat16 bf[MAX_VEC_SIZE_BYTES / 2];
      } MMVector;
      
      typedef union {
     
    - ## target/hexagon/mmvec/hvx_ieee_fp.c ##
    -@@ target/hexagon/mmvec/hvx_ieee_fp.c: uint16_t cmpgt_hf(uint16_t a1, uint16_t a2, float_status *fp_status)
    -     }
    -     return float16_compare(a1, a2, fp_status) == float_relation_greater;
    - }
    -+
    -+DEF_FP_INSN_3(mult_sf_bf_acc, 32, 16, 16, 32,
    -+              float32_muladd(bf_to_sf(f1, fp_status), bf_to_sf(f2, fp_status),
    -+                             f3, 0, fp_status))
    -
      ## target/hexagon/imported/mmvec/encode_ext.def ##
     @@ target/hexagon/imported/mmvec/encode_ext.def: DEF_ENC(V6_vgthf_or,"00011100100vvvvvPP1uuuuu001101xx")
      DEF_ENC(V6_vgtsf_xor,"00011100100vvvvvPP1uuuuu111010xx")
    @@ target/hexagon/imported/mmvec/ext.idef: ITERATOR_INSN_SHIFT_SLOT_FLT(16, vconv_h
     +    VddV.v[1].sf[i] = fp_mult_sf_bf(VuV.bf[2*i+1], VvV.bf[2*i+1]); fBFLOAT())
     +ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_bf_acc,
     +    "Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf)", "Vector IEEE fma: hf widen to sf",
    -+    VxxV.v[0].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i], VvV.bf[2*i],
    -+                                        VxxV.v[0].sf[i], &env->hvx_fp_status);
    -+    VxxV.v[1].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i+1], VvV.bf[2*i+1],
    -+                                        VxxV.v[1].sf[i], &env->hvx_fp_status);
    ++    VxxV.v[0].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i], VvV.bf[2*i], VxxV.v[0].sf[i]);
    ++    VxxV.v[1].sf[i] = fp_mult_sf_bf_acc(VuV.bf[2*i+1], VvV.bf[2*i+1], VxxV.v[1].sf[i]);
     +    fCVI_VX_NO_TMP_LD(); fBFLOAT())
     +ITERATOR_INSN_IEEE_FP_16(32, vcvt_bf_sf,
     +    "Vd32.bf=vcvt(Vu32.sf,Vv32.sf)",   "Vector IEEE cvt: sf to bf",
 7:  0cfe85d9fb = 12:  e66f33dc97 tests/hexagon: add tests for v68 HVX IEEE float arithmetics
 8:  eb66aadfac = 13:  5055daa72b tests/hexagon: add tests for v68 HVX IEEE float min/max
 9:  166c7bc232 = 14:  e0d756ec35 tests/hexagon: add tests for v68 HVX IEEE float conversions
10:  cdc88a2115 = 15:  f46538124c tests/hexagon: add tests for v68 HVX IEEE float comparisons
11:  54d79eb29d = 16:  12d1c25d33 tests/hexagon: add tests for HVX bfloat
-- 
2.37.2