Add HVX IEEE floating-point arithmetic instructions:
- vmpy_sf_sf, vmpy_sf_hf, vmpy_hf_hf: multiply operations
- vdmpy_sf_hf: dot-product multiply
- vmpy_sf_hf_acc, vmpy_hf_hf_acc, vdmpy_sf_hf_acc: multiply-accumulate
- vadd_sf_sf, vsub_sf_sf, vadd_sf_hf, vsub_sf_hf: add/sub with sf output
- vadd_hf_hf, vsub_hf_hf: add/sub with hf output
Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
---
target/hexagon/mmvec/kvx_ieee.h | 47 ++++++++++
target/hexagon/mmvec/macros.h | 1 +
target/hexagon/mmvec/mmvec.h | 2 +
target/hexagon/attribs_def.h.inc | 4 +
target/hexagon/mmvec/kvx_ieee.c | 87 ++++++++++++++++++
target/hexagon/hex_common.py | 1 +
target/hexagon/imported/mmvec/encode_ext.def | 18 ++++
target/hexagon/imported/mmvec/ext.idef | 93 ++++++++++++++++++++
target/hexagon/meson.build | 1 +
9 files changed, 254 insertions(+)
create mode 100644 target/hexagon/mmvec/kvx_ieee.h
create mode 100644 target/hexagon/mmvec/kvx_ieee.c
diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h
new file mode 100644
index 0000000000..e92ddebeb9
--- /dev/null
+++ b/target/hexagon/mmvec/kvx_ieee.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HEXAGON_KVX_IEEE_H
+#define HEXAGON_KVX_IEEE_H
+
+#include "fpu/softfloat.h"
+
+/* Hexagon canonical NaN */
+#define FP32_DEF_NAN 0x7FFFFFFF
+#define FP16_DEF_NAN 0x7FFF
+
+/*
+ * IEEE - FP ADD/SUB/MPY instructions
+ */
+uint32_t fp_mult_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
+uint32_t fp_add_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
+uint32_t fp_sub_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
+
+uint16_t fp_mult_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+uint16_t fp_add_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+uint16_t fp_sub_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+
+uint32_t fp_mult_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+uint32_t fp_add_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+uint32_t fp_sub_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
+
+/*
+ * IEEE - FP Accumulate instructions
+ */
+uint16_t fp_mult_hf_hf_acc(uint16_t a1, uint16_t a2, uint16_t acc,
+ float_status *fp_status);
+uint32_t fp_mult_sf_hf_acc(uint16_t a1, uint16_t a2, uint32_t acc,
+ float_status *fp_status);
+
+/*
+ * IEEE - FP Reduce instructions
+ */
+uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
+ float_status *fp_status);
+uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t a3,
+ uint16_t a4, float_status *fp_status);
+
+#endif
diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h
index c7840fbf2e..2af3d2d747 100644
--- a/target/hexagon/mmvec/macros.h
+++ b/target/hexagon/mmvec/macros.h
@@ -23,6 +23,7 @@
#include "mmvec/system_ext_mmvec.h"
#include "accel/tcg/getpc.h"
#include "accel/tcg/probe.h"
+#include "mmvec/kvx_ieee.h"
#ifndef QEMU_GENERATE
#define VdV (*(MMVector *restrict)(VdV_void))
diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h
index 52d470709c..eaedfe0d6d 100644
--- a/target/hexagon/mmvec/mmvec.h
+++ b/target/hexagon/mmvec/mmvec.h
@@ -38,6 +38,8 @@ typedef union {
int16_t h[MAX_VEC_SIZE_BYTES / 2];
uint8_t ub[MAX_VEC_SIZE_BYTES / 1];
int8_t b[MAX_VEC_SIZE_BYTES / 1];
+ int32_t sf[MAX_VEC_SIZE_BYTES / 4]; /* single float (32-bit) */
+ int16_t hf[MAX_VEC_SIZE_BYTES / 2]; /* half float (16-bit) */
} MMVector;
typedef union {
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index c85cd5d17c..d3c4bf6301 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -175,6 +175,10 @@ DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "")
/* HVX IEEE FP extension attributes */
DEF_ATTRIB(HVX_IEEE_FP, "HVX IEEE FP extension instruction", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_16, "HVX IEEE FP 16-bit output", "", "")
+DEF_ATTRIB(HVX_IEEE_FP_OUT_32, "HVX IEEE FP 32-bit output", "", "")
+DEF_ATTRIB(CVI_VX_NO_TMP_LD, "HVX multiply without tmp load", "", "")
/* Keep this as the last attribute: */
DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "")
diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
new file mode 100644
index 0000000000..b763899aa3
--- /dev/null
+++ b/target/hexagon/mmvec/kvx_ieee.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "kvx_ieee.h"
+
+#define DEF_FP_INSN_2(name, rt, a1t, a2t, op) \
+ uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
+ float_status *fp_status) { \
+ float##a1t f1 = make_float##a1t(a1); \
+ float##a2t f2 = make_float##a2t(a2); \
+ \
+ if (float##a1t##_is_any_nan(f1) || float##a2t##_is_any_nan(f2)) { \
+ return FP##rt##_DEF_NAN; \
+ } \
+ float##rt result = op; \
+ \
+ if (float##rt##_is_any_nan(result)) { \
+ return FP##rt##_DEF_NAN; \
+ } \
+ return result; \
+ }
+
+#define DEF_FP_INSN_3(name, rt, a1t, a2t, a3t, op) \
+ uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
+ uint##a3t##_t a3, float_status *fp_status) { \
+ float##a1t f1 = make_float##a1t(a1); \
+ float##a2t f2 = make_float##a2t(a2); \
+ float##a3t f3 = make_float##a3t(a3); \
+ \
+ if (float##a1t##_is_any_nan(f1) || float##a2t##_is_any_nan(f2) || \
+ float##a3t##_is_any_nan(f3)) \
+ return FP##rt##_DEF_NAN; \
+ \
+ float##rt result = op; \
+ \
+ if (float##rt##_is_any_nan(result)) \
+ return FP##rt##_DEF_NAN; \
+ return result; \
+ }
+
+DEF_FP_INSN_2(mult_sf_sf, 32, 32, 32, float32_mul(f1, f2, fp_status))
+DEF_FP_INSN_2(add_sf_sf, 32, 32, 32, float32_add(f1, f2, fp_status))
+DEF_FP_INSN_2(sub_sf_sf, 32, 32, 32, float32_sub(f1, f2, fp_status))
+
+DEF_FP_INSN_2(mult_hf_hf, 16, 16, 16, float16_mul(f1, f2, fp_status))
+DEF_FP_INSN_2(add_hf_hf, 16, 16, 16, float16_add(f1, f2, fp_status))
+DEF_FP_INSN_2(sub_hf_hf, 16, 16, 16, float16_sub(f1, f2, fp_status))
+
+DEF_FP_INSN_2(mult_sf_hf, 32, 16, 16,
+ float32_mul(float16_to_float32(f1, true, fp_status),
+ float16_to_float32(f2, true, fp_status),
+ fp_status))
+DEF_FP_INSN_2(add_sf_hf, 32, 16, 16,
+ float32_add(float16_to_float32(f1, true, fp_status),
+ float16_to_float32(f2, true, fp_status),
+ fp_status))
+DEF_FP_INSN_2(sub_sf_hf, 32, 16, 16,
+ float32_sub(float16_to_float32(f1, true, fp_status),
+ float16_to_float32(f2, true, fp_status),
+ fp_status))
+
+DEF_FP_INSN_3(mult_hf_hf_acc, 16, 16, 16, 16,
+ float16_muladd(f1, f2, f3, 0, fp_status))
+DEF_FP_INSN_3(mult_sf_hf_acc, 32, 16, 16, 32,
+ float32_muladd(float16_to_float32(f1, true, fp_status),
+ float16_to_float32(f2, true, fp_status),
+ f3, 0, fp_status))
+
+uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
+ float_status *fp_status)
+{
+ float32 prod1 = fp_mult_sf_hf(a1, a3, fp_status);
+ float32 prod2 = fp_mult_sf_hf(a2, a4, fp_status);
+ return fp_add_sf_sf(float32_val(prod1), float32_val(prod2), fp_status);
+}
+
+uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2,
+ uint16_t a3, uint16_t a4,
+ float_status *fp_status)
+{
+ float32 red = fp_vdmpy(a1, a2, a3, a4, fp_status);
+ return fp_add_sf_sf(float32_val(red), acc, fp_status);
+}
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index c0e9f26aeb..f6a2848db1 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -215,6 +215,7 @@ def need_env(tag):
"A_LOAD" in attribdict[tag] or
"A_CVI_GATHER" in attribdict[tag] or
"A_CVI_SCATTER" in attribdict[tag] or
+ "A_HVX_IEEE_FP" in attribdict[tag] or
"A_IMPLICIT_WRITES_USR" in attribdict[tag])
diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def
index 6d70086b5f..4ce87d09fd 100644
--- a/target/hexagon/imported/mmvec/encode_ext.def
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -804,5 +804,23 @@ DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd")
DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd")
DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd")
+/* IEEE FP multiply instructions */
+DEF_ENC(V6_vmpy_sf_sf,"00011111100vvvvvPP1uuuuu001ddddd")
+DEF_ENC(V6_vmpy_sf_hf,"00011111100vvvvvPP1uuuuu010ddddd")
+DEF_ENC(V6_vmpy_hf_hf,"00011111100vvvvvPP1uuuuu011ddddd")
+DEF_ENC(V6_vdmpy_sf_hf,"00011111101vvvvvPP1uuuuu110ddddd")
+
+/* IEEE FP multiply-accumulate instructions */
+DEF_ENC(V6_vmpy_sf_hf_acc,"00011100010vvvvvPP1uuuuu001xxxxx")
+DEF_ENC(V6_vmpy_hf_hf_acc,"00011100010vvvvvPP1uuuuu010xxxxx")
+DEF_ENC(V6_vdmpy_sf_hf_acc,"00011100010vvvvvPP1uuuuu011xxxxx")
+
+/* IEEE FP add/sub instructions */
+DEF_ENC(V6_vadd_sf_sf,"00011111100vvvvvPP1uuuuu110ddddd")
+DEF_ENC(V6_vsub_sf_sf,"00011111100vvvvvPP1uuuuu111ddddd")
+DEF_ENC(V6_vadd_sf_hf,"00011111100vvvvvPP1uuuuu100ddddd")
+DEF_ENC(V6_vsub_sf_hf,"00011111100vvvvvPP1uuuuu101ddddd")
+DEF_ENC(V6_vadd_hf_hf,"00011111101vvvvvPP1uuuuu111ddddd")
+DEF_ENC(V6_vsub_hf_hf,"00011111011vvvvvPP1uuuuu000ddddd")
#endif /* NO MMVEC */
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
index 03d31f6181..3f0d8e366e 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -2895,9 +2895,102 @@ EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Qv4)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_
}
} )
+/* KVX - IEEE FP Instructions */
+/* Single pipe, 32-bit output */
+#define ITERATOR_INSN_IEEE_FP_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_32), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+/* Single pipe, 16-bit output */
+#define ITERATOR_INSN_IEEE_FP_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_16), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+/* Two pipes: P2 & P3, single output: P2, 32-bit output */
+#define ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/* Two pipes: P2 & P3, two outputs, 32-bit output */
+#define ITERATOR_INSN_IEEE_FP_DOUBLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/*
+ * single pipe, accumulate instruction, produces 16-bit output, requires 16-bit
+ * accumulate input
+ */
+#define ITERATOR_INSN_IEEE_FP_ACC_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_16,A_CVI_VX_NO_TMP_LD), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/*
+ * single pipe, accumulate instruction, produces 32-bit output, requires 32-bit
+ * accumulate input
+ */
+#define ITERATOR_INSN_IEEE_FP_ACC_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_32,A_CVI_VX_NO_TMP_LD), \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+/* IEEE FP multiply instructions */
+ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(32, vmpy_sf_sf,
+ "Vd32.sf=vmpy(Vu32.sf,Vv32.sf)", "Vector IEEE mul: sf",
+ VdV.sf[i] = fp_mult_sf_sf(VuV.sf[i], VvV.sf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_hf,
+ "Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE mul: hf widen to sf",
+ VddV.v[0].sf[i] = fp_mult_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->fp_status);
+ VddV.v[1].sf[i] = fp_mult_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_16(16, vmpy_hf_hf, "Vd32.hf=vmpy(Vu32.hf,Vv32.hf)",
+ "Vector IEEE mul: hf",
+ VdV.hf[i] = fp_mult_hf_hf(VuV.hf[i], VvV.hf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_32(32, vdmpy_sf_hf, "Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)",
+ "Vector IEEE mul reduction: hf widen to sf",
+ VdV.sf[i] = fp_vdmpy(VuV.hf[2*i+1], VuV.hf[2*i], VvV.hf[2*i+1],
+ VvV.hf[2*i], &env->fp_status))
+
+/* IEEE FP multiply-accumulate instructions */
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vmpy_sf_hf_acc,
+ "Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma: hf widen to sf",
+ VxxV.v[0].sf[i] = fp_mult_sf_hf_acc(VuV.hf[2*i], VvV.hf[2*i],
+ VxxV.v[0].sf[i], &env->fp_status);
+ VxxV.v[1].sf[i] = fp_mult_sf_hf_acc(VuV.hf[2*i+1], VvV.hf[2*i+1],
+ VxxV.v[1].sf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_ACC_16(16, vmpy_hf_hf_acc,
+ "Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma: hf",
+ VxV.hf[i] = fp_mult_hf_hf_acc(VuV.hf[i], VvV.hf[i], VxV.hf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_ACC_32(32, vdmpy_sf_hf_acc,
+ "Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)", "Vector IEEE fma reduce: hf widen to sf",
+ VxV.sf[i] = fp_vdmpy_acc(VxV.sf[i], VuV.hf[2*i+1], VuV.hf[2*i], VvV.hf[2*i+1],
+ VvV.hf[2*i], &env->fp_status))
+
+/* IEEE FP add/sub instructions */
+ITERATOR_INSN_IEEE_FP_32(32, vadd_sf_sf, "Vd32.sf=vadd(Vu32.sf,Vv32.sf)",
+ "Vector IEEE add: sf",
+ VdV.sf[i] = fp_add_sf_sf(VuV.sf[i], VvV.sf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_32(32, vsub_sf_sf, "Vd32.sf=vsub(Vu32.sf,Vv32.sf)",
+ "Vector IEEE sub: sf",
+ VdV.sf[i] = fp_sub_sf_sf(VuV.sf[i], VvV.sf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_16(16, vadd_hf_hf, "Vd32.hf=vadd(Vu32.hf,Vv32.hf)",
+ "Vector IEEE add: hf",
+ VdV.hf[i] = fp_add_hf_hf(VuV.hf[i], VvV.hf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_16(16, vsub_hf_hf, "Vd32.hf=vsub(Vu32.hf,Vv32.hf)",
+ "Vector IEEE sub: hf",
+ VdV.hf[i] = fp_sub_hf_hf(VuV.hf[i], VvV.hf[i], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vadd_sf_hf,
+ "Vdd32.sf=vadd(Vu32.hf,Vv32.hf)", "Vector IEEE add: hf widen to sf",
+ VddV.v[0].sf[i] = fp_add_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->fp_status);
+ VddV.v[1].sf[i] = fp_add_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->fp_status))
+ITERATOR_INSN_IEEE_FP_DOUBLE_32(32, vsub_sf_hf,
+ "Vdd32.sf=vsub(Vu32.hf,Vv32.hf)", "Vector IEEE sub: hf widen to sf",
+ VddV.v[0].sf[i] = fp_sub_sf_hf(VuV.hf[2*i], VvV.hf[2*i], &env->fp_status);
+ VddV.v[1].sf[i] = fp_sub_sf_hf(VuV.hf[2*i+1], VvV.hf[2*i+1], &env->fp_status))
/******************************************************************************
DEBUG Vector/Register Printing
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index d169cf71b2..f9a93975ad 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -250,6 +250,7 @@ hexagon_ss.add(files(
'fma_emu.c',
'mmvec/decode_ext_mmvec.c',
'mmvec/system_ext_mmvec.c',
+ 'mmvec/kvx_ieee.c',
))
#
--
2.37.2
On Mon, Mar 23, 2026 at 7:15 AM Matheus Tavares Bernardino <
matheus.bernardino@oss.qualcomm.com> wrote:
> Add HVX IEEE floating-point arithmetic instructions:
> - vmpy_sf_sf, vmpy_sf_hf, vmpy_hf_hf: multiply operations
> - vdmpy_sf_hf: dot-product multiply
> - vmpy_sf_hf_acc, vmpy_hf_hf_acc, vdmpy_sf_hf_acc: multiply-accumulate
> - vadd_sf_sf, vsub_sf_sf, vadd_sf_hf, vsub_sf_hf: add/sub with sf output
> - vadd_hf_hf, vsub_hf_hf: add/sub with hf output
>
> Signed-off-by: Matheus Tavares Bernardino <
> matheus.bernardino@oss.qualcomm.com>
> ---
> target/hexagon/mmvec/kvx_ieee.h | 47 ++++++++++
> target/hexagon/mmvec/macros.h | 1 +
> target/hexagon/mmvec/mmvec.h | 2 +
> target/hexagon/attribs_def.h.inc | 4 +
> target/hexagon/mmvec/kvx_ieee.c | 87 ++++++++++++++++++
> target/hexagon/hex_common.py | 1 +
> target/hexagon/imported/mmvec/encode_ext.def | 18 ++++
> target/hexagon/imported/mmvec/ext.idef | 93 ++++++++++++++++++++
> target/hexagon/meson.build | 1 +
> 9 files changed, 254 insertions(+)
> create mode 100644 target/hexagon/mmvec/kvx_ieee.h
> create mode 100644 target/hexagon/mmvec/kvx_ieee.c
>
I'm curious why the prefix is kvx instead of hvx.
>
> diff --git a/target/hexagon/mmvec/kvx_ieee.h
> b/target/hexagon/mmvec/kvx_ieee.h
> new file mode 100644
> index 0000000000..e92ddebeb9
> --- /dev/null
> +++ b/target/hexagon/mmvec/kvx_ieee.h
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HEXAGON_KVX_IEEE_H
> +#define HEXAGON_KVX_IEEE_H
> +
> +#include "fpu/softfloat.h"
> +
> +/* Hexagon canonical NaN */
> +#define FP32_DEF_NAN 0x7FFFFFFF
> +#define FP16_DEF_NAN 0x7FFF
>
These are the same as the scalar core, right? If so, there's already a
call to set_float_default_nan_pattern in hexagon_cpu_reset_hold.
If the patterns are different, you'll need to call
set_float_default_nan_pattern before each scalar FP instruction and before
each HVX FP instruction.
> +
> +/*
> + * IEEE - FP ADD/SUB/MPY instructionsFP
> + */
> +uint32_t fp_mult_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
> +uint32_t fp_add_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
> +uint32_t fp_sub_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
> +
> +uint16_t fp_mult_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +uint16_t fp_add_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +uint16_t fp_sub_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +
> +uint32_t fp_mult_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +uint32_t fp_add_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +uint32_t fp_sub_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
> +
> +/*
> + * IEEE - FP Accumulate instructions
> + */
> +uint16_t fp_mult_hf_hf_acc(uint16_t a1, uint16_t a2, uint16_t acc,
> + float_status *fp_status);
> +uint32_t fp_mult_sf_hf_acc(uint16_t a1, uint16_t a2, uint32_t acc,
> + float_status *fp_status);
> +
> +/*
> + * IEEE - FP Reduce instructions
> + */
> +uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
> + float_status *fp_status);
> +uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t a3,
> + uint16_t a4, float_status *fp_status);
> +
>
Consider using macros similar to the ones in the .c file to create these
protos.
> +#endif
> diff --git a/target/hexagon/mmvec/kvx_ieee.c
> b/target/hexagon/mmvec/kvx_ieee.c
> new file mode 100644
> index 0000000000..b763899aa3
> --- /dev/null
> +++ b/target/hexagon/mmvec/kvx_ieee.c
> @@ -0,0 +1,87 @@
> +/*
> + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "kvx_ieee.h"
> +
> +#define DEF_FP_INSN_2(name, rt, a1t, a2t, op) \
> + uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
> + float_status *fp_status) { \
> + float##a1t f1 = make_float##a1t(a1); \
> + float##a2t f2 = make_float##a2t(a2); \
> + \
> + if (float##a1t##_is_any_nan(f1) || float##a2t##_is_any_nan(f2)) {
> \
> + return FP##rt##_DEF_NAN; \
> + } \
>
These nan checks shouldn't be needed if you're using QEMU softfloat
properly.
> + float##rt result = op; \
> + \
> + if (float##rt##_is_any_nan(result)) { \
> + return FP##rt##_DEF_NAN; \
> + } \
>
Ditto
> + return result; \
> + }
> +
> +#define DEF_FP_INSN_3(name, rt, a1t, a2t, a3t, op) \
> + uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
> + uint##a3t##_t a3, float_status *fp_status) { \
> + float##a1t f1 = make_float##a1t(a1); \
> + float##a2t f2 = make_float##a2t(a2); \
> + float##a3t f3 = make_float##a3t(a3); \
> + \
> + if (float##a1t##_is_any_nan(f1) || float##a2t##_is_any_nan(f2) ||
> \
> + float##a3t##_is_any_nan(f3)) \
> + return FP##rt##_DEF_NAN; \
>
Ditto
> + \
> + float##rt result = op; \
> + \
> + if (float##rt##_is_any_nan(result)) \
> + return FP##rt##_DEF_NAN; \
>
Ditto
> + return result; \
> + }
> +
> diff --git a/target/hexagon/imported/mmvec/ext.idef
> b/target/hexagon/imported/mmvec/ext.idef
> index 03d31f6181..3f0d8e366e 100644
> --- a/target/hexagon/imported/mmvec/ext.idef
> +++ b/target/hexagon/imported/mmvec/ext.idef
> @@ -2895,9 +2895,102 @@ EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Qv4)",
> ATTRIBS(A_EXTENSION,A_CVI,A_CVI_
> }
> } )
>
> +/* KVX - IEEE FP Instructions */
>
> +/* Single pipe, 32-bit output */
> +#define ITERATOR_INSN_IEEE_FP_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_32), \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>
> +/* Single pipe, 16-bit output */
> +#define ITERATOR_INSN_IEEE_FP_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_16), \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>
> +/* Two pipes: P2 & P3, single output: P2, 32-bit output */
> +#define
> ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32),
> \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
> +
> +/* Two pipes: P2 & P3, two outputs, 32-bit output */
> +#define ITERATOR_INSN_IEEE_FP_DOUBLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32),
> \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
> +
> +/*
> + * single pipe, accumulate instruction, produces 16-bit output, requires
> 16-bit
> + * accumulate input
> + */
> +#define ITERATOR_INSN_IEEE_FP_ACC_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_16,A_CVI_VX_NO_TMP_LD),
> \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
> +
> +/*
> + * single pipe, accumulate instruction, produces 32-bit output, requires
> 32-bit
> + * accumulate input
> + */
> +#define ITERATOR_INSN_IEEE_FP_ACC_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
> +EXTINSN(V6_##TAG, SYNTAX, \
> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_32,A_CVI_VX_NO_TMP_LD),
> \
> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
> +
> +/* IEEE FP multiply instructions */
> +ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(32, vmpy_sf_sf,
> + "Vd32.sf=vmpy(Vu32.sf,Vv32.sf)", "Vector IEEE mul: sf",
> + VdV.sf[i] = fp_mult_sf_sf(VuV.sf[i], VvV.sf[i], &env->fp_status))
>
Do these instructions interact with the FP bits in USR (e.g., rounding
mode, FP exceptions)?
If so, you'll need something similar to arch_fpop_start/arch_fpop_end at
the start/end of each helper. This can be done in gen_helper_funcs.py.
Thanks,
Taylor
On Mon, Mar 23, 2026 at 5:29 PM Taylor Simpson <ltaylorsimpson@gmail.com> wrote:
>
>
>
> On Mon, Mar 23, 2026 at 7:15 AM Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com> wrote:
>>
>> Add HVX IEEE floating-point arithmetic instructions:
>> - vmpy_sf_sf, vmpy_sf_hf, vmpy_hf_hf: multiply operations
>> - vdmpy_sf_hf: dot-product multiply
>> - vmpy_sf_hf_acc, vmpy_hf_hf_acc, vdmpy_sf_hf_acc: multiply-accumulate
>> - vadd_sf_sf, vsub_sf_sf, vadd_sf_hf, vsub_sf_hf: add/sub with sf output
>> - vadd_hf_hf, vsub_hf_hf: add/sub with hf output
>>
>> Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
>> ---
>> target/hexagon/mmvec/kvx_ieee.h | 47 ++++++++++
>> target/hexagon/mmvec/macros.h | 1 +
>> target/hexagon/mmvec/mmvec.h | 2 +
>> target/hexagon/attribs_def.h.inc | 4 +
>> target/hexagon/mmvec/kvx_ieee.c | 87 ++++++++++++++++++
>> target/hexagon/hex_common.py | 1 +
>> target/hexagon/imported/mmvec/encode_ext.def | 18 ++++
>> target/hexagon/imported/mmvec/ext.idef | 93 ++++++++++++++++++++
>> target/hexagon/meson.build | 1 +
>> 9 files changed, 254 insertions(+)
>> create mode 100644 target/hexagon/mmvec/kvx_ieee.h
>> create mode 100644 target/hexagon/mmvec/kvx_ieee.c
>
>
> I'm curious why the prefix is kvx instead of hvx.
Actually, not sure either... These files were imported from the arch
simulator. Brian suggested it was a left over acronym that didn't
catch on. I'll rename to hvx_ieee
>>
>> diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h
>> new file mode 100644
>> index 0000000000..e92ddebeb9
>> --- /dev/null
>> +++ b/target/hexagon/mmvec/kvx_ieee.h
>> @@ -0,0 +1,47 @@
>> +/*
>> + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#ifndef HEXAGON_KVX_IEEE_H
>> +#define HEXAGON_KVX_IEEE_H
>> +
>> +#include "fpu/softfloat.h"
>> +
>> +/* Hexagon canonical NaN */
>> +#define FP32_DEF_NAN 0x7FFFFFFF
>> +#define FP16_DEF_NAN 0x7FFF
>
>
> These are the same as the scalar core, right? If so, there's already a call to set_float_default_nan_pattern in hexagon_cpu_reset_hold.
>
> If the patterns are different, you'll need to call set_float_default_nan_pattern before each scalar FP instruction and before each HVX FP instruction.
Not the same, no. I'll adjust the set_float_default_nan_pattern call
accordingly.
>> +
>> +/*
>> + * IEEE - FP ADD/SUB/MPY instructionsFP
>> + */
>> +uint32_t fp_mult_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
>> +uint32_t fp_add_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
>> +uint32_t fp_sub_sf_sf(uint32_t a1, uint32_t a2, float_status *fp_status);
>> +
>> +uint16_t fp_mult_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +uint16_t fp_add_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +uint16_t fp_sub_hf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +
>> +uint32_t fp_mult_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +uint32_t fp_add_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +uint32_t fp_sub_sf_hf(uint16_t a1, uint16_t a2, float_status *fp_status);
>> +
>> +/*
>> + * IEEE - FP Accumulate instructions
>> + */
>> +uint16_t fp_mult_hf_hf_acc(uint16_t a1, uint16_t a2, uint16_t acc,
>> + float_status *fp_status);
>> +uint32_t fp_mult_sf_hf_acc(uint16_t a1, uint16_t a2, uint32_t acc,
>> + float_status *fp_status);
>> +
>> +/*
>> + * IEEE - FP Reduce instructions
>> + */
>> +uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4,
>> + float_status *fp_status);
>> +uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t a3,
>> + uint16_t a4, float_status *fp_status);
>> +
>
>
> Consider using macros similar to the ones in the .c file to create these protos.
Hmm, I think in this case, the boilerplate size will outweight the
benefit of the macros.
>>
>> +#endif
>> diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c
>> new file mode 100644
>> index 0000000000..b763899aa3
>> --- /dev/null
>> +++ b/target/hexagon/mmvec/kvx_ieee.c
>> @@ -0,0 +1,87 @@
>> +/*
>> + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "kvx_ieee.h"
>> +
>> +#define DEF_FP_INSN_2(name, rt, a1t, a2t, op) \
>> + uint##rt##_t fp_##name(uint##a1t##_t a1, uint##a2t##_t a2, \
>> + float_status *fp_status) { \
>> + float##a1t f1 = make_float##a1t(a1); \
>> + float##a2t f2 = make_float##a2t(a2); \
>> + \
>> + if (float##a1t##_is_any_nan(f1) || float##a2t##_is_any_nan(f2)) { \
>> + return FP##rt##_DEF_NAN; \
>> + } \
>
>
> These nan checks shouldn't be needed if you're using QEMU softfloat properly.
Ah, indeed. Just checked that. Will change
>> +
>> diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
>> index 03d31f6181..3f0d8e366e 100644
>> --- a/target/hexagon/imported/mmvec/ext.idef
>> +++ b/target/hexagon/imported/mmvec/ext.idef
>> @@ -2895,9 +2895,102 @@ EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Qv4)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_
>> }
>> } )
>>
>> +/* KVX - IEEE FP Instructions */
>>
>> +/* Single pipe, 32-bit output */
>> +#define ITERATOR_INSN_IEEE_FP_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_32), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>>
>> +/* Single pipe, 16-bit output */
>> +#define ITERATOR_INSN_IEEE_FP_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_OUT_16), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>>
>> +/* Two pipes: P2 & P3, single output: P2, 32-bit output */
>> +#define ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>> +
>> +/* Two pipes: P2 & P3, two outputs, 32-bit output */
>> +#define ITERATOR_INSN_IEEE_FP_DOUBLE_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX_DV,A_HVX_IEEE_FP_OUT_32), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>> +
>> +/*
>> + * single pipe, accumulate instruction, produces 16-bit output, requires 16-bit
>> + * accumulate input
>> + */
>> +#define ITERATOR_INSN_IEEE_FP_ACC_16(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_16,A_CVI_VX_NO_TMP_LD), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>> +
>> +/*
>> + * single pipe, accumulate instruction, produces 32-bit output, requires 32-bit
>> + * accumulate input
>> + */
>> +#define ITERATOR_INSN_IEEE_FP_ACC_32(WIDTH,TAG,SYNTAX,DESCR,CODE) \
>> +EXTINSN(V6_##TAG, SYNTAX, \
>> +ATTRIBS(A_EXTENSION,A_HVX_IEEE_FP,A_CVI,A_CVI_VX,A_HVX_IEEE_FP_ACC,A_HVX_IEEE_FP_OUT_32,A_CVI_VX_NO_TMP_LD), \
>> +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
>> +
>> +/* IEEE FP multiply instructions */
>> +ITERATOR_INSN_IEEE_FP_DOUBLE_SINGLE_32(32, vmpy_sf_sf,
>> + "Vd32.sf=vmpy(Vu32.sf,Vv32.sf)", "Vector IEEE mul: sf",
>> + VdV.sf[i] = fp_mult_sf_sf(VuV.sf[i], VvV.sf[i], &env->fp_status))
>
>
> Do these instructions interact with the FP bits in USR (e.g., rounding mode, FP exceptions)?
They do not. I'll add a new env->hvx_fp_status and use that for the
default nan. This way we can avoid messing up with the scalar
fp_status.
On Tue, Mar 24, 2026 at 1:30 PM Matheus Bernardino < matheus.bernardino@oss.qualcomm.com> wrote: > On Mon, Mar 23, 2026 at 5:29 PM Taylor Simpson <ltaylorsimpson@gmail.com> > wrote: > >> +/* > >> + * IEEE - FP Reduce instructions > >> + */ > >> +uint32_t fp_vdmpy(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, > >> + float_status *fp_status); > >> +uint32_t fp_vdmpy_acc(uint32_t acc, uint16_t a1, uint16_t a2, uint16_t > a3, > >> + uint16_t a4, float_status *fp_status); > >> + > > > > > > Consider using macros similar to the ones in the .c file to create these > protos. > > Hmm, I think in this case, the boilerplate size will outweight the > benefit of the macros. > OK > > > > Do these instructions interact with the FP bits in USR (e.g., rounding > mode, FP exceptions)? > > They do not. I'll add a new env->hvx_fp_status and use that for the > default nan. This way we can avoid messing up with the scalar > fp_status. > That will work for the nan. Is there any programmer-visible state for rounding mode or FP exceptions? Thanks, Taylor
On Tue, Mar 24, 2026 at 4:51 PM Taylor Simpson <ltaylorsimpson@gmail.com> wrote: > > > > On Tue, Mar 24, 2026 at 1:30 PM Matheus Bernardino <matheus.bernardino@oss.qualcomm.com> wrote: >> >> On Mon, Mar 23, 2026 at 5:29 PM Taylor Simpson <ltaylorsimpson@gmail.com> wrote: >> >> > >> > Do these instructions interact with the FP bits in USR (e.g., rounding mode, FP exceptions)? >> >> They do not. I'll add a new env->hvx_fp_status and use that for the >> default nan. This way we can avoid messing up with the scalar >> fp_status. > > > That will work for the nan. Is there any programmer-visible state for rounding mode or FP exceptions? No, rounding is always float_round_nearest_even (the default) and FWICT the HVX IEEE FP functions don't track or report any FP exceptions.
On Tue, Mar 24, 2026 at 2:00 PM Matheus Bernardino < matheus.bernardino@oss.qualcomm.com> wrote: > On Tue, Mar 24, 2026 at 4:51 PM Taylor Simpson <ltaylorsimpson@gmail.com> > wrote: > > > > > > > > On Tue, Mar 24, 2026 at 1:30 PM Matheus Bernardino < > matheus.bernardino@oss.qualcomm.com> wrote: > >> > >> On Mon, Mar 23, 2026 at 5:29 PM Taylor Simpson < > ltaylorsimpson@gmail.com> wrote: > >> > >> > > >> > Do these instructions interact with the FP bits in USR (e.g., > rounding mode, FP exceptions)? > >> > >> They do not. I'll add a new env->hvx_fp_status and use that for the > >> default nan. This way we can avoid messing up with the scalar > >> fp_status. > > > > > > That will work for the nan. Is there any programmer-visible state for > rounding mode or FP exceptions? > > No, rounding is always float_round_nearest_even (the default) and > FWICT the HVX IEEE FP functions don't track or report any FP > exceptions. > OK, then disregard my comments about checking the exceptions.
© 2016 - 2026 Red Hat, Inc.