[v1] hexagon: add missing HVX float instructions

[PATCH 10/13] tests/hexagon: add tests for v68 HVX IEEE float arithmetics

Posted by Matheus Tavares Bernardino 1 week, 4 days ago

Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
---
 tests/tcg/hexagon/hvx_misc.h        |  12 +++
 tests/tcg/hexagon/fp_hvx.c          | 129 ++++++++++++++++++++++++++++
 tests/tcg/hexagon/fp_hvx_disabled.c |  32 +++++++
 tests/tcg/hexagon/Makefile.target   |   8 ++
 4 files changed, 181 insertions(+)
 create mode 100644 tests/tcg/hexagon/fp_hvx.c
 create mode 100644 tests/tcg/hexagon/fp_hvx_disabled.c

diff --git a/tests/tcg/hexagon/hvx_misc.h b/tests/tcg/hexagon/hvx_misc.h
index 2e868340fd..771a4a22b6 100644
--- a/tests/tcg/hexagon/hvx_misc.h
+++ b/tests/tcg/hexagon/hvx_misc.h
@@ -34,8 +34,10 @@ typedef union {
     uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
     int64_t   d[MAX_VEC_SIZE_BYTES / 8];
     uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+    uint32_t sf[MAX_VEC_SIZE_BYTES / 4]; /* convenience alias */
     int32_t   w[MAX_VEC_SIZE_BYTES / 4];
     uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+    uint16_t hf[MAX_VEC_SIZE_BYTES / 2]; /* convenience alias */
     int16_t   h[MAX_VEC_SIZE_BYTES / 2];
     uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
     int8_t    b[MAX_VEC_SIZE_BYTES / 1];
@@ -63,7 +65,9 @@ static inline void check_output_##FIELD(int line, size_t num_vectors) \
 
 CHECK_OUTPUT_FUNC(d,  8)
 CHECK_OUTPUT_FUNC(w,  4)
+CHECK_OUTPUT_FUNC(sf, 4)
 CHECK_OUTPUT_FUNC(h,  2)
+CHECK_OUTPUT_FUNC(hf, 2)
 CHECK_OUTPUT_FUNC(b,  1)
 
 static inline void init_buffers(void)
@@ -175,4 +179,12 @@ static inline void test_##NAME(bool invert) \
     check_output_b(__LINE__, BUFSIZE); \
 }
 
+#define float_sf(x) ({ typeof(x) _x = (x); *((float *)&(_x)); })
+#define float_hf(x) ({ typeof(x) _x = (x); *((_Float16 *) &(_x)); })
+#define raw_sf(x) ({ typeof(x) _x = (x); *((uint32_t *)&(_x)); })
+#define raw_hf(x) ({ typeof(x) _x = (x); *((uint16_t *)&(_x)); })
+#define float_hf_to_sf(x) ((float)x)
+#define bytes_hf 2
+#define bytes_sf 4
+
 #endif
diff --git a/tests/tcg/hexagon/fp_hvx.c b/tests/tcg/hexagon/fp_hvx.c
new file mode 100644
index 0000000000..85b8ff78ed
--- /dev/null
+++ b/tests/tcg/hexagon/fp_hvx.c
@@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <hexagon_types.h>
+#include <hvx_hexagon_protos.h>
+
+int err;
+#include "hvx_misc.h"
+
+#if __HEXAGON_ARCH__ > 75
+#error "After v75, compiler will replace some FP HVX instructions."
+#endif
+
+/******************************************************************************
+ * NAN handling
+ *****************************************************************************/
+
+#define isnan(X) \
+     (sizeof(X) == bytes_hf ? ((raw_hf(X) & ~0x8000) > 0x7c00) : \
+                              ((raw_sf(X) & ~(1 << 31)) > 0x7f800000UL))
+
+#define CHECK_NAN(A, DEF_NAN) (isnan(A) ? DEF_NAN : (A))
+#define NAN_SF float_sf(0x7FFFFFFF)
+#define NAN_HF float_hf(0x7FFF)
+
+/******************************************************************************
+ * Binary operations
+ *****************************************************************************/
+
+#define DEF_TEST_OP_2(vop, op, type_res, type_arg) \
+    static void test_##vop##_##type_res##_##type_arg(void) \
+    { \
+        memset(expect, 0xff, sizeof(expect)); \
+        memset(output, 0xff, sizeof(expect)); \
+        HVX_Vector *hvx_output = (HVX_Vector *)&output[0]; \
+        HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[0]; \
+        HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[0]; \
+        \
+        *hvx_output = \
+            Q6_V##type_res##_##vop##_V##type_arg##V##type_arg(hvx_buffer0, \
+                                                              hvx_buffer1); \
+        \
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES / bytes_##type_res; i++) { \
+            expect[0].type_res[i] = \
+                raw_##type_res(op(float_##type_arg(buffer0[0].type_arg[i]), \
+                                  float_##type_arg(buffer1[0].type_arg[i]))); \
+        } \
+        check_output_##type_res(__LINE__, 1); \
+    }
+
+#define SUM(X, Y, DEF_NAN) CHECK_NAN((X) + (Y), DEF_NAN)
+#define SUB(X, Y, DEF_NAN) CHECK_NAN((X) - (Y), DEF_NAN)
+#define MULT(X, Y, DEF_NAN) CHECK_NAN((X) * (Y), DEF_NAN)
+
+#define SUM_SF(X, Y) SUM(X, Y, NAN_SF)
+#define SUM_HF(X, Y) SUM(X, Y, NAN_HF)
+#define SUB_SF(X, Y) SUB(X, Y, NAN_SF)
+#define SUB_HF(X, Y) SUB(X, Y, NAN_HF)
+#define MULT_SF(X, Y) MULT(X, Y, NAN_SF)
+#define MULT_HF(X, Y) MULT(X, Y, NAN_HF)
+
+DEF_TEST_OP_2(vadd, SUM_SF, sf, sf);
+DEF_TEST_OP_2(vadd, SUM_HF, hf, hf);
+DEF_TEST_OP_2(vsub, SUB_SF, sf, sf);
+DEF_TEST_OP_2(vsub, SUB_HF, hf, hf);
+DEF_TEST_OP_2(vmpy, MULT_SF, sf, sf);
+DEF_TEST_OP_2(vmpy, MULT_HF, hf, hf);
+
+/******************************************************************************
+ * Other tests
+ *****************************************************************************/
+
+void test_vdmpy_sf_hf(bool acc)
+{
+    HVX_Vector *hvx_output = (HVX_Vector *)&output[0];
+    HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[0];
+    HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[0];
+
+    uint32_t PREFIL_VAL = 0x111222;
+    memset(expect, 0xff, sizeof(expect));
+    *hvx_output = Q6_V_vsplat_R(PREFIL_VAL);
+
+    if (!acc) {
+        *hvx_output = Q6_Vsf_vdmpy_VhfVhf(hvx_buffer0, hvx_buffer1);
+    } else {
+        *hvx_output = Q6_Vsf_vdmpyacc_VsfVhfVhf(*hvx_output, hvx_buffer0,
+                                                hvx_buffer1);
+    }
+
+    for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+        float a1 = float_hf_to_sf(float_hf(buffer0[0].hf[2 * i + 1]));
+        float a2 = float_hf_to_sf(float_hf(buffer0[0].hf[2 * i]));
+        float a3 = float_hf_to_sf(float_hf(buffer1[0].hf[2 * i + 1]));
+        float a4 = float_hf_to_sf(float_hf(buffer1[0].hf[2 * i]));
+        float prev = acc ? float_sf(PREFIL_VAL) : 0;
+        expect[0].sf[i] = raw_sf(CHECK_NAN((a1 * a3) + (a2 * a4) + prev, NAN_SF));
+    }
+
+    check_output_sf(__LINE__, 1);
+}
+
+int main(void)
+{
+    init_buffers();
+
+    /* add/sub */
+    test_vadd_sf_sf();
+    test_vadd_hf_hf();
+    test_vsub_sf_sf();
+    test_vsub_hf_hf();
+
+    /* multiply */
+    test_vmpy_sf_sf();
+    test_vmpy_hf_hf();
+
+    /* dot product */
+    test_vdmpy_sf_hf(false);
+    test_vdmpy_sf_hf(true);
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/fp_hvx_disabled.c b/tests/tcg/hexagon/fp_hvx_disabled.c
new file mode 100644
index 0000000000..af409ab8d2
--- /dev/null
+++ b/tests/tcg/hexagon/fp_hvx_disabled.c
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <hexagon_types.h>
+#include <hvx_hexagon_protos.h>
+
+int err;
+#include "hvx_misc.h"
+
+int main(void)
+{
+    asm volatile("r0 = #0xff\n"
+                 "v0 = vsplat(r0)\n"
+                 "vmem(%1 + #0) = v0\n"
+                 "r1 = #0x1\n"
+                 "v1 = vsplat(r1)\n"
+                 "v2 = vsplat(r1)\n"
+                 "v0.sf = vadd(v1.sf, v2.sf)\n"
+                 "vmem(%0 + #0) = v0\n"
+                 :
+                 : "r"(output), "r"(expect)
+                 : "r0", "r1", "v0", "v1", "v2", "memory");
+
+    check_output_w(__LINE__, 1);
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index a70ef2f660..16072c96fd 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -50,6 +50,8 @@ HEX_TESTS += vector_add_int
 HEX_TESTS += scatter_gather
 HEX_TESTS += hvx_misc
 HEX_TESTS += hvx_histogram
+HEX_TESTS += fp_hvx
+HEX_TESTS += fp_hvx_disabled
 HEX_TESTS += invalid-slots
 HEX_TESTS += invalid-encoding
 HEX_TESTS += multiple-writes
@@ -123,6 +125,12 @@ v68_hvx: CFLAGS += -mhvx -Wno-unused-function
 v69_hvx: v69_hvx.c hvx_misc.h
 v69_hvx: CFLAGS += -mhvx -Wno-unused-function
 v73_scalar: CFLAGS += -Wno-unused-function
+fp_hvx: fp_hvx.c hvx_misc.h
+fp_hvx: CFLAGS += -mhvx -mhvx-ieee-fp
+fp_hvx_disabled: fp_hvx_disabled.c hvx_misc.h
+fp_hvx_disabled: CFLAGS += -mhvx -mhvx-ieee-fp
+
+run-fp_hvx_disabled: QEMU_OPTS += -cpu v73,ieee-fp=false
 
 hvx_histogram: hvx_histogram.c hvx_histogram_row.S
 	$(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS)
-- 
2.37.2

Re: [PATCH 10/13] tests/hexagon: add tests for v68 HVX IEEE float arithmetics

Posted by Taylor Simpson 1 week, 3 days ago

On Mon, Mar 23, 2026 at 7:16 AM Matheus Tavares Bernardino <
matheus.bernardino@oss.qualcomm.com> wrote:

> Signed-off-by: Matheus Tavares Bernardino <
> matheus.bernardino@oss.qualcomm.com>
> ---
>  tests/tcg/hexagon/hvx_misc.h        |  12 +++
>  tests/tcg/hexagon/fp_hvx.c          | 129 ++++++++++++++++++++++++++++
>  tests/tcg/hexagon/fp_hvx_disabled.c |  32 +++++++
>  tests/tcg/hexagon/Makefile.target   |   8 ++
>  4 files changed, 181 insertions(+)
>  create mode 100644 tests/tcg/hexagon/fp_hvx.c
>  create mode 100644 tests/tcg/hexagon/fp_hvx_disabled.c
>
> diff --git a/tests/tcg/hexagon/fp_hvx.c b/tests/tcg/hexagon/fp_hvx.c
> new file mode 100644
> index 0000000000..85b8ff78ed
> --- /dev/null
> +++ b/tests/tcg/hexagon/fp_hvx.c
> @@ -0,0 +1,129 @@
> +/*
> + *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
> + *
> + *  SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <hexagon_types.h>
> +#include <hvx_hexagon_protos.h>
> +
> +int err;
> +#include "hvx_misc.h"
> +
> +#if __HEXAGON_ARCH__ > 75
> +#error "After v75, compiler will replace some FP HVX instructions."
> +#endif
> +
>
> +/******************************************************************************
> + * NAN handling
> +
> *****************************************************************************/
> +
> +#define isnan(X) \
> +     (sizeof(X) == bytes_hf ? ((raw_hf(X) & ~0x8000) > 0x7c00) : \
> +                              ((raw_sf(X) & ~(1 << 31)) > 0x7f800000UL))
> +
> +#define CHECK_NAN(A, DEF_NAN) (isnan(A) ? DEF_NAN : (A))
> +#define NAN_SF float_sf(0x7FFFFFFF)
> +#define NAN_HF float_hf(0x7FFF)
> +
>
> +/******************************************************************************
> + * Binary operations
> +
> *****************************************************************************/
> +
> +#define DEF_TEST_OP_2(vop, op, type_res, type_arg) \
> +    static void test_##vop##_##type_res##_##type_arg(void) \
> +    { \
> +        memset(expect, 0xff, sizeof(expect)); \
> +        memset(output, 0xff, sizeof(expect)); \
>

sizeof(output)


> +        HVX_Vector *hvx_output = (HVX_Vector *)&output[0]; \
> +        HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[0]; \
> +        HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[0]; \
> +        \
> +        *hvx_output = \
> +
> Q6_V##type_res##_##vop##_V##type_arg##V##type_arg(hvx_buffer0, \
> +
> hvx_buffer1); \
> +        \
> +        for (int i = 0; i < MAX_VEC_SIZE_BYTES / bytes_##type_res; i++) {
> \
> +            expect[0].type_res[i] = \
> +
> raw_##type_res(op(float_##type_arg(buffer0[0].type_arg[i]), \
> +
> float_##type_arg(buffer1[0].type_arg[i]))); \
> +        } \
>

Put this in a loop over the input buffers to get more input values.  Then
change the second argument to check_output below.


> +        check_output_##type_res(__LINE__, 1); \
> +    }
> +
> +#define SUM(X, Y, DEF_NAN) CHECK_NAN((X) + (Y), DEF_NAN)
> +#define SUB(X, Y, DEF_NAN) CHECK_NAN((X) - (Y), DEF_NAN)
> +#define MULT(X, Y, DEF_NAN) CHECK_NAN((X) * (Y), DEF_NAN)
> +
> +#define SUM_SF(X, Y) SUM(X, Y, NAN_SF)
> +#define SUM_HF(X, Y) SUM(X, Y, NAN_HF)
> +#define SUB_SF(X, Y) SUB(X, Y, NAN_SF)
> +#define SUB_HF(X, Y) SUB(X, Y, NAN_HF)
> +#define MULT_SF(X, Y) MULT(X, Y, NAN_SF)
> +#define MULT_HF(X, Y) MULT(X, Y, NAN_HF)
> +
> +DEF_TEST_OP_2(vadd, SUM_SF, sf, sf);
> +DEF_TEST_OP_2(vadd, SUM_HF, hf, hf);
> +DEF_TEST_OP_2(vsub, SUB_SF, sf, sf);
> +DEF_TEST_OP_2(vsub, SUB_HF, hf, hf);
> +DEF_TEST_OP_2(vmpy, MULT_SF, sf, sf);
> +DEF_TEST_OP_2(vmpy, MULT_HF, hf, hf);
> +
>
> +/******************************************************************************
> + * Other tests
> +
> *****************************************************************************/
> +
> +void test_vdmpy_sf_hf(bool acc)
> +{
> +    HVX_Vector *hvx_output = (HVX_Vector *)&output[0];
> +    HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[0];
> +    HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[0];
> +
> +    uint32_t PREFIL_VAL = 0x111222;
> +    memset(expect, 0xff, sizeof(expect));
> +    *hvx_output = Q6_V_vsplat_R(PREFIL_VAL);
> +
> +    if (!acc) {
> +        *hvx_output = Q6_Vsf_vdmpy_VhfVhf(hvx_buffer0, hvx_buffer1);
> +    } else {
> +        *hvx_output = Q6_Vsf_vdmpyacc_VsfVhfVhf(*hvx_output, hvx_buffer0,
> +                                                hvx_buffer1);
> +    }
> +
> +    for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
> +        float a1 = float_hf_to_sf(float_hf(buffer0[0].hf[2 * i + 1]));
> +        float a2 = float_hf_to_sf(float_hf(buffer0[0].hf[2 * i]));
> +        float a3 = float_hf_to_sf(float_hf(buffer1[0].hf[2 * i + 1]));
> +        float a4 = float_hf_to_sf(float_hf(buffer1[0].hf[2 * i]));
> +        float prev = acc ? float_sf(PREFIL_VAL) : 0;
> +        expect[0].sf[i] = raw_sf(CHECK_NAN((a1 * a3) + (a2 * a4) + prev,
> NAN_SF));
> +    }
>

Put this into a loop also.


> +
> +    check_output_sf(__LINE__, 1);
> +}
> +
> +int main(void)
> +{
> +    init_buffers();
>

The init_buffers function is designed to create inputs for non-FP functions.
Create a new function to initialize the buffers with interesting FP values
(e.g., NaN, large FP values that will lead to overflow).
Also, see my prior comment about FP flags.  We'll want to check those here.
We should also add some tests with packets.  See my prior comment about
.new values.


> +
> +    /* add/sub */
> +    test_vadd_sf_sf();
> +    test_vadd_hf_hf();
> +    test_vsub_sf_sf();
> +    test_vsub_hf_hf();
> +
> +    /* multiply */
> +    test_vmpy_sf_sf();
> +    test_vmpy_hf_hf();
> +
> +    /* dot product */
> +    test_vdmpy_sf_hf(false);
> +    test_vdmpy_sf_hf(true);
> +
> +    puts(err ? "FAIL" : "PASS");
> +    return err ? 1 : 0;
> +}
> diff --git a/tests/tcg/hexagon/fp_hvx_disabled.c
> b/tests/tcg/hexagon/fp_hvx_disabled.c
> new file mode 100644
> index 0000000000..af409ab8d2
> --- /dev/null
> +++ b/tests/tcg/hexagon/fp_hvx_disabled.c
> @@ -0,0 +1,32 @@
> +/*
> + *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
> + *
> + *  SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <stdio.h>
> +#include <string.h>
> +#include <hexagon_types.h>
> +#include <hvx_hexagon_protos.h>
> +
> +int err;
> +#include "hvx_misc.h"
> +
> +int main(void)
> +{
> +    asm volatile("r0 = #0xff\n"
> +                 "v0 = vsplat(r0)\n"
> +                 "vmem(%1 + #0) = v0\n"
> +                 "r1 = #0x1\n"
> +                 "v1 = vsplat(r1)\n"
> +                 "v2 = vsplat(r1)\n"
> +                 "v0.sf = vadd(v1.sf, v2.sf)\n"
> +                 "vmem(%0 + #0) = v0\n"
> +                 :
> +                 : "r"(output), "r"(expect)
> +                 : "r0", "r1", "v0", "v1", "v2", "memory");
>

Add a test where the result is used in a .new context.


> +
> +    check_output_w(__LINE__, 1);
> +    puts(err ? "FAIL" : "PASS");
> +    return err ? 1 : 0;
> +}
>
>

Re: [PATCH 10/13] tests/hexagon: add tests for v68 HVX IEEE float arithmetics

Posted by Matheus Bernardino 1 week ago

On Tue, Mar 24, 2026 at 4:05 PM Taylor Simpson <ltaylorsimpson@gmail.com> wrote:
>
>
>
> On Mon, Mar 23, 2026 at 7:16 AM Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com> wrote:
>>
>> Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
>> ---
>>
>> +        HVX_Vector *hvx_output = (HVX_Vector *)&output[0]; \
>> +        HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[0]; \
>> +        HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[0]; \
>> +        \
>> +        *hvx_output = \
>> +            Q6_V##type_res##_##vop##_V##type_arg##V##type_arg(hvx_buffer0, \
>> +                                                              hvx_buffer1); \
>> +        \
>> +        for (int i = 0; i < MAX_VEC_SIZE_BYTES / bytes_##type_res; i++) { \
>> +            expect[0].type_res[i] = \
>> +                raw_##type_res(op(float_##type_arg(buffer0[0].type_arg[i]), \
>> +                                  float_##type_arg(buffer1[0].type_arg[i]))); \
>> +        } \
>
>
> Put this in a loop over the input buffers to get more input values.  Then change the second argument to check_output below.

Will do!

>>
>> +
>> +    check_output_sf(__LINE__, 1);
>> +}
>> +
>> +int main(void)
>> +{
>> +    init_buffers();
>
>
> The init_buffers function is designed to create inputs for non-FP functions.
> Create a new function to initialize the buffers with interesting FP values (e.g., NaN, large FP values that will lead to overflow).
> Also, see my prior comment about FP flags.  We'll want to check those here.
> We should also add some tests with packets.  See my prior comment about .new values.

Great ideas! Ok, will implement both.