[v2] hexagon: add missing HVX float instructions

[PATCH v2 12/16] tests/hexagon: add tests for v68 HVX IEEE float arithmetics
Posted by Matheus Tavares Bernardino 1 day, 10 hours ago
Signed-off-by: Matheus Tavares Bernardino <matheus.bernardino@oss.qualcomm.com>
---
 tests/tcg/hexagon/hex_test.h        |   4 +
 tests/tcg/hexagon/hvx_misc.h        |  41 ++++++++
 tests/tcg/hexagon/fp_hvx.c          | 155 ++++++++++++++++++++++++++++
 tests/tcg/hexagon/fp_hvx_disabled.c |  57 ++++++++++
 tests/tcg/hexagon/Makefile.target   |   8 ++
 5 files changed, 265 insertions(+)
 create mode 100644 tests/tcg/hexagon/fp_hvx.c
 create mode 100644 tests/tcg/hexagon/fp_hvx_disabled.c

diff --git a/tests/tcg/hexagon/hex_test.h b/tests/tcg/hexagon/hex_test.h
index cfed06a58b..e7a6644d41 100644
--- a/tests/tcg/hexagon/hex_test.h
+++ b/tests/tcg/hexagon/hex_test.h
@@ -19,6 +19,8 @@
 #ifndef HEX_TEST_H
 #define HEX_TEST_H
 
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
 static inline void __check32(int line, uint32_t val, uint32_t expect)
 {
     if (val != expect) {
@@ -110,6 +112,7 @@ static inline void __check64_ne(int line, uint64_t val, uint64_t expect)
 
 /* Some useful floating point values */
 const uint32_t SF_INF =              0x7f800000;
+const uint32_t SF_INF_neg =          0xff800000;
 const uint32_t SF_QNaN =             0x7fc00000;
 const uint32_t SF_QNaN_special =     0x7f800001;
 const uint32_t SF_SNaN =             0x7fb00000;
@@ -128,6 +131,7 @@ const uint32_t SF_large_pos =        0x5afa572e;
 const uint32_t SF_any =              0x3f800000;
 const uint32_t SF_denorm =           0x00000001;
 const uint32_t SF_random =           0x346001d6;
+const uint32_t SF_neg_two =          0xc0000000;
 
 const uint64_t DF_QNaN =             0x7ff8000000000000ULL;
 const uint64_t DF_SNaN =             0x7ff7000000000000ULL;
diff --git a/tests/tcg/hexagon/hvx_misc.h b/tests/tcg/hexagon/hvx_misc.h
index 2e868340fd..0330cb289d 100644
--- a/tests/tcg/hexagon/hvx_misc.h
+++ b/tests/tcg/hexagon/hvx_misc.h
@@ -18,6 +18,8 @@
 #ifndef HVX_MISC_H
 #define HVX_MISC_H
 
+#include "hex_test.h"
+
 static inline void check(int line, int i, int j,
                          uint64_t result, uint64_t expect)
 {
@@ -34,8 +36,10 @@ typedef union {
     uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
     int64_t   d[MAX_VEC_SIZE_BYTES / 8];
     uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+    uint32_t sf[MAX_VEC_SIZE_BYTES / 4]; /* convenience alias */
     int32_t   w[MAX_VEC_SIZE_BYTES / 4];
     uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+    uint16_t hf[MAX_VEC_SIZE_BYTES / 2]; /* convenience alias */
     int16_t   h[MAX_VEC_SIZE_BYTES / 2];
     uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
     int8_t    b[MAX_VEC_SIZE_BYTES / 1];
@@ -63,7 +67,9 @@ static inline void check_output_##FIELD(int line, size_t num_vectors) \
 
 CHECK_OUTPUT_FUNC(d,  8)
 CHECK_OUTPUT_FUNC(w,  4)
+CHECK_OUTPUT_FUNC(sf, 4)
 CHECK_OUTPUT_FUNC(h,  2)
+CHECK_OUTPUT_FUNC(hf, 2)
 CHECK_OUTPUT_FUNC(b,  1)
 
 static inline void init_buffers(void)
@@ -81,6 +87,33 @@ static inline void init_buffers(void)
     }
 }
 
+static const uint32_t FP_VALUES[] = {
+    SF_INF, SF_INF_neg, SF_QNaN, SF_QNaN_special, SF_SNaN, SF_QNaN_neg,
+    SF_SNaN_neg, SF_HEX_NaN, SF_zero, SF_zero_neg, SF_one, SF_one_recip,
+    SF_one_invsqrta, SF_two, SF_four, SF_small_neg, SF_large_pos, SF_any,
+    SF_denorm, SF_random, SF_neg_two,
+};
+#define FP_VALUES_MAX ARRAY_SIZE(FP_VALUES)
+
+static inline void init_buffers_fp(void)
+{
+    _Static_assert(BUFSIZE * (MAX_VEC_SIZE_BYTES / 4) >
+                   FP_VALUES_MAX * FP_VALUES_MAX,
+                   "test arrays can't fit all FP_VALUES combinations");
+    int counter1 = 0, counter2 = 0;
+    for (int i = 0; i < BUFSIZE; i++) {
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            buffer0[i].sf[j] = FP_VALUES[counter1];
+            buffer1[i].sf[j] = FP_VALUES[counter2];
+            counter2++;
+            if (counter2 == FP_VALUES_MAX) {
+                counter2 = 0;
+                counter1 = (counter1 + 1) % FP_VALUES_MAX;
+            }
+        }
+    }
+}
+
 #define VEC_OP1(ASM, EL, IN, OUT) \
     asm("v2 = vmem(%0 + #0)\n\t" \
         "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \
@@ -175,4 +208,12 @@ static inline void test_##NAME(bool invert) \
     check_output_b(__LINE__, BUFSIZE); \
 }
 
+#define float_sf(x) ({ typeof(x) _x = (x); *((float *)&(_x)); })
+#define float_hf(x) ({ typeof(x) _x = (x); *((_Float16 *) &(_x)); })
+#define raw_sf(x) ({ typeof(x) _x = (x); *((uint32_t *)&(_x)); })
+#define raw_hf(x) ({ typeof(x) _x = (x); *((uint16_t *)&(_x)); })
+#define float_hf_to_sf(x) ((float)x)
+#define bytes_hf 2
+#define bytes_sf 4
+
 #endif
diff --git a/tests/tcg/hexagon/fp_hvx.c b/tests/tcg/hexagon/fp_hvx.c
new file mode 100644
index 0000000000..0365833753
--- /dev/null
+++ b/tests/tcg/hexagon/fp_hvx.c
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <hexagon_types.h>
+#include <hvx_hexagon_protos.h>
+
+int err;
+#include "hvx_misc.h"
+
+#if __HEXAGON_ARCH__ > 75
+#error "After v75, compiler will replace some FP HVX instructions."
+#endif
+
+/******************************************************************************
+ * NAN handling
+ *****************************************************************************/
+
+#define isnan(X) \
+     (sizeof(X) == bytes_hf ? ((raw_hf(X) & ~0x8000) > 0x7c00) : \
+                              ((raw_sf(X) & ~(1 << 31)) > 0x7f800000UL))
+
+#define CHECK_NAN(A, DEF_NAN) (isnan(A) ? DEF_NAN : (A))
+#define NAN_SF float_sf(0x7FFFFFFF)
+#define NAN_HF float_hf(0x7FFF)
+
+/******************************************************************************
+ * Binary operations
+ *****************************************************************************/
+
+#define DEF_TEST_OP_2(vop, op, type_res, type_arg) \
+    static void test_##vop##_##type_res##_##type_arg(void) \
+    { \
+        memset(expect, 0xff, sizeof(expect)); \
+        memset(output, 0xff, sizeof(output)); \
+        for (int i = 0; i < BUFSIZE; i++) { \
+            HVX_Vector *hvx_output = (HVX_Vector *)&output[i]; \
+            HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[i]; \
+            HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[i]; \
+            *hvx_output = \
+                Q6_V##type_res##_##vop##_V##type_arg##V##type_arg(hvx_buffer0, \
+                                                                  hvx_buffer1); \
+            for (int j = 0; j < MAX_VEC_SIZE_BYTES / bytes_##type_res; j++) { \
+                expect[i].type_res[j] = \
+                    raw_##type_res(op(float_##type_arg(buffer0[i].type_arg[j]), \
+                                      float_##type_arg(buffer1[i].type_arg[j]))); \
+            } \
+        } \
+        check_output_##type_res(__LINE__, BUFSIZE); \
+    }
+
+#define SUM(X, Y, DEF_NAN) CHECK_NAN((X) + (Y), DEF_NAN)
+#define SUB(X, Y, DEF_NAN) CHECK_NAN((X) - (Y), DEF_NAN)
+#define MULT(X, Y, DEF_NAN) CHECK_NAN((X) * (Y), DEF_NAN)
+
+#define SUM_SF(X, Y) SUM(X, Y, NAN_SF)
+#define SUM_HF(X, Y) SUM(X, Y, NAN_HF)
+#define SUB_SF(X, Y) SUB(X, Y, NAN_SF)
+#define SUB_HF(X, Y) SUB(X, Y, NAN_HF)
+#define MULT_SF(X, Y) MULT(X, Y, NAN_SF)
+#define MULT_HF(X, Y) MULT(X, Y, NAN_HF)
+
+DEF_TEST_OP_2(vadd, SUM_SF, sf, sf);
+DEF_TEST_OP_2(vadd, SUM_HF, hf, hf);
+DEF_TEST_OP_2(vsub, SUB_SF, sf, sf);
+DEF_TEST_OP_2(vsub, SUB_HF, hf, hf);
+DEF_TEST_OP_2(vmpy, MULT_SF, sf, sf);
+DEF_TEST_OP_2(vmpy, MULT_HF, hf, hf);
+
+/******************************************************************************
+ * Other tests
+ *****************************************************************************/
+
+static void test_vdmpy_sf_hf(bool acc)
+{
+    memset(expect, 0xff, sizeof(expect));
+
+    for (int i = 0; i < BUFSIZE; i++) {
+        HVX_Vector hvx_buffer0 = *(HVX_Vector *)&buffer0[i];
+        HVX_Vector hvx_buffer1 = *(HVX_Vector *)&buffer1[i];
+        HVX_Vector *hvx_output = (HVX_Vector *)&output[i];
+
+        uint32_t PREFIL_VAL = 0x111222;
+        *hvx_output = Q6_V_vsplat_R(PREFIL_VAL);
+
+        if (!acc) {
+            *hvx_output = Q6_Vsf_vdmpy_VhfVhf(hvx_buffer0, hvx_buffer1);
+        } else {
+            *hvx_output = Q6_Vsf_vdmpyacc_VsfVhfVhf(*hvx_output, hvx_buffer0,
+                                                    hvx_buffer1);
+        }
+
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            float a1 = float_hf_to_sf(float_hf(buffer0[i].hf[2 * j + 1]));
+            float a2 = float_hf_to_sf(float_hf(buffer0[i].hf[2 * j]));
+            float a3 = float_hf_to_sf(float_hf(buffer1[i].hf[2 * j + 1]));
+            float a4 = float_hf_to_sf(float_hf(buffer1[i].hf[2 * j]));
+            /*
+             * Note, IEEE FP specifies +0.0 + -0.0 == +0.0. So we use -0.0 in
+             * the default case to preserve the zero sign.
+             */
+            float prev = acc ? float_sf(PREFIL_VAL) : -0.0;
+            expect[i].sf[j] = raw_sf(CHECK_NAN((a1 * a3) + (a2 * a4) + prev, NAN_SF));
+        }
+    }
+    check_output_sf(__LINE__, BUFSIZE);
+}
+
+static void test_new(void)
+{
+    asm volatile("r0 = #0x2\n"
+                 "v0 = vsplat(r0)\n"
+                 "vmem(%1 + #0) = v0\n"
+                 "r1 = #0x1\n"
+                 "v1 = vsplat(r1)\n"
+                 "v2 = vsplat(r1)\n"
+                 "{\n"
+                 "  v0.sf = vadd(v1.sf, v2.sf)\n"
+                 "  vmem(%0 + #0) = v0.new\n"
+                 "}\n"
+                 :
+                 : "r"(output), "r"(expect)
+                 : "r0", "r1", "v0", "v1", "v2", "memory");
+    check_output_w(__LINE__, 1);
+}
+
+int main(void)
+{
+    init_buffers_fp();
+
+    /* add/sub */
+    test_vadd_sf_sf();
+    test_vadd_hf_hf();
+    test_vsub_sf_sf();
+    test_vsub_hf_hf();
+
+    /* multiply */
+    test_vmpy_sf_sf();
+    test_vmpy_hf_hf();
+
+    /* dot product */
+    test_vdmpy_sf_hf(false);
+    test_vdmpy_sf_hf(true);
+
+    test_new();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/fp_hvx_disabled.c b/tests/tcg/hexagon/fp_hvx_disabled.c
new file mode 100644
index 0000000000..388a42e2b7
--- /dev/null
+++ b/tests/tcg/hexagon/fp_hvx_disabled.c
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <hexagon_types.h>
+#include <hvx_hexagon_protos.h>
+
+int err;
+#include "hvx_misc.h"
+
+static void test_disabled(void)
+{
+    memset(output, 0xAA, sizeof(output));
+    memset(expect, 0, sizeof(expect));
+    asm volatile("r0 = #0xff\n"
+                 "v0 = vsplat(r0)\n"
+                 "r1 = #0x1\n"
+                 "v1 = vsplat(r1)\n"
+                 "v2 = vsplat(r1)\n"
+                 "v0.sf = vadd(v1.sf, v2.sf)\n"
+                 "vmem(%0 + #0) = v0\n"
+                 :
+                 : "r"(output)
+                 : "r0", "r1", "v0", "v1", "v2", "memory");
+    check_output_w(__LINE__, 1);
+}
+
+static void test_disabled_with_new(void)
+{
+    memset(output, 0xAA, sizeof(output));
+    memset(expect, 0, sizeof(expect));
+    asm volatile("r0 = #0xff\n"
+                 "v0 = vsplat(r0)\n"
+                 "r1 = #0x1\n"
+                 "v1 = vsplat(r1)\n"
+                 "v2 = vsplat(r1)\n"
+                 "{\n"
+                 "    v0.sf = vadd(v1.sf, v2.sf)\n"
+                 "    vmem(%0 + #0) = v0.new\n"
+                 "}\n"
+                 :
+                 : "r"(output)
+                 : "r0", "r1", "v0", "v1", "v2", "memory");
+    check_output_w(__LINE__, 1);
+}
+
+int main(void)
+{
+    test_disabled();
+    test_disabled_with_new();
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 549c95082f..789721bdac 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -50,6 +50,8 @@ HEX_TESTS += vector_add_int
 HEX_TESTS += scatter_gather
 HEX_TESTS += hvx_misc
 HEX_TESTS += hvx_histogram
+HEX_TESTS += fp_hvx
+HEX_TESTS += fp_hvx_disabled
 HEX_TESTS += invalid-slots
 HEX_TESTS += invalid-encoding
 HEX_TESTS += multiple-writes
@@ -126,6 +128,12 @@ v68_hvx: CFLAGS += -mhvx -Wno-unused-function
 v69_hvx: v69_hvx.c hvx_misc.h
 v69_hvx: CFLAGS += -mhvx -Wno-unused-function
 v73_scalar: CFLAGS += -Wno-unused-function
+fp_hvx: fp_hvx.c hvx_misc.h
+fp_hvx: CFLAGS += -mhvx -mhvx-ieee-fp
+fp_hvx_disabled: fp_hvx_disabled.c hvx_misc.h
+fp_hvx_disabled: CFLAGS += -mhvx -mhvx-ieee-fp
+
+run-fp_hvx_disabled: QEMU_OPTS += -cpu v73,ieee-fp=false
 
 hvx_histogram: hvx_histogram.c hvx_histogram_row.S
 	$(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS)
-- 
2.37.2