From: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240506010403.6204-20-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/helper.h | 1 +
target/arm/tcg/a64.decode | 6 ++++
target/arm/tcg/translate-a64.c | 60 ++++++++++++++++++++++------------
target/arm/tcg/vec_helper.c | 6 ++++
4 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8d076011c18..ff6e3094f41 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -724,6 +724,7 @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 7fc3277be67..a852b5f06f0 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -728,6 +728,9 @@ FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd
FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h
FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd
+FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h
+FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd
+
### Advanced SIMD three same
FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h
@@ -778,6 +781,9 @@ FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd
FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h
FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd
+FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h
+FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 4094003759d..9d1ddfbdddc 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5010,6 +5010,31 @@ static const FPScalar f_scalar_facgt = {
};
TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
+static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subh(d, n, m, s);
+ gen_vfp_absh(d, d);
+}
+
+static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subs(d, n, m, s);
+ gen_vfp_abss(d, d);
+}
+
+static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
+{
+ gen_helper_vfp_subd(d, n, m, s);
+ gen_vfp_absd(d, d);
+}
+
+static const FPScalar f_scalar_fabd = {
+ gen_fabd_h,
+ gen_fabd_s,
+ gen_fabd_d,
+};
+TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
+
static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
@@ -5150,6 +5175,13 @@ static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
};
TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt)
+static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
+ gen_helper_gvec_fabd_h,
+ gen_helper_gvec_fabd_s,
+ gen_helper_gvec_fabd_d,
+};
+TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
+
/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -9296,10 +9328,6 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_vfp_absd(tcg_res, tcg_res);
- break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
@@ -9315,6 +9343,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
@@ -9337,10 +9366,6 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x3f: /* FRSQRTS */
gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x7a: /* FABD */
- gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
- gen_vfp_abss(tcg_res, tcg_res);
- break;
default:
case 0x18: /* FMAXNM */
case 0x19: /* FMLA */
@@ -9356,6 +9381,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
g_assert_not_reached();
@@ -9398,7 +9424,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
switch (fpopcode) {
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
- case 0x7a: /* FABD */
break;
default:
case 0x1b: /* FMULX */
@@ -9406,6 +9431,7 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
case 0x7d: /* FACGT */
case 0x1c: /* FCMEQ */
case 0x5c: /* FCMGE */
+ case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
unallocated_encoding(s);
return;
@@ -9561,13 +9587,13 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
switch (fpopcode) {
case 0x07: /* FRECPS */
case 0x0f: /* FRSQRTS */
- case 0x1a: /* FABD */
break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
unallocated_encoding(s);
@@ -9595,15 +9621,12 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
case 0x0f: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
default:
case 0x03: /* FMULX */
case 0x04: /* FCMEQ (reg) */
case 0x14: /* FCMGE (reg) */
case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT (reg) */
case 0x1d: /* FACGT */
g_assert_not_reached();
@@ -11265,7 +11288,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
return;
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
- case 0x7a: /* FABD */
if (!fp_access_check(s)) {
return;
}
@@ -11307,6 +11329,7 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
case 0x7d: /* FACGT */
case 0x7c: /* FCMGT */
unallocated_encoding(s);
@@ -11652,7 +11675,6 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
switch (fpopcode) {
case 0x7: /* FRECPS */
case 0xf: /* FRSQRTS */
- case 0x1a: /* FABD */
pairwise = false;
break;
case 0x10: /* FMAXNMP */
@@ -11677,6 +11699,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
case 0x17: /* FDIV */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT */
case 0x1d: /* FACGT */
unallocated_encoding(s);
@@ -11750,10 +11773,6 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0xf: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x1a: /* FABD */
- gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
- tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
- break;
default:
case 0x0: /* FMAXNM */
case 0x1: /* FMLA */
@@ -11769,6 +11788,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0x14: /* FCMGE */
case 0x15: /* FACGE */
case 0x17: /* FDIV */
+ case 0x1a: /* FABD */
case 0x1c: /* FCMGT */
case 0x1d: /* FACGT */
g_assert_not_reached();
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index dabefa3526d..e9d7922f303 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1154,6 +1154,11 @@ static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
return float32_abs(float32_sub(op1, op2, stat));
}
+static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
+{
+ return float64_abs(float64_sub(op1, op2, stat));
+}
+
/*
* Reciprocal step. These are the AArch32 version which uses a
* non-fused multiply-and-subtract.
@@ -1238,6 +1243,7 @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
DO_3OP(gvec_fabd_h, float16_abd, float16)
DO_3OP(gvec_fabd_s, float32_abd, float32)
+DO_3OP(gvec_fabd_d, float64_abd, float64)
DO_3OP(gvec_fceq_h, float16_ceq, float16)
DO_3OP(gvec_fceq_s, float32_ceq, float32)
--
2.34.1