Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/a64.decode | 5 +
target/arm/tcg/translate-a64.c | 241 ++++++++++-----------------------
2 files changed, 76 insertions(+), 170 deletions(-)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index f330919851..4b2a6ba302 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -960,6 +960,8 @@ USMMLA 0100 1110 100 ..... 10101 1 ..... ..... @rrr_q1e0
FCADD_90 0.10 1110 ..0 ..... 11100 1 ..... ..... @qrrr_e
FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e
+FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
@@ -1041,6 +1043,9 @@ USDOT_vi 0.00 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s
BFDOT_vi 0.00 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_s
BFMLAL_vi 0.00 1111 11 .. .... 1111 . 0 ..... ..... @qrrx_h
+FCMLA_vi 0 q:1 10 1111 10 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl
+FCMLA_vi 0 q:1 10 1111 01 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2
+
# Floating-point conditional select
FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index a1b338263f..0a54a9ef8f 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5631,6 +5631,39 @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
+static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
+{
+ gen_helper_gvec_4_ptr *fn;
+
+ if (!dc_isar_feature(aa64_fcma, s)) {
+ return false;
+ }
+ switch (a->esz) {
+ case MO_64:
+ if (!a->q) {
+ return false;
+ }
+ fn = gen_helper_gvec_fcmlad;
+ break;
+ case MO_32:
+ fn = gen_helper_gvec_fcmlas;
+ break;
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ fn = gen_helper_gvec_fcmlah;
+ break;
+ default:
+ return false;
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ a->esz == MO_16, a->rot, fn);
+ }
+ return true;
+}
+
/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -5985,6 +6018,36 @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
return true;
}
+static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
+{
+ gen_helper_gvec_4_ptr *fn;
+
+ if (!dc_isar_feature(aa64_fcma, s)) {
+ return false;
+ }
+ switch (a->esz) {
+ case MO_16:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ return false;
+ }
+ fn = gen_helper_gvec_fcmlah_idx;
+ break;
+ case MO_32:
+ if (!a->q && a->idx) {
+ return false;
+ }
+ fn = gen_helper_gvec_fcmlas_idx;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (fp_access_check(s)) {
+ gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+ a->esz == MO_16, (a->idx << 2) | a->rot, fn);
+ }
+ return true;
+}
+
/*
* Advanced SIMD scalar pairwise
*/
@@ -10942,90 +11005,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD three same extra
- * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
- * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
- */
-static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 4);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- bool is_q = extract32(insn, 30, 1);
- bool feature;
- int rot;
-
- switch (u * 16 + opcode) {
- case 0x18: /* FCMLA, #0 */
- case 0x19: /* FCMLA, #90 */
- case 0x1a: /* FCMLA, #180 */
- case 0x1b: /* FCMLA, #270 */
- if (size == 0
- || (size == 1 && !dc_isar_feature(aa64_fp16, s))
- || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- feature = dc_isar_feature(aa64_fcma, s);
- break;
- default:
- case 0x02: /* SDOT (vector) */
- case 0x03: /* USDOT */
- case 0x04: /* SMMLA */
- case 0x05: /* USMMLA */
- case 0x10: /* SQRDMLAH (vector) */
- case 0x11: /* SQRDMLSH (vector) */
- case 0x12: /* UDOT (vector) */
- case 0x14: /* UMMLA */
- case 0x1c: /* FCADD, #90 */
- case 0x1d: /* BFMMLA */
- case 0x1e: /* FCADD, #270 */
- case 0x1f: /* BFDOT / BFMLAL */
- unallocated_encoding(s);
- return;
- }
- if (!feature) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (opcode) {
- case 0x8: /* FCMLA, #0 */
- case 0x9: /* FCMLA, #90 */
- case 0xa: /* FCMLA, #180 */
- case 0xb: /* FCMLA, #270 */
- rot = extract32(opcode, 0, 2);
- switch (size) {
- case 1:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
- gen_helper_gvec_fcmlah);
- break;
- case 2:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
- gen_helper_gvec_fcmlas);
- break;
- case 3:
- gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
- gen_helper_gvec_fcmlad);
- break;
- default:
- g_assert_not_reached();
- }
- return;
-
- default:
- g_assert_not_reached();
- }
-}
-
static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
int size, int rn, int rd)
{
@@ -12001,10 +11980,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool is_long = false;
- int is_fp = 0;
- bool is_fp16 = false;
int index;
- TCGv_ptr fpst;
switch (16 * u + opcode) {
case 0x02: /* SMLAL, SMLAL2 */
@@ -12024,16 +12000,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x0b: /* SQDMULL, SQDMULL2 */
is_long = true;
break;
- case 0x11: /* FCMLA #0 */
- case 0x13: /* FCMLA #90 */
- case 0x15: /* FCMLA #180 */
- case 0x17: /* FCMLA #270 */
- if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
- unallocated_encoding(s);
- return;
- }
- is_fp = 2;
- break;
default:
case 0x00: /* FMLAL */
case 0x01: /* FMLA */
@@ -12046,7 +12012,11 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x0e: /* SDOT */
case 0x0f: /* SUDOT / BFDOT / USDOT / BFMLAL */
case 0x10: /* MLA */
+ case 0x11: /* FCMLA #0 */
+ case 0x13: /* FCMLA #90 */
case 0x14: /* MLS */
+ case 0x15: /* FCMLA #180 */
+ case 0x17: /* FCMLA #270 */
case 0x18: /* FMLAL2 */
case 0x19: /* FMULX */
case 0x1c: /* FMLSL2 */
@@ -12057,46 +12027,12 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
return;
}
- switch (is_fp) {
- case 1: /* normal fp */
- unallocated_encoding(s); /* in decodetree */
- return;
-
- case 2: /* complex fp */
- /* Each indexable element is a complex pair. */
- size += 1;
- switch (size) {
- case MO_32:
- if (h && !is_q) {
- unallocated_encoding(s);
- return;
- }
- is_fp16 = true;
- break;
- case MO_64:
- break;
- default:
- unallocated_encoding(s);
- return;
- }
- break;
-
- default: /* integer */
- switch (size) {
- case MO_8:
- case MO_64:
- unallocated_encoding(s);
- return;
- }
- break;
- }
- if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
- unallocated_encoding(s);
- return;
- }
-
/* Given MemOp size, adjust register and indexing. */
switch (size) {
+ case MO_8:
+ case MO_64:
+ unallocated_encoding(s);
+ return;
case MO_16:
index = h << 2 | l << 1 | m;
break;
@@ -12104,14 +12040,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
index = h << 1 | l;
rm |= m << 4;
break;
- case MO_64:
- if (l || !is_q) {
- unallocated_encoding(s);
- return;
- }
- index = h;
- rm |= m << 4;
- break;
default:
g_assert_not_reached();
}
@@ -12120,32 +12048,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
return;
}
- if (is_fp) {
- fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
- } else {
- fpst = NULL;
- }
-
- switch (16 * u + opcode) {
- case 0x11: /* FCMLA #0 */
- case 0x13: /* FCMLA #90 */
- case 0x15: /* FCMLA #180 */
- case 0x17: /* FCMLA #270 */
- {
- int rot = extract32(insn, 13, 2);
- int data = (index << 2) | rot;
- tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- vec_full_reg_offset(s, rd), fpst,
- is_q ? 16 : 8, vec_full_reg_size(s), data,
- size == MO_64
- ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
- }
- return;
- }
-
if (size == 3) {
g_assert_not_reached();
} else if (!is_long) {
@@ -12407,7 +12309,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
*/
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
- { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
{ 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
--
2.34.1
On Tue, 25 Jun 2024 at 06:09, Richard Henderson <richard.henderson@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/tcg/a64.decode | 5 + > target/arm/tcg/translate-a64.c | 241 ++++++++++----------------------- > 2 files changed, 76 insertions(+), 170 deletions(-) > > diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode > index f330919851..4b2a6ba302 100644 > --- a/target/arm/tcg/a64.decode > +++ b/target/arm/tcg/a64.decode > @@ -960,6 +960,8 @@ USMMLA 0100 1110 100 ..... 10101 1 ..... ..... @rrr_q1e0 > FCADD_90 0.10 1110 ..0 ..... 11100 1 ..... ..... @qrrr_e > FCADD_270 0.10 1110 ..0 ..... 11110 1 ..... ..... @qrrr_e > > +FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5 > + > ### Advanced SIMD scalar x indexed element > > FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h > @@ -1041,6 +1043,9 @@ USDOT_vi 0.00 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s > BFDOT_vi 0.00 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_s > BFMLAL_vi 0.00 1111 11 .. .... 1111 . 0 ..... ..... @qrrx_h > > +FCMLA_vi 0 q:1 10 1111 10 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl > +FCMLA_vi 0 q:1 10 1111 01 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 This doesn't look right. Bits [23:22] are the size field, but here you have 10 and esz=1, and 01 and esz=2. I think the 01 and 10 should be the other way around. In the size 10 case (MO_32), we should UNDEF for L=1 or Q=0, which is to say that L must be 0 and Q must be 1. We hard-wire the L bit (bit 21) to 0 in the decode, but we leave q:1 as it is; I think it would be better for the second line here to have a forced 1 in the q bit and set q=1 at the end of the line. We also don't have the check for H==1 && Q==0 for the size 01 case. I think we can do that in the decode file by splitting the 01 case into two lines, one for Q=0 and one for Q=1. > +static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) > +{ > + gen_helper_gvec_4_ptr *fn; > + > + if (!dc_isar_feature(aa64_fcma, s)) { > + return false; > + } > + switch (a->esz) { > + case MO_16: > + if (!dc_isar_feature(aa64_fp16, s)) { > + return false; > + } > + fn = gen_helper_gvec_fcmlah_idx; > + break; > + case MO_32: > + if (!a->q && a->idx) { > + return false; > + } I suspect this was supposed to be the size 01 H==1 && Q==0 check, but it's in the wrong case. So my suggested fixup for this patch is: diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 4b2a6ba302d..223eac3cac2 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -1043,8 +1043,9 @@ USDOT_vi 0.00 1111 10 .. .... 1111 . 0 ..... ..... @qrrx_s BFDOT_vi 0.00 1111 01 .. .... 1111 . 0 ..... ..... @qrrx_s BFMLAL_vi 0.00 1111 11 .. .... 1111 . 0 ..... ..... @qrrx_h -FCMLA_vi 0 q:1 10 1111 10 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl -FCMLA_vi 0 q:1 10 1111 01 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 +FCMLA_vi 0 0 10 1111 01 idx:1 rm:5 0 rot:2 1 0 0 rn:5 rd:5 esz=1 q=0 +FCMLA_vi 0 1 10 1111 01 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl q=1 +FCMLA_vi 0 1 10 1111 10 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2 q=1 # Floating-point conditional select diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 0a54a9ef8f7..161fa2659c4 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -6033,9 +6033,6 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) fn = gen_helper_gvec_fcmlah_idx; break; case MO_32: - if (!a->q && a->idx) { - return false; - } fn = gen_helper_gvec_fcmlas_idx; break; default: thanks -- PMM
© 2016 - 2024 Red Hat, Inc.