From nobody Thu Dec 18 19:36:04 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1530287442403879.3819124589374; Fri, 29 Jun 2018 08:50:42 -0700 (PDT) Received: from localhost ([::1]:43077 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fYvfV-0003NH-IT for importer@patchew.org; Fri, 29 Jun 2018 11:50:41 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34213) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fYun4-0007li-Bp for qemu-devel@nongnu.org; Fri, 29 Jun 2018 10:54:28 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fYun2-0007VG-Qk for qemu-devel@nongnu.org; Fri, 29 Jun 2018 10:54:26 -0400 Received: from orth.archaic.org.uk ([2001:8b0:1d0::2]:43120) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fYun2-0007Qj-EL for qemu-devel@nongnu.org; Fri, 29 Jun 2018 10:54:24 -0400 Received: from pm215 by orth.archaic.org.uk with local (Exim 4.89) (envelope-from ) id 1fYun1-0004lu-CO for qemu-devel@nongnu.org; Fri, 29 Jun 2018 15:54:23 +0100 From: Peter Maydell To: qemu-devel@nongnu.org Date: Fri, 29 Jun 2018 15:53:33 +0100 Message-Id: <20180629145347.652-42-peter.maydell@linaro.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20180629145347.652-1-peter.maydell@linaro.org> References: <20180629145347.652-1-peter.maydell@linaro.org> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2001:8b0:1d0::2 Subject: [Qemu-devel] [PULL 41/55] target/arm: Implement ARMv8.2-DotProd X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: Richard Henderson We've already added the helpers with an SVE patch, all that remains is to wire up the aa64 and aa32 translators. Enable the feature within -cpu max for CONFIG_USER_ONLY. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20180627043328.11531-36-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/cpu.h | 1 + linux-user/elfload.c | 1 + target/arm/cpu.c | 1 + target/arm/cpu64.c | 1 + target/arm/translate-a64.c | 36 +++++++++++++++++++ target/arm/translate.c | 74 +++++++++++++++++++++++++++----------- 6 files changed, 93 insertions(+), 21 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a4507a2d6f0..6a8441c2dd6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1480,6 +1480,7 @@ enum arm_features { ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ + ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */ ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index d1231ad07a3..942a1b661f4 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -583,6 +583,7 @@ static uint32_t get_elf_hwcap(void) ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS); GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM); + GET_FEATURE(ARM_FEATURE_V8_DOTPROD, ARM_HWCAP_A64_ASIMDDP); GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA); GET_FEATURE(ARM_FEATURE_SVE, ARM_HWCAP_A64_SVE); #undef GET_FEATURE diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 6dcc552e143..aa62315cea2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1805,6 +1805,7 @@ static void arm_max_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_V8_RDM); + set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); #endif } diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 0360d7efc5e..3b4bc73ffa6 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -250,6 +250,7 @@ static void aarch64_max_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_CRC); set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); set_feature(&cpu->env, ARM_FEATURE_V8_RDM); + set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD); set_feature(&cpu->env, ARM_FEATURE_V8_FP16); set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); set_feature(&cpu->env, ARM_FEATURE_SVE); diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index eb3a4ab2f08..f9863408324 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -640,6 +640,16 @@ static void gen_gvec_op3(DisasContext *s, bool is_q, i= nt rd, vec_full_reg_size(s), gvec_op); } =20 +/* Expand a 3-operand operation using an out-of-line helper. */ +static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, + int rn, int rm, int data, gen_helper_gvec_3 *= fn) +{ + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); +} + /* Expand a 3-operand + env pointer operation using * an out-of-line helper. */ @@ -11336,6 +11346,14 @@ static void disas_simd_three_reg_same_extra(DisasC= ontext *s, uint32_t insn) } feature =3D ARM_FEATURE_V8_RDM; break; + case 0x02: /* SDOT (vector) */ + case 0x12: /* UDOT (vector) */ + if (size !=3D MO_32) { + unallocated_encoding(s); + return; + } + feature =3D ARM_FEATURE_V8_DOTPROD; + break; case 0x8: /* FCMLA, #0 */ case 0x9: /* FCMLA, #90 */ case 0xa: /* FCMLA, #180 */ @@ -11389,6 +11407,11 @@ static void disas_simd_three_reg_same_extra(DisasC= ontext *s, uint32_t insn) } return; =20 + case 0x2: /* SDOT / UDOT */ + gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, + u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot= _b); + return; + case 0x8: /* FCMLA, #0 */ case 0x9: /* FCMLA, #90 */ case 0xa: /* FCMLA, #180 */ @@ -12568,6 +12591,13 @@ static void disas_simd_indexed(DisasContext *s, ui= nt32_t insn) return; } break; + case 0x0e: /* SDOT */ + case 0x1e: /* UDOT */ + if (size !=3D MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD))= { + unallocated_encoding(s); + return; + } + break; case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ @@ -12665,6 +12695,12 @@ static void disas_simd_indexed(DisasContext *s, ui= nt32_t insn) } =20 switch (16 * u + opcode) { + case 0x0e: /* SDOT */ + case 0x1e: /* UDOT */ + gen_gvec_op3_ool(s, is_q, rd, rn, rm, index, + u ? gen_helper_gvec_udot_idx_b + : gen_helper_gvec_sdot_idx_b); + return; case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ diff --git a/target/arm/translate.c b/target/arm/translate.c index a7a980b1f29..f845da7c638 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7762,9 +7762,10 @@ static int disas_neon_data_insn(DisasContext *s, uin= t32_t insn) */ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) { - gen_helper_gvec_3_ptr *fn_gvec_ptr; - int rd, rn, rm, rot, size, opr_sz; - TCGv_ptr fpst; + gen_helper_gvec_3 *fn_gvec =3D NULL; + gen_helper_gvec_3_ptr *fn_gvec_ptr =3D NULL; + int rd, rn, rm, opr_sz; + int data =3D 0; bool q; =20 q =3D extract32(insn, 6, 1); @@ -7777,8 +7778,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s,= uint32_t insn) =20 if ((insn & 0xfe200f10) =3D=3D 0xfc200800) { /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ - size =3D extract32(insn, 20, 1); - rot =3D extract32(insn, 23, 2); + int size =3D extract32(insn, 20, 1); + data =3D extract32(insn, 23, 2); /* rot */ if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { return 1; @@ -7786,13 +7787,20 @@ static int disas_neon_insn_3same_ext(DisasContext *= s, uint32_t insn) fn_gvec_ptr =3D size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fc= mlah; } else if ((insn & 0xfea00f10) =3D=3D 0xfc800800) { /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ - size =3D extract32(insn, 20, 1); - rot =3D extract32(insn, 24, 1); + int size =3D extract32(insn, 20, 1); + data =3D extract32(insn, 24, 1); /* rot */ if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { return 1; } fn_gvec_ptr =3D size ? gen_helper_gvec_fcadds : gen_helper_gvec_fc= addh; + } else if ((insn & 0xfeb00f00) =3D=3D 0xfc200d00) { + /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ + bool u =3D extract32(insn, 4, 1); + if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + return 1; + } + fn_gvec =3D u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; } else { return 1; } @@ -7807,12 +7815,19 @@ static int disas_neon_insn_3same_ext(DisasContext *= s, uint32_t insn) } =20 opr_sz =3D (1 + q) * 8; - fpst =3D get_fpstatus_ptr(1); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), fpst, - opr_sz, opr_sz, rot, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); + if (fn_gvec_ptr) { + TCGv_ptr fpst =3D get_fpstatus_ptr(1); + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), + vfp_reg_offset(1, rn), + vfp_reg_offset(1, rm), fpst, + opr_sz, opr_sz, data, fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + } else { + tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), + vfp_reg_offset(1, rn), + vfp_reg_offset(1, rm), + opr_sz, opr_sz, data, fn_gvec); + } return 0; } =20 @@ -7826,9 +7841,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s,= uint32_t insn) =20 static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) { - gen_helper_gvec_3_ptr *fn_gvec_ptr; + gen_helper_gvec_3 *fn_gvec =3D NULL; + gen_helper_gvec_3_ptr *fn_gvec_ptr =3D NULL; int rd, rn, rm, opr_sz, data; - TCGv_ptr fpst; bool q; =20 q =3D extract32(insn, 6, 1); @@ -7862,6 +7877,16 @@ static int disas_neon_insn_2reg_scalar_ext(DisasCont= ext *s, uint32_t insn) data =3D (index << 2) | rot; fn_gvec_ptr =3D (size ? gen_helper_gvec_fcmlas_idx : gen_helper_gvec_fcmlah_idx); + } else if ((insn & 0xffb00f00) =3D=3D 0xfe200d00) { + /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ + int u =3D extract32(insn, 4, 1); + if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) { + return 1; + } + fn_gvec =3D u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_= idx_b; + /* rm is just Vm, and index is M. */ + data =3D extract32(insn, 5, 1); /* index */ + rm =3D extract32(insn, 0, 4); } else { return 1; } @@ -7876,12 +7901,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasCon= text *s, uint32_t insn) } =20 opr_sz =3D (1 + q) * 8; - fpst =3D get_fpstatus_ptr(1); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), fpst, - opr_sz, opr_sz, data, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); + if (fn_gvec_ptr) { + TCGv_ptr fpst =3D get_fpstatus_ptr(1); + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), + vfp_reg_offset(1, rn), + vfp_reg_offset(1, rm), fpst, + opr_sz, opr_sz, data, fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + } else { + tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), + vfp_reg_offset(1, rn), + vfp_reg_offset(1, rm), + opr_sz, opr_sz, data, fn_gvec); + } return 0; } =20 --=20 2.17.1