From nobody Wed Nov 5 10:41:22 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1533789518806929.0339036122601; Wed, 8 Aug 2018 21:38:38 -0700 (PDT) Received: from localhost ([::1]:47307 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fncib-0006D4-Jo for importer@patchew.org; Thu, 09 Aug 2018 00:38:37 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54139) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fncT7-000128-N2 for qemu-devel@nongnu.org; Thu, 09 Aug 2018 00:22:40 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fncT4-0007Su-ON for qemu-devel@nongnu.org; Thu, 09 Aug 2018 00:22:37 -0400 Received: from mail-pf1-x432.google.com ([2607:f8b0:4864:20::432]:34670) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1fncT4-0007Rl-DS for qemu-devel@nongnu.org; Thu, 09 Aug 2018 00:22:34 -0400 Received: by mail-pf1-x432.google.com with SMTP id k19-v6so2208678pfi.1 for ; Wed, 08 Aug 2018 21:22:34 -0700 (PDT) Received: from cloudburst.twiddle.net (97-113-8-179.tukw.qwest.net. [97.113.8.179]) by smtp.gmail.com with ESMTPSA id m30-v6sm7355799pff.121.2018.08.08.21.22.31 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Wed, 08 Aug 2018 21:22:31 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=MYhgJucgZ+U6nSJDfeKc+2O2o0Gq7XMWGUI3jo+jjRM=; b=AoKV7ib9qW0+WTnSISN/3yJNKiMNgj122pJhHB1uD1mlsng4halNLOxt1cENI7RVVS pl+Pd2EgvaSx5ejdDtCMIifIeqmsUNxRLlKbre1qlcRiKS5N9ZLkUtQa84D0prqytqmV xJ9S04Snx4HYbQtljsVkfuI7p6sH3Dw4Rc/+0= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=MYhgJucgZ+U6nSJDfeKc+2O2o0Gq7XMWGUI3jo+jjRM=; b=ihz7lhOlsh0edosquR4icfAuEbvmgyZZAsJvT5QN68gwPGi8VtjVITt07Pbyo2oZIy H4C4JjRnC3cSg3r7mR8JdJMgv3WbRrtv8BCgmDTFBUUO+lgTkYRd9lAhOlMkzxgp0iwN BAD2UR9KSpnoU+NeGPuw3u5otcSlVP4tnZkpepCZIbdN3PDWnRmlZohcD9Sh5M4ogRgf os2fcRQ77orgCP3s2T4dpH13VvfjPA1dCTa+rY99CjiyLnErUCLPwFwT3n13HQGi+NGZ F+Fu+XN6yiG6N9rmF5b7JfZjNqH6ucgAI/zCLKKaoyfy9UfmFBgpIJRE7TGVdvg6iXJh bDMA== X-Gm-Message-State: AOUpUlGSncQz1tjfrD3XXudr7JXL0U/o6L60I0CuFGvdOe3CF0naQHFo FxgVXVQBZ1PC2uplJSdqp+gXDqKC3Ys= X-Google-Smtp-Source: AA+uWPy3sWwE5vdtDcO51lrOpgW+SvWIsMiIclCaQw6uJlWCzWPv/8RcDtzW8sxtJQfh31p1F4jrYw== X-Received: by 2002:a65:5907:: with SMTP id f7-v6mr497734pgu.83.1533788552598; Wed, 08 Aug 2018 21:22:32 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Wed, 8 Aug 2018 21:22:03 -0700 Message-Id: <20180809042206.15726-18-richard.henderson@linaro.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20180809042206.15726-1-richard.henderson@linaro.org> References: <20180809042206.15726-1-richard.henderson@linaro.org> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:4864:20::432 Subject: [Qemu-devel] [PATCH 17/20] target/arm: Rewrite vector gather loads X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: laurent.desnogues@gmail.com, peter.maydell@linaro.org, alex.bennee@linaro.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZohoMail: RDMRC_1 RDKM_2 RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This fixes the endianness problem for softmmu, and does move the main loop out of a macro and into an inlined function. Signed-off-by: Richard Henderson --- target/arm/helper-sve.h | 84 +++++++++---- target/arm/sve_helper.c | 218 +++++++++++++++++++++++---------- target/arm/translate-sve.c | 244 +++++++++++++++++++++++++------------ 3 files changed, 380 insertions(+), 166 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 1ad043101a..49d1c09e30 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1292,69 +1292,111 @@ DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG,= void, env, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) =20 DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 92c0e961a9..76d3f021e4 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -4984,80 +4984,166 @@ DO_STN_2(4, dd, 8, 8) =20 /* Loads with a vector index. */ =20 -#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz =3D simd_oprsz(desc); \ - unsigned scale =3D simd_data(desc); \ - uintptr_t ra =3D GETPC(); \ - for (i =3D 0; i < oprsz; ) { \ - uint16_t pg =3D *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - TYPEM m =3D 0; \ - if (pg & 1) { \ - target_ulong off =3D *(TYPEI *)(vm + H1_4(i)); \ - m =3D FN(env, base + (off << scale), ra); \ - } \ - *(uint32_t *)(vd + H1_4(i)) =3D m; \ - i +=3D 4, pg >>=3D 4; = \ - } while (i & 15); \ - } \ +typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); + +static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) +{ + return *(uint32_t *)(reg + H1_4(reg_ofs)); } =20 -#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz =3D simd_oprsz(desc) / 8; \ - unsigned scale =3D simd_data(desc); \ - uintptr_t ra =3D GETPC(); \ - uint64_t *d =3D vd, *m =3D vm; uint8_t *pg =3D vg; = \ - for (i =3D 0; i < oprsz; i++) { \ - TYPEM mm =3D 0; \ - if (pg[H1(i)] & 1) { \ - target_ulong off =3D (TYPEI)m[i]; \ - mm =3D FN(env, base + (off << scale), ra); \ - } \ - d[i] =3D mm; \ - } \ +static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) +{ + return *(int32_t *)(reg + H1_4(reg_ofs)); } =20 -DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra) +static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) +{ + return (uint32_t)*(uint64_t *)(reg + reg_ofs); +} =20 -DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t, cpu_lduw_data_ra) +static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) +{ + return (int32_t)*(uint64_t *)(reg + reg_ofs); +} =20 -DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) +static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) +{ + return *(uint64_t *)(reg + reg_ofs); +} =20 -DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t, cpu_ldl_data_ra) +static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const int mmu_idx =3D cpu_mmu_index(env, false); + intptr_t i, oprsz =3D simd_oprsz(desc); + unsigned scale =3D simd_data(desc); + ARMVectorReg scratch =3D { }; =20 -DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra) -DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) -DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) -DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) -DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) + set_helper_retaddr(ra); + for (i =3D 0; i < oprsz; ) { + uint16_t pg =3D *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (pg & 1) { + target_ulong off =3D off_fn(vm, i); + tlb_fn(env, &scratch, i, base + (off << scale), mmu_idx, r= a); + } + i +=3D 4, pg >>=3D 4; + } while (i & 15); + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz); +} + +static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const int mmu_idx =3D cpu_mmu_index(env, false); + intptr_t i, oprsz =3D simd_oprsz(desc) / 8; + unsigned scale =3D simd_data(desc); + ARMVectorReg scratch =3D { }; + + set_helper_retaddr(ra); + for (i =3D 0; i < oprsz; i++) { + uint8_t pg =3D *(uint8_t *)(vg + H1(i)); + if (pg & 1) { + target_ulong off =3D off_fn(vm, i * 8); + tlb_fn(env, &scratch, i * 8, base + (off << scale), mmu_idx, r= a); + } + } + set_helper_retaddr(0); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz * 8); +} + +#define DO_LD1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb); \ +} + +#define DO_LD1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu) +DO_LD1_ZPZ_S(bsu, zss) +DO_LD1_ZPZ_D(bdu, zsu) +DO_LD1_ZPZ_D(bdu, zss) +DO_LD1_ZPZ_D(bdu, zd) + +DO_LD1_ZPZ_S(bss, zsu) +DO_LD1_ZPZ_S(bss, zss) +DO_LD1_ZPZ_D(bds, zsu) +DO_LD1_ZPZ_D(bds, zss) +DO_LD1_ZPZ_D(bds, zd) + +DO_LD1_ZPZ_S(hsu_le, zsu) +DO_LD1_ZPZ_S(hsu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zsu) +DO_LD1_ZPZ_D(hdu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zd) + +DO_LD1_ZPZ_S(hsu_be, zsu) +DO_LD1_ZPZ_S(hsu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zsu) +DO_LD1_ZPZ_D(hdu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zd) + +DO_LD1_ZPZ_S(hss_le, zsu) +DO_LD1_ZPZ_S(hss_le, zss) +DO_LD1_ZPZ_D(hds_le, zsu) +DO_LD1_ZPZ_D(hds_le, zss) +DO_LD1_ZPZ_D(hds_le, zd) + +DO_LD1_ZPZ_S(hss_be, zsu) +DO_LD1_ZPZ_S(hss_be, zss) +DO_LD1_ZPZ_D(hds_be, zsu) +DO_LD1_ZPZ_D(hds_be, zss) +DO_LD1_ZPZ_D(hds_be, zd) + +DO_LD1_ZPZ_S(ss_le, zsu) +DO_LD1_ZPZ_S(ss_le, zss) +DO_LD1_ZPZ_D(sdu_le, zsu) +DO_LD1_ZPZ_D(sdu_le, zss) +DO_LD1_ZPZ_D(sdu_le, zd) + +DO_LD1_ZPZ_S(ss_be, zsu) +DO_LD1_ZPZ_S(ss_be, zss) +DO_LD1_ZPZ_D(sdu_be, zsu) +DO_LD1_ZPZ_D(sdu_be, zss) +DO_LD1_ZPZ_D(sdu_be, zd) + +DO_LD1_ZPZ_D(sds_le, zsu) +DO_LD1_ZPZ_D(sds_le, zss) +DO_LD1_ZPZ_D(sds_le, zd) + +DO_LD1_ZPZ_D(sds_be, zsu) +DO_LD1_ZPZ_D(sds_be, zss) +DO_LD1_ZPZ_D(sds_be, zd) + +DO_LD1_ZPZ_D(dd_le, zsu) +DO_LD1_ZPZ_D(dd_le, zss) +DO_LD1_ZPZ_D(dd_le, zd) + +DO_LD1_ZPZ_D(dd_be, zsu) +DO_LD1_ZPZ_D(dd_be, zss) +DO_LD1_ZPZ_D(dd_be, zd) + +#undef DO_LD1_ZPZ_S +#undef DO_LD1_ZPZ_D =20 /* First fault loads with a vector index. */ =20 diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index acb85731f8..d4d7e9d3ae 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -5077,91 +5077,176 @@ static void do_mem_zpz(DisasContext *s, int zt, in= t pg, int zm, int scale, tcg_temp_free_i32(desc); } =20 -/* Indexed by [ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = =3D { - { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_zsu, - gen_helper_sve_ldssu_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_zss, - gen_helper_sve_ldssu_zss, } } }, +/* Indexed by [be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3]= =3D { + /* Little-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, =20 - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_zsu, - gen_helper_sve_ldffssu_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_zss, - gen_helper_sve_ldffssu_zss, } } } + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_zsu, + gen_helper_sve_ldffssu_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_zss, + gen_helper_sve_ldffssu_zss, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_zsu, + gen_helper_sve_ldffssu_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_zss, + gen_helper_sve_ldffssu_zss, } } } }, }; =20 /* Note that we overload xs=3D2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = =3D { - { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_zsu, - gen_helper_sve_ldsds_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_zsu, - gen_helper_sve_ldsdu_zsu, - gen_helper_sve_ldddu_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_zss, - gen_helper_sve_ldsds_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_zss, - gen_helper_sve_ldsdu_zss, - gen_helper_sve_ldddu_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_zd, - gen_helper_sve_ldsds_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_zd, - gen_helper_sve_ldsdu_zd, - gen_helper_sve_ldddu_zd, } } }, +static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4]= =3D { + /* Little-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, =20 - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_zsu, - gen_helper_sve_ldffsds_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_zsu, - gen_helper_sve_ldffsdu_zsu, - gen_helper_sve_ldffddu_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_zss, - gen_helper_sve_ldffsds_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_zss, - gen_helper_sve_ldffsdu_zss, - gen_helper_sve_ldffddu_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_zd, - gen_helper_sve_ldffsds_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_zd, - gen_helper_sve_ldffsdu_zd, - gen_helper_sve_ldffddu_zd, } } } + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_zsu, + gen_helper_sve_ldffsds_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_zsu, + gen_helper_sve_ldffsdu_zsu, + gen_helper_sve_ldffddu_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_zss, + gen_helper_sve_ldffsds_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_zss, + gen_helper_sve_ldffsdu_zss, + gen_helper_sve_ldffddu_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_zd, + gen_helper_sve_ldffsds_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_zd, + gen_helper_sve_ldffsdu_zd, + gen_helper_sve_ldffddu_zd, } } } }, + + /* Big-endian */ + { { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_zsu, + gen_helper_sve_ldffsds_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_zsu, + gen_helper_sve_ldffsdu_zsu, + gen_helper_sve_ldffddu_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_zss, + gen_helper_sve_ldffsds_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_zss, + gen_helper_sve_ldffsdu_zss, + gen_helper_sve_ldffddu_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_zd, + gen_helper_sve_ldffsds_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_zd, + gen_helper_sve_ldffsdu_zd, + gen_helper_sve_ldffddu_zd, } } } }, }; =20 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn =3D NULL; + int be =3D s->be_data =3D=3D MO_BE; =20 if (!sve_access_check(s)) { return true; @@ -5169,10 +5254,10 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1= _zprz *a, uint32_t insn) =20 switch (a->esz) { case MO_32: - fn =3D gather_load_fn32[a->ff][a->xs][a->u][a->msz]; + fn =3D gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn =3D gather_load_fn64[a->ff][a->xs][a->u][a->msz]; + fn =3D gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn !=3D NULL); @@ -5185,6 +5270,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_z= prz *a, uint32_t insn) static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn =3D NULL; + int be =3D s->be_data =3D=3D MO_BE; TCGv_i64 imm; =20 if (a->esz < a->msz || (a->esz =3D=3D a->msz && !a->u)) { @@ -5196,10 +5282,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1= _zpiz *a, uint32_t insn) =20 switch (a->esz) { case MO_32: - fn =3D gather_load_fn32[a->ff][0][a->u][a->msz]; + fn =3D gather_load_fn32[be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn =3D gather_load_fn64[a->ff][2][a->u][a->msz]; + fn =3D gather_load_fn64[be][a->ff][2][a->u][a->msz]; break; } assert(fn !=3D NULL); --=20 2.17.1