From nobody Wed May 7 19:10:51 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: <qemu-devel-bounces+importer=patchew.org@nongnu.org> Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1539969082483984.1530200718795; Fri, 19 Oct 2018 10:11:22 -0700 (PDT) Received: from localhost ([::1]:51759 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from <qemu-devel-bounces+importer=patchew.org@nongnu.org>) id 1gDYIz-0006UC-A6 for importer@patchew.org; Fri, 19 Oct 2018 13:11:21 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47766) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from <pm215@archaic.org.uk>) id 1gDY6c-00024y-8v for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:35 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from <pm215@archaic.org.uk>) id 1gDY6b-0002tX-8Q for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:34 -0400 Received: from orth.archaic.org.uk ([2001:8b0:1d0::2]:51984) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from <pm215@archaic.org.uk>) id 1gDY6a-0002bn-TL for qemu-devel@nongnu.org; Fri, 19 Oct 2018 12:58:33 -0400 Received: from pm215 by orth.archaic.org.uk with local (Exim 4.89) (envelope-from <pm215@archaic.org.uk>) id 1gDY6J-0006oJ-0D for qemu-devel@nongnu.org; Fri, 19 Oct 2018 17:58:15 +0100 From: Peter Maydell <peter.maydell@linaro.org> To: qemu-devel@nongnu.org Date: Fri, 19 Oct 2018 17:57:28 +0100 Message-Id: <20181019165735.22511-39-peter.maydell@linaro.org> X-Mailer: git-send-email 2.19.1 In-Reply-To: <20181019165735.22511-1-peter.maydell@linaro.org> References: <20181019165735.22511-1-peter.maydell@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2001:8b0:1d0::2 Subject: [Qemu-devel] [PULL 38/45] target/arm: Use gvec for NEON VLD all lanes X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: <qemu-devel.nongnu.org> List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>, <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe> List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel/> List-Post: <mailto:qemu-devel@nongnu.org> List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help> List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>, <mailto:qemu-devel-request@nongnu.org?subject=subscribe> Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+importer=patchew.org@nongnu.org> X-ZohoMail: RDMRC_1 RSF_0 Z_629925259 SPT_0 Content-Type: text/plain; charset="utf-8" From: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20181011205206.3552-18-richard.henderson@linaro.org [PMM: added parens in ?: expression] Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/translate.c | 81 ++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index e6b06910369..e5d723d03b7 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -2993,19 +2993,6 @@ static void gen_vfp_msr(TCGv_i32 tmp) tcg_temp_free_i32(tmp); } =20 -static void gen_neon_dup_u8(TCGv_i32 var, int shift) -{ - TCGv_i32 tmp =3D tcg_temp_new_i32(); - if (shift) - tcg_gen_shri_i32(var, var, shift); - tcg_gen_ext8u_i32(var, var); - tcg_gen_shli_i32(tmp, var, 8); - tcg_gen_or_i32(var, var, tmp); - tcg_gen_shli_i32(tmp, var, 16); - tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); -} - static void gen_neon_dup_low16(TCGv_i32 var) { TCGv_i32 tmp =3D tcg_temp_new_i32(); @@ -3024,28 +3011,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_temp_free_i32(tmp); } =20 -static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int= size) -{ - /* Load a single Neon element and replicate into a 32 bit TCG reg */ - TCGv_i32 tmp =3D tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - return tmp; -} - static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t r= m, uint32_t dp) { @@ -4949,6 +4914,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32= _t insn) int load; int shift; int n; + int vec_size; TCGv_i32 addr; TCGv_i32 tmp; TCGv_i32 tmp2; @@ -5118,28 +5084,33 @@ static int disas_neon_ls_insn(DisasContext *s, uint= 32_t insn) } addr =3D tcg_temp_new_i32(); load_reg_var(s, addr, rn); - if (nregs =3D=3D 1) { - /* VLD1 to all lanes: bit 5 indicates how many Dregs to wr= ite */ - tmp =3D gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - if (insn & (1 << 5)) { - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0= )); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1= )); - } - tcg_temp_free_i32(tmp); - } else { - /* VLD2/3/4 to all lanes: bit 5 indicates register stride = */ - stride =3D (insn & (1 << 5)) ? 2 : 1; - for (reg =3D 0; reg < nregs; reg++) { - tmp =3D gen_load_and_replicate(s, addr, size); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); - tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); - tcg_temp_free_i32(tmp); - tcg_gen_addi_i32(addr, addr, 1 << size); - rd +=3D stride; + + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. + * VLD2/3/4 to all lanes: bit 5 indicates register stride. + */ + stride =3D (insn & (1 << 5)) ? 2 : 1; + vec_size =3D nregs =3D=3D 1 ? stride * 8 : 8; + + tmp =3D tcg_temp_new_i32(); + for (reg =3D 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((rd & 1) && vec_size =3D=3D 16) { + /* We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0), + neon_reg_offset(rd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0), + vec_size, vec_size, tmp); } + tcg_gen_addi_i32(addr, addr, 1 << size); + rd +=3D stride; } + tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); stride =3D (1 << size) * nregs; } else { --=20 2.19.1