From nobody Thu Nov 6 06:22:12 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1539291935533194.29401872344283; Thu, 11 Oct 2018 14:05:35 -0700 (PDT) Received: from localhost ([::1]:37132 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gAi9B-000828-H7 for importer@patchew.org; Thu, 11 Oct 2018 17:05:29 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45197) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gAhwq-0006sC-2O for qemu-devel@nongnu.org; Thu, 11 Oct 2018 16:52:48 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gAhwm-0005zz-72 for qemu-devel@nongnu.org; Thu, 11 Oct 2018 16:52:43 -0400 Received: from mail-pf1-x443.google.com ([2607:f8b0:4864:20::443]:40800) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1gAhwl-0003SW-Mi for qemu-devel@nongnu.org; Thu, 11 Oct 2018 16:52:39 -0400 Received: by mail-pf1-x443.google.com with SMTP id s5-v6so5005658pfj.7 for ; Thu, 11 Oct 2018 13:52:15 -0700 (PDT) Received: from cloudburst.twiddle.net (97-113-8-179.tukw.qwest.net. [97.113.8.179]) by smtp.gmail.com with ESMTPSA id h87-v6sm34707866pfj.78.2018.10.11.13.52.13 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Thu, 11 Oct 2018 13:52:13 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=Smv7/EDiE77NiuFaVj/NIlvKHR40VLp0vXf5RsdOZ4o=; b=ArsT5d70H3y9YxQJvWJMKD5lyxLZBdthWvekNUo3yoUPzC7GnsoIjhE6Gz+w4gXVIh 5QAOtiTHlz49Yzpm4PiUCYGVyFJ67NWOpMtxcITgX8hUZ+g+rSBss64JGm6KyeS3VRs+ wjWd3rKX23RkzDzQAjjQYjspqK+Hnycate/Y8= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=Smv7/EDiE77NiuFaVj/NIlvKHR40VLp0vXf5RsdOZ4o=; b=snMNOb4F9Q9AhojIPCUzaQiQSQaRlYMvP77xPHT9m1rZqzJwghkGsWrXh7pghY87gK +SDcynnawSBeILnK6nDfVzv6ZGZfDztfpFuzJJJz8XDImtJ+ov/dXExGa++QnV0GpfL3 atcboTqI646Wjb+FC+KICfxrwO1AnZWXXYl4U/aGXgAVfFB8ujeZvQy9Ksq/jhimyObD 4iQW4of7xGg/g+vQnRTRz43zEpPN8m4MkE2X0MZVqnfqo4Wbv0Pf3WbWbvYs4e2lnsUV xe4evY36xgn0Anfn0VN9JrV5MTdW40rOmRYegHf+/vya5bvZDuPTcvK95qbmsVK1nAoB VwbQ== X-Gm-Message-State: ABuFfoh7SOSSSz0DwmBEkpDW+qlMGXYuq5RuMjqYDhMfQ+9VEJNfHiHu 6uKW8RJInbDSgZZ2hI7qeElWOmg2VmE= X-Google-Smtp-Source: ACcGV62FK7GyOVQTD9G6dj/vRJdscU80s5Q9w9+GWlu/kb9aRJF2lv9sUBnnSp1kjYILSSIxceYTvA== X-Received: by 2002:aa7:86cc:: with SMTP id h12-v6mr3054589pfo.58.1539291134647; Thu, 11 Oct 2018 13:52:14 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Thu, 11 Oct 2018 13:51:50 -0700 Message-Id: <20181011205206.3552-5-richard.henderson@linaro.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20181011205206.3552-1-richard.henderson@linaro.org> References: <20181011205206.3552-1-richard.henderson@linaro.org> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:4864:20::443 Subject: [Qemu-devel] [PATCH 04/20] target/arm: Promote consecutive memory ops for aa64 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: peter.maydell@linaro.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZohoMail: RDMRC_1 RDKM_2 RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" For a sequence of loads or stores from a single register, little-endian operations can be promoted to an 8-byte op. This can reduce the number of operations by a factor of 8. Signed-off-by: Richard Henderson --- target/arm/translate-a64.c | 66 +++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index fff99ca303..2f4041462e 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -1200,25 +1200,23 @@ static void write_vec_element_i32(DisasContext *s, = TCGv_i32 tcg_src, =20 /* Store from vector register to memory */ static void do_vec_st(DisasContext *s, int srcidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop =3D s->be_data + size; TCGv_i64 tcg_tmp =3D tcg_temp_new_i64(); =20 read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size= ); =20 tcg_temp_free_i64(tcg_tmp); } =20 /* Load from memory to vector register */ static void do_vec_ld(DisasContext *s, int destidx, int element, - TCGv_i64 tcg_addr, int size) + TCGv_i64 tcg_addr, int size, TCGMemOp endian) { - TCGMemOp memop =3D s->be_data + size; TCGv_i64 tcg_tmp =3D tcg_temp_new_i64(); =20 - tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size= ); write_vec_element(s, tcg_tmp, destidx, element, size); =20 tcg_temp_free_i64(tcg_tmp); @@ -3013,9 +3011,10 @@ static void disas_ldst_multiple_struct(DisasContext = *s, uint32_t insn) bool is_postidx =3D extract32(insn, 23, 1); bool is_q =3D extract32(insn, 30, 1); TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; + TCGMemOp endian =3D s->be_data; =20 - int ebytes =3D 1 << size; - int elements =3D (is_q ? 128 : 64) / (8 << size); + int ebytes; /* bytes per element */ + int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ int r; @@ -3074,6 +3073,20 @@ static void disas_ldst_multiple_struct(DisasContext = *s, uint32_t insn) gen_check_sp_alignment(s); } =20 + /* For our purposes, bytes are always little-endian. */ + if (size =3D=3D 0) { + endian =3D MO_LE; + } + + /* Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (selem =3D=3D 1 && endian =3D=3D MO_LE) { + size =3D 3; + } + ebytes =3D 1 << size; + elements =3D (is_q ? 16 : 8) / ebytes; + tcg_rn =3D cpu_reg_sp(s, rn); tcg_addr =3D tcg_temp_new_i64(); tcg_gen_mov_i64(tcg_addr, tcg_rn); @@ -3082,32 +3095,33 @@ static void disas_ldst_multiple_struct(DisasContext= *s, uint32_t insn) for (r =3D 0; r < rpt; r++) { int e; for (e =3D 0; e < elements; e++) { - int tt =3D (rt + r) % 32; int xs; for (xs =3D 0; xs < selem; xs++) { + int tt =3D (rt + r + xs) % 32; if (is_store) { - do_vec_st(s, tt, e, tcg_addr, size); + do_vec_st(s, tt, e, tcg_addr, size, endian); } else { - do_vec_ld(s, tt, e, tcg_addr, size); - - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. We optimize by noticing that we = only - * need to do this the first time we touch a register. - */ - if (e =3D=3D 0 && (r =3D=3D 0 || xs =3D=3D selem - 1))= { - clear_vec_high(s, is_q, tt); - } + do_vec_ld(s, tt, e, tcg_addr, size, endian); } tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); - tt =3D (tt + 1) % 32; } } } =20 + if (!is_store) { + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). + * For quad operations, we might still need to zero the + * high bits of SVE. + */ + for (r =3D 0; r < rpt * selem; r++) { + int tt =3D (rt + r) % 32; + clear_vec_high(s, is_q, tt); + } + } + if (is_postidx) { int rm =3D extract32(insn, 16, 5); if (rm =3D=3D 31) { @@ -3228,9 +3242,9 @@ static void disas_ldst_single_struct(DisasContext *s,= uint32_t insn) } else { /* Load/store one element per register */ if (is_load) { - do_vec_ld(s, rt, index, tcg_addr, scale); + do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data); } else { - do_vec_st(s, rt, index, tcg_addr, scale); + do_vec_st(s, rt, index, tcg_addr, scale, s->be_data); } } tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes); --=20 2.17.1