From nobody Mon Feb 9 22:45:38 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1518893437364173.34977810488374; Sat, 17 Feb 2018 10:50:37 -0800 (PST) Received: from localhost ([::1]:48265 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1en7ZE-0005Jz-DW for importer@patchew.org; Sat, 17 Feb 2018 13:50:36 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40072) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1en79c-0000ho-1p for qemu-devel@nongnu.org; Sat, 17 Feb 2018 13:24:10 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1en79Z-0001kR-Fa for qemu-devel@nongnu.org; Sat, 17 Feb 2018 13:24:08 -0500 Received: from mail-pf0-x244.google.com ([2607:f8b0:400e:c00::244]:44407) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1en79Z-0001k9-6V for qemu-devel@nongnu.org; Sat, 17 Feb 2018 13:24:05 -0500 Received: by mail-pf0-x244.google.com with SMTP id 17so590931pfw.11 for ; Sat, 17 Feb 2018 10:24:05 -0800 (PST) Received: from cloudburst.twiddle.net ([50.0.192.64]) by smtp.gmail.com with ESMTPSA id h15sm13466712pfi.56.2018.02.17.10.24.02 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Sat, 17 Feb 2018 10:24:02 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=rsh86Uyni96VeArd/ErWXmUyZyr8qyIKJCMXUaKWZU0=; b=ghV2xgAlpAyAP7tkVG4GrFsiHoirQYM8f+3fMFeHpeaGG20n5nCteUNfgOcoOjMrf2 7Y4+pB6+l4ikquN+LOzSWDNiHwYhcjNDK+AZ69QMN8jyV4QIB2wBEdGa7QgDI16lqh1M GCIuHHH5atOAJH+aiJbArC+p4rzp22kKOmcLQ= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=rsh86Uyni96VeArd/ErWXmUyZyr8qyIKJCMXUaKWZU0=; b=D1bxWO31C63itXncltWZbHQf/fVmQ7q7D47e0KAM4awxcdhZFBeFbvpry6FcDi17zQ aTFL7x2hF9kgAuruQZz4ldYelSZc74pFb1GG3W8QJYbDtavJxXQ9MlYg8PYOfhnslada sh7wSXuWn6+PQEOJOonJAHAXnEuG+YdmEyaO7fltWw5eOHAgnMjhTDi8a8xLTm9Vzbec hI8vS3SY1xvB5WU7cSEEnZNXas2qTmYFOQDRDSjkCnjP0JPBrXTra4QPB+RLnpTJs7kp nNxk2nM4OcjMVnzABFC2B8cvW50fT7p4LFm5OhKUltjsm2WZrawHCansqhihUE1WL63O UjCA== X-Gm-Message-State: APf1xPAHJWrYsRnEVJSu8/I52sRrWIj8sQQwfjPvUREAzfH/6ePNaxZn PnTjW5XJ0cMON4eD74D/tArOz2yF83U= X-Google-Smtp-Source: AH8x226NKR9aWxSYjHKhD1vLp83ga+UGr+MVEywWmGUnVCXU6psNu+jESciHoqFh2V+aaqlLq+9LiQ== X-Received: by 10.98.208.3 with SMTP id p3mr9790778pfg.8.1518891843780; Sat, 17 Feb 2018 10:24:03 -0800 (PST) From: Richard Henderson To: qemu-devel@nongnu.org Date: Sat, 17 Feb 2018 10:22:39 -0800 Message-Id: <20180217182323.25885-24-richard.henderson@linaro.org> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180217182323.25885-1-richard.henderson@linaro.org> References: <20180217182323.25885-1-richard.henderson@linaro.org> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:400e:c00::244 Subject: [Qemu-devel] [PATCH v2 23/67] target/arm: Implement SVE Element Count Group X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: qemu-arm@nongnu.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZohoMail: RDKM_2 RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell --- target/arm/helper-sve.h | 11 ++ target/arm/sve_helper.c | 136 ++++++++++++++++++++++ target/arm/translate-sve.c | 274 +++++++++++++++++++++++++++++++++++++++++= +++- target/arm/sve.decode | 30 ++++- 4 files changed, 448 insertions(+), 3 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 4f1bd5a62f..2831e1643b 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -393,6 +393,17 @@ DEF_HELPER_FLAGS_4(sve_ftssel_h, TCG_CALL_NO_RWG, void= , ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_ftssel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_ftssel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) =20 +DEF_HELPER_FLAGS_4(sve_sqaddi_b, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32) +DEF_HELPER_FLAGS_4(sve_sqaddi_h, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32) +DEF_HELPER_FLAGS_4(sve_sqaddi_s, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32) +DEF_HELPER_FLAGS_4(sve_sqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32) + +DEF_HELPER_FLAGS_4(sve_uqaddi_b, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32) +DEF_HELPER_FLAGS_4(sve_uqaddi_h, TCG_CALL_NO_RWG, void, ptr, ptr, s32, i32) +DEF_HELPER_FLAGS_4(sve_uqaddi_s, TCG_CALL_NO_RWG, void, ptr, ptr, s64, i32) +DEF_HELPER_FLAGS_4(sve_uqaddi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(sve_uqsubi_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr= , i32) DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr= , i32) DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr= , i32) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index b4f70af23f..cfda16d520 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -1225,3 +1225,139 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void = *vm, uint32_t desc) d[i] =3D nn ^ (mm & 2) << 62; } } + +/* + * Signed saturating addition with scalar operand. + */ + +void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(int8_t)) { + int r =3D *(int8_t *)(a + i) + b; + if (r > INT8_MAX) { + r =3D INT8_MAX; + } else if (r < INT8_MIN) { + r =3D INT8_MIN; + } + *(int8_t *)(d + i) =3D r; + } +} + +void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(int16_t)) { + int r =3D *(int16_t *)(a + i) + b; + if (r > INT16_MAX) { + r =3D INT16_MAX; + } else if (r < INT16_MIN) { + r =3D INT16_MIN; + } + *(int16_t *)(d + i) =3D r; + } +} + +void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(int32_t)) { + int64_t r =3D *(int32_t *)(a + i) + b; + if (r > INT32_MAX) { + r =3D INT32_MAX; + } else if (r < INT32_MIN) { + r =3D INT32_MIN; + } + *(int32_t *)(d + i) =3D r; + } +} + +void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(int64_t)) { + int64_t ai =3D *(int64_t *)(a + i); + int64_t r =3D ai + b; + if (((r ^ ai) & ~(ai ^ b)) < 0) { + /* Signed overflow. */ + r =3D (r < 0 ? INT64_MAX : INT64_MIN); + } + *(int64_t *)(d + i) =3D r; + } +} + +/* + * Unsigned saturating addition with scalar operand. + */ + +void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(uint8_t)) { + int r =3D *(uint8_t *)(a + i) + b; + if (r > UINT8_MAX) { + r =3D UINT8_MAX; + } else if (r < 0) { + r =3D 0; + } + *(uint8_t *)(d + i) =3D r; + } +} + +void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(uint16_t)) { + int r =3D *(uint16_t *)(a + i) + b; + if (r > UINT16_MAX) { + r =3D UINT16_MAX; + } else if (r < 0) { + r =3D 0; + } + *(uint16_t *)(d + i) =3D r; + } +} + +void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(uint32_t)) { + int64_t r =3D *(uint32_t *)(a + i) + b; + if (r > UINT32_MAX) { + r =3D UINT32_MAX; + } else if (r < 0) { + r =3D 0; + } + *(uint32_t *)(d + i) =3D r; + } +} + +void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(uint64_t)) { + uint64_t r =3D *(uint64_t *)(a + i) + b; + if (r < b) { + r =3D UINT64_MAX; + } + *(uint64_t *)(d + i) =3D r; + } +} + +void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz =3D simd_oprsz(desc); + + for (i =3D 0; i < oprsz; i +=3D sizeof(uint64_t)) { + uint64_t ai =3D *(uint64_t *)(a + i); + *(uint64_t *)(d + i) =3D (ai < b ? 0 : ai - b); + } +} diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index e32be385fd..702f20e97b 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -61,6 +61,11 @@ static int tszimm_shl(int x) return x - (8 << tszimm_esz(x)); } =20 +static inline int plus1(int x) +{ + return x + 1; +} + /* * Include the generated decoder. */ @@ -127,7 +132,9 @@ static void do_vector3_z(DisasContext *s, GVecGen3Fn *g= vec_fn, /* Invoke a vector move on two Zregs. */ static void do_mov_z(DisasContext *s, int rd, int rn) { - do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn); + if (rd !=3D rn) { + do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn); + } } =20 /* Initialize a Zreg with replications of a 64-bit immediate. */ @@ -168,7 +175,9 @@ static void do_vecop4_p(DisasContext *s, const GVecGen4= *gvec_op, /* Invoke a vector move on two Pregs. */ static void do_mov_p(DisasContext *s, int rd, int rn) { - do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn); + if (rd !=3D rn) { + do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn); + } } =20 /* Set the cpu flags as per a return from an SVE helper. */ @@ -1378,6 +1387,267 @@ static void trans_PNEXT(DisasContext *s, arg_rr_esz= *a, uint32_t insn) do_pfirst_pnext(s, a, gen_helper_sve_pnext); } =20 +/* + *** SVE Element Count Group + */ + +/* Perform an inline saturating addition of a 32-bit value within + * a 64-bit register. The second operand is known to be positive, + * which halves the comparisions we must perform to bound the result. + */ +static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) +{ + int64_t ibound; + TCGv_i64 bound; + TCGCond cond; + + /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ + if (u) { + tcg_gen_ext32u_i64(reg, reg); + } else { + tcg_gen_ext32s_i64(reg, reg); + } + if (d) { + tcg_gen_sub_i64(reg, reg, val); + ibound =3D (u ? 0 : INT32_MIN); + cond =3D TCG_COND_LT; + } else { + tcg_gen_add_i64(reg, reg, val); + ibound =3D (u ? UINT32_MAX : INT32_MAX); + cond =3D TCG_COND_GT; + } + bound =3D tcg_const_i64(ibound); + tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg); + tcg_temp_free_i64(bound); +} + +/* Similarly with 64-bit values. */ +static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) +{ + TCGv_i64 t0 =3D tcg_temp_new_i64(); + TCGv_i64 t1 =3D tcg_temp_new_i64(); + TCGv_i64 t2; + + if (u) { + if (d) { + tcg_gen_sub_i64(t0, reg, val); + tcg_gen_movi_i64(t1, 0); + tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0); + } else { + tcg_gen_add_i64(t0, reg, val); + tcg_gen_movi_i64(t1, -1); + tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0); + } + } else { + if (d) { + /* Detect signed overflow for subtraction. */ + tcg_gen_xor_i64(t0, reg, val); + tcg_gen_sub_i64(t1, reg, val); + tcg_gen_xor_i64(reg, reg, t0); + tcg_gen_and_i64(t0, t0, reg); + + /* Bound the result. */ + tcg_gen_movi_i64(reg, INT64_MIN); + t2 =3D tcg_const_i64(0); + tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); + } else { + /* Detect signed overflow for addition. */ + tcg_gen_xor_i64(t0, reg, val); + tcg_gen_add_i64(reg, reg, val); + tcg_gen_xor_i64(t1, reg, val); + tcg_gen_andc_i64(t0, t1, t0); + + /* Bound the result. */ + tcg_gen_movi_i64(t1, INT64_MAX); + t2 =3D tcg_const_i64(0); + tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); + } + tcg_temp_free_i64(t2); + } + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +} + +/* Similarly with a vector and a scalar operand. */ +static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, + TCGv_i64 val, bool u, bool d) +{ + unsigned vsz =3D vec_full_reg_size(s); + TCGv_ptr dptr, nptr; + TCGv_i32 t32, desc; + TCGv_i64 t64; + + dptr =3D tcg_temp_new_ptr(); + nptr =3D tcg_temp_new_ptr(); + tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); + tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); + desc =3D tcg_const_i32(simd_desc(vsz, vsz, 0)); + + switch (esz) { + case MO_8: + t32 =3D tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t32, val); + if (d) { + tcg_gen_neg_i32(t32, t32); + } + if (u) { + gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); + } else { + gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); + } + tcg_temp_free_i32(t32); + break; + + case MO_16: + t32 =3D tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t32, val); + if (d) { + tcg_gen_neg_i32(t32, t32); + } + if (u) { + gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); + } else { + gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); + } + tcg_temp_free_i32(t32); + break; + + case MO_32: + t64 =3D tcg_temp_new_i64(); + if (d) { + tcg_gen_neg_i64(t64, val); + } else { + tcg_gen_mov_i64(t64, val); + } + if (u) { + gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); + } else { + gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); + } + tcg_temp_free_i64(t64); + break; + + case MO_64: + if (u) { + if (d) { + gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); + } else { + gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); + } + } else if (d) { + t64 =3D tcg_temp_new_i64(); + tcg_gen_neg_i64(t64, val); + gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); + tcg_temp_free_i64(t64); + } else { + gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); + } + break; + + default: + g_assert_not_reached(); + } + + tcg_temp_free_ptr(dptr); + tcg_temp_free_ptr(nptr); + tcg_temp_free_i32(desc); +} + +static void trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + + tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); +} + +static void trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t in= sn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + int inc =3D numelem * a->imm * (a->d ? -1 : 1); + TCGv_i64 reg =3D cpu_reg(s, a->rd); + + tcg_gen_addi_i64(reg, reg, inc); +} + +static void trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a, + uint32_t insn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + int inc =3D numelem * a->imm; + TCGv_i64 reg =3D cpu_reg(s, a->rd); + + /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ + if (inc =3D=3D 0) { + if (a->u) { + tcg_gen_ext32u_i64(reg, reg); + } else { + tcg_gen_ext32s_i64(reg, reg); + } + } else { + TCGv_i64 t =3D tcg_const_i64(inc); + do_sat_addsub_32(reg, t, a->u, a->d); + tcg_temp_free_i64(t); + } +} + +static void trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a, + uint32_t insn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + int inc =3D numelem * a->imm; + TCGv_i64 reg =3D cpu_reg(s, a->rd); + + if (inc !=3D 0) { + TCGv_i64 t =3D tcg_const_i64(inc); + do_sat_addsub_64(reg, t, a->u, a->d); + tcg_temp_free_i64(t); + } +} + +static void trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t i= nsn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + int inc =3D numelem * a->imm; + + if (a->esz =3D=3D 0) { + unallocated_encoding(s); + return; + } + if (inc !=3D 0) { + TCGv_i64 t =3D tcg_const_i64(a->d ? -inc : inc); + tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), t, fullsz, fullsz= ); + tcg_temp_free_i64(t); + } else { + do_mov_z(s, a->rd, a->rn); + } +} + +static void trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a, + uint32_t insn) +{ + unsigned fullsz =3D vec_full_reg_size(s); + unsigned numelem =3D decode_pred_count(fullsz, a->pat, a->esz); + int inc =3D numelem * a->imm; + + if (a->esz =3D=3D 0) { + unallocated_encoding(s); + return; + } + if (inc !=3D 0) { + TCGv_i64 t =3D tcg_const_i64(inc); + do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d); + tcg_temp_free_i64(t); + } else { + do_mov_z(s, a->rd, a->rn); + } +} + /* *** SVE Memory - 32-bit Gather and Unsized Contiguous Group */ diff --git a/target/arm/sve.decode b/target/arm/sve.decode index 4ea3f33919..5690b5fcb9 100644 --- a/target/arm/sve.decode +++ b/target/arm/sve.decode @@ -22,6 +22,7 @@ ########################################################################### # Named fields. These are primarily for disjoint fields. =20 +%imm4_16_p1 16:4 !function=3Dplus1 %imm6_22_5 22:1 5:5 %imm9_16_10 16:s6 10:3 %preg4_5 5:4 @@ -58,6 +59,8 @@ &rprrr_esz rd pg rn rm ra esz &rpri_esz rd pg rn imm esz &ptrue rd esz pat s +&incdec_cnt rd pat esz imm d u +&incdec2_cnt rd rn pat esz imm d u =20 ########################################################################### # Named instruction formats. These are generally used to @@ -115,6 +118,13 @@ @rd_rn_i9 ........ ........ ...... rn:5 rd:5 \ &rri imm=3D%imm9_16_10 =20 +# One register, pattern, and uint4+1. +# User must fill in U and D. +@incdec_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \ + &incdec_cnt imm=3D%imm4_16_p1 +@incdec2_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \ + &incdec2_cnt imm=3D%imm4_16_p1 rn=3D%reg_movprfx + ########################################################################### # Instruction patterns. Grouped according to the SVE encodingindex.xhtml. =20 @@ -301,7 +311,25 @@ FEXPA 00000100 .. 1 00000 101110 ..... ..... @rd_rn # Note esz !=3D 0 FTSSEL 00000100 .. 1 ..... 101100 ..... ..... @rd_rn_rm =20 -### SVE Predicate Logical Operations Group +### SVE Element Count Group + +# SVE element count +CNT_r 00000100 .. 10 .... 1110 0 0 ..... ..... @incdec_cnt d=3D0 u=3D1 + +# SVE inc/dec register by element count +INCDEC_r 00000100 .. 11 .... 1110 0 d:1 ..... ..... @incdec_cnt u=3D1 + +# SVE saturating inc/dec register by element count +SINCDEC_r_32 00000100 .. 10 .... 1111 d:1 u:1 ..... ..... @incdec_cnt +SINCDEC_r_64 00000100 .. 11 .... 1111 d:1 u:1 ..... ..... @incdec_cnt + +# SVE inc/dec vector by element count +# Note this requires esz !=3D 0. +INCDEC_v 00000100 .. 1 1 .... 1100 0 d:1 ..... ..... @incdec2_cnt u=3D1 + +# SVE saturating inc/dec vector by element count +# Note these require esz !=3D 0. +SINCDEC_v 00000100 .. 1 0 .... 1100 d:1 u:1 ..... ..... @incdec2_cnt =20 # SVE predicate logical operations AND_pppp 00100101 0. 00 .... 01 .... 0 .... 0 .... @pd_pg_pn_pm_s --=20 2.14.3