From nobody Sat Jun 29 02:51:50 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=redhat.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1552295597728693.9663686634432; Mon, 11 Mar 2019 02:13:17 -0700 (PDT) Received: from localhost ([127.0.0.1]:57904 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1h3Gzf-0001r1-Hv for importer@patchew.org; Mon, 11 Mar 2019 05:13:11 -0400 Received: from eggs.gnu.org ([209.51.188.92]:43538) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1h3Grt-0003uB-F2 for qemu-devel@nongnu.org; Mon, 11 Mar 2019 05:05:14 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1h3Grq-0006V6-Ry for qemu-devel@nongnu.org; Mon, 11 Mar 2019 05:05:08 -0400 Received: from mx1.redhat.com ([209.132.183.28]:35100) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1h3Grq-0006T2-Gb; Mon, 11 Mar 2019 05:05:06 -0400 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 9EC2781F2F; Mon, 11 Mar 2019 09:05:05 +0000 (UTC) Received: from localhost (ovpn-117-96.ams2.redhat.com [10.36.117.96]) by smtp.corp.redhat.com (Postfix) with ESMTPS id CECB817250; Mon, 11 Mar 2019 09:05:00 +0000 (UTC) From: Cornelia Huck To: Peter Maydell Date: Mon, 11 Mar 2019 10:03:09 +0100 Message-Id: <20190311090322.21603-21-cohuck@redhat.com> In-Reply-To: <20190311090322.21603-1-cohuck@redhat.com> References: <20190311090322.21603-1-cohuck@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Mon, 11 Mar 2019 09:05:05 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [PULL 20/33] s390x/tcg: Implement VECTOR PACK * X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: qemu-s390x@nongnu.org, Cornelia Huck , qemu-devel@nongnu.org, David Hildenbrand Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: David Hildenbrand This is a big one. Luckily we only have a limited set of such nasty instructions. We'll implement all variants with helpers, except when sources and the destination don't overlap for VECTOR PACK. Provide different helpers when the cc is to be modified. We'll return the cc then via env->cc_op. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand Message-Id: <20190307121539.12842-20-david@redhat.com> Signed-off-by: Cornelia Huck --- target/s390x/helper.h | 15 +++++ target/s390x/insn-data.def | 6 ++ target/s390x/translate_vx.inc.c | 89 +++++++++++++++++++++++++++ target/s390x/vec_helper.c | 105 ++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 6c745ba0f6ad..315495f49fc2 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -126,6 +126,21 @@ DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env) =20 /* =3D=3D=3D Vector Support Instructions =3D=3D=3D */ DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64) +DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vpks16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i3= 2) +DEF_HELPER_FLAGS_4(gvec_vpks32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i3= 2) +DEF_HELPER_FLAGS_4(gvec_vpks64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i3= 2) +DEF_HELPER_5(gvec_vpks_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpks_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpks_cc64, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vpkls16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i= 32) +DEF_HELPER_FLAGS_4(gvec_vpkls32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i= 32) +DEF_HELPER_FLAGS_4(gvec_vpkls64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i= 32) +DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32) =20 #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index f7232f861575..39cd6f27c159 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1014,6 +1014,12 @@ F(0xe761, VMRH, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) /* VECTOR MERGE LOW */ F(0xe760, VMRL, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) +/* VECTOR PACK */ + F(0xe794, VPK, VRR_c, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK SATURATE */ + F(0xe797, VPKS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK LOGICAL SATURATE */ + F(0xe795, VPKLS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) =20 #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.in= c.c index 9c2cf8a77b8f..7ae38f71f75e 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -135,6 +135,12 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint= 8_t reg, TCGv_i64 enr, tcg_temp_free_i64(tmp); } =20 +#define gen_gvec_3_ool(v1, v2, v3, data, fn) \ + tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16, data, fn) +#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \ + tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), ptr, 16, 16, data, fn) #define gen_gvec_dup_i64(es, v1, c) \ tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) #define gen_gvec_mov(v1, v2) \ @@ -574,3 +580,86 @@ static DisasJumpType op_vmr(DisasContext *s, DisasOps = *o) tcg_temp_free_i64(tmp); return DISAS_NEXT; } + +static DisasJumpType op_vpk(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 =3D get_field(s->fields, v1); + const uint8_t v2 =3D get_field(s->fields, v2); + const uint8_t v3 =3D get_field(s->fields, v3); + const uint8_t es =3D get_field(s->fields, m4); + static gen_helper_gvec_3 * const vpk[3] =3D { + gen_helper_gvec_vpk16, + gen_helper_gvec_vpk32, + gen_helper_gvec_vpk64, + }; + static gen_helper_gvec_3 * const vpks[3] =3D { + gen_helper_gvec_vpks16, + gen_helper_gvec_vpks32, + gen_helper_gvec_vpks64, + }; + static gen_helper_gvec_3_ptr * const vpks_cc[3] =3D { + gen_helper_gvec_vpks_cc16, + gen_helper_gvec_vpks_cc32, + gen_helper_gvec_vpks_cc64, + }; + static gen_helper_gvec_3 * const vpkls[3] =3D { + gen_helper_gvec_vpkls16, + gen_helper_gvec_vpkls32, + gen_helper_gvec_vpkls64, + }; + static gen_helper_gvec_3_ptr * const vpkls_cc[3] =3D { + gen_helper_gvec_vpkls_cc16, + gen_helper_gvec_vpkls_cc32, + gen_helper_gvec_vpkls_cc64, + }; + + if (es =3D=3D ES_8 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields->op2) { + case 0x97: + if (get_field(s->fields, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]); + } + break; + case 0x95: + if (get_field(s->fields, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]); + } + break; + case 0x94: + /* If sources and destination dont't overlap -> fast path */ + if (v1 !=3D v2 && v1 !=3D v3) { + const uint8_t src_es =3D get_field(s->fields, m4); + const uint8_t dst_es =3D src_es - 1; + TCGv_i64 tmp =3D tcg_temp_new_i64(); + int dst_idx, src_idx; + + for (dst_idx =3D 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_id= x++) { + src_idx =3D dst_idx; + if (src_idx < NUM_VEC_ELEMENTS(src_es)) { + read_vec_element_i64(tmp, v2, src_idx, src_es); + } else { + src_idx -=3D NUM_VEC_ELEMENTS(src_es); + read_vec_element_i64(tmp, v3, src_idx, src_es); + } + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + tcg_temp_free_i64(tmp); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]); + } + break; + default: + g_assert_not_reached(); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec_helper.c b/target/s390x/vec_helper.c index 5b2333a8d666..7f680201b5f5 100644 --- a/target/s390x/vec_helper.c +++ b/target/s390x/vec_helper.c @@ -15,6 +15,7 @@ #include "internal.h" #include "vec.h" #include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" @@ -42,3 +43,107 @@ void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t= addr, uint64_t bytes) *(S390Vector *)v1 =3D tmp; } } + +#define DEF_VPK_HFN(BITS, TBITS) = \ +typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *); = \ +static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2, = \ + const S390Vector *v3, vpk##BITS##_fn fn) = \ +{ = \ + int i, saturated =3D 0; = \ + S390Vector tmp; = \ + = \ + for (i =3D 0; i < (128 / TBITS); i++) { = \ + uint##BITS##_t src; = \ + = \ + if (i < (128 / BITS)) { = \ + src =3D s390_vec_read_element##BITS(v2, i); = \ + } else { = \ + src =3D s390_vec_read_element##BITS(v3, i - (128 / BITS)); = \ + } = \ + s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated)); = \ + } = \ + *v1 =3D tmp; = \ + return saturated; = \ +} +DEF_VPK_HFN(64, 32) +DEF_VPK_HFN(32, 16) +DEF_VPK_HFN(16, 8) + +#define DEF_VPK(BITS, TBITS) = \ +static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated) = \ +{ = \ + return src; = \ +} = \ +void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3, = \ + uint32_t desc) = \ +{ = \ + vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e); = \ +} +DEF_VPK(64, 32) +DEF_VPK(32, 16) +DEF_VPK(16, 8) + +#define DEF_VPKS(BITS, TBITS) = \ +static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated) = \ +{ = \ + if ((int##BITS##_t)src > INT##TBITS##_MAX) { = \ + (*saturated)++; = \ + return INT##TBITS##_MAX; = \ + } else if ((int##BITS##_t)src < INT##TBITS##_MIN) { = \ + (*saturated)++; = \ + return INT##TBITS##_MIN; = \ + } = \ + return src; = \ +} = \ +void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3, = \ + uint32_t desc) = \ +{ = \ + vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); = \ +} = \ +void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3, = \ + CPUS390XState *env, uint32_t desc) = \ +{ = \ + int saturated =3D vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); = \ + = \ + if (saturated =3D=3D (128 / TBITS)) { = \ + env->cc_op =3D 3; = \ + } else if (saturated) { = \ + env->cc_op =3D 1; = \ + } else { = \ + env->cc_op =3D 0; = \ + } = \ +} +DEF_VPKS(64, 32) +DEF_VPKS(32, 16) +DEF_VPKS(16, 8) + +#define DEF_VPKLS(BITS, TBITS) = \ +static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated) = \ +{ = \ + if (src > UINT##TBITS##_MAX) { = \ + (*saturated)++; = \ + return UINT##TBITS##_MAX; = \ + } = \ + return src; = \ +} = \ +void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3, = \ + uint32_t desc) = \ +{ = \ + vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); = \ +} = \ +void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3,= \ + CPUS390XState *env, uint32_t desc) = \ +{ = \ + int saturated =3D vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); = \ + = \ + if (saturated =3D=3D (128 / TBITS)) { = \ + env->cc_op =3D 3; = \ + } else if (saturated) { = \ + env->cc_op =3D 1; = \ + } else { = \ + env->cc_op =3D 0; = \ + } = \ +} +DEF_VPKLS(64, 32) +DEF_VPKLS(32, 16) +DEF_VPKLS(16, 8) --=20 2.17.2