From nobody Sat Feb 7 05:37:10 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1679973179753125.11723906543205; Mon, 27 Mar 2023 20:12:59 -0700 (PDT) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1pgzg4-0007Mx-6Q; Mon, 27 Mar 2023 23:07:16 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1pgzfw-0007IF-BK for qemu-devel@nongnu.org; Mon, 27 Mar 2023 23:07:09 -0400 Received: from mail.loongson.cn ([114.242.206.163] helo=loongson.cn) by eggs.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1pgzft-0000kC-8W for qemu-devel@nongnu.org; Mon, 27 Mar 2023 23:07:08 -0400 Received: from loongson.cn (unknown [10.2.5.185]) by gateway (Coremail) with SMTP id _____8AxJMTNWSJkjNoSAA--.17544S3; Tue, 28 Mar 2023 11:06:53 +0800 (CST) Received: from localhost.localdomain (unknown [10.2.5.185]) by localhost.localdomain (Coremail) with SMTP id AQAAf8Dxyr24WSJkZukOAA--.10252S36; Tue, 28 Mar 2023 11:06:52 +0800 (CST) From: Song Gao To: qemu-devel@nongnu.org Cc: richard.henderson@linaro.org Subject: [RFC PATCH v2 34/44] target/loongarch: Implement LSX fpu arith instructions Date: Tue, 28 Mar 2023 11:06:21 +0800 Message-Id: <20230328030631.3117129-35-gaosong@loongson.cn> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20230328030631.3117129-1-gaosong@loongson.cn> References: <20230328030631.3117129-1-gaosong@loongson.cn> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-CM-TRANSID: AQAAf8Dxyr24WSJkZukOAA--.10252S36 X-CM-SenderInfo: 5jdr20tqj6z05rqj20fqof0/ X-Coremail-Antispam: 1Uk129KBjvAXoWfJrW3Cw18ur1fKF4kXFWxCrg_yoW8Aw4xAo WfC3W5Jw48GrySkry2ka4vvF97t340yw4DJaykur4aqa4xAry7Kr45K3s5tayfA3yYgry3 J39rZ3W5tw1a9ryDn29KB7ZKAUJUUUU8529EdanIXcx71UUUUU7KY7ZEXasCq-sGcSsGvf J3Ic02F40EFcxC0VAKzVAqx4xG6I80ebIjqfuFe4nvWSU5nxnvy29KBjDU0xBIdaVrnRJU UUqG1xkIjI8I6I8E6xAIw20EY4v20xvaj40_Wr0E3s1l8cAvFVAK0II2c7xJM28CjxkF64 kEwVA0rcxSw2x7M28EF7xvwVC0I7IYx2IY67AKxVW7JVWDJwA2z4x0Y4vE2Ix0cI8IcVCY 1x0267AKxVWxJVW8Jr1l84ACjcxK6I8E87Iv67AKxVW8Jr0_Cr1UM28EF7xvwVC2z280aV CY1x0267AKxVW8Jr0_Cr1UM2AIxVAIcxkEcVAq07x20xvEncxIr21l57IF6xkI12xvs2x2 6I8E6xACxx1l5I8CrVACY4xI64kE6c02F40Ex7xfMcIj6x8ErcxFaVAv8VWrMcvjeVCFs4 IE7xkEbVWUJVW8JwACjcxG0xvY0x0EwIxGrwCF04k20xvY0x0EwIxGrwCF04k20xvE74AG Y7Cv6cx26rWl4I8I3I0E4IkC6x0Yz7v_Jr0_Gr1lx2IqxVAqx4xG67AKxVWUJVWUGwC20s 026x8GjcxK67AKxVWUGVWUWwC2zVAF1VAY17CE14v26r1Y6r17MIIYrxkI7VAKI48JMIIF 0xvE2Ix0cI8IcVAFwI0_Ar0_tr1lIxAIcVC0I7IYx2IY6xkF7I0E14v26F4j6r4UJwCI42 IY6xAIw20EY4v20xvaj40_Jr0_JF4lIxAIcVC2z280aVAFwI0_Cr0_Gr1UMIIF0xvEx4A2 jsIEc7CjxVAFwI0_Gr0_Gr1UYxBIdaVFxhVjvjDU0xZFpf9x0zRVWlkUUUUU= Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=114.242.206.163; envelope-from=gaosong@loongson.cn; helo=loongson.cn X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_PASS=-0.001, SPF_PASS=-0.001 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: qemu-devel-bounces+importer=patchew.org@nongnu.org X-ZM-MESSAGEID: 1679973181726100003 Content-Type: text/plain; charset="utf-8" This patch includes: - VF{ADD/SUB/MUL/DIV}.{S/D}; - VF{MADD/MSUB/NMADD/NMSUB}.{S/D}; - VF{MAX/MIN}.{S/D}; - VF{MAXA/MINA}.{S/D}; - VFLOGB.{S/D}; - VFCLASS.{S/D}; - VF{SQRT/RECIP/RSQRT}.{S/D}. Signed-off-by: Song Gao Reviewed-by: Richard Henderson --- target/loongarch/cpu.h | 4 + target/loongarch/disas.c | 46 +++++ target/loongarch/fpu_helper.c | 2 +- target/loongarch/helper.h | 41 +++++ target/loongarch/insn_trans/trans_lsx.c.inc | 55 ++++++ target/loongarch/insns.decode | 43 +++++ target/loongarch/internals.h | 1 + target/loongarch/lsx_helper.c | 187 ++++++++++++++++++++ 8 files changed, 378 insertions(+), 1 deletion(-) diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index 2e5326f474..abbe79f783 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -55,6 +55,10 @@ FIELD(FCSR0, CAUSE, 24, 5) do { \ (REG) =3D FIELD_DP32(REG, FCSR0, CAUSE, V); \ } while (0) +#define UPDATE_FP_CAUSE(REG, V) \ + do { \ + (REG) |=3D FIELD_DP32(0, FCSR0, CAUSE, V); \ + } while (0) =20 #define GET_FP_ENABLES(REG) FIELD_EX32(REG, FCSR0, ENABLES) #define SET_FP_ENABLES(REG, V) \ diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c index be2bb9cc42..b57b284e49 100644 --- a/target/loongarch/disas.c +++ b/target/loongarch/disas.c @@ -807,6 +807,11 @@ static void output_vv(DisasContext *ctx, arg_vv *a, co= nst char *mnemonic) output(ctx, mnemonic, "v%d, v%d", a->vd, a->vj); } =20 +static void output_vvvv(DisasContext *ctx, arg_vvvv *a, const char *mnemon= ic) +{ + output(ctx, mnemonic, "v%d, v%d, v%d, v%d", a->vd, a->vj, a->vk, a->va= ); +} + INSN_LSX(vadd_b, vvv) INSN_LSX(vadd_h, vvv) INSN_LSX(vadd_w, vvv) @@ -1302,3 +1307,44 @@ INSN_LSX(vfrstp_b, vvv) INSN_LSX(vfrstp_h, vvv) INSN_LSX(vfrstpi_b, vv_i) INSN_LSX(vfrstpi_h, vv_i) + +INSN_LSX(vfadd_s, vvv) +INSN_LSX(vfadd_d, vvv) +INSN_LSX(vfsub_s, vvv) +INSN_LSX(vfsub_d, vvv) +INSN_LSX(vfmul_s, vvv) +INSN_LSX(vfmul_d, vvv) +INSN_LSX(vfdiv_s, vvv) +INSN_LSX(vfdiv_d, vvv) + +INSN_LSX(vfmadd_s, vvvv) +INSN_LSX(vfmadd_d, vvvv) +INSN_LSX(vfmsub_s, vvvv) +INSN_LSX(vfmsub_d, vvvv) +INSN_LSX(vfnmadd_s, vvvv) +INSN_LSX(vfnmadd_d, vvvv) +INSN_LSX(vfnmsub_s, vvvv) +INSN_LSX(vfnmsub_d, vvvv) + +INSN_LSX(vfmax_s, vvv) +INSN_LSX(vfmax_d, vvv) +INSN_LSX(vfmin_s, vvv) +INSN_LSX(vfmin_d, vvv) + +INSN_LSX(vfmaxa_s, vvv) +INSN_LSX(vfmaxa_d, vvv) +INSN_LSX(vfmina_s, vvv) +INSN_LSX(vfmina_d, vvv) + +INSN_LSX(vflogb_s, vv) +INSN_LSX(vflogb_d, vv) + +INSN_LSX(vfclass_s, vv) +INSN_LSX(vfclass_d, vv) + +INSN_LSX(vfsqrt_s, vv) +INSN_LSX(vfsqrt_d, vv) +INSN_LSX(vfrecip_s, vv) +INSN_LSX(vfrecip_d, vv) +INSN_LSX(vfrsqrt_s, vv) +INSN_LSX(vfrsqrt_d, vv) diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c index 4b9637210a..f6753c5875 100644 --- a/target/loongarch/fpu_helper.c +++ b/target/loongarch/fpu_helper.c @@ -33,7 +33,7 @@ void restore_fp_status(CPULoongArchState *env) set_flush_to_zero(0, &env->fp_status); } =20 -static int ieee_ex_to_loongarch(int xcpt) +int ieee_ex_to_loongarch(int xcpt) { int ret =3D 0; if (xcpt & float_flag_invalid) { diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index d8b783ebc7..2c59fb09c0 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -530,3 +530,44 @@ DEF_HELPER_4(vfrstp_b, void, env, i32, i32, i32) DEF_HELPER_4(vfrstp_h, void, env, i32, i32, i32) DEF_HELPER_4(vfrstpi_b, void, env, i32, i32, i32) DEF_HELPER_4(vfrstpi_h, void, env, i32, i32, i32) + +DEF_HELPER_4(vfadd_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfadd_d, void, env, i32, i32, i32) +DEF_HELPER_4(vfsub_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfsub_d, void, env, i32, i32, i32) +DEF_HELPER_4(vfmul_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfmul_d, void, env, i32, i32, i32) +DEF_HELPER_4(vfdiv_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfdiv_d, void, env, i32, i32, i32) + +DEF_HELPER_5(vfmadd_s, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfmadd_d, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfmsub_s, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfmsub_d, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfnmadd_s, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfnmadd_d, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfnmsub_s, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vfnmsub_d, void, env, i32, i32, i32, i32) + +DEF_HELPER_4(vfmax_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfmax_d, void, env, i32, i32, i32) +DEF_HELPER_4(vfmin_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfmin_d, void, env, i32, i32, i32) + +DEF_HELPER_4(vfmaxa_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfmaxa_d, void, env, i32, i32, i32) +DEF_HELPER_4(vfmina_s, void, env, i32, i32, i32) +DEF_HELPER_4(vfmina_d, void, env, i32, i32, i32) + +DEF_HELPER_3(vflogb_s, void, env, i32, i32) +DEF_HELPER_3(vflogb_d, void, env, i32, i32) + +DEF_HELPER_3(vfclass_s, void, env, i32, i32) +DEF_HELPER_3(vfclass_d, void, env, i32, i32) + +DEF_HELPER_3(vfsqrt_s, void, env, i32, i32) +DEF_HELPER_3(vfsqrt_d, void, env, i32, i32) +DEF_HELPER_3(vfrecip_s, void, env, i32, i32) +DEF_HELPER_3(vfrecip_d, void, env, i32, i32) +DEF_HELPER_3(vfrsqrt_s, void, env, i32, i32) +DEF_HELPER_3(vfrsqrt_d, void, env, i32, i32) diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch= /insn_trans/trans_lsx.c.inc index 9ba9113ca3..34a272ce00 100644 --- a/target/loongarch/insn_trans/trans_lsx.c.inc +++ b/target/loongarch/insn_trans/trans_lsx.c.inc @@ -15,6 +15,20 @@ #define CHECK_SXE #endif =20 +static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, + TCGv_i32, TCGv_i32)) +{ + TCGv_i32 vd =3D tcg_constant_i32(a->vd); + TCGv_i32 vj =3D tcg_constant_i32(a->vj); + TCGv_i32 vk =3D tcg_constant_i32(a->vk); + TCGv_i32 va =3D tcg_constant_i32(a->va); + + CHECK_SXE; + func(cpu_env, vd, vj, vk, va); + return true; +} + static bool gen_vvv(DisasContext *ctx, arg_vvv *a, void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) { @@ -2829,3 +2843,44 @@ TRANS(vfrstp_b, gen_vvv, gen_helper_vfrstp_b) TRANS(vfrstp_h, gen_vvv, gen_helper_vfrstp_h) TRANS(vfrstpi_b, gen_vv_i, gen_helper_vfrstpi_b) TRANS(vfrstpi_h, gen_vv_i, gen_helper_vfrstpi_h) + +TRANS(vfadd_s, gen_vvv, gen_helper_vfadd_s) +TRANS(vfadd_d, gen_vvv, gen_helper_vfadd_d) +TRANS(vfsub_s, gen_vvv, gen_helper_vfsub_s) +TRANS(vfsub_d, gen_vvv, gen_helper_vfsub_d) +TRANS(vfmul_s, gen_vvv, gen_helper_vfmul_s) +TRANS(vfmul_d, gen_vvv, gen_helper_vfmul_d) +TRANS(vfdiv_s, gen_vvv, gen_helper_vfdiv_s) +TRANS(vfdiv_d, gen_vvv, gen_helper_vfdiv_d) + +TRANS(vfmadd_s, gen_vvvv, gen_helper_vfmadd_s) +TRANS(vfmadd_d, gen_vvvv, gen_helper_vfmadd_d) +TRANS(vfmsub_s, gen_vvvv, gen_helper_vfmsub_s) +TRANS(vfmsub_d, gen_vvvv, gen_helper_vfmsub_d) +TRANS(vfnmadd_s, gen_vvvv, gen_helper_vfnmadd_s) +TRANS(vfnmadd_d, gen_vvvv, gen_helper_vfnmadd_d) +TRANS(vfnmsub_s, gen_vvvv, gen_helper_vfnmsub_s) +TRANS(vfnmsub_d, gen_vvvv, gen_helper_vfnmsub_d) + +TRANS(vfmax_s, gen_vvv, gen_helper_vfmax_s) +TRANS(vfmax_d, gen_vvv, gen_helper_vfmax_d) +TRANS(vfmin_s, gen_vvv, gen_helper_vfmin_s) +TRANS(vfmin_d, gen_vvv, gen_helper_vfmin_d) + +TRANS(vfmaxa_s, gen_vvv, gen_helper_vfmaxa_s) +TRANS(vfmaxa_d, gen_vvv, gen_helper_vfmaxa_d) +TRANS(vfmina_s, gen_vvv, gen_helper_vfmina_s) +TRANS(vfmina_d, gen_vvv, gen_helper_vfmina_d) + +TRANS(vflogb_s, gen_vv, gen_helper_vflogb_s) +TRANS(vflogb_d, gen_vv, gen_helper_vflogb_d) + +TRANS(vfclass_s, gen_vv, gen_helper_vfclass_s) +TRANS(vfclass_d, gen_vv, gen_helper_vfclass_d) + +TRANS(vfsqrt_s, gen_vv, gen_helper_vfsqrt_s) +TRANS(vfsqrt_d, gen_vv, gen_helper_vfsqrt_d) +TRANS(vfrecip_s, gen_vv, gen_helper_vfrecip_s) +TRANS(vfrecip_d, gen_vv, gen_helper_vfrecip_d) +TRANS(vfrsqrt_s, gen_vv, gen_helper_vfrsqrt_s) +TRANS(vfrsqrt_d, gen_vv, gen_helper_vfrsqrt_d) diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode index 4cb286ffe5..bcc531dd25 100644 --- a/target/loongarch/insns.decode +++ b/target/loongarch/insns.decode @@ -493,6 +493,7 @@ dbcl 0000 00000010 10101 ............... = @i15 &vv vd vj &vvv vd vj vk &vv_i vd vj imm +&vvvv vd vj vk va =20 # # LSX Formats @@ -506,6 +507,7 @@ dbcl 0000 00000010 10101 ............... = @i15 @vv_ui7 .... ........ ... imm:7 vj:5 vd:5 &vv_i @vv_ui8 .... ........ .. imm:8 vj:5 vd:5 &vv_i @vv_i5 .... ........ ..... imm:s5 vj:5 vd:5 &vv_i +@vvvv .... ........ va:5 vk:5 vj:5 vd:5 &vvvv =20 vadd_b 0111 00000000 10100 ..... ..... ..... @vvv vadd_h 0111 00000000 10101 ..... ..... ..... @vvv @@ -1003,3 +1005,44 @@ vfrstp_b 0111 00010010 10110 ..... ..... ...= .. @vvv vfrstp_h 0111 00010010 10111 ..... ..... ..... @vvv vfrstpi_b 0111 00101001 10100 ..... ..... ..... @vv_ui5 vfrstpi_h 0111 00101001 10101 ..... ..... ..... @vv_ui5 + +vfadd_s 0111 00010011 00001 ..... ..... ..... @vvv +vfadd_d 0111 00010011 00010 ..... ..... ..... @vvv +vfsub_s 0111 00010011 00101 ..... ..... ..... @vvv +vfsub_d 0111 00010011 00110 ..... ..... ..... @vvv +vfmul_s 0111 00010011 10001 ..... ..... ..... @vvv +vfmul_d 0111 00010011 10010 ..... ..... ..... @vvv +vfdiv_s 0111 00010011 10101 ..... ..... ..... @vvv +vfdiv_d 0111 00010011 10110 ..... ..... ..... @vvv + +vfmadd_s 0000 10010001 ..... ..... ..... ..... @vvvv +vfmadd_d 0000 10010010 ..... ..... ..... ..... @vvvv +vfmsub_s 0000 10010101 ..... ..... ..... ..... @vvvv +vfmsub_d 0000 10010110 ..... ..... ..... ..... @vvvv +vfnmadd_s 0000 10011001 ..... ..... ..... ..... @vvvv +vfnmadd_d 0000 10011010 ..... ..... ..... ..... @vvvv +vfnmsub_s 0000 10011101 ..... ..... ..... ..... @vvvv +vfnmsub_d 0000 10011110 ..... ..... ..... ..... @vvvv + +vfmax_s 0111 00010011 11001 ..... ..... ..... @vvv +vfmax_d 0111 00010011 11010 ..... ..... ..... @vvv +vfmin_s 0111 00010011 11101 ..... ..... ..... @vvv +vfmin_d 0111 00010011 11110 ..... ..... ..... @vvv + +vfmaxa_s 0111 00010100 00001 ..... ..... ..... @vvv +vfmaxa_d 0111 00010100 00010 ..... ..... ..... @vvv +vfmina_s 0111 00010100 00101 ..... ..... ..... @vvv +vfmina_d 0111 00010100 00110 ..... ..... ..... @vvv + +vflogb_s 0111 00101001 11001 10001 ..... ..... @vv +vflogb_d 0111 00101001 11001 10010 ..... ..... @vv + +vfclass_s 0111 00101001 11001 10101 ..... ..... @vv +vfclass_d 0111 00101001 11001 10110 ..... ..... @vv + +vfsqrt_s 0111 00101001 11001 11001 ..... ..... @vv +vfsqrt_d 0111 00101001 11001 11010 ..... ..... @vv +vfrecip_s 0111 00101001 11001 11101 ..... ..... @vv +vfrecip_d 0111 00101001 11001 11110 ..... ..... @vv +vfrsqrt_s 0111 00101001 11010 00001 ..... ..... @vv +vfrsqrt_d 0111 00101001 11010 00010 ..... ..... @vv diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h index f01635aed6..c492863cc5 100644 --- a/target/loongarch/internals.h +++ b/target/loongarch/internals.h @@ -31,6 +31,7 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, =20 const char *loongarch_exception_name(int32_t exception); =20 +int ieee_ex_to_loongarch(int xcpt); void restore_fp_status(CPULoongArchState *env); =20 #ifndef CONFIG_USER_ONLY diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c index d6143a0016..b66a896a28 100644 --- a/target/loongarch/lsx_helper.c +++ b/target/loongarch/lsx_helper.c @@ -9,6 +9,8 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "fpu/softfloat.h" +#include "internals.h" =20 void helper_vadd_q(CPULoongArchState *env, uint32_t vd, uint32_t vj, uint32_t vk) @@ -2329,3 +2331,188 @@ void HELPER(NAME)(CPULoongArchState *env, = \ =20 VFRSTPI(vfrstpi_b, 8, B) VFRSTPI(vfrstpi_h, 16, H) + +static void vec_update_fcsr0_mask(CPULoongArchState *env, + uintptr_t pc, int mask) +{ + int flags =3D get_float_exception_flags(&env->fp_status); + + set_float_exception_flags(0, &env->fp_status); + + flags &=3D ~mask; + + if (flags) { + flags =3D ieee_ex_to_loongarch(flags); + UPDATE_FP_CAUSE(env->fcsr0, flags); + } + + if (GET_FP_ENABLES(env->fcsr0) & flags) { + do_raise_exception(env, EXCCODE_FPE, pc); + } else { + UPDATE_FP_FLAGS(env->fcsr0, flags); + } +} + +static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) +{ + vec_update_fcsr0_mask(env, pc, 0); +} + +static inline void vec_clear_cause(CPULoongArchState *env) +{ + SET_FP_CAUSE(env->fcsr0, 0); +} + +#define DO_3OP_F(NAME, BIT, T, E, FN) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk) \ +{ \ + int i; \ + VReg *Vd =3D &(env->fpr[vd].vreg); \ + VReg *Vj =3D &(env->fpr[vj].vreg); \ + VReg *Vk =3D &(env->fpr[vk].vreg); \ + \ + vec_clear_cause(env); \ + for (i =3D 0; i < LSX_LEN/BIT; i++) { \ + Vd->E(i) =3D FN((T)Vj->E(i), (T)Vk->E(i), &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + } \ +} + +DO_3OP_F(vfadd_s, 32, uint32_t, W, float32_add) +DO_3OP_F(vfadd_d, 64, uint64_t, D, float64_add) +DO_3OP_F(vfsub_s, 32, uint32_t, W, float32_sub) +DO_3OP_F(vfsub_d, 64, uint64_t, D, float64_sub) +DO_3OP_F(vfmul_s, 32, uint32_t, W, float32_mul) +DO_3OP_F(vfmul_d, 64, uint64_t, D, float64_mul) +DO_3OP_F(vfdiv_s, 32, uint32_t, W, float32_div) +DO_3OP_F(vfdiv_d, 64, uint64_t, D, float64_div) +DO_3OP_F(vfmax_s, 32, uint32_t, W, float32_maxnum) +DO_3OP_F(vfmax_d, 64, uint64_t, D, float64_maxnum) +DO_3OP_F(vfmin_s, 32, uint32_t, W, float32_minnum) +DO_3OP_F(vfmin_d, 64, uint64_t, D, float64_minnum) +DO_3OP_F(vfmaxa_s, 32, uint32_t, W, float32_maxnummag) +DO_3OP_F(vfmaxa_d, 64, uint64_t, D, float64_maxnummag) +DO_3OP_F(vfmina_s, 32, uint32_t, W, float32_minnummag) +DO_3OP_F(vfmina_d, 64, uint64_t, D, float64_minnummag) + +#define DO_4OP_F(NAME, BIT, T, E, FN, flags) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t va) \ +{ \ + int i; \ + VReg *Vd =3D &(env->fpr[vd].vreg); \ + VReg *Vj =3D &(env->fpr[vj].vreg); \ + VReg *Vk =3D &(env->fpr[vk].vreg); \ + VReg *Va =3D &(env->fpr[va].vreg); \ + \ + vec_clear_cause(env); \ + for (i =3D 0; i < LSX_LEN/BIT; i++) { \ + Vd->E(i) =3D FN((T)Vj->E(i), (T)Vk->E(i), (T)Va->E(i), \ + flags, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + } \ +} + +DO_4OP_F(vfmadd_s, 32, uint32_t, W, float32_muladd, 0) +DO_4OP_F(vfmadd_d, 64, uint64_t, D, float64_muladd, 0) +DO_4OP_F(vfmsub_s, 32, uint32_t, W, float32_muladd, float_muladd_negate_c) +DO_4OP_F(vfmsub_d, 64, uint64_t, D, float64_muladd, float_muladd_negate_c) +DO_4OP_F(vfnmadd_s, 32, uint32_t, W, float32_muladd, float_muladd_negate_r= esult) +DO_4OP_F(vfnmadd_d, 64, uint64_t, D, float64_muladd, float_muladd_negate_r= esult) +DO_4OP_F(vfnmsub_s, 32, uint32_t, W, float32_muladd, + float_muladd_negate_c | float_muladd_negate_result) +DO_4OP_F(vfnmsub_d, 64, uint64_t, D, float64_muladd, + float_muladd_negate_c | float_muladd_negate_result) + +#define DO_2OP_F(NAME, BIT, T, E, FN) \ +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ +{ \ + int i; \ + VReg *Vd =3D &(env->fpr[vd].vreg); \ + VReg *Vj =3D &(env->fpr[vj].vreg); \ + \ + vec_clear_cause(env); \ + for (i =3D 0; i < LSX_LEN/BIT; i++) { \ + Vd->E(i) =3D FN(env, (T)Vj->E(i)); \ + } \ +} + +#define FLOGB(BIT, T) \ +static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fp, fd; \ + float_status *status =3D &env->fp_status; \ + FloatRoundMode old_mode =3D get_float_rounding_mode(status); \ + \ + set_float_rounding_mode(float_round_down, status); \ + fp =3D float ## BIT ##_log2(fj, status); \ + fd =3D float ## BIT ##_round_to_int(fp, status); \ + set_float_rounding_mode(old_mode, status); \ + vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ + return fd; \ +} + +FLOGB(32, uint32_t) +FLOGB(64, uint64_t) + +#define FCLASS(NAME, BIT, T, E, FN) \ +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ +{ \ + int i; \ + VReg *Vd =3D &(env->fpr[vd].vreg); \ + VReg *Vj =3D &(env->fpr[vj].vreg); \ + \ + for (i =3D 0; i < LSX_LEN/BIT; i++) { \ + Vd->E(i) =3D FN(env, (T)Vj->E(i)); \ + } \ +} + +FCLASS(vfclass_s, 32, uint32_t, W, helper_fclass_s) +FCLASS(vfclass_d, 64, uint64_t, D, helper_fclass_d) + +#define FSQRT(BIT, T) \ +static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd; \ + fd =3D float ## BIT ##_sqrt(fj, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FSQRT(32, uint32_t) +FSQRT(64, uint64_t) + +#define FRECIP(BIT, T) \ +static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd; \ + fd =3D float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FRECIP(32, uint32_t) +FRECIP(64, uint64_t) + +#define FRSQRT(BIT, T) \ +static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd, fp; \ + fp =3D float ## BIT ##_sqrt(fj, &env->fp_status); \ + fd =3D float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FRSQRT(32, uint32_t) +FRSQRT(64, uint64_t) + +DO_2OP_F(vflogb_s, 32, uint32_t, W, do_flogb_32) +DO_2OP_F(vflogb_d, 64, uint64_t, D, do_flogb_64) +DO_2OP_F(vfsqrt_s, 32, uint32_t, W, do_fsqrt_32) +DO_2OP_F(vfsqrt_d, 64, uint64_t, D, do_fsqrt_64) +DO_2OP_F(vfrecip_s, 32, uint32_t, W, do_frecip_32) +DO_2OP_F(vfrecip_d, 64, uint64_t, D, do_frecip_64) +DO_2OP_F(vfrsqrt_s, 32, uint32_t, W, do_frsqrt_32) +DO_2OP_F(vfrsqrt_d, 64, uint64_t, D, do_frsqrt_64) --=20 2.31.1