From nobody Tue Feb 10 08:03:31 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org ARC-Seal: i=1; a=rsa-sha256; t=1562094569; cv=none; d=zoho.com; s=zohoarc; b=l9wfpOFXqvfn4J24UmMCgJh6PkgSukHYFdsX7KjRG+5dZS2eG3ZbSYkWgUJGt3lNdUwFCYc4h4lMtWYZtc1fvHps+wCuOcF52z1dY8DbBX973ehGEBORpXAWdHNBEWNnDfRq9s73twmo96EW/nzFv80FDOt3oxYO/QdoZ3KIU/I= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zoho.com; s=zohoarc; t=1562094569; h=Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:Message-ID:References:Sender:Subject:To:ARC-Authentication-Results; bh=Bumo5oX1lFjxM9cHwA8u/odlpuNaK+M3HoQtTuE5SBs=; b=Y3SrI3foV/RdXwpZWlQYxdfV90akrSyVwsMsJhLQvi8Lyeq+E+mqYM7OJZF9v4piI5il0W6+9NhSAU6kpOkU1XYqvCAdBoIlupas1NZHNR7jo8p+iMn5EqgHeO3mD2kd34eFxov+xah7rUdEdEoSJ/c+porxg67s+LFtqe73FQk= ARC-Authentication-Results: i=1; mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1562094569959114.73137938659102; Tue, 2 Jul 2019 12:09:29 -0700 (PDT) Received: from localhost ([::1]:56404 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hiO9c-0002Eu-5y for importer@patchew.org; Tue, 02 Jul 2019 15:09:25 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:50480) by lists.gnu.org with esmtp (Exim 4.86_2) (envelope-from ) id 1hiLlQ-00038x-Bh for qemu-devel@nongnu.org; Tue, 02 Jul 2019 12:36:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1hiLlO-0000aS-0Q for qemu-devel@nongnu.org; Tue, 02 Jul 2019 12:36:16 -0400 Received: from mx2.rt-rk.com ([89.216.37.149]:59850 helo=mail.rt-rk.com) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1hiLlN-0007pv-9W for qemu-devel@nongnu.org; Tue, 02 Jul 2019 12:36:13 -0400 Received: from localhost (localhost [127.0.0.1]) by mail.rt-rk.com (Postfix) with ESMTP id EB68F1A2245; Tue, 2 Jul 2019 18:35:33 +0200 (CEST) Received: from rtrkw774-lin.domain.local (rtrkw774-lin.domain.local [10.10.13.43]) by mail.rt-rk.com (Postfix) with ESMTPSA id B7AE31A221C; Tue, 2 Jul 2019 18:35:33 +0200 (CEST) X-Virus-Scanned: amavisd-new at rt-rk.com From: Aleksandar Markovic To: qemu-devel@nongnu.org Date: Tue, 2 Jul 2019 18:35:27 +0200 Message-Id: <1562085328-5126-12-git-send-email-aleksandar.markovic@rt-rk.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1562085328-5126-1-git-send-email-aleksandar.markovic@rt-rk.com> References: <1562085328-5126-1-git-send-email-aleksandar.markovic@rt-rk.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 89.216.37.149 Subject: [Qemu-devel] [PULL 11/12] target/mips: Unroll loops for MSA float max/min instructions X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: peter.maydell@linaro.org, amarkovic@wavecomp.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: Aleksandar Markovic Slight preformance improvement for MSA float max/min instructions. Signed-off-by: Aleksandar Markovic Reviewed-by: Aleksandar Rikalo Message-Id: <1562068213-11307-7-git-send-email-aleksandar.markovic@rt-rk.co= m> --- target/mips/msa_helper.c | 198 ++++++++++++++++++++++++++++++-------------= ---- 1 file changed, 125 insertions(+), 73 deletions(-) diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c index 5377bc1..97f840b 100644 --- a/target/mips/msa_helper.c +++ b/target/mips/msa_helper.c @@ -3850,35 +3850,65 @@ void helper_msa_fmin_df(CPUMIPSState *env, uint32_t= df, uint32_t wd, wr_t *pwd =3D &(env->active_fpu.fpr[wd].wr); wr_t *pws =3D &(env->active_fpu.fpr[ws].wr); wr_t *pwt =3D &(env->active_fpu.fpr[wt].wr); - uint32_t i; =20 clear_msacsr_cause(env); =20 - switch (df) { - case DF_WORD: - for (i =3D 0; i < DF_ELEMENTS(DF_WORD); i++) { - if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) { - MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pws->w[i], 32); - } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status))= { - MSA_FLOAT_MAXOP(pwx->w[i], min, pwt->w[i], pwt->w[i], 32); - } else { - MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pwt->w[i], 32); - } + if (df =3D=3D DF_WORD) { + + if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32); } - break; - case DF_DOUBLE: - for (i =3D 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { - if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) { - MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pws->d[i], 64); - } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status))= { - MSA_FLOAT_MAXOP(pwx->d[i], min, pwt->d[i], pwt->d[i], 64); - } else { - MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pwt->d[i], 64); - } + + if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32); } - break; - default: + + if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32); + } + + if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32); + } + + } else if (df =3D=3D DF_DOUBLE) { + + if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64); + } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64); + } else { + MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64); + } + + if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64); + } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64); + } else { + MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64); + } + + } else { + assert(0); + } =20 check_msacsr_cause(env, GETPC()); @@ -3894,22 +3924,18 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32= _t df, uint32_t wd, wr_t *pwd =3D &(env->active_fpu.fpr[wd].wr); wr_t *pws =3D &(env->active_fpu.fpr[ws].wr); wr_t *pwt =3D &(env->active_fpu.fpr[wt].wr); - uint32_t i; =20 clear_msacsr_cause(env); =20 - switch (df) { - case DF_WORD: - for (i =3D 0; i < DF_ELEMENTS(DF_WORD); i++) { - FMAXMIN_A(min, max, pwx->w[i], pws->w[i], pwt->w[i], 32, statu= s); - } - break; - case DF_DOUBLE: - for (i =3D 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { - FMAXMIN_A(min, max, pwx->d[i], pws->d[i], pwt->d[i], 64, statu= s); - } - break; - default: + if (df =3D=3D DF_WORD) { + FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status); + FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status); + FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status); + FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status); + } else if (df =3D=3D DF_DOUBLE) { + FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status); + FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status); + } else { assert(0); } =20 @@ -3921,40 +3947,70 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32= _t df, uint32_t wd, void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t ws, uint32_t wt) { - float_status *status =3D &env->active_tc.msa_fp_status; + float_status *status =3D &env->active_tc.msa_fp_status; wr_t wx, *pwx =3D &wx; wr_t *pwd =3D &(env->active_fpu.fpr[wd].wr); wr_t *pws =3D &(env->active_fpu.fpr[ws].wr); wr_t *pwt =3D &(env->active_fpu.fpr[wt].wr); - uint32_t i; =20 clear_msacsr_cause(env); =20 - switch (df) { - case DF_WORD: - for (i =3D 0; i < DF_ELEMENTS(DF_WORD); i++) { - if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) { - MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pws->w[i], 32); - } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status))= { - MSA_FLOAT_MAXOP(pwx->w[i], max, pwt->w[i], pwt->w[i], 32); - } else { - MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pwt->w[i], 32); - } + if (df =3D=3D DF_WORD) { + + if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32); } - break; - case DF_DOUBLE: - for (i =3D 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { - if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) { - MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pws->d[i], 64); - } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status))= { - MSA_FLOAT_MAXOP(pwx->d[i], max, pwt->d[i], pwt->d[i], 64); - } else { - MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pwt->d[i], 64); - } + + if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32); } - break; - default: + + if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32); + } + + if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32); + } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) { + MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32); + } else { + MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32); + } + + } else if (df =3D=3D DF_DOUBLE) { + + if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64); + } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64); + } else { + MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64); + } + + if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64); + } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) { + MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64); + } else { + MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64); + } + + } else { + assert(0); + } =20 check_msacsr_cause(env, GETPC()); @@ -3970,22 +4026,18 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32= _t df, uint32_t wd, wr_t *pwd =3D &(env->active_fpu.fpr[wd].wr); wr_t *pws =3D &(env->active_fpu.fpr[ws].wr); wr_t *pwt =3D &(env->active_fpu.fpr[wt].wr); - uint32_t i; =20 clear_msacsr_cause(env); =20 - switch (df) { - case DF_WORD: - for (i =3D 0; i < DF_ELEMENTS(DF_WORD); i++) { - FMAXMIN_A(max, min, pwx->w[i], pws->w[i], pwt->w[i], 32, statu= s); - } - break; - case DF_DOUBLE: - for (i =3D 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { - FMAXMIN_A(max, min, pwx->d[i], pws->d[i], pwt->d[i], 64, statu= s); - } - break; - default: + if (df =3D=3D DF_WORD) { + FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status); + FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status); + FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status); + FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status); + } else if (df =3D=3D DF_DOUBLE) { + FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status); + FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status); + } else { assert(0); } =20 --=20 2.7.4