From nobody Mon Feb 9 22:39:06 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 153947314698040.07381974329803; Sat, 13 Oct 2018 16:25:46 -0700 (PDT) Received: from localhost ([::1]:46564 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gBTI1-0006Bq-TJ for importer@patchew.org; Sat, 13 Oct 2018 19:25:45 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:57877) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gBTCf-0002TH-LL for qemu-devel@nongnu.org; Sat, 13 Oct 2018 19:20:14 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gBTCd-0007xL-4a for qemu-devel@nongnu.org; Sat, 13 Oct 2018 19:20:13 -0400 Received: from out3-smtp.messagingengine.com ([66.111.4.27]:57231) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1gBTCc-0007wT-Ta for qemu-devel@nongnu.org; Sat, 13 Oct 2018 19:20:10 -0400 Received: from compute4.internal (compute4.nyi.internal [10.202.2.44]) by mailout.nyi.internal (Postfix) with ESMTP id 8A16B21C1B; Sat, 13 Oct 2018 19:20:09 -0400 (EDT) Received: from mailfrontend2 ([10.202.2.163]) by compute4.internal (MEProxy); Sat, 13 Oct 2018 19:20:09 -0400 Received: from localhost (flamenco.cs.columbia.edu [128.59.20.216]) by mail.messagingengine.com (Postfix) with ESMTPA id 1E526102DE; Sat, 13 Oct 2018 19:20:09 -0400 (EDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=braap.org; h= from:to:cc:subject:date:message-id:in-reply-to:references; s= mesmtp; bh=ibaUUqpgQWDre7FcVXKtql0Sh+oAI7RDp9M9SMSkCw4=; b=JKkj9 0UvyBhYC3N8vp/0ublS9ka1eC1M0y1ZNiC2W0A9jElqWlTSMd7tdE4ttSQKlwgNW TN9JyrBiPeyRQCwsoLaB1agep9s65jnr3pZl5IyVSu5DQxxz/3KKmcbuOIek6AhE T+B3dw5OXA7nVBr3Gm83btePWsjIWHAZpjjzaQ= DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:date:from:in-reply-to:message-id :references:subject:to:x-me-proxy:x-me-proxy:x-me-sender :x-me-sender:x-sasl-enc; s=fm1; bh=ibaUUqpgQWDre7FcVXKtql0Sh+oAI 7RDp9M9SMSkCw4=; b=qnU2ZpJ8rbjGcCIMsq/udr6em95b6C1GYuSGZb6J08A2x /XLzTdpiUbUxbR9YJQOPqU78yWqmqNLmTRDQxVdDAklHVZo45wHopKb0Q49OVNKq q68mqdJq7Bj8uWstsMPkLck0mFBdo0GV4bvuOq53Yc7HCyy6pin5RaO/x9tbCc85 DbFMSf3qt9qMzg2btF1sJQ2jS2cj3PEXlKOmdErTjH4P1I1F+VyMzYf5ws8bpYGw sQXXKBz4FZ7Kjc9GOyqfWY/5QLGFSTMolekU5NVcJOgS6TAgwlLecIesaOIPTQsI gg1HfR2iQBWQxUId3ghrPWYFMCA3VWUunt6+HNqIg== X-ME-Sender: X-ME-Proxy: From: "Emilio G. Cota" To: qemu-devel@nongnu.org Date: Sat, 13 Oct 2018 19:19:30 -0400 Message-Id: <20181013231933.28789-11-cota@braap.org> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20181013231933.28789-1-cota@braap.org> References: <20181013231933.28789-1-cota@braap.org> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 66.111.4.27 Subject: [Qemu-devel] [PATCH v5 10/13] hardfloat: implement float32/64 division X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: =?UTF-8?q?Alex=20Benn=C3=A9e?= , Richard Henderson Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Performance results for fp-bench: 1. Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz - before: div-single: 34.84 MFlops div-double: 34.04 MFlops - after: div-single: 275.23 MFlops div-double: 216.38 MFlops 2. ARM Aarch64 A57 @ 2.4GHz - before: div-single: 9.33 MFlops div-double: 9.30 MFlops - after: div-single: 51.55 MFlops div-double: 15.09 MFlops 3. IBM POWER8E @ 2.1 GHz - before: div-single: 25.65 MFlops div-double: 24.91 MFlops - after: div-single: 96.83 MFlops div-double: 31.01 MFlops Here setting 2FP64_USE_FP to 1 pays off for x86_64: [1] 215.97 vs [0] 62.15 MFlops Signed-off-by: Emilio G. Cota --- fpu/softfloat.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 78837fa9d8..8ef0571c6e 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1678,7 +1678,8 @@ float16 float16_div(float16 a, float16 b, float_statu= s *status) return float16_round_pack_canonical(pr, status); } =20 -float32 float32_div(float32 a, float32 b, float_status *status) +static float32 QEMU_SOFTFLOAT_ATTR +soft_float32_div(float32 a, float32 b, float_status *status) { FloatParts pa =3D float32_unpack_canonical(a, status); FloatParts pb =3D float32_unpack_canonical(b, status); @@ -1687,7 +1688,8 @@ float32 float32_div(float32 a, float32 b, float_statu= s *status) return float32_round_pack_canonical(pr, status); } =20 -float64 float64_div(float64 a, float64 b, float_status *status) +static float64 QEMU_SOFTFLOAT_ATTR +soft_float64_div(float64 a, float64 b, float_status *status) { FloatParts pa =3D float64_unpack_canonical(a, status); FloatParts pb =3D float64_unpack_canonical(b, status); @@ -1696,6 +1698,88 @@ float64 float64_div(float64 a, float64 b, float_stat= us *status) return float64_round_pack_canonical(pr, status); } =20 +static float float_div(float a, float b) +{ + return a / b; +} + +static double double_div(double a, double b) +{ + return a / b; +} + +static bool f32_div_pre(float32 a, float32 b, const struct float_status *s) +{ + return likely(float32_is_zero_or_normal(a) && + float32_is_normal(b) && + can_use_fpu(s)); +} + +static bool f64_div_pre(float64 a, float64 b, const struct float_status *s) +{ + return likely(float64_is_zero_or_normal(a) && + float64_is_normal(b) && + can_use_fpu(s)); +} + +static bool float_div_pre(float a, float b, const struct float_status *s) +{ + return likely((fpclassify(a) =3D=3D FP_NORMAL || fpclassify(a) =3D=3D = FP_ZERO) && + fpclassify(b) =3D=3D FP_NORMAL && + can_use_fpu(s)); +} + +static bool double_div_pre(double a, double b, const struct float_status *= s) +{ + return likely((fpclassify(a) =3D=3D FP_NORMAL || fpclassify(a) =3D=3D = FP_ZERO) && + fpclassify(b) =3D=3D FP_NORMAL && + can_use_fpu(s)); +} + +static bool f32_div_post(float32 a, float32 b, const struct float_status *= s) +{ + return !float32_is_zero(a); +} + +static bool f64_div_post(float64 a, float64 b, const struct float_status *= s) +{ + return !float64_is_zero(a); +} + +static bool float_div_post(float a, float b, const struct float_status *s) +{ + return fpclassify(a) !=3D FP_ZERO; +} + +static bool double_div_post(double a, double b, const struct float_status = *s) +{ + return fpclassify(a) !=3D FP_ZERO; +} + +float32 __attribute__((flatten)) +float32_div(float32 a, float32 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F32_USE_FP) { + return float_gen2(a, b, s, float_div, soft_float32_div, float_div_= pre, + float_div_post, NULL, NULL); + } else { + return f32_gen2(a, b, s, float_div, soft_float32_div, f32_div_pre, + f32_div_post, NULL, NULL); + } +} + +float64 __attribute__((flatten)) +float64_div(float64 a, float64 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F64_USE_FP) { + return double_gen2(a, b, s, double_div, soft_float64_div, + double_div_pre, double_div_post, NULL, NULL); + } else { + return f64_gen2(a, b, s, double_div, soft_float64_div, f64_div_pre, + f64_div_post, NULL, NULL); + } +} + /* * Float to Float conversions * --=20 2.17.1