From nobody Tue Feb 10 08:26:58 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1528768595889776.5716722083265; Mon, 11 Jun 2018 18:56:35 -0700 (PDT) Received: from localhost ([::1]:52253 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fSYXz-0001QT-4a for importer@patchew.org; Mon, 11 Jun 2018 21:56:35 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40800) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fSYQm-00046e-Q8 for qemu-devel@nongnu.org; Mon, 11 Jun 2018 21:49:13 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fSYQi-0003MO-9j for qemu-devel@nongnu.org; Mon, 11 Jun 2018 21:49:08 -0400 Received: from out1-smtp.messagingengine.com ([66.111.4.25]:45721) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fSYQi-0003LE-0V for qemu-devel@nongnu.org; Mon, 11 Jun 2018 21:49:04 -0400 Received: from compute4.internal (compute4.nyi.internal [10.202.2.44]) by mailout.nyi.internal (Postfix) with ESMTP id 634B921D81; Mon, 11 Jun 2018 21:49:03 -0400 (EDT) Received: from mailfrontend2 ([10.202.2.163]) by compute4.internal (MEProxy); Mon, 11 Jun 2018 21:49:03 -0400 Received: from localhost (flamenco.cs.columbia.edu [128.59.20.216]) by mail.messagingengine.com (Postfix) with ESMTPA id 1E21E10266; Mon, 11 Jun 2018 21:49:03 -0400 (EDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=braap.org; h=cc :date:from:in-reply-to:message-id:references:subject:to :x-me-sender:x-me-sender:x-sasl-enc; s=mesmtp; bh=3b6ajoTs1izHZZ lG5rWS1xP1WwDE6H5Aak5ItkBclgk=; b=el4+QtQXEZPPZve7dzu5BXhLazvyIM te79RlAgm8CVUaktugzA5i1h3Bo1QBpiKnwVOXM9+tAX8xlYanNYnVEndxIIRNDz 89V2nm/gbMCXHwFHzhkCaZQIfm6r0FShdqARJINGc7GI7qoBv6l0k/GtvmUddTsB hAi0fuuzAluJg= DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:date:from:in-reply-to:message-id :references:subject:to:x-me-sender:x-me-sender:x-sasl-enc; s= fm3; bh=3b6ajoTs1izHZZlG5rWS1xP1WwDE6H5Aak5ItkBclgk=; b=rVnrl2xd KeUhdXOhPnngVhL8qogf5lJpEaIyqcutNMYpKRTDqLzPi8WkfArAH/7jXRkXiVVD mkptFvDvAqy/ZdsoEeVoAh83ktjZGVXd4khFx1NUgajaGTb3LPnKHHTpNj6H03tk st/0/vRIZ7y3Y/+vf1fehnrsNs0c5HtTZMBG+4T4PwCIIdnHJq40tNemHylEcmks h8/iAx2XykKsNfryypTjKYnkWZaujJgo/NBvzUYS3J+yt0Bn15seGtni3bPVoxIZ mpIhO05praqygO8G8oWM7YkNb2ISD+E4Mdfq9e2898cdWuiQ/L70BOVcYTArwXb+ xo8cS3dwPyOMSw== X-ME-Proxy: X-ME-Proxy: X-ME-Proxy: X-ME-Proxy: X-ME-Proxy: X-ME-Proxy: X-ME-Sender: From: "Emilio G. Cota" To: qemu-devel@nongnu.org Date: Mon, 11 Jun 2018 21:48:57 -0400 Message-Id: <1528768140-17894-12-git-send-email-cota@braap.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1528768140-17894-1-git-send-email-cota@braap.org> References: <1528768140-17894-1-git-send-email-cota@braap.org> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 66.111.4.25 Subject: [Qemu-devel] [PATCH v4 11/14] hardfloat: support float32/64 division X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Peter Maydell , Mark Cave-Ayland , Richard Henderson , Laurent Vivier , Paolo Bonzini , =?UTF-8?q?Alex=20Benn=C3=A9e?= , Aurelien Jarno Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Performance results for fp-bench: 1. Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz - before: div-single: 34.84 MFlops div-double: 34.04 MFlops - after: div-single: 275.23 MFlops div-double: 216.38 MFlops 2. ARM Aarch64 A57 @ 2.4GHz - before: div-single: 9.33 MFlops div-double: 9.30 MFlops - after: div-single: 51.55 MFlops div-double: 15.09 MFlops 3. IBM POWER8E @ 2.1 GHz - before: div-single: 25.65 MFlops div-double: 24.91 MFlops - after: div-single: 96.83 MFlops div-double: 31.01 MFlops Here setting 2FP64_USE_FP to 1 pays off for x86_64: [1] 215.97 vs [0] 62.15 MFlops Signed-off-by: Emilio G. Cota --- fpu/softfloat.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++= ++-- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 4fcabf6..fa6c3b6 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1659,7 +1659,8 @@ float16 float16_div(float16 a, float16 b, float_statu= s *status) return float16_round_pack_canonical(pr, status); } =20 -float32 float32_div(float32 a, float32 b, float_status *status) +static float32 QEMU_SOFTFLOAT_ATTR +soft_float32_div(float32 a, float32 b, float_status *status) { FloatParts pa =3D float32_unpack_canonical(a, status); FloatParts pb =3D float32_unpack_canonical(b, status); @@ -1668,7 +1669,8 @@ float32 float32_div(float32 a, float32 b, float_statu= s *status) return float32_round_pack_canonical(pr, status); } =20 -float64 float64_div(float64 a, float64 b, float_status *status) +static float64 QEMU_SOFTFLOAT_ATTR +soft_float64_div(float64 a, float64 b, float_status *status) { FloatParts pa =3D float64_unpack_canonical(a, status); FloatParts pb =3D float64_unpack_canonical(b, status); @@ -1677,6 +1679,88 @@ float64 float64_div(float64 a, float64 b, float_stat= us *status) return float64_round_pack_canonical(pr, status); } =20 +static float float_div(float a, float b) +{ + return a / b; +} + +static double double_div(double a, double b) +{ + return a / b; +} + +static bool f32_div_pre(float32 a, float32 b, const struct float_status *s) +{ + return likely(float32_is_zero_or_normal(a) && + float32_is_normal(b) && + can_use_fpu(s)); +} + +static bool f64_div_pre(float64 a, float64 b, const struct float_status *s) +{ + return likely(float64_is_zero_or_normal(a) && + float64_is_normal(b) && + can_use_fpu(s)); +} + +static bool float_div_pre(float a, float b, const struct float_status *s) +{ + return likely((fpclassify(a) =3D=3D FP_NORMAL || fpclassify(a) =3D=3D = FP_ZERO) && + fpclassify(b) =3D=3D FP_NORMAL && + can_use_fpu(s)); +} + +static bool double_div_pre(double a, double b, const struct float_status *= s) +{ + return likely((fpclassify(a) =3D=3D FP_NORMAL || fpclassify(a) =3D=3D = FP_ZERO) && + fpclassify(b) =3D=3D FP_NORMAL && + can_use_fpu(s)); +} + +static bool f32_div_post(float32 a, float32 b, const struct float_status *= s) +{ + return !float32_is_zero(a); +} + +static bool f64_div_post(float64 a, float64 b, const struct float_status *= s) +{ + return !float64_is_zero(a); +} + +static bool float_div_post(float a, float b, const struct float_status *s) +{ + return fpclassify(a) !=3D FP_ZERO; +} + +static bool double_div_post(double a, double b, const struct float_status = *s) +{ + return fpclassify(a) !=3D FP_ZERO; +} + +float32 __attribute__((flatten)) +float32_div(float32 a, float32 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F32_USE_FP) { + return float_gen2(a, b, s, float_div, soft_float32_div, float_div_= pre, + float_div_post, NULL, NULL); + } else { + return f32_gen2(a, b, s, float_div, soft_float32_div, f32_div_pre, + f32_div_post, NULL, NULL); + } +} + +float64 __attribute__((flatten)) +float64_div(float64 a, float64 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F64_USE_FP) { + return double_gen2(a, b, s, double_div, soft_float64_div, + double_div_pre, double_div_post, NULL, NULL); + } else { + return f64_gen2(a, b, s, double_div, soft_float64_div, f64_div_pre, + f64_div_post, NULL, NULL); + } +} + /* * Float to Float conversions * --=20 2.7.4