From nobody Thu Oct 30 15:17:03 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=linaro.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 152608693192025.95660126010796; Fri, 11 May 2018 18:02:11 -0700 (PDT) Received: from localhost ([::1]:59652 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fHIvH-0005LE-8E for importer@patchew.org; Fri, 11 May 2018 21:02:07 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:33410) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fHIdK-0004Xq-Sn for qemu-devel@nongnu.org; Fri, 11 May 2018 20:43:36 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fHIdH-0002uj-Nz for qemu-devel@nongnu.org; Fri, 11 May 2018 20:43:34 -0400 Received: from mail-pg0-x241.google.com ([2607:f8b0:400e:c05::241]:45673) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1fHIdH-0002uW-GW for qemu-devel@nongnu.org; Fri, 11 May 2018 20:43:31 -0400 Received: by mail-pg0-x241.google.com with SMTP id w3-v6so3074280pgv.12 for ; Fri, 11 May 2018 17:43:31 -0700 (PDT) Received: from cloudburst.twiddle.net (97-113-2-170.tukw.qwest.net. [97.113.2.170]) by smtp.gmail.com with ESMTPSA id k84-v6sm10756406pfh.93.2018.05.11.17.43.28 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Fri, 11 May 2018 17:43:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=o3UcWRqkJ7P7qbc1bIGacUuqmFllGIe/9UbjgPzL8Ao=; b=aJUhKdriHlXr8G8ABxgilPhudTwlubFrHU/9ICtwicQfbZb/ltN0nycLBT2M2Uh7Uz gbHEJ7Mc4Y5UkM7NeUL3vv9hYBArWb5NRfLmbvql49Yjy9UXoFTAFTZ/8Rnhj/uVGS8j jOM1FcZs9F4fkMFBqUveGp65NcqL5tirLJh+Y= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=o3UcWRqkJ7P7qbc1bIGacUuqmFllGIe/9UbjgPzL8Ao=; b=hVxfC/JDx5Qd1gYC+xESeE1WcXstieULnWHN5YfuYxmVPvYuTTPoKBO6I6jGO4jd7U kxaA/cNZEE0IAQn1AN6VMfHPO8/VhfxghfprGVhVxkspwzGH6O7YH1Yc8JQQqKxvFtAN /Y732Kobe3XAztKuF64YQg1OB5tYMZcV3YBCn2lRIq8DwPYZp7buKQKRAhxliURnnRpG Mo6KdTa3HXcqTk1/mt00ccRmqu4B4oMfHaiv05UGzrLwa5KVuSfjhjjqkziPWBjbO88a Q8ns/YoOavcELgCMy0gA9wrajn4zxd4LdWVdTH98eykSzqwYjAY8MOg+ywU7rM8jY/HG L5Jw== X-Gm-Message-State: ALKqPwdx6cYyThoDAoi+u8/V7Yb1Y2BhZy2t3JPFXp04y7dfYIm5yZfP g/4LmaoriGcQOHyXGYqJdrxfgbBLwLk= X-Google-Smtp-Source: AB8JxZrKkm82+vdowL+R6GZGDzXQHqXqnelBT6Tbj2MNGtH8tINilZxHsYQ/iveCqIp1WmgkCBpm8g== X-Received: by 2002:a62:e04c:: with SMTP id f73-v6mr1007270pfh.88.1526085810122; Fri, 11 May 2018 17:43:30 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Fri, 11 May 2018 17:42:55 -0700 Message-Id: <20180512004311.9299-12-richard.henderson@linaro.org> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20180512004311.9299-1-richard.henderson@linaro.org> References: <20180512004311.9299-1-richard.henderson@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:400e:c05::241 Subject: [Qemu-devel] [PATCH v2 11/27] fpu/softfloat: support ARM Alternative half-precision X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: peter.maydell@linaro.org, alex.bennee@linaro.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) X-ZohoMail: RDKM_2 RSF_0 Z_629925259 SPT_0 From: Alex Benn=C3=A9e For float16 ARM supports an alternative half-precision format which sacrifices the ability to represent NaN/Inf in return for a higher dynamic range. To support this I've added an additional FloatFmt (float16_params_ahp). The new FloatFmt flag (arm_althp) is then used to modify the behaviour of canonicalize and round_canonical with respect to representation and exception raising. Finally the float16_to_floatN and floatN_to_float16 conversion routines select the new alternative FloatFmt when !ieee. Signed-off-by: Alex Benn=C3=A9e Signed-off-by: Richard Henderson --- v3 - squash NaN to 0 if destination is AHP F16 v4 - handle inf -> ahp max in float_to_float not round_canonical - assert no nan and inf for ahp in round_canonical - check ahp before snan in float_to_float --- fpu/softfloat.c | 95 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 14 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index aa219223ff..15a272759d 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -211,8 +211,10 @@ typedef struct { * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_PO= INT * The following are computed based the size of fraction * frac_lsb: least significant bit of fraction - * fram_lsbm1: the bit bellow the least significant bit (for rounding) + * frac_lsbm1: the bit bellow the least significant bit (for rounding) * round_mask/roundeven_mask: masks used for rounding + * The following optional modifiers are available: + * arm_althp: handle ARM Alternative Half Precision */ typedef struct { int exp_size; @@ -224,6 +226,7 @@ typedef struct { uint64_t frac_lsbm1; uint64_t round_mask; uint64_t roundeven_mask; + bool arm_althp; } FloatFmt; =20 /*------------------------------------------------------------------------= ---- @@ -252,6 +255,11 @@ static const FloatFmt float16_params =3D { FLOAT_PARAMS(5, 10) }; =20 +static const FloatFmt float16_params_ahp =3D { + FLOAT_PARAMS(5, 10), + .arm_althp =3D true +}; + static const FloatFmt float32_params =3D { FLOAT_PARAMS(8, 23) }; @@ -315,7 +323,7 @@ static inline float64 float64_pack_raw(FloatParts p) static FloatParts canonicalize(FloatParts part, const FloatFmt *parm, float_status *status) { - if (part.exp =3D=3D parm->exp_max) { + if (part.exp =3D=3D parm->exp_max && !parm->arm_althp) { if (part.frac =3D=3D 0) { part.cls =3D float_class_inf; } else { @@ -404,7 +412,15 @@ static FloatParts round_canonical(FloatParts p, float_= status *s, } frac >>=3D frac_shift; =20 - if (unlikely(exp >=3D exp_max)) { + if (parm->arm_althp) { + /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ + if (unlikely(exp > exp_max)) { + /* Overflow. Return the maximum normal. */ + flags =3D float_flag_invalid; + exp =3D exp_max; + frac =3D -1; + } + } else if (unlikely(exp >=3D exp_max)) { flags |=3D float_flag_overflow | float_flag_inexact; if (overflow_norm) { exp =3D exp_max - 1; @@ -455,12 +471,14 @@ static FloatParts round_canonical(FloatParts p, float= _status *s, =20 case float_class_inf: do_inf: + assert(!parm->arm_althp); exp =3D exp_max; frac =3D 0; break; =20 case float_class_qnan: case float_class_snan: + assert(!parm->arm_althp); exp =3D exp_max; frac >>=3D parm->frac_shift; break; @@ -475,14 +493,27 @@ static FloatParts round_canonical(FloatParts p, float= _status *s, return p; } =20 +/* Explicit FloatFmt version */ +static FloatParts float16a_unpack_canonical(float16 f, float_status *s, + const FloatFmt *params) +{ + return canonicalize(float16_unpack_raw(f), params, s); +} + static FloatParts float16_unpack_canonical(float16 f, float_status *s) { - return canonicalize(float16_unpack_raw(f), &float16_params, s); + return float16a_unpack_canonical(f, s, &float16_params); +} + +static float16 float16a_round_pack_canonical(FloatParts p, float_status *s, + const FloatFmt *params) +{ + return float16_pack_raw(round_canonical(p, s, params)); } =20 static float16 float16_round_pack_canonical(FloatParts p, float_status *s) { - return float16_pack_raw(round_canonical(p, s, &float16_params)); + return float16a_round_pack_canonical(p, s, &float16_params); } =20 static FloatParts float32_unpack_canonical(float32 f, float_status *s) @@ -1174,7 +1205,33 @@ static FloatParts float_to_float(FloatParts a, const FloatFmt *srcf, const FloatFmt *dst= f, float_status *s) { - if (is_nan(a.cls)) { + if (dstf->arm_althp) { + switch (a.cls) { + case float_class_qnan: + case float_class_snan: + /* There is no NaN in the destination format. Raise Invalid + * and return a zero with the sign of the input NaN. + */ + s->float_exception_flags |=3D float_flag_invalid; + a.cls =3D float_class_zero; + a.frac =3D 0; + a.exp =3D 0; + break; + + case float_class_inf: + /* There is no Inf in the destination format. Raise Invalid + * and return the maximum normal with the correct sign. + */ + s->float_exception_flags |=3D float_flag_invalid; + a.cls =3D float_class_normal; + a.exp =3D dstf->exp_max; + a.frac =3D ((1ull << dstf->frac_size) - 1) << dstf->frac_shift; + break; + + default: + break; + } + } else if (is_nan(a.cls)) { if (is_snan(a.cls)) { s->float_exception_flags |=3D float_flag_invalid; a =3D parts_silence_nan(a, s); @@ -1186,25 +1243,34 @@ static FloatParts float_to_float(FloatParts a, return a; } =20 +/* + * Currently non-ieee implies ARM Alternative Half Precision handling + * for float16 values. If more are needed we'll need to expand the API + * into softfloat. + */ + float32 float16_to_float32(float16 a, bool ieee, float_status *s) { - FloatParts p =3D float16_unpack_canonical(a, s); - FloatParts pr =3D float_to_float(p, &float16_params, &float32_params, = s); + const FloatFmt *fmt16 =3D ieee ? &float16_params : &float16_params_ahp; + FloatParts p =3D float16a_unpack_canonical(a, s, fmt16); + FloatParts pr =3D float_to_float(p, fmt16, &float32_params, s); return float32_round_pack_canonical(pr, s); } =20 float64 float16_to_float64(float16 a, bool ieee, float_status *s) { - FloatParts p =3D float16_unpack_canonical(a, s); - FloatParts pr =3D float_to_float(p, &float16_params, &float64_params, = s); + const FloatFmt *fmt16 =3D ieee ? &float16_params : &float16_params_ahp; + FloatParts p =3D float16a_unpack_canonical(a, s, fmt16); + FloatParts pr =3D float_to_float(p, fmt16, &float64_params, s); return float64_round_pack_canonical(pr, s); } =20 float16 float32_to_float16(float32 a, bool ieee, float_status *s) { + const FloatFmt *fmt16 =3D ieee ? &float16_params : &float16_params_ahp; FloatParts p =3D float32_unpack_canonical(a, s); - FloatParts pr =3D float_to_float(p, &float32_params, &float16_params, = s); - return float16_round_pack_canonical(pr, s); + FloatParts pr =3D float_to_float(p, &float32_params, fmt16, s); + return float16a_round_pack_canonical(pr, s, fmt16); } =20 float64 float32_to_float64(float32 a, float_status *s) @@ -1216,9 +1282,10 @@ float64 float32_to_float64(float32 a, float_status *= s) =20 float16 float64_to_float16(float64 a, bool ieee, float_status *s) { + const FloatFmt *fmt16 =3D ieee ? &float16_params : &float16_params_ahp; FloatParts p =3D float64_unpack_canonical(a, s); - FloatParts pr =3D float_to_float(p, &float64_params, &float16_params, = s); - return float16_round_pack_canonical(pr, s); + FloatParts pr =3D float_to_float(p, &float64_params, fmt16, s); + return float16a_round_pack_canonical(pr, s, fmt16); } =20 float32 float64_to_float32(float64 a, float_status *s) --=20 2.17.0