From: Max Chou <max.chou@sifive.com>
Signed-off-by: Max Chou <max.chou@sifive.com>
[rth: Split out of a larger patch; adjust overflow detection.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-types.h | 1 +
include/fpu/softfloat.h | 4 +++
fpu/softfloat.c | 62 +++++++++++++++++++++++++++++++++++
fpu/softfloat-parts.c.inc | 45 +++++++++++++++++++++++--
4 files changed, 109 insertions(+), 3 deletions(-)
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index be7e2de6e3..9c84a101e5 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -122,6 +122,7 @@ typedef uint16_t bfloat16;
/*
* Open Compute Project (OCP) Microscaling Formats
*/
+typedef uint8_t float8_e4m3;
typedef uint8_t float8_e5m2;
/*
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 4385462992..31d3f76d3f 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -193,6 +193,10 @@ float128 uint128_to_float128(Int128, float_status *status);
| OCP FP8 conversion routines.
*----------------------------------------------------------------------------*/
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool sat, float_status *status);
+float8_e4m3 float32_to_float8_e4m3(float32, bool sat, float_status *status);
+
bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status *status);
float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 0dc769283d..6e21882ab2 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -528,6 +528,8 @@ typedef enum __attribute__((__packed__)) {
float_expmax_ieee,
/* exp==max is a normal number; no infinity or nan representation. */
float_expmax_normal,
+ /* exp==max, frac==max ? nan : normal; no infinity representation. */
+ float_expmax_e4m3,
} FloatFmtExpMaxKind;
/*
@@ -572,6 +574,14 @@ typedef struct {
.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e4m3_params = {
+ FLOAT_PARAMS(4, 3),
+ .exp_max_kind = float_expmax_e4m3
+};
+
+/* 110 << frac_shift, with the implicit bit set */
+#define E4M3_NORMAL_FRAC_MAX 0xe000000000000000ull
+
static const FloatFmt float8_e5m2_params = {
FLOAT_PARAMS(5, 2)
};
@@ -631,6 +641,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
};
}
+static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
+{
+ unpack_raw64(p, &float8_e4m3_params, f);
+}
+
static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
{
unpack_raw64(p, &float8_e5m2_params, f);
@@ -693,6 +708,11 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
return ret;
}
+static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p)
+{
+ return pack_raw64(p, &float8_e4m3_params);
+}
+
static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
{
return pack_raw64(p, &float8_e5m2_params);
@@ -1689,6 +1709,13 @@ static const uint16_t rsqrt_tab[128] = {
* Pack/unpack routines with a specific FloatFmt.
*/
+static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
+ float_status *s)
+{
+ float8_e4m3_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e4m3_params);
+}
+
static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
float_status *s)
{
@@ -1716,6 +1743,14 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
parts_canonicalize(p, s, &bfloat16_params);
}
+static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p,
+ float_status *s,
+ bool saturate)
+{
+ parts_uncanon(p, s, &float8_e4m3_params, saturate);
+ return float8_e4m3_pack_raw(p);
+}
+
static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
float_status *s,
bool saturate)
@@ -2894,6 +2929,15 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
}
}
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e4m3_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
{
FloatParts64 p;
@@ -2923,6 +2967,15 @@ float64 float16_to_float64(float16 a, bool ieee, float_status *s)
return float64_round_pack_canonical(&p, s);
}
+float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e4m3_round_pack_canonical(&p, s, saturate);
+}
+
float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
{
FloatParts64 p;
@@ -2999,6 +3052,15 @@ float32 float64_to_float32(float64 a, float_status *s)
return float32_round_pack_canonical(&p, s);
}
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e4m3_round_pack_canonical(&p, s, saturate);
+}
+
float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
{
FloatParts64 p;
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 09be686645..61b07307bf 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -242,6 +242,15 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
return;
case float_expmax_normal:
break;
+ case float_expmax_e4m3:
+ if (p->frac_hi == 0b111) {
+ frac_shl(p, fmt->frac_shift);
+ p->cls = (parts_is_snan_frac(p->frac_hi, status)
+ ? float_class_snan : float_class_qnan);
+ return;
+ }
+ /* otherwise normal */
+ break;
default:
g_assert_not_reached();
}
@@ -262,6 +271,21 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
* The saturate parameter controls saturation behavior for formats that
* support it -- when true, overflow produces max normal instead of infinity.
*/
+
+/* Helper for uncanon_normal and uncanon, for FP8 E4M3. */
+static void partsN(uncanon_e4m3_overflow)(FloatPartsN *p, float_status *s,
+ const FloatFmt *fmt, bool saturate)
+{
+ assert(N == 64);
+ float_raise(float_flag_overflow | float_flag_inexact, s);
+ if (saturate) {
+ p->exp = fmt->exp_max;
+ p->frac_hi = E4M3_NORMAL_FRAC_MAX;
+ } else {
+ parts_default_nan(p, s);
+ }
+}
+
static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
const FloatFmt *fmt, bool saturate)
{
@@ -360,6 +384,12 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
}
break;
+ case float_expmax_e4m3:
+ if (exp > exp_max || p->frac_hi > E4M3_NORMAL_FRAC_MAX) {
+ partsN(uncanon_e4m3_overflow)(p, s, fmt, overflow_norm);
+ }
+ break;
+
default:
g_assert_not_reached();
}
@@ -459,9 +489,18 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
frac_clear(p);
return;
case float_class_inf:
- assert(fmt->exp_max_kind == float_expmax_ieee);
- p->exp = fmt->exp_max;
- frac_clear(p);
+ switch (fmt->exp_max_kind) {
+ case float_expmax_ieee:
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ break;
+ case float_expmax_e4m3:
+ partsN(uncanon_e4m3_overflow)(p, s, fmt, saturate);
+ break;
+ case float_expmax_normal:
+ default:
+ g_assert_not_reached();
+ }
return;
case float_class_qnan:
case float_class_snan:
--
2.43.0
On Mon, Feb 23, 2026 at 07:21:53PM +1100, Richard Henderson wrote:
> From: Max Chou <max.chou@sifive.com>
>
> Signed-off-by: Max Chou <max.chou@sifive.com>
> [rth: Split out of a larger patch; adjust overflow detection.]
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/fpu/softfloat-types.h | 1 +
> include/fpu/softfloat.h | 4 +++
> fpu/softfloat.c | 62 +++++++++++++++++++++++++++++++++++
> fpu/softfloat-parts.c.inc | 45 +++++++++++++++++++++++--
> 4 files changed, 109 insertions(+), 3 deletions(-)
>
> diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
> index be7e2de6e3..9c84a101e5 100644
> --- a/include/fpu/softfloat-types.h
> +++ b/include/fpu/softfloat-types.h
> @@ -122,6 +122,7 @@ typedef uint16_t bfloat16;
> /*
> * Open Compute Project (OCP) Microscaling Formats
> */
> +typedef uint8_t float8_e4m3;
> typedef uint8_t float8_e5m2;
>
> /*
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index 4385462992..31d3f76d3f 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -193,6 +193,10 @@ float128 uint128_to_float128(Int128, float_status *status);
> | OCP FP8 conversion routines.
> *----------------------------------------------------------------------------*/
>
> +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
> +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool sat, float_status *status);
> +float8_e4m3 float32_to_float8_e4m3(float32, bool sat, float_status *status);
> +
> bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
> float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status *status);
> float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status);
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 0dc769283d..6e21882ab2 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -528,6 +528,8 @@ typedef enum __attribute__((__packed__)) {
> float_expmax_ieee,
> /* exp==max is a normal number; no infinity or nan representation. */
> float_expmax_normal,
> + /* exp==max, frac==max ? nan : normal; no infinity representation. */
> + float_expmax_e4m3,
> } FloatFmtExpMaxKind;
>
> /*
> @@ -572,6 +574,14 @@ typedef struct {
> .frac_shift = (-F - 1) & 63, \
> .round_mask = (1ull << ((-F - 1) & 63)) - 1
>
> +static const FloatFmt float8_e4m3_params = {
> + FLOAT_PARAMS(4, 3),
> + .exp_max_kind = float_expmax_e4m3
> +};
> +
> +/* 110 << frac_shift, with the implicit bit set */
> +#define E4M3_NORMAL_FRAC_MAX 0xe000000000000000ull
> +
> static const FloatFmt float8_e5m2_params = {
> FLOAT_PARAMS(5, 2)
> };
> @@ -631,6 +641,11 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
> };
> }
>
> +static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
> +{
> + unpack_raw64(p, &float8_e4m3_params, f);
> +}
> +
> static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
> {
> unpack_raw64(p, &float8_e5m2_params, f);
> @@ -693,6 +708,11 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
> return ret;
> }
>
> +static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p)
> +{
> + return pack_raw64(p, &float8_e4m3_params);
> +}
> +
> static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
> {
> return pack_raw64(p, &float8_e5m2_params);
> @@ -1689,6 +1709,13 @@ static const uint16_t rsqrt_tab[128] = {
> * Pack/unpack routines with a specific FloatFmt.
> */
>
> +static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
> + float_status *s)
> +{
> + float8_e4m3_unpack_raw(p, f);
> + parts_canonicalize(p, s, &float8_e4m3_params);
> +}
> +
> static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
> float_status *s)
> {
> @@ -1716,6 +1743,14 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
> parts_canonicalize(p, s, &bfloat16_params);
> }
>
> +static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p,
> + float_status *s,
> + bool saturate)
> +{
> + parts_uncanon(p, s, &float8_e4m3_params, saturate);
> + return float8_e4m3_pack_raw(p);
> +}
> +
> static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
> float_status *s,
> bool saturate)
> @@ -2894,6 +2929,15 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
> }
> }
>
> +bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
> +{
> + FloatParts64 p;
> +
> + float8_e4m3_unpack_canonical(&p, a, s);
> + parts_float_to_float(&p, s);
> + return bfloat16_round_pack_canonical(&p, s);
> +}
> +
> bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
> {
> FloatParts64 p;
> @@ -2923,6 +2967,15 @@ float64 float16_to_float64(float16 a, bool ieee, float_status *s)
> return float64_round_pack_canonical(&p, s);
> }
>
> +float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s)
> +{
> + FloatParts64 p;
> +
> + float32_unpack_canonical(&p, a, s);
> + parts_float_to_float(&p, s);
> + return float8_e4m3_round_pack_canonical(&p, s, saturate);
> +}
> +
> float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
> {
> FloatParts64 p;
> @@ -2999,6 +3052,15 @@ float32 float64_to_float32(float64 a, float_status *s)
> return float32_round_pack_canonical(&p, s);
> }
>
> +float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s)
> +{
> + FloatParts64 p;
> +
> + bfloat16_unpack_canonical(&p, a, s);
> + parts_float_to_float(&p, s);
> + return float8_e4m3_round_pack_canonical(&p, s, saturate);
> +}
> +
> float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
> {
> FloatParts64 p;
> diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
> index 09be686645..61b07307bf 100644
> --- a/fpu/softfloat-parts.c.inc
> +++ b/fpu/softfloat-parts.c.inc
> @@ -242,6 +242,15 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
> return;
> case float_expmax_normal:
> break;
> + case float_expmax_e4m3:
> + if (p->frac_hi == 0b111) {
> + frac_shl(p, fmt->frac_shift);
> + p->cls = (parts_is_snan_frac(p->frac_hi, status)
> + ? float_class_snan : float_class_qnan);
> + return;
> + }
> + /* otherwise normal */
> + break;
> default:
> g_assert_not_reached();
> }
> @@ -262,6 +271,21 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
> * The saturate parameter controls saturation behavior for formats that
> * support it -- when true, overflow produces max normal instead of infinity.
> */
> +
> +/* Helper for uncanon_normal and uncanon, for FP8 E4M3. */
> +static void partsN(uncanon_e4m3_overflow)(FloatPartsN *p, float_status *s,
> + const FloatFmt *fmt, bool saturate)
> +{
> + assert(N == 64);
> + float_raise(float_flag_overflow | float_flag_inexact, s);
> + if (saturate) {
> + p->exp = fmt->exp_max;
> + p->frac_hi = E4M3_NORMAL_FRAC_MAX;
> + } else {
> + parts_default_nan(p, s);
> + }
> +}
> +
> static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
> const FloatFmt *fmt, bool saturate)
> {
> @@ -360,6 +384,12 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
> }
> break;
>
> + case float_expmax_e4m3:
> + if (exp > exp_max || p->frac_hi > E4M3_NORMAL_FRAC_MAX) {
> + partsN(uncanon_e4m3_overflow)(p, s, fmt, overflow_norm);
> + }
> + break;
> +
> default:
> g_assert_not_reached();
> }
> @@ -459,9 +489,18 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
> frac_clear(p);
> return;
> case float_class_inf:
> - assert(fmt->exp_max_kind == float_expmax_ieee);
> - p->exp = fmt->exp_max;
> - frac_clear(p);
> + switch (fmt->exp_max_kind) {
> + case float_expmax_ieee:
> + p->exp = fmt->exp_max;
> + frac_clear(p);
> + break;
> + case float_expmax_e4m3:
> + partsN(uncanon_e4m3_overflow)(p, s, fmt, saturate);
> + break;
> + case float_expmax_normal:
> + default:
> + g_assert_not_reached();
> + }
> return;
> case float_class_qnan:
> case float_class_snan:
> --
> 2.43.0
>
Reviewed-by: Chao Liu <chao.liu.zevorn@gmail.com>
Thanks,
Chao
© 2016 - 2026 Red Hat, Inc.