This commit provides the implementation defined behavior flags and the basic
operation support for the OCP float8 data types(E4M3 & E5M2).
According to the definition in OFP8 spec, the conversion from a wider
format infinity depends on the saturation mode defined in the spec.
Signed-off-by: Max Chou <max.chou@sifive.com>
---
fpu/softfloat-parts.c.inc | 159 +++++++++++++++++++++------
fpu/softfloat-specialize.c.inc | 75 +++++++++++++
fpu/softfloat.c | 191 +++++++++++++++++++++++++++++++--
include/fpu/softfloat-types.h | 12 +++
include/fpu/softfloat.h | 81 ++++++++++++++
5 files changed, 480 insertions(+), 38 deletions(-)
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 5e0438fc0b..eee7daae4d 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -227,11 +227,28 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
p->exp = fmt->frac_shift - fmt->exp_bias
- shift + !has_pseudo_denormals;
}
- } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
+ } else if (likely(p->exp < fmt->exp_max)) {
p->cls = float_class_normal;
p->exp -= fmt->exp_bias;
frac_shl(p, fmt->frac_shift);
p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ } else if (fmt->limited_nan) {
+ /*
+ * Formats with limited NaN encodings (E4M3, E2M1, ARM Alt HP).
+ */
+ frac_shl(p, fmt->frac_shift);
+ p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+ if (fmt->normal_frac_max == NORMAL_FRAC_MAX_ALL ||
+ p->frac_hi <= fmt->normal_frac_max) {
+ p->cls = float_class_normal;
+ p->exp -= fmt->exp_bias;
+ } else {
+ if (parts_is_snan_frac(p->frac_hi, status)) {
+ p->cls = float_class_snan;
+ } else {
+ p->cls = float_class_qnan;
+ }
+ }
} else if (likely(frac_eqz(p))) {
p->cls = float_class_inf;
} else {
@@ -241,14 +258,39 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
}
}
+/*
+ * Set FloatPartsN to the maximum normal value for the given format.
+ * - IEEE formats (!no_infinity): exp = exp_max - 1, frac = all ones
+ * - Limited NaN formats (E4M3): exp = exp_max, frac = normal_frac_max
+ * - No NaN/InF formats (E2M1, ARM AHP): exp = exp_max, frac = all ones
+ */
+static void partsN(set_max_normal)(FloatPartsN *p, const FloatFmt *fmt)
+{
+ if (!fmt->no_infinity) {
+ p->exp = fmt->exp_max - 1;
+ frac_allones(p);
+ } else if (fmt->normal_frac_max != NORMAL_FRAC_MAX_ALL) {
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ p->frac_hi = fmt->normal_frac_max;
+ } else {
+ p->exp = fmt->exp_max;
+ frac_allones(p);
+ }
+}
+
/*
* Round and uncanonicalize a floating-point number by parts. There
* are FRAC_SHIFT bits that may require rounding at the bottom of the
* fraction; these bits will be removed. The exponent will be biased
* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ *
+ * The saturate parameter controls saturation behavior for formats that
+ * support it (OCP FP8 E4M3/E5M2). When true, overflow produces max normal
+ * instead of infinity (E5M2) or NaN (E4M3).
*/
static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
+ const FloatFmt *fmt, bool saturate)
{
const int exp_max = fmt->exp_max;
const int frac_shift = fmt->frac_shift;
@@ -256,8 +298,8 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
const uint64_t frac_lsb = round_mask + 1;
const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
const uint64_t roundeven_mask = round_mask | frac_lsb;
+ bool overflow_norm = saturate;
uint64_t inc;
- bool overflow_norm = false;
int exp, flags = 0;
switch (s->float_rounding_mode) {
@@ -313,30 +355,64 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
}
p->frac_lo &= ~round_mask;
}
+ p->exp = exp;
- if (fmt->arm_althp) {
- /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
- if (unlikely(exp > exp_max)) {
- /* Overflow. Return the maximum normal. */
- flags = float_flag_invalid;
- exp = exp_max;
- frac_allones(p);
- p->frac_lo &= ~round_mask;
+ /*
+ * Unified overflow handling based on format capabilities.
+ * 1. Format has infinity -> overflow to infinity (or saturate)
+ * 2. Format has NaN but no infinity -> overflow to NaN (or saturate)
+ * 3. Format has neither -> always saturate
+ */
+ if (!fmt->no_infinity) {
+ if (unlikely(exp >= exp_max)) {
+ flags |= float_flag_overflow;
+ if (s->rebias_overflow) {
+ exp -= fmt->exp_re_bias;
+ } else if (overflow_norm) {
+ flags |= float_flag_inexact;
+ parts_set_max_normal(p, fmt);
+ exp = p->exp;
+ p->frac_lo &= ~round_mask;
+ } else {
+ flags |= float_flag_inexact;
+ p->cls = float_class_inf;
+ exp = exp_max;
+ frac_clear(p);
+ }
}
- } else if (unlikely(exp >= exp_max)) {
- flags |= float_flag_overflow;
- if (s->rebias_overflow) {
- exp -= fmt->exp_re_bias;
- } else if (overflow_norm) {
+ } else if (fmt_has_nan_encoding(fmt)) {
+ bool is_overflow = (exp > exp_max) ||
+ (exp == exp_max &&
+ p->frac_hi > fmt->normal_frac_max);
+
+ if (unlikely(is_overflow)) {
+ flags |= float_flag_overflow;
flags |= float_flag_inexact;
- exp = exp_max - 1;
- frac_allones(p);
+
+ if (overflow_norm) {
+ parts_set_max_normal(p, fmt);
+ exp = p->exp;
+ } else {
+ uint8_t dnan = s->default_nan_pattern;
+ p->cls = float_class_qnan;
+ p->sign = dnan >> 7;
+ exp = exp_max;
+ frac_allones(p);
+ }
+ }
+ } else {
+ if (unlikely(exp > exp_max)) {
+ if (fmt->overflow_raises_invalid) {
+ /* ARM Alt HP: raise Invalid, not Overflow */
+ flags = float_flag_invalid;
+ } else {
+ flags |= float_flag_overflow;
+ flags |= float_flag_inexact;
+ }
+
+ parts_set_max_normal(p, fmt);
+ exp = p->exp;
p->frac_lo &= ~round_mask;
- } else {
- flags |= float_flag_inexact;
- p->cls = float_class_inf;
- exp = exp_max;
- frac_clear(p);
}
}
frac_shr(p, frac_shift);
@@ -422,11 +498,11 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
float_raise(flags, s);
}
-static void partsN(uncanon)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
+static void partsN(uncanon_sat)(FloatPartsN *p, float_status *s,
+ const FloatFmt *fmt, bool saturate)
{
if (likely(is_anynorm(p->cls))) {
- parts_uncanon_normal(p, s, fmt);
+ parts_uncanon_normal(p, s, fmt, saturate);
} else {
switch (p->cls) {
case float_class_zero:
@@ -434,13 +510,30 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
frac_clear(p);
return;
case float_class_inf:
- g_assert(!fmt->arm_althp);
- p->exp = fmt->exp_max;
- frac_clear(p);
+ /*
+ * Unified infinity handling using format capabilities.
+ * Formats with no_infinity must convert infinity to something else
+ */
+ if (!fmt->no_infinity) {
+ p->exp = fmt->exp_max;
+ frac_clear(p);
+ } else if (fmt_has_nan_encoding(fmt)) {
+ if (saturate) {
+ parts_set_max_normal(p, fmt);
+ } else {
+ uint8_t dnan = s->default_nan_pattern;
+ p->cls = float_class_qnan;
+ p->sign = dnan >> 7;
+ p->exp = fmt->exp_max;
+ frac_allones(p);
+ }
+ } else {
+ parts_set_max_normal(p, fmt);
+ }
return;
case float_class_qnan:
case float_class_snan:
- g_assert(!fmt->arm_althp);
+ g_assert(fmt_has_nan_encoding(fmt));
p->exp = fmt->exp_max;
frac_shr(p, fmt->frac_shift);
return;
@@ -451,6 +544,12 @@ static void partsN(uncanon)(FloatPartsN *p, float_status *s,
}
}
+static void partsN(uncanon)(FloatPartsN *p, float_status *s,
+ const FloatFmt *fmt)
+{
+ partsN(uncanon_sat)(p, s, fmt, false);
+}
+
/*
* Returns the result of adding or subtracting the values of the
* floating-point values `a' and `b'. The operation is performed
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index ce7315c996..3648dc7467 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -242,6 +242,63 @@ static bool float16_is_snan_internal(float16 a, float_status *status)
return frac_msb == snan_bit_is_one(status);
}
+/*----------------------------------------------------------------------------
+| Internal helper: Determine if E4M3 NaN is signaling.
+| E4M3 has only one NaN encoding, so classification is policy-based.
+*----------------------------------------------------------------------------*/
+
+static bool float8_e4m3_is_snan_internal(float8_e4m3 a, float_status *status)
+{
+ if (!float8_e4m3_is_any_nan(a)) {
+ return false;
+ }
+ if (no_signaling_nans(status)) {
+ return false;
+ }
+ return snan_bit_is_one(status);
+}
+
+/*----------------------------------------------------------------------------
+| Internal helper: Determine if E5M2 NaN is signaling.
+*----------------------------------------------------------------------------*/
+
+static bool float8_e5m2_is_snan_internal(float8_e5m2 a, float_status *status)
+{
+ if (!float8_e5m2_is_any_nan(a)) {
+ return false;
+ }
+ if (no_signaling_nans(status)) {
+ return false;
+ }
+ uint8_t frac = float8_e5m2_val(a) & 0x3;
+ bool frac_msb = (frac >> 1) & 1;
+ return frac_msb == snan_bit_is_one(status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP FP8 E4M3 value `a' is a quiet NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_quiet_nan(float8_e4m3 a_, float_status *status)
+{
+ if (!float8_e4m3_is_any_nan(a_)) {
+ return false;
+ }
+ return !float8_e4m3_is_snan_internal(a_, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP FP8 E5M2 value `a' is a quiet NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e5m2_is_quiet_nan(float8_e5m2 a_, float_status *status)
+{
+ if (!float8_e5m2_is_any_nan(a_)) {
+ return false;
+ }
+ return !float8_e5m2_is_snan_internal(a_, status);
+}
+
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
@@ -285,6 +342,24 @@ bool bfloat16_is_quiet_nan(bfloat16 a_, float_status *status)
return !bfloat16_is_snan_internal(a_, status);
}
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP FP8 E4M3 value `a' is a signaling NaN; otherwise 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_signaling_nan(float8_e4m3 a_, float_status *status)
+{
+ return float8_e4m3_is_snan_internal(a_, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the OCP FP8 E5M2 value `a' is a signaling NaN; otherwise 0.
+*----------------------------------------------------------------------------*/
+
+bool float8_e5m2_is_signaling_nan(float8_e5m2 a_, float_status *status)
+{
+ return float8_e5m2_is_snan_internal(a_, status);
+}
+
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 8094358c2e..533f96dcda 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -522,6 +522,13 @@ typedef struct {
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
+/*
+ * Sentinel value for normal_frac_max indicating "all fraction values at
+ * exp_max are normal" (i.e., the format has no NaN encoding at exp_max).
+ * Used by E2M1 and ARM Alternative Half Precision formats.
+ */
+#define NORMAL_FRAC_MAX_ALL 0
+
/* Structure holding all of the relevant parameters for a format.
* exp_size: the size of the exponent field
* exp_bias: the offset applied to the exponent field
@@ -542,11 +549,39 @@ typedef struct {
int exp_max;
int frac_size;
int frac_shift;
- bool arm_althp;
bool has_explicit_bit;
uint64_t round_mask;
+ /*
+ * Format capability flags:
+ * no_infinity: Format has no infinity encoding. When true, exp=exp_max
+ * with frac=0 is NOT infinity - it's either NaN or max normal.
+ *
+ * limited_nan: Format has limited or no NaN patterns. When combined
+ * with normal_frac_max, determines NaN encoding capability:
+ * - limited_nan=false: Standard IEEE NaN (exp=exp_max, frac!=0)
+ * - limited_nan=true && normal_frac_max!=0: Limited NaN (E4M3)
+ * - limited_nan=true && normal_frac_max==0: No NaN encoding (AHP, E2M1)
+ *
+ * overflow_raises_invalid: Raise Invalid (not Overflow) exception.
+ * ARM Alt HP uses this to signal overflow as an invalid operation.
+ *
+ * normal_frac_max: For formats with limited_nan, the maximum fraction
+ * value (after normalization shift, including implicit bit) that is
+ * still considered normal at exp=exp_max.
+ * Use NORMAL_FRAC_MAX_ALL (0) to indicate all frac values at exp_max
+ * are normal (E2M1, ARM Alt HP), which also implies no NaN encoding.
+ */
+ bool no_infinity;
+ bool limited_nan;
+ bool overflow_raises_invalid;
+ uint64_t normal_frac_max;
} FloatFmt;
+static inline bool fmt_has_nan_encoding(const FloatFmt *fmt)
+{
+ return !fmt->limited_nan || fmt->normal_frac_max != NORMAL_FRAC_MAX_ALL;
+}
+
/* Expand fields based on the size of exponent and fraction */
#define FLOAT_PARAMS_(E) \
.exp_size = E, \
@@ -560,13 +595,27 @@ typedef struct {
.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e4m3_params = {
+ FLOAT_PARAMS(4, 3),
+ .no_infinity = true,
+ .limited_nan = true,
+ .normal_frac_max = 0xE000000000000000ULL,
+};
+
+static const FloatFmt float8_e5m2_params = {
+ FLOAT_PARAMS(5, 2),
+};
+
static const FloatFmt float16_params = {
FLOAT_PARAMS(5, 10)
};
static const FloatFmt float16_params_ahp = {
FLOAT_PARAMS(5, 10),
- .arm_althp = true
+ .no_infinity = true,
+ .limited_nan = true,
+ .overflow_raises_invalid = true,
+ .normal_frac_max = NORMAL_FRAC_MAX_ALL,
};
static const FloatFmt bfloat16_params = {
@@ -614,6 +663,16 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
};
}
+static void QEMU_FLATTEN float8_e4m3_unpack_raw(FloatParts64 *p, float8_e4m3 f)
+{
+ unpack_raw64(p, &float8_e4m3_params, f);
+}
+
+static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
+{
+ unpack_raw64(p, &float8_e5m2_params, f);
+}
+
static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
{
unpack_raw64(p, &float16_params, f);
@@ -671,6 +730,16 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
return ret;
}
+static float8_e4m3 QEMU_FLATTEN float8_e4m3_pack_raw(const FloatParts64 *p)
+{
+ return make_float8_e4m3(pack_raw64(p, &float8_e4m3_params));
+}
+
+static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
+{
+ return make_float8_e5m2(pack_raw64(p, &float8_e5m2_params));
+}
+
static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
{
return make_float16(pack_raw64(p, &float16_params));
@@ -758,12 +827,26 @@ static void parts128_canonicalize(FloatParts128 *p, float_status *status,
PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
+
+#define parts_uncanon_normal(A, S, F, SAT) \
+ PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F, SAT)
-#define parts_uncanon_normal(A, S, F) \
- PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
+static void parts64_uncanon_sat(FloatParts64 *p, float_status *status,
+ const FloatFmt *fmt, bool saturate);
+static void parts128_uncanon_sat(FloatParts128 *p, float_status *status,
+ const FloatFmt *fmt, bool saturate);
+
+#define parts_uncanon_sat(A, S, F, SAT) \
+ PARTS_GENERIC_64_128(uncanon_sat, A)(A, S, F, SAT)
+
+static void parts64_set_max_normal(FloatParts64 *p, const FloatFmt *fmt);
+static void parts128_set_max_normal(FloatParts128 *p, const FloatFmt *fmt);
+
+#define parts_set_max_normal(P, F) \
+ PARTS_GENERIC_64_128(set_max_normal, P)(P, F)
static void parts64_uncanon(FloatParts64 *p, float_status *status,
const FloatFmt *fmt);
@@ -1662,6 +1745,20 @@ static const uint16_t rsqrt_tab[128] = {
* Pack/unpack routines with a specific FloatFmt.
*/
+static void float8_e4m3_unpack_canonical(FloatParts64 *p, float8_e4m3 f,
+ float_status *s)
+{
+ float8_e4m3_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e4m3_params);
+}
+
+static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
+ float_status *s)
+{
+ float8_e5m2_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e5m2_params);
+}
+
static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
float_status *s, const FloatFmt *params)
{
@@ -1682,6 +1779,24 @@ static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
parts_canonicalize(p, s, &bfloat16_params);
}
+static float8_e4m3 float8_e4m3_round_pack_canonical(FloatParts64 *p,
+ float_status *status,
+ const FloatFmt *params,
+ const bool saturate)
+{
+ parts_uncanon_sat(p, status, params, saturate);
+ return float8_e4m3_pack_raw(p);
+}
+
+static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
+ float_status *status,
+ const FloatFmt *params,
+ const bool saturate)
+{
+ parts_uncanon_sat(p, status, params, saturate);
+ return float8_e5m2_pack_raw(p);
+}
+
static float16 float16a_round_pack_canonical(FloatParts64 *p,
float_status *s,
const FloatFmt *params)
@@ -1838,7 +1953,7 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
case float_class_normal:
case float_class_denormal:
if (s->floatx80_rounding_precision == floatx80_precision_x) {
- parts_uncanon_normal(p, s, fmt);
+ parts_uncanon_normal(p, s, fmt, false);
frac = p->frac_hi;
exp = p->exp;
} else {
@@ -1847,7 +1962,7 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
p64.sign = p->sign;
p64.exp = p->exp;
frac_truncjam(&p64, p);
- parts_uncanon_normal(&p64, s, fmt);
+ parts_uncanon_normal(&p64, s, fmt, false);
frac = p64.frac;
exp = p64.exp;
}
@@ -2823,6 +2938,66 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
}
}
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e4m3_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e5m2_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e4m3_round_pack_canonical(&p, s, &float8_e4m3_params,
+ saturate);
+}
+
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e5m2_round_pack_canonical(&p, s, &float8_e5m2_params,
+ saturate);
+}
+
+float8_e4m3 float32_to_float8_e4m3(float32 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e4m3_round_pack_canonical(&p, s, &float8_e4m3_params,
+ saturate);
+}
+
+float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return float8_e5m2_round_pack_canonical(&p, s, &float8_e5m2_params,
+ saturate);
+}
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 8f82fdfc97..b781bf10b7 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -119,6 +119,18 @@ typedef struct {
*/
typedef uint16_t bfloat16;
+/*
+ * Software OCP(Open Compute Project) floating point types
+ */
+typedef uint8_t float8_e4m3;
+typedef uint8_t float8_e5m2;
+#define float8_e4m3_val(x) (x)
+#define float8_e5m2_val(x) (x)
+#define make_float8_e4m3(x) (x)
+#define make_float8_e5m2(x) (x)
+#define const_float8_e4m3(x) (x)
+#define const_float8_e5m2(x) (x)
+
/*
* Software IEC/IEEE floating-point underflow tininess-detection mode.
*/
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index c18ab2cb60..30aca23057 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -189,6 +189,87 @@ float128 int128_to_float128(Int128, float_status *status);
float128 uint64_to_float128(uint64_t, float_status *status);
float128 uint128_to_float128(Int128, float_status *status);
+/*----------------------------------------------------------------------------
+| Software OCP conversion routines.
+*----------------------------------------------------------------------------*/
+
+bfloat16 float8_e4m3_to_bfloat16(float8_e4m3, float_status *status);
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
+float8_e4m3 bfloat16_to_float8_e4m3(bfloat16, bool saturate, float_status *status);
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool saturate, float_status *status);
+float8_e4m3 float32_to_float8_e4m3(float32, bool saturate, float_status *status);
+float8_e5m2 float32_to_float8_e5m2(float32, bool saturate, float_status *status);
+
+/*----------------------------------------------------------------------------
+| Software OCP operations.
+*----------------------------------------------------------------------------*/
+
+bool float8_e4m3_is_quiet_nan(float8_e4m3, float_status *status);
+bool float8_e4m3_is_signaling_nan(float8_e4m3, float_status *status);
+bool float8_e5m2_is_quiet_nan(float8_e5m2, float_status *status);
+bool float8_e5m2_is_signaling_nan(float8_e5m2, float_status *status);
+
+static inline bool float8_e4m3_is_any_nan(float8_e4m3 a)
+{
+ return ((float8_e4m3_val(a) & ~0x80) == 0x7f);
+}
+
+static inline bool float8_e5m2_is_any_nan(float8_e5m2 a)
+{
+ return ((float8_e5m2_val(a) & ~0x80) > 0x7c);
+}
+
+static inline bool float8_e4m3_is_neg(float8_e4m3 a)
+{
+ return float8_e4m3_val(a) >> 7;
+}
+
+static inline bool float8_e5m2_is_neg(float8_e5m2 a)
+{
+ return float8_e5m2_val(a) >> 7;
+}
+
+static inline bool float8_e4m3_is_infinity(float8_e4m3 a)
+{
+ return false;
+}
+
+static inline bool float8_e5m2_is_infinity(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7f) == 0x7c;
+}
+
+static inline bool float8_e4m3_is_zero(float8_e4m3 a)
+{
+ return (float8_e4m3_val(a) & 0x7f) == 0;
+}
+
+static inline bool float8_e5m2_is_zero(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7f) == 0;
+}
+
+static inline bool float8_e4m3_is_zero_or_denormal(float8_e4m3 a)
+{
+ return (float8_e4m3_val(a) & 0x78) == 0;
+}
+
+static inline bool float8_e5m2_is_zero_or_denormal(float8_e5m2 a)
+{
+ return (float8_e5m2_val(a) & 0x7c) == 0;
+}
+
+static inline bool float8_e4m3_is_normal(float8_e4m3 a)
+{
+ uint8_t em = float8_e4m3_val(a) & 0x7f;
+ return em >= 0x8 && em <= 0x7e;
+}
+
+static inline bool float8_e5m2_is_normal(float8_e5m2 a)
+{
+ return (((float8_e5m2_val(a) >> 2) + 1) & 0x1f) >= 2;
+}
+
/*----------------------------------------------------------------------------
| Software half-precision conversion routines.
*----------------------------------------------------------------------------*/
--
2.52.0
© 2016 - 2026 Red Hat, Inc.