[v2] target/riscv: NaN-boxing for multiple precison

[PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by Richard Henderson 5 years, 6 months ago

Make sure that all results from single-precision scalar helpers
are properly nan-boxed to 64-bits.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/internals.h  |  5 +++++
 target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 37d33820ad..9f4ba7d617 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
 #define SEW32 2
 #define SEW64 3
 
+static inline uint64_t nanbox_s(float32 f)
+{
+    return f | MAKE_64BIT_MASK(32, 32);
+}
+
 #endif
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 4379756dc4..72541958a7 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -81,10 +81,16 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
     set_float_rounding_mode(softrm, &env->fp_status);
 }
 
+static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
+                           uint64_t frs3, int flags)
+{
+    return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status));
+}
+
 uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                         uint64_t frs3)
 {
-    return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status);
+    return do_fmadd_s(env, frs1, frs2, frs3, 0);
 }
 
 uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
@@ -96,8 +102,7 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
 uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                         uint64_t frs3)
 {
-    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c,
-                          &env->fp_status);
+    return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_c);
 }
 
 uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
@@ -110,8 +115,7 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
 uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                          uint64_t frs3)
 {
-    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product,
-                          &env->fp_status);
+    return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_product);
 }
 
 uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
@@ -124,8 +128,8 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
 uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
                          uint64_t frs3)
 {
-    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c |
-                          float_muladd_negate_product, &env->fp_status);
+    return do_fmadd_s(env, frs1, frs2, frs3,
+                      float_muladd_negate_c | float_muladd_negate_product);
 }
 
 uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
@@ -137,37 +141,37 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
 
 uint64_t helper_fadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_add(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_add(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_sub(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_sub(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fmul_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_mul(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_mul(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_div(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_div(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fmin_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_minnum(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_minnum(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fmax_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float32_maxnum(frs1, frs2, &env->fp_status);
+    return nanbox_s(float32_maxnum(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t frs1)
 {
-    return float32_sqrt(frs1, &env->fp_status);
+    return nanbox_s(float32_sqrt(frs1, &env->fp_status));
 }
 
 target_ulong helper_fle_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
@@ -209,23 +213,23 @@ uint64_t helper_fcvt_lu_s(CPURISCVState *env, uint64_t frs1)
 
 uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1)
 {
-    return int32_to_float32((int32_t)rs1, &env->fp_status);
+    return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status));
 }
 
 uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1)
 {
-    return uint32_to_float32((uint32_t)rs1, &env->fp_status);
+    return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status));
 }
 
 #if defined(TARGET_RISCV64)
 uint64_t helper_fcvt_s_l(CPURISCVState *env, uint64_t rs1)
 {
-    return int64_to_float32(rs1, &env->fp_status);
+    return nanbox_s(int64_to_float32(rs1, &env->fp_status));
 }
 
 uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1)
 {
-    return uint64_to_float32(rs1, &env->fp_status);
+    return nanbox_s(uint64_to_float32(rs1, &env->fp_status));
 }
 #endif
 
@@ -266,7 +270,7 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 
 uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1)
 {
-    return float64_to_float32(rs1, &env->fp_status);
+    return nanbox_s(float64_to_float32(rs1, &env->fp_status));
 }
 
 uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1)
-- 
2.25.1

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by LIU Zhiwei 5 years, 6 months ago


On 2020/7/24 8:28, Richard Henderson wrote:
> Make sure that all results from single-precision scalar helpers
> are properly nan-boxed to 64-bits.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/riscv/internals.h  |  5 +++++
>   target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------
>   2 files changed, 28 insertions(+), 19 deletions(-)
>
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 37d33820ad..9f4ba7d617 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>   #define SEW32 2
>   #define SEW64 3
>   
> +static inline uint64_t nanbox_s(float32 f)
> +{
> +    return f | MAKE_64BIT_MASK(32, 32);
> +}
> +
If define it here,  we can also define a more general  function with flen.

+static inline uint64_t nanbox_s(float32 f, uint32_t flen)
+{
+    return f | MAKE_64BIT_MASK(flen, 64 - flen);
+}
+

So we can reuse it in fp16 or bf16 scalar instruction and in vector 
instructions.

Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>

Zhiwei
>   #endif
> diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
> index 4379756dc4..72541958a7 100644
> --- a/target/riscv/fpu_helper.c
> +++ b/target/riscv/fpu_helper.c
> @@ -81,10 +81,16 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm)
>       set_float_rounding_mode(softrm, &env->fp_status);
>   }
>   
> +static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> +                           uint64_t frs3, int flags)
> +{
> +    return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status));
> +}
> +
>   uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                           uint64_t frs3)
>   {
> -    return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status);
> +    return do_fmadd_s(env, frs1, frs2, frs3, 0);
>   }
>   
>   uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> @@ -96,8 +102,7 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>   uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                           uint64_t frs3)
>   {
> -    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c,
> -                          &env->fp_status);
> +    return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_c);
>   }
>   
>   uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> @@ -110,8 +115,7 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>   uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                            uint64_t frs3)
>   {
> -    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product,
> -                          &env->fp_status);
> +    return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_product);
>   }
>   
>   uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> @@ -124,8 +128,8 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>   uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>                            uint64_t frs3)
>   {
> -    return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c |
> -                          float_muladd_negate_product, &env->fp_status);
> +    return do_fmadd_s(env, frs1, frs2, frs3,
> +                      float_muladd_negate_c | float_muladd_negate_product);
>   }
>   
>   uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
> @@ -137,37 +141,37 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
>   
>   uint64_t helper_fadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_add(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_add(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_sub(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_sub(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fmul_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_mul(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_mul(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_div(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_div(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fmin_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_minnum(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_minnum(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fmax_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   {
> -    return float32_maxnum(frs1, frs2, &env->fp_status);
> +    return nanbox_s(float32_maxnum(frs1, frs2, &env->fp_status));
>   }
>   
>   uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t frs1)
>   {
> -    return float32_sqrt(frs1, &env->fp_status);
> +    return nanbox_s(float32_sqrt(frs1, &env->fp_status));
>   }
>   
>   target_ulong helper_fle_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
> @@ -209,23 +213,23 @@ uint64_t helper_fcvt_lu_s(CPURISCVState *env, uint64_t frs1)
>   
>   uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1)
>   {
> -    return int32_to_float32((int32_t)rs1, &env->fp_status);
> +    return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status));
>   }
>   
>   uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1)
>   {
> -    return uint32_to_float32((uint32_t)rs1, &env->fp_status);
> +    return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status));
>   }
>   
>   #if defined(TARGET_RISCV64)
>   uint64_t helper_fcvt_s_l(CPURISCVState *env, uint64_t rs1)
>   {
> -    return int64_to_float32(rs1, &env->fp_status);
> +    return nanbox_s(int64_to_float32(rs1, &env->fp_status));
>   }
>   
>   uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1)
>   {
> -    return uint64_to_float32(rs1, &env->fp_status);
> +    return nanbox_s(uint64_to_float32(rs1, &env->fp_status));
>   }
>   #endif
>   
> @@ -266,7 +270,7 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
>   
>   uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1)
>   {
> -    return float64_to_float32(rs1, &env->fp_status);
> +    return nanbox_s(float64_to_float32(rs1, &env->fp_status));
>   }
>   
>   uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1)

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by Richard Henderson 5 years, 6 months ago

On 7/23/20 7:35 PM, LIU Zhiwei wrote:
> 
> 
> On 2020/7/24 8:28, Richard Henderson wrote:
>> Make sure that all results from single-precision scalar helpers
>> are properly nan-boxed to 64-bits.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/riscv/internals.h  |  5 +++++
>>   target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------
>>   2 files changed, 28 insertions(+), 19 deletions(-)
>>
>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>> index 37d33820ad..9f4ba7d617 100644
>> --- a/target/riscv/internals.h
>> +++ b/target/riscv/internals.h
>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>>   #define SEW32 2
>>   #define SEW64 3
>>   +static inline uint64_t nanbox_s(float32 f)
>> +{
>> +    return f | MAKE_64BIT_MASK(32, 32);
>> +}
>> +
> If define it here,  we can also define a more general  function with flen.
> 
> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
> +{
> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
> +}
> +
> 
> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.

While we could do that, we will not encounter all possible lengths.  In the
cover letter, I mentioned defining a second function,

static inline uint64_t nanbox_h(float16 f)
{
   return f | MAKE_64BIT_MASK(16, 48);
}

Having two separate functions will, I believe, be easier to use in practice.


r~

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by LIU Zhiwei 5 years, 6 months ago


On 2020/7/24 11:55, Richard Henderson wrote:
> On 7/23/20 7:35 PM, LIU Zhiwei wrote:
>>
>> On 2020/7/24 8:28, Richard Henderson wrote:
>>> Make sure that all results from single-precision scalar helpers
>>> are properly nan-boxed to 64-bits.
>>>
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>    target/riscv/internals.h  |  5 +++++
>>>    target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------
>>>    2 files changed, 28 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>>> index 37d33820ad..9f4ba7d617 100644
>>> --- a/target/riscv/internals.h
>>> +++ b/target/riscv/internals.h
>>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>>>    #define SEW32 2
>>>    #define SEW64 3
>>>    +static inline uint64_t nanbox_s(float32 f)
>>> +{
>>> +    return f | MAKE_64BIT_MASK(32, 32);
>>> +}
>>> +
>> If define it here,  we can also define a more general  function with flen.
>>
>> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
>> +{
>> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
>> +}
>> +
>>
>> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.
> While we could do that, we will not encounter all possible lengths.  In the
> cover letter, I mentioned defining a second function,
>
> static inline uint64_t nanbox_h(float16 f)
> {
>     return f | MAKE_64BIT_MASK(16, 48);
> }
>
> Having two separate functions will, I believe, be easier to use in practice.
>
Get  it. Thanks.

Zhiwei
>
> r~

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by Chih-Min Chao 5 years, 6 months ago

On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:

>
>
> On 2020/7/24 11:55, Richard Henderson wrote:
> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:
> >>
> >> On 2020/7/24 8:28, Richard Henderson wrote:
> >>> Make sure that all results from single-precision scalar helpers
> >>> are properly nan-boxed to 64-bits.
> >>>
> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> >>> ---
> >>>    target/riscv/internals.h  |  5 +++++
> >>>    target/riscv/fpu_helper.c | 42
> +++++++++++++++++++++------------------
> >>>    2 files changed, 28 insertions(+), 19 deletions(-)
> >>>
> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> >>> index 37d33820ad..9f4ba7d617 100644
> >>> --- a/target/riscv/internals.h
> >>> +++ b/target/riscv/internals.h
> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
> >>>    #define SEW32 2
> >>>    #define SEW64 3
> >>>    +static inline uint64_t nanbox_s(float32 f)
> >>> +{
> >>> +    return f | MAKE_64BIT_MASK(32, 32);
> >>> +}
> >>> +
> >> If define it here,  we can also define a more general  function with
> flen.
> >>
> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
> >> +{
> >> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
> >> +}
> >> +
> >>
> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector
> instructions.
> > While we could do that, we will not encounter all possible lengths.  In
> the
> > cover letter, I mentioned defining a second function,
> >
> > static inline uint64_t nanbox_h(float16 f)
> > {
> >     return f | MAKE_64BIT_MASK(16, 48);
> > }
> >
> > Having two separate functions will, I believe, be easier to use in
> practice.
> >
> Get  it. Thanks.
>
> Zhiwei
> >
> > r~
>
>
>
That is what has been implemented in spike.  It fills up the Nan-Box when
value is stored back internal structure and
unbox the value with difference floating type (half/single/double/quad).

By the way,  I prefer to keeping the suffix to tell different floating
type rather than pass arbitrary
since each floating type belong to each extension.

Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com>

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by LIU Zhiwei 5 years, 6 months ago


On 2020/8/6 14:09, Chih-Min Chao wrote:
> On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com 
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
>
>
>     On 2020/7/24 11:55, Richard Henderson wrote:
>     > On 7/23/20 7:35 PM, LIU Zhiwei wrote:
>     >>
>     >> On 2020/7/24 8:28, Richard Henderson wrote:
>     >>> Make sure that all results from single-precision scalar helpers
>     >>> are properly nan-boxed to 64-bits.
>     >>>
>     >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org
>     <mailto:richard.henderson@linaro.org>>
>     >>> ---
>     >>>    target/riscv/internals.h  |  5 +++++
>     >>>    target/riscv/fpu_helper.c | 42
>     +++++++++++++++++++++------------------
>     >>>    2 files changed, 28 insertions(+), 19 deletions(-)
>     >>>
>     >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>     >>> index 37d33820ad..9f4ba7d617 100644
>     >>> --- a/target/riscv/internals.h
>     >>> +++ b/target/riscv/internals.h
>     >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>     >>>    #define SEW32 2
>     >>>    #define SEW64 3
>     >>>    +static inline uint64_t nanbox_s(float32 f)
>     >>> +{
>     >>> +    return f | MAKE_64BIT_MASK(32, 32);
>     >>> +}
>     >>> +
>     >> If define it here,  we can also define a more general  function
>     with flen.
>     >>
>     >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
>     >> +{
>     >> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
>     >> +}
>     >> +
>     >>
>     >> So we can reuse it in fp16 or bf16 scalar instruction and in
>     vector instructions.
>     > While we could do that, we will not encounter all possible
>     lengths.  In the
>     > cover letter, I mentioned defining a second function,
>     >
>     > static inline uint64_t nanbox_h(float16 f)
>     > {
>     >     return f | MAKE_64BIT_MASK(16, 48);
>     > }
>     >
>     > Having two separate functions will, I believe, be easier to use
>     in practice.
>     >
>     Get  it. Thanks.
>
>     Zhiwei
>     >
>     > r~
>
>
>
> That is what has been implemented in spike.  It fills up the Nan-Box 
> when value is stored back internal structure and
> unbox the value with difference floating type (half/single/double/quad).
Hi Chih-Min,

Has half-precision been a part of RVV? Or do you know the ISA 
abbreviation of half-precision?

Thanks very much.

Best Regards,
Zhiwei
>
> By the way,  I prefer to keeping the suffix to tell different floating 
> type rather than pass arbitrary
> since each floating type belong to each extension.
>
> Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com 
> <mailto:chihmin.chao@sifive.com>>

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by Chih-Min Chao 5 years, 6 months ago

On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:

>
>
> On 2020/8/6 14:09, Chih-Min Chao wrote:
>
> On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>
>>
>>
>> On 2020/7/24 11:55, Richard Henderson wrote:
>> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:
>> >>
>> >> On 2020/7/24 8:28, Richard Henderson wrote:
>> >>> Make sure that all results from single-precision scalar helpers
>> >>> are properly nan-boxed to 64-bits.
>> >>>
>> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> >>> ---
>> >>>    target/riscv/internals.h  |  5 +++++
>> >>>    target/riscv/fpu_helper.c | 42
>> +++++++++++++++++++++------------------
>> >>>    2 files changed, 28 insertions(+), 19 deletions(-)
>> >>>
>> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>> >>> index 37d33820ad..9f4ba7d617 100644
>> >>> --- a/target/riscv/internals.h
>> >>> +++ b/target/riscv/internals.h
>> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>> >>>    #define SEW32 2
>> >>>    #define SEW64 3
>> >>>    +static inline uint64_t nanbox_s(float32 f)
>> >>> +{
>> >>> +    return f | MAKE_64BIT_MASK(32, 32);
>> >>> +}
>> >>> +
>> >> If define it here,  we can also define a more general  function with
>> flen.
>> >>
>> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
>> >> +{
>> >> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
>> >> +}
>> >> +
>> >>
>> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector
>> instructions.
>> > While we could do that, we will not encounter all possible lengths.  In
>> the
>> > cover letter, I mentioned defining a second function,
>> >
>> > static inline uint64_t nanbox_h(float16 f)
>> > {
>> >     return f | MAKE_64BIT_MASK(16, 48);
>> > }
>> >
>> > Having two separate functions will, I believe, be easier to use in
>> practice.
>> >
>> Get  it. Thanks.
>>
>> Zhiwei
>> >
>> > r~
>>
>>
>>
> That is what has been implemented in spike.  It fills up the Nan-Box when
> value is stored back internal structure and
> unbox the value with difference floating type (half/single/double/quad).
>
> Hi Chih-Min,
>
> Has half-precision been a part of RVV? Or do you know the ISA abbreviation
> of half-precision?
>
> Thanks very much.
>
> Best Regards,
> Zhiwei
>
>
> By the way,  I prefer to keeping the suffix to tell different floating
> type rather than pass arbitrary
> since each floating type belong to each extension.
>
> Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com>
>
>
Hi  ZhiWei,

It is still under branch https://github.com/riscv/riscv-isa-manual/tree/zfh and
I am not sure about the working group progress.
I have an implementation based on this draft and will send it as RFC patch
next week.

Thanks
Chih-Min

Re: [PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers

Posted by LIU Zhiwei 5 years, 6 months ago


On 2020/8/6 16:42, Chih-Min Chao wrote:
>
>
>
> On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <zhiwei_liu@c-sky.com 
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
>
>
>     On 2020/8/6 14:09, Chih-Min Chao wrote:
>>     On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com
>>     <mailto:zhiwei_liu@c-sky.com>> wrote:
>>
>>
>>
>>         On 2020/7/24 11:55, Richard Henderson wrote:
>>         > On 7/23/20 7:35 PM, LIU Zhiwei wrote:
>>         >>
>>         >> On 2020/7/24 8:28, Richard Henderson wrote:
>>         >>> Make sure that all results from single-precision scalar
>>         helpers
>>         >>> are properly nan-boxed to 64-bits.
>>         >>>
>>         >>> Signed-off-by: Richard Henderson
>>         <richard.henderson@linaro.org
>>         <mailto:richard.henderson@linaro.org>>
>>         >>> ---
>>         >>>    target/riscv/internals.h  |  5 +++++
>>         >>>    target/riscv/fpu_helper.c | 42
>>         +++++++++++++++++++++------------------
>>         >>>    2 files changed, 28 insertions(+), 19 deletions(-)
>>         >>>
>>         >>> diff --git a/target/riscv/internals.h
>>         b/target/riscv/internals.h
>>         >>> index 37d33820ad..9f4ba7d617 100644
>>         >>> --- a/target/riscv/internals.h
>>         >>> +++ b/target/riscv/internals.h
>>         >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);
>>         >>>    #define SEW32 2
>>         >>>    #define SEW64 3
>>         >>>    +static inline uint64_t nanbox_s(float32 f)
>>         >>> +{
>>         >>> +    return f | MAKE_64BIT_MASK(32, 32);
>>         >>> +}
>>         >>> +
>>         >> If define it here,  we can also define a more general 
>>         function with flen.
>>         >>
>>         >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)
>>         >> +{
>>         >> +    return f | MAKE_64BIT_MASK(flen, 64 - flen);
>>         >> +}
>>         >> +
>>         >>
>>         >> So we can reuse it in fp16 or bf16 scalar instruction and
>>         in vector instructions.
>>         > While we could do that, we will not encounter all possible
>>         lengths.  In the
>>         > cover letter, I mentioned defining a second function,
>>         >
>>         > static inline uint64_t nanbox_h(float16 f)
>>         > {
>>         >     return f | MAKE_64BIT_MASK(16, 48);
>>         > }
>>         >
>>         > Having two separate functions will, I believe, be easier to
>>         use in practice.
>>         >
>>         Get  it. Thanks.
>>
>>         Zhiwei
>>         >
>>         > r~
>>
>>
>>
>>     That is what has been implemented in spike.  It fills up the
>>     Nan-Box when value is stored back internal structure and
>>     unbox the value with difference floating type
>>     (half/single/double/quad).
>     Hi Chih-Min,
>
>     Has half-precision been a part of RVV? Or do you know the ISA
>     abbreviation of half-precision?
>
>     Thanks very much.
>
>     Best Regards,
>     Zhiwei
>>
>>     By the way,  I prefer to keeping the suffix to tell
>>     different floating type rather than pass arbitrary
>>     since each floating type belong to each extension.
>>
>>     Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com
>>     <mailto:chihmin.chao@sifive.com>>
>
>
> Hi  ZhiWei,
>
> It is still under branch 
> https://github.com/riscv/riscv-isa-manual/tree/zfh and I am not sure 
> about the working group progress.
> I have an implementation based on this draft and will send it as RFC 
> patch next week.
Hi Chih-Min,

Thanks for your information.

As Krste said once,  as we don't have RV16, FP16 separated won't make 
sense.  Obviously, it has changed.:-P

I also have implemented a version of FP16 ,“obvious set including 
existing FP instructions with format field set to "half" (fmt=10)“

If you want to send the patch, I will not send it again.:-)


Zhiwei
>
> Thanks
> Chih-Min

[PATCH v2 1/7] target/riscv: Generate nanboxed results from fp helpers
[PATCH v2 2/7] target/riscv: Generalize gen_nanbox_fpr to gen_nanbox_s
[PATCH v2 3/7] target/riscv: Generate nanboxed results from trans_rvf.inc.c
[PATCH v2 4/7] target/riscv: Check nanboxed inputs to fp helpers
[PATCH v2 5/7] target/riscv: Check nanboxed inputs in trans_rvf.inc.c
[PATCH v2 6/7] target/riscv: Clean up fmv.w.x
[PATCH v2 7/7] target/riscv: check before allocating TCG temps