[v4] Add ARMv8.2 half-precision functions

[Qemu-devel] [PATCH v4 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Posted by Alex Bennée 7 years, 11 months ago

The helpers use the new re-factored muladd support in SoftFloat for
the float16 work.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v3
  - re-jigged switch statement to fall-through for unalloc
  - added is_fp16 bool for fpst
  - fixed up some long lines
v4
  - don't double-check for feature bit
---
 target/arm/translate-a64.c | 82 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 16 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e96e6cdd15..6a264bc134 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11198,6 +11198,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     int rd = extract32(insn, 0, 5);
     bool is_long = false;
     bool is_fp = false;
+    bool is_fp16 = false;
     int index;
     TCGv_ptr fpst;
 
@@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
         }
         /* fall through */
     case 0x9: /* FMUL, FMULX */
-        if (!extract32(size, 1, 1)) {
+        if (size == 1) {
             unallocated_encoding(s);
             return;
         }
@@ -11256,18 +11257,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     }
 
     if (is_fp) {
-        /* low bit of size indicates single/double */
-        size = extract32(size, 0, 1) ? 3 : 2;
-        if (size == 2) {
+        /* convert insn encoded size to TCGMemOp size */
+        switch (size) {
+        case 2: /* single precision */
+            size = MO_32;
             index = h << 1 | l;
-        } else {
+            rm |= (m << 4);
+            break;
+        case 3: /* double precision */
+            size = MO_64;
             if (l || !is_q) {
                 unallocated_encoding(s);
                 return;
             }
             index = h;
+            rm |= (m << 4);
+            break;
+        case 0: /* half precision */
+            size = MO_16;
+            index = h << 2 | l << 1 | m;
+            is_fp16 = true;
+            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                break;
+            }
+            /* fallthru */
+        default: /* unallocated */
+            unallocated_encoding(s);
+            return;
         }
-        rm |= (m << 4);
     } else {
         switch (size) {
         case 1:
@@ -11288,7 +11305,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     }
 
     if (is_fp) {
-        fpst = get_fpstatus_ptr(false);
+        fpst = get_fpstatus_ptr(is_fp16);
     } else {
         fpst = NULL;
     }
@@ -11390,18 +11407,51 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 break;
             }
             case 0x5: /* FMLS */
-                /* As usual for ARM, separate negation for fused multiply-add */
-                gen_helper_vfp_negs(tcg_op, tcg_op);
-                /* fall through */
             case 0x1: /* FMLA */
-                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                read_vec_element_i32(s, tcg_res, rd, pass,
+                                     is_scalar ? size : MO_32);
+                switch (size) {
+                case 1:
+                    if (opcode == 0x5) {
+                        /* As usual for ARM, separate negation for fused
+                         * multiply-add */
+                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
+                    }
+                    gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+                                               tcg_res, fpst);
+                    break;
+                case 2:
+                    if (opcode == 0x5) {
+                        /* As usual for ARM, separate negation for
+                         * fused multiply-add */
+                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
+                    }
+                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+                                           tcg_res, fpst);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
                 break;
             case 0x9: /* FMUL, FMULX */
-                if (u) {
-                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
-                } else {
-                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+                switch (size) {
+                case 1:
+                    if (u) {
+                        gen_helper_advsimd_mulxh(tcg_res, tcg_op, tcg_idx,
+                                                 fpst);
+                    } else {
+                        g_assert_not_reached();
+                    }
+                    break;
+                case 2:
+                    if (u) {
+                        gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+                    } else {
+                        gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+                    }
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
                 break;
             case 0xc: /* SQDMULH */
-- 
2.15.1

Re: [Qemu-devel] [PATCH v4 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Posted by Richard Henderson 7 years, 11 months ago

On 02/27/2018 06:38 AM, Alex Bennée wrote:
> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>          }
>          /* fall through */
>      case 0x9: /* FMUL, FMULX */
> -        if (!extract32(size, 1, 1)) {
> +        if (size == 1) {
>              unallocated_encoding(s);
>              return;
>          }

This is still redundant, since size == 1 is handled...

> @@ -11256,18 +11257,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>      }
>  
>      if (is_fp) {
> -        /* low bit of size indicates single/double */
> -        size = extract32(size, 0, 1) ? 3 : 2;
> -        if (size == 2) {
> +        /* convert insn encoded size to TCGMemOp size */
> +        switch (size) {
> +        case 2: /* single precision */
> +            size = MO_32;
>              index = h << 1 | l;
> -        } else {
> +            rm |= (m << 4);
> +            break;
> +        case 3: /* double precision */
> +            size = MO_64;
>              if (l || !is_q) {
>                  unallocated_encoding(s);
>                  return;
>              }
>              index = h;
> +            rm |= (m << 4);
> +            break;
> +        case 0: /* half precision */
> +            size = MO_16;
> +            index = h << 2 | l << 1 | m;
> +            is_fp16 = true;
> +            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
> +                break;
> +            }
> +            /* fallthru */
> +        default: /* unallocated */
> +            unallocated_encoding(s);
> +            return;
>          }

... here.  But it's not wrong and I can clean this up along with the additional
changes I need to make to this function for fcmla support.  So,

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

Re: [Qemu-devel] [PATCH v4 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Posted by Alex Bennée 7 years, 11 months ago

Richard Henderson <richard.henderson@linaro.org> writes:

> On 02/27/2018 06:38 AM, Alex Bennée wrote:
>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>>          }
>>          /* fall through */
>>      case 0x9: /* FMUL, FMULX */
>> -        if (!extract32(size, 1, 1)) {
>> +        if (size == 1) {
>>              unallocated_encoding(s);
>>              return;
>>          }
>
> This is still redundant, since size == 1 is handled...

doh!

will fix.

>
>> @@ -11256,18 +11257,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>>      }
>>
>>      if (is_fp) {
>> -        /* low bit of size indicates single/double */
>> -        size = extract32(size, 0, 1) ? 3 : 2;
>> -        if (size == 2) {
>> +        /* convert insn encoded size to TCGMemOp size */
>> +        switch (size) {
>> +        case 2: /* single precision */
>> +            size = MO_32;
>>              index = h << 1 | l;
>> -        } else {
>> +            rm |= (m << 4);
>> +            break;
>> +        case 3: /* double precision */
>> +            size = MO_64;
>>              if (l || !is_q) {
>>                  unallocated_encoding(s);
>>                  return;
>>              }
>>              index = h;
>> +            rm |= (m << 4);
>> +            break;
>> +        case 0: /* half precision */
>> +            size = MO_16;
>> +            index = h << 2 | l << 1 | m;
>> +            is_fp16 = true;
>> +            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
>> +                break;
>> +            }
>> +            /* fallthru */
>> +        default: /* unallocated */
>> +            unallocated_encoding(s);
>> +            return;
>>          }
>
> ... here.  But it's not wrong and I can clean this up along with the additional
> changes I need to make to this function for fcmla support.  So,
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
>
> r~


--
Alex Bennée

Re: [Qemu-devel] [PATCH v4 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Posted by Peter Maydell 7 years, 11 months ago

On 27 February 2018 at 17:52, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Richard Henderson <richard.henderson@linaro.org> writes:
>
>> On 02/27/2018 06:38 AM, Alex Bennée wrote:
>>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>>>          }
>>>          /* fall through */
>>>      case 0x9: /* FMUL, FMULX */
>>> -        if (!extract32(size, 1, 1)) {
>>> +        if (size == 1) {
>>>              unallocated_encoding(s);
>>>              return;
>>>          }
>>
>> This is still redundant, since size == 1 is handled...
>
> doh!
>
> will fix.

I'd prefer it if you didn't, because I'm in the process of putting
this version of the patchset into target-arm.next...

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Posted by Alex Bennée 7 years, 11 months ago

Peter Maydell <peter.maydell@linaro.org> writes:

> On 27 February 2018 at 17:52, Alex Bennée <alex.bennee@linaro.org> wrote:
>>
>> Richard Henderson <richard.henderson@linaro.org> writes:
>>
>>> On 02/27/2018 06:38 AM, Alex Bennée wrote:
>>>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>>>>          }
>>>>          /* fall through */
>>>>      case 0x9: /* FMUL, FMULX */
>>>> -        if (!extract32(size, 1, 1)) {
>>>> +        if (size == 1) {
>>>>              unallocated_encoding(s);
>>>>              return;
>>>>          }
>>>
>>> This is still redundant, since size == 1 is handled...
>>
>> doh!
>>
>> will fix.
>
> I'd prefer it if you didn't, because I'm in the process of putting
> this version of the patchset into target-arm.next...

Fair enough - you've picked up Richard's r-b?

--
Alex Bennée