[v1] target/arm: Implement v8.1-Atomics

[Qemu-devel] [PATCH 7/9] target/arm: Fill in disas_ldst_atomic

Posted by Richard Henderson 7 years, 9 months ago

This implements all of the v8.1-Atomics instructions except
for compare-and-swap, which is decoded elsewhere.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 0706c8c394..6ed7627d79 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
 
 /* Note that the gvec expanders operate on offsets + sizes.  */
 typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
@@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     int rn = extract32(insn, 5, 5);
     int o3_opc = extract32(insn, 12, 4);
     int feature = ARM_FEATURE_V8_ATOMICS;
+    TCGv_i64 tcg_rn, tcg_rs;
+    AtomicThreeOpFn *fn;
 
     if (is_vector) {
         unallocated_encoding(s);
@@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     }
     switch (o3_opc) {
     case 000: /* LDADD */
+        fn = tcg_gen_atomic_fetch_add_i64;
+        break;
     case 001: /* LDCLR */
+        fn = tcg_gen_atomic_fetch_and_i64;
+        break;
     case 002: /* LDEOR */
+        fn = tcg_gen_atomic_fetch_xor_i64;
+        break;
     case 003: /* LDSET */
+        fn = tcg_gen_atomic_fetch_or_i64;
+        break;
     case 004: /* LDSMAX */
+        fn = tcg_gen_atomic_fetch_smax_i64;
+        break;
     case 005: /* LDSMIN */
+        fn = tcg_gen_atomic_fetch_smin_i64;
+        break;
     case 006: /* LDUMAX */
+        fn = tcg_gen_atomic_fetch_umax_i64;
+        break;
     case 007: /* LDUMIN */
+        fn = tcg_gen_atomic_fetch_umin_i64;
+        break;
     case 010: /* SWP */
+        fn = tcg_gen_atomic_xchg_i64;
+        break;
     default:
         unallocated_encoding(s);
         return;
@@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
         return;
     }
 
-    (void)rs;
-    (void)rn;
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+    tcg_rn = cpu_reg_sp(s, rn);
+    tcg_rs = read_cpu_reg(s, rs, false);
+
+    if (o3_opc == 1) { /* LDCLR */
+        tcg_gen_not_i64(tcg_rs, tcg_rs);
+    }
+
+    /* The tcg atomic primitives are all full barriers.  Therefore we
+     * can ignore the Acquire and Release bits of this instruction.
+     */
+    fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+       s->be_data | size | MO_ALIGN);
 }
 
 /* Load/store register (all forms) */
-- 
2.14.3

Re: [Qemu-devel] [Qemu-arm] [PATCH 7/9] target/arm: Fill in disas_ldst_atomic

Posted by Peter Maydell 7 years, 9 months ago

On 27 April 2018 at 01:26, Richard Henderson
<richard.henderson@linaro.org> wrote:
> This implements all of the v8.1-Atomics instructions except
> for compare-and-swap, which is decoded elsewhere.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++--
>  1 file changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 0706c8c394..6ed7627d79 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
>  typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
>  typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
>  typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
> +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
>
>  /* Note that the gvec expanders operate on offsets + sizes.  */
>  typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
> @@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
>      int rn = extract32(insn, 5, 5);
>      int o3_opc = extract32(insn, 12, 4);
>      int feature = ARM_FEATURE_V8_ATOMICS;
> +    TCGv_i64 tcg_rn, tcg_rs;
> +    AtomicThreeOpFn *fn;
>
>      if (is_vector) {
>          unallocated_encoding(s);
> @@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
>      }
>      switch (o3_opc) {
>      case 000: /* LDADD */
> +        fn = tcg_gen_atomic_fetch_add_i64;
> +        break;
>      case 001: /* LDCLR */
> +        fn = tcg_gen_atomic_fetch_and_i64;
> +        break;
>      case 002: /* LDEOR */
> +        fn = tcg_gen_atomic_fetch_xor_i64;
> +        break;
>      case 003: /* LDSET */
> +        fn = tcg_gen_atomic_fetch_or_i64;
> +        break;
>      case 004: /* LDSMAX */
> +        fn = tcg_gen_atomic_fetch_smax_i64;
> +        break;
>      case 005: /* LDSMIN */
> +        fn = tcg_gen_atomic_fetch_smin_i64;
> +        break;
>      case 006: /* LDUMAX */
> +        fn = tcg_gen_atomic_fetch_umax_i64;
> +        break;
>      case 007: /* LDUMIN */
> +        fn = tcg_gen_atomic_fetch_umin_i64;
> +        break;
>      case 010: /* SWP */
> +        fn = tcg_gen_atomic_xchg_i64;
> +        break;
>      default:
>          unallocated_encoding(s);
>          return;
> @@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
>          return;
>      }
>
> -    (void)rs;
> -    (void)rn;
> +    if (rn == 31) {
> +        gen_check_sp_alignment(s);
> +    }
> +    tcg_rn = cpu_reg_sp(s, rn);
> +    tcg_rs = read_cpu_reg(s, rs, false);
> +
> +    if (o3_opc == 1) { /* LDCLR */
> +        tcg_gen_not_i64(tcg_rs, tcg_rs);
> +    }
> +
> +    /* The tcg atomic primitives are all full barriers.  Therefore we
> +     * can ignore the Acquire and Release bits of this instruction.
> +     */
> +    fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
> +       s->be_data | size | MO_ALIGN);

Does this definitely do the arithmetic operation at the datatype
size and not the _i64 size ? (It makes a difference for example
with LDEORB if Rs has high bits set: the result should always
have [31:8] zero.)

Still missing LDAPR*, but otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

>  }
>
>  /* Load/store register (all forms) */
> --
> 2.14.3

thanks
-- PMM

Re: [Qemu-devel] [Qemu-arm] [PATCH 7/9] target/arm: Fill in disas_ldst_atomic

Posted by Richard Henderson 7 years, 9 months ago

On 05/03/2018 07:14 AM, Peter Maydell wrote:
>> +    /* The tcg atomic primitives are all full barriers.  Therefore we
>> +     * can ignore the Acquire and Release bits of this instruction.
>> +     */
>> +    fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
>> +       s->be_data | size | MO_ALIGN);
> 
> Does this definitely do the arithmetic operation at the datatype
> size and not the _i64 size ? (It makes a difference for example
> with LDEORB if Rs has high bits set: the result should always
> have [31:8] zero.)

Yes.  Also recall that this returns the original data not the result of the
expression.


r~