target/arm: Correct load/store exclusive pairing and alignment

[Qemu-devel] [PATCH] target/arm: Correct load/store exclusive pairing and alignment

Posted by Richard Henderson 8 years, 6 months ago

SetExclusiveMonitors in the pseudocode is on the address + width,
and says nothing about the manner of the load.  Therefore

	ldxp	w0, w1, [x2]
vs
	ldxr	x0, [x2]

must record the same metadata so that either may pair with

	stxp	w3, w0, w1, [x2]
vs
	stxr	w3, x0, [x2]

Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP.

Also note that we were not providing the required single-copy atomic
semantics for 32-bit LDXP.  This is trivially fixed alongside the
cpu_exclusive_val change.

At the same time, exclusive loads require the same alignment as
exclusive stores.  For 64-bit LDXP, this means adding MO_ALIGN_16;
for the others adding MO_ALIGN.

Reported-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 24 deletions(-)
---

I have not yet constructed test cases for all of the combinations
listed above.  I wanted to put this into your hands so that you could
test against your existing code using LDXP/STXP.


r~


diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 58ed4c6d05..f3643ac8dc 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                                TCGv_i64 addr, int size, bool is_pair)
 {
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    TCGMemOp memop = s->be_data + size;
+    int idx = get_mem_index(s);
+    TCGMemOp memop = s->be_data;
 
     g_assert(size <= 3);
-    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
-
     if (is_pair) {
-        TCGv_i64 addr2 = tcg_temp_new_i64();
-        TCGv_i64 hitmp = tcg_temp_new_i64();
-
         g_assert(size >= 2);
-        tcg_gen_addi_i64(addr2, addr, 1 << size);
-        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
-        tcg_temp_free_i64(addr2);
-        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
-        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
-        tcg_temp_free_i64(hitmp);
-    }
+        if (size == 2) {
+            /* The pair must be single-copy atomic for the doubleword.  */
+            memop |= MO_64 | MO_ALIGN;
+            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+            if (s->be_data == MO_LE) {
+                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
+                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
+            } else {
+                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
+                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
+            }
+        } else {
+            /* The pair must be single-copy atomic for *each* doubleword,
+               but not the entire quadword.  It must, however, be aligned.  */
+            TCGv_i64 addr2;
 
-    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
-    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
+            memop |= MO_64;
+            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
+                                memop | MO_ALIGN_16);
 
-    tcg_temp_free_i64(tmp);
+            addr2 = tcg_temp_new_i64();
+            tcg_gen_addi_i64(addr2, addr, 8);
+            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
+            tcg_temp_free_i64(addr2);
+
+            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
+            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
+        }
+    } else {
+        memop |= size | MO_ALIGN;
+        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
+    }
     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
 }
 
@@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tmp = tcg_temp_new_i64();
     if (is_pair) {
         if (size == 2) {
-            TCGv_i64 val = tcg_temp_new_i64();
-            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
-            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
-            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
+            if (s->be_data == MO_LE) {
+                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
+            } else {
+                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
+            }
+            tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp,
                                        get_mem_index(s),
-                                       size | MO_ALIGN | s->be_data);
-            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
-            tcg_temp_free_i64(val);
+                                       MO_64 | MO_ALIGN | s->be_data);
+            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
         } else if (s->be_data == MO_LE) {
             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
                                            cpu_reg(s, rt2));
-- 
2.13.4

Re: [Qemu-devel] [PATCH] target/arm: Correct load/store exclusive pairing and alignment

Posted by Alistair Francis 8 years, 6 months ago

On Sat, Aug 12, 2017 at 8:41 AM, Richard Henderson <rth7680@gmail.com> wrote:
> SetExclusiveMonitors in the pseudocode is on the address + width,
> and says nothing about the manner of the load.  Therefore
>
>         ldxp    w0, w1, [x2]
> vs
>         ldxr    x0, [x2]
>
> must record the same metadata so that either may pair with
>
>         stxp    w3, w0, w1, [x2]
> vs
>         stxr    w3, x0, [x2]
>
> Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP.
>
> Also note that we were not providing the required single-copy atomic
> semantics for 32-bit LDXP.  This is trivially fixed alongside the
> cpu_exclusive_val change.
>
> At the same time, exclusive loads require the same alignment as
> exclusive stores.  For 64-bit LDXP, this means adding MO_ALIGN_16;
> for the others adding MO_ALIGN.
>
> Reported-by: Alistair Francis <alistair.francis@xilinx.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++-----------------
>  1 file changed, 41 insertions(+), 24 deletions(-)
> ---
>
> I have not yet constructed test cases for all of the combinations
> listed above.  I wanted to put this into your hands so that you could
> test against your existing code using LDXP/STXP.

I can test it on Monday when I'm back in the office. I'll let you know
what I find.

>
>
> r~
>
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 58ed4c6d05..f3643ac8dc 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>                                 TCGv_i64 addr, int size, bool is_pair)
>  {
> -    TCGv_i64 tmp = tcg_temp_new_i64();
> -    TCGMemOp memop = s->be_data + size;
> +    int idx = get_mem_index(s);
> +    TCGMemOp memop = s->be_data;
>
>      g_assert(size <= 3);
> -    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
> -
>      if (is_pair) {
> -        TCGv_i64 addr2 = tcg_temp_new_i64();
> -        TCGv_i64 hitmp = tcg_temp_new_i64();
> -
>          g_assert(size >= 2);
> -        tcg_gen_addi_i64(addr2, addr, 1 << size);
> -        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
> -        tcg_temp_free_i64(addr2);
> -        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
> -        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
> -        tcg_temp_free_i64(hitmp);
> -    }
> +        if (size == 2) {
> +            /* The pair must be single-copy atomic for the doubleword.  */
> +            memop |= MO_64 | MO_ALIGN;
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
> +            if (s->be_data == MO_LE) {
> +                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
> +                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
> +            } else {
> +                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
> +                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
> +            }
> +        } else {
> +            /* The pair must be single-copy atomic for *each* doubleword,
> +               but not the entire quadword.  It must, however, be aligned.  */
> +            TCGv_i64 addr2;
>
> -    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
> -    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
> +            memop |= MO_64;
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
> +                                memop | MO_ALIGN_16);
>
> -    tcg_temp_free_i64(tmp);
> +            addr2 = tcg_temp_new_i64();
> +            tcg_gen_addi_i64(addr2, addr, 8);
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
> +            tcg_temp_free_i64(addr2);
> +
> +            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
> +            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
> +        }
> +    } else {
> +        memop |= size | MO_ALIGN;
> +        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
> +        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
> +    }
>      tcg_gen_mov_i64(cpu_exclusive_addr, addr);
>  }
>
> @@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>      tmp = tcg_temp_new_i64();
>      if (is_pair) {
>          if (size == 2) {
> -            TCGv_i64 val = tcg_temp_new_i64();
> -            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
> -            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
> -            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
> +            if (s->be_data == MO_LE) {
> +                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
> +            } else {
> +                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
> +            }
> +            tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp,
>                                         get_mem_index(s),
> -                                       size | MO_ALIGN | s->be_data);
> -            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
> -            tcg_temp_free_i64(val);
> +                                       MO_64 | MO_ALIGN | s->be_data);
> +            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);

Now we aren't ever checking cpu_exclusive_high. Is it even worth having?

Otherwise the patch looks good, let me test it next week and I'll get
back to you.

Thanks,
Alistair

>          } else if (s->be_data == MO_LE) {
>              gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
>                                             cpu_reg(s, rt2));
> --
> 2.13.4
>
>

Re: [Qemu-devel] [PATCH] target/arm: Correct load/store exclusive pairing and alignment

Posted by Richard Henderson 8 years, 6 months ago

On 08/12/2017 09:29 AM, Alistair Francis wrote:
> Now we aren't ever checking cpu_exclusive_high. Is it even worth having?

We are checking cpu_exclusive_high for 64-bit STXP.
See paired_cmpxchg64_{l,b}e in target/arm/helper-a64.c.


r~

Re: [Qemu-devel] [PATCH] target/arm: Correct load/store exclusive pairing and alignment

Posted by Alistair Francis 8 years, 5 months ago

On Sat, Aug 12, 2017 at 8:41 AM, Richard Henderson <rth7680@gmail.com> wrote:
> SetExclusiveMonitors in the pseudocode is on the address + width,
> and says nothing about the manner of the load.  Therefore
>
>         ldxp    w0, w1, [x2]
> vs
>         ldxr    x0, [x2]
>
> must record the same metadata so that either may pair with
>
>         stxp    w3, w0, w1, [x2]
> vs
>         stxr    w3, x0, [x2]
>
> Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP.
>
> Also note that we were not providing the required single-copy atomic
> semantics for 32-bit LDXP.  This is trivially fixed alongside the
> cpu_exclusive_val change.
>
> At the same time, exclusive loads require the same alignment as
> exclusive stores.  For 64-bit LDXP, this means adding MO_ALIGN_16;
> for the others adding MO_ALIGN.
>
> Reported-by: Alistair Francis <alistair.francis@xilinx.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

This passes the same tests that my patch series passes.

Reviewed-by: Alistair Francis <alistair.francis@xilinx.com>
Tested-by: Alistair Francis <alistair.francis@xilinx.com>

Thanks,
Alistair


> ---
>  target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++-----------------
>  1 file changed, 41 insertions(+), 24 deletions(-)
> ---
>
> I have not yet constructed test cases for all of the combinations
> listed above.  I wanted to put this into your hands so that you could
> test against your existing code using LDXP/STXP.
>
>
> r~
>
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 58ed4c6d05..f3643ac8dc 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
>  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>                                 TCGv_i64 addr, int size, bool is_pair)
>  {
> -    TCGv_i64 tmp = tcg_temp_new_i64();
> -    TCGMemOp memop = s->be_data + size;
> +    int idx = get_mem_index(s);
> +    TCGMemOp memop = s->be_data;
>
>      g_assert(size <= 3);
> -    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
> -
>      if (is_pair) {
> -        TCGv_i64 addr2 = tcg_temp_new_i64();
> -        TCGv_i64 hitmp = tcg_temp_new_i64();
> -
>          g_assert(size >= 2);
> -        tcg_gen_addi_i64(addr2, addr, 1 << size);
> -        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
> -        tcg_temp_free_i64(addr2);
> -        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
> -        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
> -        tcg_temp_free_i64(hitmp);
> -    }
> +        if (size == 2) {
> +            /* The pair must be single-copy atomic for the doubleword.  */
> +            memop |= MO_64 | MO_ALIGN;
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
> +            if (s->be_data == MO_LE) {
> +                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
> +                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
> +            } else {
> +                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
> +                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
> +            }
> +        } else {
> +            /* The pair must be single-copy atomic for *each* doubleword,
> +               but not the entire quadword.  It must, however, be aligned.  */
> +            TCGv_i64 addr2;
>
> -    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
> -    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
> +            memop |= MO_64;
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
> +                                memop | MO_ALIGN_16);
>
> -    tcg_temp_free_i64(tmp);
> +            addr2 = tcg_temp_new_i64();
> +            tcg_gen_addi_i64(addr2, addr, 8);
> +            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
> +            tcg_temp_free_i64(addr2);
> +
> +            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
> +            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
> +        }
> +    } else {
> +        memop |= size | MO_ALIGN;
> +        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
> +        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
> +    }
>      tcg_gen_mov_i64(cpu_exclusive_addr, addr);
>  }
>
> @@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>      tmp = tcg_temp_new_i64();
>      if (is_pair) {
>          if (size == 2) {
> -            TCGv_i64 val = tcg_temp_new_i64();
> -            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
> -            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
> -            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
> +            if (s->be_data == MO_LE) {
> +                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
> +            } else {
> +                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
> +            }
> +            tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp,
>                                         get_mem_index(s),
> -                                       size | MO_ALIGN | s->be_data);
> -            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
> -            tcg_temp_free_i64(val);
> +                                       MO_64 | MO_ALIGN | s->be_data);
> +            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
>          } else if (s->be_data == MO_LE) {
>              gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
>                                             cpu_reg(s, rt2));
> --
> 2.13.4
>
>