SetExclusiveMonitors in the pseudocode is on the address + width,
and says nothing about the manner of the load. Therefore
ldxp w0, w1, [x2]
vs
ldxr x0, [x2]
must record the same metadata so that either may pair with
stxp w3, w0, w1, [x2]
vs
stxr w3, x0, [x2]
Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP.
Also note that we were not providing the required single-copy atomic
semantics for 32-bit LDXP. This is trivially fixed alongside the
cpu_exclusive_val change.
At the same time, exclusive loads require the same alignment as
exclusive stores. For 64-bit LDXP, this means adding MO_ALIGN_16;
for the others adding MO_ALIGN.
Reported-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++-----------------
1 file changed, 41 insertions(+), 24 deletions(-)
---
I have not yet constructed test cases for all of the combinations
listed above. I wanted to put this into your hands so that you could
test against your existing code using LDXP/STXP.
r~
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 58ed4c6d05..f3643ac8dc 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i64 addr, int size, bool is_pair)
{
- TCGv_i64 tmp = tcg_temp_new_i64();
- TCGMemOp memop = s->be_data + size;
+ int idx = get_mem_index(s);
+ TCGMemOp memop = s->be_data;
g_assert(size <= 3);
- tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
-
if (is_pair) {
- TCGv_i64 addr2 = tcg_temp_new_i64();
- TCGv_i64 hitmp = tcg_temp_new_i64();
-
g_assert(size >= 2);
- tcg_gen_addi_i64(addr2, addr, 1 << size);
- tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
- tcg_temp_free_i64(addr2);
- tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
- tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
- tcg_temp_free_i64(hitmp);
- }
+ if (size == 2) {
+ /* The pair must be single-copy atomic for the doubleword. */
+ memop |= MO_64 | MO_ALIGN;
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
+ tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
+ } else {
+ tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
+ tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
+ }
+ } else {
+ /* The pair must be single-copy atomic for *each* doubleword,
+ but not the entire quadword. It must, however, be aligned. */
+ TCGv_i64 addr2;
- tcg_gen_mov_i64(cpu_exclusive_val, tmp);
- tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
+ memop |= MO_64;
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
+ memop | MO_ALIGN_16);
- tcg_temp_free_i64(tmp);
+ addr2 = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr2, addr, 8);
+ tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
+ tcg_temp_free_i64(addr2);
+
+ tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
+ tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
+ }
+ } else {
+ memop |= size | MO_ALIGN;
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
+ }
tcg_gen_mov_i64(cpu_exclusive_addr, addr);
}
@@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
tmp = tcg_temp_new_i64();
if (is_pair) {
if (size == 2) {
- TCGv_i64 val = tcg_temp_new_i64();
- tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
- tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
- tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
+ } else {
+ tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
+ }
+ tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp,
get_mem_index(s),
- size | MO_ALIGN | s->be_data);
- tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
- tcg_temp_free_i64(val);
+ MO_64 | MO_ALIGN | s->be_data);
+ tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
} else if (s->be_data == MO_LE) {
gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
cpu_reg(s, rt2));
--
2.13.4
On Sat, Aug 12, 2017 at 8:41 AM, Richard Henderson <rth7680@gmail.com> wrote: > SetExclusiveMonitors in the pseudocode is on the address + width, > and says nothing about the manner of the load. Therefore > > ldxp w0, w1, [x2] > vs > ldxr x0, [x2] > > must record the same metadata so that either may pair with > > stxp w3, w0, w1, [x2] > vs > stxr w3, x0, [x2] > > Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP. > > Also note that we were not providing the required single-copy atomic > semantics for 32-bit LDXP. This is trivially fixed alongside the > cpu_exclusive_val change. > > At the same time, exclusive loads require the same alignment as > exclusive stores. For 64-bit LDXP, this means adding MO_ALIGN_16; > for the others adding MO_ALIGN. > > Reported-by: Alistair Francis <alistair.francis@xilinx.com> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++----------------- > 1 file changed, 41 insertions(+), 24 deletions(-) > --- > > I have not yet constructed test cases for all of the combinations > listed above. I wanted to put this into your hands so that you could > test against your existing code using LDXP/STXP. I can test it on Monday when I'm back in the office. I'll let you know what I find. > > > r~ > > > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 58ed4c6d05..f3643ac8dc 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) > static void gen_load_exclusive(DisasContext *s, int rt, int rt2, > TCGv_i64 addr, int size, bool is_pair) > { > - TCGv_i64 tmp = tcg_temp_new_i64(); > - TCGMemOp memop = s->be_data + size; > + int idx = get_mem_index(s); > + TCGMemOp memop = s->be_data; > > g_assert(size <= 3); > - tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop); > - > if (is_pair) { > - TCGv_i64 addr2 = tcg_temp_new_i64(); > - TCGv_i64 hitmp = tcg_temp_new_i64(); > - > g_assert(size >= 2); > - tcg_gen_addi_i64(addr2, addr, 1 << size); > - tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop); > - tcg_temp_free_i64(addr2); > - tcg_gen_mov_i64(cpu_exclusive_high, hitmp); > - tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp); > - tcg_temp_free_i64(hitmp); > - } > + if (size == 2) { > + /* The pair must be single-copy atomic for the doubleword. */ > + memop |= MO_64 | MO_ALIGN; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); > + if (s->be_data == MO_LE) { > + tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); > + tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); > + } else { > + tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); > + tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); > + } > + } else { > + /* The pair must be single-copy atomic for *each* doubleword, > + but not the entire quadword. It must, however, be aligned. */ > + TCGv_i64 addr2; > > - tcg_gen_mov_i64(cpu_exclusive_val, tmp); > - tcg_gen_mov_i64(cpu_reg(s, rt), tmp); > + memop |= MO_64; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, > + memop | MO_ALIGN_16); > > - tcg_temp_free_i64(tmp); > + addr2 = tcg_temp_new_i64(); > + tcg_gen_addi_i64(addr2, addr, 8); > + tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); > + tcg_temp_free_i64(addr2); > + > + tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); > + tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); > + } > + } else { > + memop |= size | MO_ALIGN; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); > + tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); > + } > tcg_gen_mov_i64(cpu_exclusive_addr, addr); > } > > @@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, > tmp = tcg_temp_new_i64(); > if (is_pair) { > if (size == 2) { > - TCGv_i64 val = tcg_temp_new_i64(); > - tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); > - tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high); > - tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp, > + if (s->be_data == MO_LE) { > + tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); > + } else { > + tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); > + } > + tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp, > get_mem_index(s), > - size | MO_ALIGN | s->be_data); > - tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val); > - tcg_temp_free_i64(val); > + MO_64 | MO_ALIGN | s->be_data); > + tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); Now we aren't ever checking cpu_exclusive_high. Is it even worth having? Otherwise the patch looks good, let me test it next week and I'll get back to you. Thanks, Alistair > } else if (s->be_data == MO_LE) { > gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt), > cpu_reg(s, rt2)); > -- > 2.13.4 > >
On 08/12/2017 09:29 AM, Alistair Francis wrote: > Now we aren't ever checking cpu_exclusive_high. Is it even worth having? We are checking cpu_exclusive_high for 64-bit STXP. See paired_cmpxchg64_{l,b}e in target/arm/helper-a64.c. r~
On Sat, Aug 12, 2017 at 8:41 AM, Richard Henderson <rth7680@gmail.com> wrote: > SetExclusiveMonitors in the pseudocode is on the address + width, > and says nothing about the manner of the load. Therefore > > ldxp w0, w1, [x2] > vs > ldxr x0, [x2] > > must record the same metadata so that either may pair with > > stxp w3, w0, w1, [x2] > vs > stxr w3, x0, [x2] > > Fix this by ignoring cpu_exclusive_high except for 64-bit LDXP/STXP. > > Also note that we were not providing the required single-copy atomic > semantics for 32-bit LDXP. This is trivially fixed alongside the > cpu_exclusive_val change. > > At the same time, exclusive loads require the same alignment as > exclusive stores. For 64-bit LDXP, this means adding MO_ALIGN_16; > for the others adding MO_ALIGN. > > Reported-by: Alistair Francis <alistair.francis@xilinx.com> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> This passes the same tests that my patch series passes. Reviewed-by: Alistair Francis <alistair.francis@xilinx.com> Tested-by: Alistair Francis <alistair.francis@xilinx.com> Thanks, Alistair > --- > target/arm/translate-a64.c | 65 +++++++++++++++++++++++++++++----------------- > 1 file changed, 41 insertions(+), 24 deletions(-) > --- > > I have not yet constructed test cases for all of the combinations > listed above. I wanted to put this into your hands so that you could > test against your existing code using LDXP/STXP. > > > r~ > > > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 58ed4c6d05..f3643ac8dc 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -1853,29 +1853,45 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) > static void gen_load_exclusive(DisasContext *s, int rt, int rt2, > TCGv_i64 addr, int size, bool is_pair) > { > - TCGv_i64 tmp = tcg_temp_new_i64(); > - TCGMemOp memop = s->be_data + size; > + int idx = get_mem_index(s); > + TCGMemOp memop = s->be_data; > > g_assert(size <= 3); > - tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop); > - > if (is_pair) { > - TCGv_i64 addr2 = tcg_temp_new_i64(); > - TCGv_i64 hitmp = tcg_temp_new_i64(); > - > g_assert(size >= 2); > - tcg_gen_addi_i64(addr2, addr, 1 << size); > - tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop); > - tcg_temp_free_i64(addr2); > - tcg_gen_mov_i64(cpu_exclusive_high, hitmp); > - tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp); > - tcg_temp_free_i64(hitmp); > - } > + if (size == 2) { > + /* The pair must be single-copy atomic for the doubleword. */ > + memop |= MO_64 | MO_ALIGN; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); > + if (s->be_data == MO_LE) { > + tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); > + tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); > + } else { > + tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); > + tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); > + } > + } else { > + /* The pair must be single-copy atomic for *each* doubleword, > + but not the entire quadword. It must, however, be aligned. */ > + TCGv_i64 addr2; > > - tcg_gen_mov_i64(cpu_exclusive_val, tmp); > - tcg_gen_mov_i64(cpu_reg(s, rt), tmp); > + memop |= MO_64; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, > + memop | MO_ALIGN_16); > > - tcg_temp_free_i64(tmp); > + addr2 = tcg_temp_new_i64(); > + tcg_gen_addi_i64(addr2, addr, 8); > + tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); > + tcg_temp_free_i64(addr2); > + > + tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); > + tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); > + } > + } else { > + memop |= size | MO_ALIGN; > + tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); > + tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); > + } > tcg_gen_mov_i64(cpu_exclusive_addr, addr); > } > > @@ -1908,14 +1924,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, > tmp = tcg_temp_new_i64(); > if (is_pair) { > if (size == 2) { > - TCGv_i64 val = tcg_temp_new_i64(); > - tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); > - tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high); > - tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp, > + if (s->be_data == MO_LE) { > + tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); > + } else { > + tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); > + } > + tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp, > get_mem_index(s), > - size | MO_ALIGN | s->be_data); > - tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val); > - tcg_temp_free_i64(val); > + MO_64 | MO_ALIGN | s->be_data); > + tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); > } else if (s->be_data == MO_LE) { > gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt), > cpu_reg(s, rt2)); > -- > 2.13.4 > >
© 2016 - 2024 Red Hat, Inc.