[PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG

Richard Henderson posted 36 patches 3 years ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Riku Voipio <riku.voipio@iki.fi>, Peter Maydell <peter.maydell@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Daniel Henrique Barboza <danielhb413@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, David Gibson <david@gibson.dropbear.id.au>, Greg Kurz <groug@kaod.org>, David Hildenbrand <david@redhat.com>, Ilya Leoshkevich <iii@linux.ibm.com>, Thomas Huth <thuth@redhat.com>, WANG Xuerui <git@xen0n.name>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Aurelien Jarno <aurelien@aurel32.net>, Huacai Chen <chenhuacai@kernel.org>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <Alistair.Francis@wdc.com>, Stefan Weil <sw@weilnetz.de>
[PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG
Posted by Richard Henderson 3 years ago
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
Cc: David Hildenbrand <david@redhat.com>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
---
 target/s390x/helper.h            |  2 --
 target/s390x/tcg/insn-data.h.inc |  2 +-
 target/s390x/tcg/mem_helper.c    | 52 ------------------------------
 target/s390x/tcg/translate.c     | 55 +++++++++++++++++++-------------
 4 files changed, 33 insertions(+), 78 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index bccd3bfca6..341bc51ec2 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -35,8 +35,6 @@ DEF_HELPER_3(cxgb, i128, env, s64, i32)
 DEF_HELPER_3(celgb, i64, env, i64, i32)
 DEF_HELPER_3(cdlgb, i64, env, i64, i32)
 DEF_HELPER_3(cxlgb, i128, env, i64, i32)
-DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
-DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
 DEF_HELPER_4(csst, i32, env, i32, i64, i64)
 DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 893f4b48db..9d2d35f084 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -276,7 +276,7 @@
 /* COMPARE DOUBLE AND SWAP */
     D(0xbb00, CDS,     RS_a,  Z,   r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
     D(0xeb31, CDSY,    RSY_a, LD,  r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
-    C(0xeb3e, CDSG,    RSY_a, Z,   0, 0, 0, 0, cdsg, 0)
+    C(0xeb3e, CDSG,    RSY_a, Z,   la2, r3_D64, 0, r1_D64, cdsg, 0)
 /* COMPARE AND SWAP AND STORE */
     C(0xc802, CSST,    SSF,   CASS, la1, a2, 0, 0, csst, 0)
 
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 49969abda7..d6725fd18c 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -1771,58 +1771,6 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
     return cc;
 }
 
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
-                  uint32_t r1, uint32_t r3)
-{
-    uintptr_t ra = GETPC();
-    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
-    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
-    Int128 oldv;
-    uint64_t oldh, oldl;
-    bool fail;
-
-    check_alignment(env, addr, 16, ra);
-
-    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
-    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
-    oldv = int128_make128(oldl, oldh);
-    fail = !int128_eq(oldv, cmpv);
-    if (fail) {
-        newv = oldv;
-    }
-
-    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
-    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
-
-    env->cc_op = fail;
-    env->regs[r1] = int128_gethi(oldv);
-    env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
-void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
-                           uint32_t r1, uint32_t r3)
-{
-    uintptr_t ra = GETPC();
-    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
-    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
-    int mem_idx;
-    MemOpIdx oi;
-    Int128 oldv;
-    bool fail;
-
-    assert(HAVE_CMPXCHG128);
-
-    mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
-    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-    fail = !int128_eq(oldv, cmpv);
-
-    env->cc_op = fail;
-    env->regs[r1] = int128_gethi(oldv);
-    env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index d422a1e62b..9ea28b3e52 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2224,31 +2224,25 @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
 static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
 {
     int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    int d2 = get_field(s, d2);
-    int b2 = get_field(s, b2);
-    DisasJumpType ret = DISAS_NEXT;
-    TCGv_i64 addr;
-    TCGv_i32 t_r1, t_r3;
 
-    /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
-    addr = get_address(s, 0, b2, d2);
-    t_r1 = tcg_const_i32(r1);
-    t_r3 = tcg_const_i32(r3);
-    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
-    } else if (HAVE_CMPXCHG128) {
-        gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
-    } else {
-        gen_helper_exit_atomic(cpu_env);
-        ret = DISAS_NORETURN;
-    }
-    tcg_temp_free_i64(addr);
-    tcg_temp_free_i32(t_r1);
-    tcg_temp_free_i32(t_r3);
+    o->out_128 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
 
-    set_cc_static(s);
-    return ret;
+    /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value.  */
+    tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128,
+                                get_mem_index(s), MO_BE | MO_128 | MO_ALIGN);
+
+    /*
+     * Extract result into cc_dst:cc_src, compare vs the expected value
+     * in the as yet unmodified input registers, then update CC_OP.
+     */
+    tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128);
+    tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]);
+    tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]);
+    tcg_gen_or_i64(cc_dst, cc_dst, cc_src);
+    set_cc_nz_u64(s, cc_dst);
+
+    return DISAS_NEXT;
 }
 
 static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -5488,6 +5482,13 @@ static void wout_r1_D32(DisasContext *s, DisasOps *o)
 }
 #define SPEC_wout_r1_D32 SPEC_r1_even
 
+static void wout_r1_D64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128);
+}
+#define SPEC_wout_r1_D64 SPEC_r1_even
+
 static void wout_r3_P32(DisasContext *s, DisasOps *o)
 {
     int r3 = get_field(s, r3);
@@ -5935,6 +5936,14 @@ static void in2_r3(DisasContext *s, DisasOps *o)
 }
 #define SPEC_in2_r3 0
 
+static void in2_r3_D64(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    o->in2_128 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]);
+}
+#define SPEC_in2_r3_D64 SPEC_r3_even
+
 static void in2_r3_sr32(DisasContext *s, DisasOps *o)
 {
     o->in2 = tcg_temp_new_i64();
-- 
2.34.1
Re: [PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG
Posted by Ilya Leoshkevich 3 years ago
On Mon, 2023-01-30 at 11:48 -1000, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  target/s390x/helper.h            |  2 --
>  target/s390x/tcg/insn-data.h.inc |  2 +-
>  target/s390x/tcg/mem_helper.c    | 52 ------------------------------
>  target/s390x/tcg/translate.c     | 55
> +++++++++++++++++++-------------
>  4 files changed, 33 insertions(+), 78 deletions(-)

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>

I wrote a test for this a while ago [1], but apparently it was lost in
the mail. I will post a rebased version here.

[1] https://lists.gnu.org/archive/html/qemu-devel/2022-11/msg04506.html
[PATCH] tests/tcg/s390x: Add cdsg.c
Posted by Ilya Leoshkevich 3 years ago
Add a simple test to prevent regressions.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 tests/tcg/s390x/Makefile.target |  4 ++
 tests/tcg/s390x/cdsg.c          | 93 +++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tests/tcg/s390x/cdsg.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..72ad309b273 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,10 @@ TESTS+=noexec
 TESTS+=div
 TESTS+=clst
 TESTS+=long-double
+TESTS+=cdsg
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
 
 Z13_TESTS=vistr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..800618ff4b4
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,93 @@
+/*
+ * Test CDSG instruction.
+ *
+ * Increment the first half of aligned_quadword by 1, and the second half by 2
+ * from 2 threads. Verify that the result is consistent.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+static const int n_iterations = 1000000;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+                       unsigned long new0, unsigned long new1,
+                       aligned_quadword *mem)
+{
+    register unsigned long r0 asm("r0");
+    register unsigned long r1 asm("r1");
+    register unsigned long r2 asm("r2");
+    register unsigned long r3 asm("r3");
+    int cc;
+
+    r0 = *orig0;
+    r1 = *orig1;
+    r2 = new0;
+    r3 = new1;
+    asm("cdsg %[r0],%[r2],%[db2]\n"
+        "ipm %[cc]"
+        : [r0] "+r" (r0)
+        , [r1] "+r" (r1)
+        , [db2] "+m" (*mem)
+        , [cc] "=r" (cc)
+        : [r2] "r" (r2)
+        , [r3] "r" (r3)
+        : "cc");
+    *orig0 = r0;
+    *orig1 = r1;
+
+    return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+    unsigned long orig0, orig1, new0, new1;
+    int cc;
+    int i;
+
+    while (!start) {
+    }
+
+    orig0 = val[0];
+    orig1 = val[1];
+    for (i = 0; i < n_iterations;) {
+        new0 = orig0 + 1;
+        new1 = orig1 + 2;
+
+        cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+        if (cc == 0) {
+            orig0 = new0;
+            orig1 = new1;
+            i++;
+        } else {
+            assert(cc == 1);
+        }
+    }
+
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t thread;
+    int ret;
+
+    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+    assert(ret == 0);
+    start = true;
+    cdsg_loop(NULL);
+    ret = pthread_join(thread, NULL);
+    assert(ret == 0);
+
+    assert(val[0] == n_iterations * 2);
+    assert(val[1] == n_iterations * 4);
+
+    return EXIT_SUCCESS;
+}
-- 
2.39.1
Re: [PATCH] tests/tcg/s390x: Add cdsg.c
Posted by Richard Henderson 3 years ago
On 2/1/23 03:32, Ilya Leoshkevich wrote:
> Add a simple test to prevent regressions.
> 
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>

Thanks for re-posting, and sorry for missing the original.
Added to the patch set.


r~


> ---
>   tests/tcg/s390x/Makefile.target |  4 ++
>   tests/tcg/s390x/cdsg.c          | 93 +++++++++++++++++++++++++++++++++
>   2 files changed, 97 insertions(+)
>   create mode 100644 tests/tcg/s390x/cdsg.c
> 
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index 1d454270c0e..72ad309b273 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -27,6 +27,10 @@ TESTS+=noexec
>   TESTS+=div
>   TESTS+=clst
>   TESTS+=long-double
> +TESTS+=cdsg
> +
> +cdsg: CFLAGS+=-pthread
> +cdsg: LDFLAGS+=-pthread
>   
>   Z13_TESTS=vistr
>   $(Z13_TESTS): CFLAGS+=-march=z13 -O2
> diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
> new file mode 100644
> index 00000000000..800618ff4b4
> --- /dev/null
> +++ b/tests/tcg/s390x/cdsg.c
> @@ -0,0 +1,93 @@
> +/*
> + * Test CDSG instruction.
> + *
> + * Increment the first half of aligned_quadword by 1, and the second half by 2
> + * from 2 threads. Verify that the result is consistent.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +#include <assert.h>
> +#include <pthread.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +
> +static volatile bool start;
> +typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
> +static aligned_quadword val;
> +static const int n_iterations = 1000000;
> +
> +static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
> +                       unsigned long new0, unsigned long new1,
> +                       aligned_quadword *mem)
> +{
> +    register unsigned long r0 asm("r0");
> +    register unsigned long r1 asm("r1");
> +    register unsigned long r2 asm("r2");
> +    register unsigned long r3 asm("r3");
> +    int cc;
> +
> +    r0 = *orig0;
> +    r1 = *orig1;
> +    r2 = new0;
> +    r3 = new1;
> +    asm("cdsg %[r0],%[r2],%[db2]\n"
> +        "ipm %[cc]"
> +        : [r0] "+r" (r0)
> +        , [r1] "+r" (r1)
> +        , [db2] "+m" (*mem)
> +        , [cc] "=r" (cc)
> +        : [r2] "r" (r2)
> +        , [r3] "r" (r3)
> +        : "cc");
> +    *orig0 = r0;
> +    *orig1 = r1;
> +
> +    return (cc >> 28) & 3;
> +}
> +
> +void *cdsg_loop(void *arg)
> +{
> +    unsigned long orig0, orig1, new0, new1;
> +    int cc;
> +    int i;
> +
> +    while (!start) {
> +    }
> +
> +    orig0 = val[0];
> +    orig1 = val[1];
> +    for (i = 0; i < n_iterations;) {
> +        new0 = orig0 + 1;
> +        new1 = orig1 + 2;
> +
> +        cc = cdsg(&orig0, &orig1, new0, new1, &val);
> +
> +        if (cc == 0) {
> +            orig0 = new0;
> +            orig1 = new1;
> +            i++;
> +        } else {
> +            assert(cc == 1);
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +int main(void)
> +{
> +    pthread_t thread;
> +    int ret;
> +
> +    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
> +    assert(ret == 0);
> +    start = true;
> +    cdsg_loop(NULL);
> +    ret = pthread_join(thread, NULL);
> +    assert(ret == 0);
> +
> +    assert(val[0] == n_iterations * 2);
> +    assert(val[1] == n_iterations * 4);
> +
> +    return EXIT_SUCCESS;
> +}
Re: [PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG
Posted by David Hildenbrand 3 years ago
On 30.01.23 22:48, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Ilya Leoshkevich <iii@linux.ibm.com>
> ---

Acked-by: David Hildenbrand <david@redhat.com>

-- 
Thanks,

David / dhildenb