Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/helper.h | 2 --
target/s390x/tcg/mem_helper.c | 52 ---------------------------
target/s390x/tcg/translate.c | 60 ++++++++++++++++++++------------
target/s390x/tcg/insn-data.h.inc | 2 +-
4 files changed, 38 insertions(+), 78 deletions(-)
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 481b9019f9..e5001ffddc 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -35,8 +35,6 @@ DEF_HELPER_3(cxgb, i128, env, s64, i32)
DEF_HELPER_3(celgb, i64, env, i64, i32)
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
DEF_HELPER_3(cxlgb, i128, env, i64, i32)
-DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
-DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
DEF_HELPER_4(csst, i32, env, i32, i64, i64)
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index caf8c408ef..ae4df8df3d 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -1771,58 +1771,6 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
return cc;
}
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
- uint32_t r1, uint32_t r3)
-{
- uintptr_t ra = GETPC();
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
- Int128 oldv;
- uint64_t oldh, oldl;
- bool fail;
-
- check_alignment(env, addr, 16, ra);
-
- oldh = cpu_ldq_data_ra(env, addr + 0, ra);
- oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
- oldv = int128_make128(oldl, oldh);
- fail = !int128_eq(oldv, cmpv);
- if (fail) {
- newv = oldv;
- }
-
- cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
- cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
-
- env->cc_op = fail;
- env->regs[r1] = int128_gethi(oldv);
- env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
-void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
- uint32_t r1, uint32_t r3)
-{
- uintptr_t ra = GETPC();
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
- int mem_idx;
- MemOpIdx oi;
- Int128 oldv;
- bool fail;
-
- assert(HAVE_CMPXCHG128);
-
- mem_idx = cpu_mmu_index(env, false);
- oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
- fail = !int128_eq(oldv, cmpv);
-
- env->cc_op = fail;
- env->regs[r1] = int128_gethi(oldv);
- env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
uint64_t a2, bool parallel)
{
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 6a351a5245..480c89dae3 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2224,31 +2224,22 @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
{
int r1 = get_field(s, r1);
- int r3 = get_field(s, r3);
- int d2 = get_field(s, d2);
- int b2 = get_field(s, b2);
- DisasJumpType ret = DISAS_NEXT;
- TCGv_i64 addr;
- TCGv_i32 t_r1, t_r3;
- /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */
- addr = get_address(s, 0, b2, d2);
- t_r1 = tcg_const_i32(r1);
- t_r3 = tcg_const_i32(r3);
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
- } else if (HAVE_CMPXCHG128) {
- gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
- } else {
- gen_helper_exit_atomic(cpu_env);
- ret = DISAS_NORETURN;
- }
- tcg_temp_free_i64(addr);
- tcg_temp_free_i32(t_r1);
- tcg_temp_free_i32(t_r3);
+ /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value. */
+ tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128,
+ get_mem_index(s), MO_BE | MO_128 | MO_ALIGN);
- set_cc_static(s);
- return ret;
+ /*
+ * Extract result into cc_dst:cc_src, compare vs the expected value
+ * in the as yet unmodified input registers, then update CC_OP.
+ */
+ tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128);
+ tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]);
+ tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]);
+ tcg_gen_or_i64(cc_dst, cc_dst, cc_src);
+ set_cc_nz_u64(s, cc_dst);
+
+ return DISAS_NEXT;
}
static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -5417,6 +5408,14 @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
}
#define SPEC_prep_r1_P SPEC_r1_even
+static void prep_r1_D64(DisasContext *s, DisasOps *o)
+{
+ int r1 = get_field(s, r1);
+ o->out_128 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_prep_r1_D64 SPEC_r1_even
+
static void prep_x1(DisasContext *s, DisasOps *o)
{
o->out_128 = load_freg_128(get_field(s, r1));
@@ -5486,6 +5485,13 @@ static void wout_r1_D32(DisasContext *s, DisasOps *o)
}
#define SPEC_wout_r1_D32 SPEC_r1_even
+static void wout_r1_D64(DisasContext *s, DisasOps *o)
+{
+ int r1 = get_field(s, r1);
+ tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128);
+}
+#define SPEC_wout_r1_D64 SPEC_r1_even
+
static void wout_r3_P32(DisasContext *s, DisasOps *o)
{
int r3 = get_field(s, r3);
@@ -5933,6 +5939,14 @@ static void in2_r3(DisasContext *s, DisasOps *o)
}
#define SPEC_in2_r3 0
+static void in2_r3_D64(DisasContext *s, DisasOps *o)
+{
+ int r3 = get_field(s, r3);
+ o->in2_128 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]);
+}
+#define SPEC_in2_r3_D64 SPEC_r3_even
+
static void in2_r3_sr32(DisasContext *s, DisasOps *o)
{
o->in2 = tcg_temp_new_i64();
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 1a2a55bf5e..7dfcbdd980 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -276,7 +276,7 @@
/* COMPARE DOUBLE AND SWAP */
D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
- C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0)
+ C(0xeb3e, CDSG, RSY_a, Z, la2, r3_D64, r1_D64, r1_D64, cdsg, 0)
/* COMPARE AND SWAP AND STORE */
C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0)
--
2.34.1
On Fri, Nov 11, 2022 at 06:08:19PM +1000, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/helper.h | 2 --
> target/s390x/tcg/mem_helper.c | 52 ---------------------------
> target/s390x/tcg/translate.c | 60 ++++++++++++++++++++------------
> target/s390x/tcg/insn-data.h.inc | 2 +-
> 4 files changed, 38 insertions(+), 78 deletions(-)
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
I was wondering what assembly this would generate in parallel mode and
wrote a small test. On my x86_64 machine it ended up being
helper_atomic_cmpxchgo_be() -> cpu_atomic_cmpxchgo_be_mmu() ->
lock cmpxchg16b, nothing surprising.
On an s390x host we fall back to cpu_exec_step_atomic(), because in the
configure test:
int main(void)
{
unsigned __int128 x = 0, y = 0;
__sync_val_compare_and_swap_16(&x, y, x);
return 0;
}
x and y are not aligned. I guess that's working as intended as well,
even though it would be nice to eventually make use of cdsg there.
I will post the test shortly.
On 11/28/22 15:40, Ilya Leoshkevich wrote:
> On an s390x host we fall back to cpu_exec_step_atomic(), because in the
> configure test:
>
> int main(void)
> {
> unsigned __int128 x = 0, y = 0;
> __sync_val_compare_and_swap_16(&x, y, x);
> return 0;
> }
>
> x and y are not aligned. I guess that's working as intended as well,
> even though it would be nice to eventually make use of cdsg there.
I have a fix for that as a part of '[PATCH for-8.0 00/29] tcg: Improve atomicity support':
https://lore.kernel.org/qemu-devel/20221118094754.242910-14-richard.henderson@linaro.org/
r~
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..83313699f7d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,73 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+static unsigned long val[2] __attribute__((__aligned__(16)));
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < 1000;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ r0 = orig0;
+ r1 = orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "=m" (val)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ orig0 = r0;
+ orig1 = r1;
+ cc = (cc >> 28) & 3;
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == 2000);
+ assert(val[1] == 4000);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
On 29.11.22 00:48, Ilya Leoshkevich wrote:
> Add a simple test to prevent regressions.
>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
> tests/tcg/s390x/Makefile.target | 4 ++
> tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++
> 2 files changed, 77 insertions(+)
> create mode 100644 tests/tcg/s390x/cdsg.c
>
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index 1d454270c0e..523214dac33 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -27,6 +27,7 @@ TESTS+=noexec
> TESTS+=div
> TESTS+=clst
> TESTS+=long-double
> +TESTS+=cdsg
>
> Z13_TESTS=vistr
> $(Z13_TESTS): CFLAGS+=-march=z13 -O2
> @@ -66,3 +67,6 @@ sha512-mvx: sha512.c
> $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>
> TESTS+=sha512-mvx
> +
> +cdsg: CFLAGS+=-pthread
> +cdsg: LDFLAGS+=-pthread
> diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
> new file mode 100644
> index 00000000000..83313699f7d
> --- /dev/null
> +++ b/tests/tcg/s390x/cdsg.c
> @@ -0,0 +1,73 @@
> +#include <assert.h>
> +#include <pthread.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +
> +static volatile bool start;
> +static unsigned long val[2] __attribute__((__aligned__(16)));
> +
> +void *cdsg_loop(void *arg)
> +{
> + unsigned long orig0, orig1, new0, new1;
> + register unsigned long r0 asm("r0");
> + register unsigned long r1 asm("r1");
> + register unsigned long r2 asm("r2");
> + register unsigned long r3 asm("r3");
> + int cc;
> + int i;
> +
> + while (!start) {
> + }
> +
> + orig0 = val[0];
> + orig1 = val[1];
> + for (i = 0; i < 1000;) {
Are 1000 iterations sufficient to catch the race window reliably?
> + new0 = orig0 + 1;
> + new1 = orig1 + 2;
> +
> + r0 = orig0;
> + r1 = orig1;
> + r2 = new0;
> + r3 = new1;
> + asm("cdsg %[r0],%[r2],%[db2]\n"
> + "ipm %[cc]"
> + : [r0] "+r" (r0)
> + , [r1] "+r" (r1)
> + , [db2] "=m" (val)
> + , [cc] "=r" (cc)
> + : [r2] "r" (r2)
> + , [r3] "r" (r3)
> + : "cc");
Nit: I'd suggest a simple cdsg helper function that makes this code
easier to digest.
> + orig0 = r0;
> + orig1 = r1;
> + cc = (cc >> 28) & 3;
> +
> + if (cc == 0) {
> + orig0 = new0;
> + orig1 = new1;
> + i++;
> + } else {
> + assert(cc == 1);
> + }
> + }
> +
> + return NULL;
> +}
> +
> +int main(void)
> +{
> + pthread_t thread;
> + int ret;
> +
> + ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
> + assert(ret == 0);
> + start = true;
> + cdsg_loop(NULL);
> + ret = pthread_join(thread, NULL);
> + assert(ret == 0);
> +
> + assert(val[0] == 2000);
> + assert(val[1] == 4000);
> +
> + return EXIT_SUCCESS;
> +}
--
Thanks,
David / dhildenb
On Tue, Nov 29, 2022 at 09:54:13AM +0100, David Hildenbrand wrote:
> On 29.11.22 00:48, Ilya Leoshkevich wrote:
> > Add a simple test to prevent regressions.
> >
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> > tests/tcg/s390x/Makefile.target | 4 ++
> > tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++
> > 2 files changed, 77 insertions(+)
> > create mode 100644 tests/tcg/s390x/cdsg.c
> >
> > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> > index 1d454270c0e..523214dac33 100644
> > --- a/tests/tcg/s390x/Makefile.target
> > +++ b/tests/tcg/s390x/Makefile.target
> > @@ -27,6 +27,7 @@ TESTS+=noexec
> > TESTS+=div
> > TESTS+=clst
> > TESTS+=long-double
> > +TESTS+=cdsg
> > Z13_TESTS=vistr
> > $(Z13_TESTS): CFLAGS+=-march=z13 -O2
> > @@ -66,3 +67,6 @@ sha512-mvx: sha512.c
> > $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> > TESTS+=sha512-mvx
> > +
> > +cdsg: CFLAGS+=-pthread
> > +cdsg: LDFLAGS+=-pthread
> > diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
> > new file mode 100644
> > index 00000000000..83313699f7d
> > --- /dev/null
> > +++ b/tests/tcg/s390x/cdsg.c
> > @@ -0,0 +1,73 @@
> > +#include <assert.h>
> > +#include <pthread.h>
> > +#include <stdbool.h>
> > +#include <stdlib.h>
> > +
> > +static volatile bool start;
> > +static unsigned long val[2] __attribute__((__aligned__(16)));
> > +
> > +void *cdsg_loop(void *arg)
> > +{
> > + unsigned long orig0, orig1, new0, new1;
> > + register unsigned long r0 asm("r0");
> > + register unsigned long r1 asm("r1");
> > + register unsigned long r2 asm("r2");
> > + register unsigned long r3 asm("r3");
> > + int cc;
> > + int i;
> > +
> > + while (!start) {
> > + }
> > +
> > + orig0 = val[0];
> > + orig1 = val[1];
> > + for (i = 0; i < 1000;) {
>
> Are 1000 iterations sufficient to catch the race window reliably?
Good point, I had to raise it to 10k.
If I break the code like this:
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3509,7 +3509,7 @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
{
gen_atomic_cx_i128 gen;
- if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
+ if (true) {
tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
return;
}
the test with 10k iterations fails consistently.
And it's still fast:
$ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg
real 0.01
> > + new0 = orig0 + 1;
> > + new1 = orig1 + 2;
> > +
> > + r0 = orig0;
> > + r1 = orig1;
> > + r2 = new0;
> > + r3 = new1;
> > + asm("cdsg %[r0],%[r2],%[db2]\n"
> > + "ipm %[cc]"
> > + : [r0] "+r" (r0)
> > + , [r1] "+r" (r1)
> > + , [db2] "=m" (val)
> > + , [cc] "=r" (cc)
> > + : [r2] "r" (r2)
> > + , [r3] "r" (r3)
> > + : "cc");
>
> Nit: I'd suggest a simple cdsg helper function that makes this code easier
> to digest.
Ok.
>
> > + orig0 = r0;
> > + orig1 = r1;
> > + cc = (cc >> 28) & 3;
> > +
> > + if (cc == 0) {
> > + orig0 = new0;
> > + orig1 = new1;
> > + i++;
> > + } else {
> > + assert(cc == 1);
> > + }
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > +int main(void)
> > +{
> > + pthread_t thread;
> > + int ret;
> > +
> > + ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
> > + assert(ret == 0);
> > + start = true;
> > + cdsg_loop(NULL);
> > + ret = pthread_join(thread, NULL);
> > + assert(ret == 0);
> > +
> > + assert(val[0] == 2000);
> > + assert(val[1] == 4000);
> > +
> > + return EXIT_SUCCESS;
> > +}
>
> --
> Thanks,
>
> David / dhildenb
>
>
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 84 +++++++++++++++++++++++++++++++++
2 files changed, 88 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..28b5ac9a000
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,84 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+ unsigned long new0, unsigned long new1,
+ aligned_quadword *mem)
+{
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+
+ r0 = *orig0;
+ r1 = *orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "+m" (*mem)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ *orig0 = r0;
+ *orig1 = r1;
+
+ return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < 1000;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == 2000);
+ assert(val[1] == 4000);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
Sorry, I just realized that in v2 that I sent the iteration count was
not increased. For v3 I've decided to bump it further to 1m, since it's
still fast enough:
$ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg
real 0.15
v2 -> v3: Increase iteration count to 1m.
v1 -> v2: Add cdsg() wrapper.
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 85 +++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..c7a5246181d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,85 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+static const int n_iterations = 1000000;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+ unsigned long new0, unsigned long new1,
+ aligned_quadword *mem)
+{
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+
+ r0 = *orig0;
+ r1 = *orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "+m" (*mem)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ *orig0 = r0;
+ *orig1 = r1;
+
+ return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < n_iterations;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == n_iterations * 2);
+ assert(val[1] == n_iterations * 4);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
© 2016 - 2026 Red Hat, Inc.