tests/tcg/s390x/Makefile.target | 4 ++ tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/tcg/s390x/cdsg.c
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..83313699f7d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,73 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+static unsigned long val[2] __attribute__((__aligned__(16)));
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < 1000;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ r0 = orig0;
+ r1 = orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "=m" (val)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ orig0 = r0;
+ orig1 = r1;
+ cc = (cc >> 28) & 3;
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == 2000);
+ assert(val[1] == 4000);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
On 29.11.22 00:48, Ilya Leoshkevich wrote: > Add a simple test to prevent regressions. > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > --- > tests/tcg/s390x/Makefile.target | 4 ++ > tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++ > 2 files changed, 77 insertions(+) > create mode 100644 tests/tcg/s390x/cdsg.c > > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target > index 1d454270c0e..523214dac33 100644 > --- a/tests/tcg/s390x/Makefile.target > +++ b/tests/tcg/s390x/Makefile.target > @@ -27,6 +27,7 @@ TESTS+=noexec > TESTS+=div > TESTS+=clst > TESTS+=long-double > +TESTS+=cdsg > > Z13_TESTS=vistr > $(Z13_TESTS): CFLAGS+=-march=z13 -O2 > @@ -66,3 +67,6 @@ sha512-mvx: sha512.c > $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) > > TESTS+=sha512-mvx > + > +cdsg: CFLAGS+=-pthread > +cdsg: LDFLAGS+=-pthread > diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c > new file mode 100644 > index 00000000000..83313699f7d > --- /dev/null > +++ b/tests/tcg/s390x/cdsg.c > @@ -0,0 +1,73 @@ > +#include <assert.h> > +#include <pthread.h> > +#include <stdbool.h> > +#include <stdlib.h> > + > +static volatile bool start; > +static unsigned long val[2] __attribute__((__aligned__(16))); > + > +void *cdsg_loop(void *arg) > +{ > + unsigned long orig0, orig1, new0, new1; > + register unsigned long r0 asm("r0"); > + register unsigned long r1 asm("r1"); > + register unsigned long r2 asm("r2"); > + register unsigned long r3 asm("r3"); > + int cc; > + int i; > + > + while (!start) { > + } > + > + orig0 = val[0]; > + orig1 = val[1]; > + for (i = 0; i < 1000;) { Are 1000 iterations sufficient to catch the race window reliably? > + new0 = orig0 + 1; > + new1 = orig1 + 2; > + > + r0 = orig0; > + r1 = orig1; > + r2 = new0; > + r3 = new1; > + asm("cdsg %[r0],%[r2],%[db2]\n" > + "ipm %[cc]" > + : [r0] "+r" (r0) > + , [r1] "+r" (r1) > + , [db2] "=m" (val) > + , [cc] "=r" (cc) > + : [r2] "r" (r2) > + , [r3] "r" (r3) > + : "cc"); Nit: I'd suggest a simple cdsg helper function that makes this code easier to digest. > + orig0 = r0; > + orig1 = r1; > + cc = (cc >> 28) & 3; > + > + if (cc == 0) { > + orig0 = new0; > + orig1 = new1; > + i++; > + } else { > + assert(cc == 1); > + } > + } > + > + return NULL; > +} > + > +int main(void) > +{ > + pthread_t thread; > + int ret; > + > + ret = pthread_create(&thread, NULL, cdsg_loop, NULL); > + assert(ret == 0); > + start = true; > + cdsg_loop(NULL); > + ret = pthread_join(thread, NULL); > + assert(ret == 0); > + > + assert(val[0] == 2000); > + assert(val[1] == 4000); > + > + return EXIT_SUCCESS; > +} -- Thanks, David / dhildenb
On Tue, Nov 29, 2022 at 09:54:13AM +0100, David Hildenbrand wrote: > On 29.11.22 00:48, Ilya Leoshkevich wrote: > > Add a simple test to prevent regressions. > > > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > > --- > > tests/tcg/s390x/Makefile.target | 4 ++ > > tests/tcg/s390x/cdsg.c | 73 +++++++++++++++++++++++++++++++++ > > 2 files changed, 77 insertions(+) > > create mode 100644 tests/tcg/s390x/cdsg.c > > > > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target > > index 1d454270c0e..523214dac33 100644 > > --- a/tests/tcg/s390x/Makefile.target > > +++ b/tests/tcg/s390x/Makefile.target > > @@ -27,6 +27,7 @@ TESTS+=noexec > > TESTS+=div > > TESTS+=clst > > TESTS+=long-double > > +TESTS+=cdsg > > Z13_TESTS=vistr > > $(Z13_TESTS): CFLAGS+=-march=z13 -O2 > > @@ -66,3 +67,6 @@ sha512-mvx: sha512.c > > $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) > > TESTS+=sha512-mvx > > + > > +cdsg: CFLAGS+=-pthread > > +cdsg: LDFLAGS+=-pthread > > diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c > > new file mode 100644 > > index 00000000000..83313699f7d > > --- /dev/null > > +++ b/tests/tcg/s390x/cdsg.c > > @@ -0,0 +1,73 @@ > > +#include <assert.h> > > +#include <pthread.h> > > +#include <stdbool.h> > > +#include <stdlib.h> > > + > > +static volatile bool start; > > +static unsigned long val[2] __attribute__((__aligned__(16))); > > + > > +void *cdsg_loop(void *arg) > > +{ > > + unsigned long orig0, orig1, new0, new1; > > + register unsigned long r0 asm("r0"); > > + register unsigned long r1 asm("r1"); > > + register unsigned long r2 asm("r2"); > > + register unsigned long r3 asm("r3"); > > + int cc; > > + int i; > > + > > + while (!start) { > > + } > > + > > + orig0 = val[0]; > > + orig1 = val[1]; > > + for (i = 0; i < 1000;) { > > Are 1000 iterations sufficient to catch the race window reliably? Good point, I had to raise it to 10k. If I break the code like this: --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -3509,7 +3509,7 @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv, { gen_atomic_cx_i128 gen; - if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { + if (true) { tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop); return; } the test with 10k iterations fails consistently. And it's still fast: $ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg real 0.01 > > + new0 = orig0 + 1; > > + new1 = orig1 + 2; > > + > > + r0 = orig0; > > + r1 = orig1; > > + r2 = new0; > > + r3 = new1; > > + asm("cdsg %[r0],%[r2],%[db2]\n" > > + "ipm %[cc]" > > + : [r0] "+r" (r0) > > + , [r1] "+r" (r1) > > + , [db2] "=m" (val) > > + , [cc] "=r" (cc) > > + : [r2] "r" (r2) > > + , [r3] "r" (r3) > > + : "cc"); > > Nit: I'd suggest a simple cdsg helper function that makes this code easier > to digest. Ok. > > > + orig0 = r0; > > + orig1 = r1; > > + cc = (cc >> 28) & 3; > > + > > + if (cc == 0) { > > + orig0 = new0; > > + orig1 = new1; > > + i++; > > + } else { > > + assert(cc == 1); > > + } > > + } > > + > > + return NULL; > > +} > > + > > +int main(void) > > +{ > > + pthread_t thread; > > + int ret; > > + > > + ret = pthread_create(&thread, NULL, cdsg_loop, NULL); > > + assert(ret == 0); > > + start = true; > > + cdsg_loop(NULL); > > + ret = pthread_join(thread, NULL); > > + assert(ret == 0); > > + > > + assert(val[0] == 2000); > > + assert(val[1] == 4000); > > + > > + return EXIT_SUCCESS; > > +} > > -- > Thanks, > > David / dhildenb > >
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 84 +++++++++++++++++++++++++++++++++
2 files changed, 88 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..28b5ac9a000
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,84 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+ unsigned long new0, unsigned long new1,
+ aligned_quadword *mem)
+{
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+
+ r0 = *orig0;
+ r1 = *orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "+m" (*mem)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ *orig0 = r0;
+ *orig1 = r1;
+
+ return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < 1000;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == 2000);
+ assert(val[1] == 4000);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
Add a simple test to prevent regressions.
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
Sorry, I just realized that in v2 that I sent the iteration count was
not increased. For v3 I've decided to bump it further to 1m, since it's
still fast enough:
$ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg
real 0.15
v2 -> v3: Increase iteration count to 1m.
v1 -> v2: Add cdsg() wrapper.
tests/tcg/s390x/Makefile.target | 4 ++
tests/tcg/s390x/cdsg.c | 85 +++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
TESTS+=div
TESTS+=clst
TESTS+=long-double
+TESTS+=cdsg
Z13_TESTS=vistr
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..c7a5246181d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,85 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+static const int n_iterations = 1000000;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+ unsigned long new0, unsigned long new1,
+ aligned_quadword *mem)
+{
+ register unsigned long r0 asm("r0");
+ register unsigned long r1 asm("r1");
+ register unsigned long r2 asm("r2");
+ register unsigned long r3 asm("r3");
+ int cc;
+
+ r0 = *orig0;
+ r1 = *orig1;
+ r2 = new0;
+ r3 = new1;
+ asm("cdsg %[r0],%[r2],%[db2]\n"
+ "ipm %[cc]"
+ : [r0] "+r" (r0)
+ , [r1] "+r" (r1)
+ , [db2] "+m" (*mem)
+ , [cc] "=r" (cc)
+ : [r2] "r" (r2)
+ , [r3] "r" (r3)
+ : "cc");
+ *orig0 = r0;
+ *orig1 = r1;
+
+ return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+ unsigned long orig0, orig1, new0, new1;
+ int cc;
+ int i;
+
+ while (!start) {
+ }
+
+ orig0 = val[0];
+ orig1 = val[1];
+ for (i = 0; i < n_iterations;) {
+ new0 = orig0 + 1;
+ new1 = orig1 + 2;
+
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+ if (cc == 0) {
+ orig0 = new0;
+ orig1 = new1;
+ i++;
+ } else {
+ assert(cc == 1);
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread;
+ int ret;
+
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+ assert(ret == 0);
+ start = true;
+ cdsg_loop(NULL);
+ ret = pthread_join(thread, NULL);
+ assert(ret == 0);
+
+ assert(val[0] == n_iterations * 2);
+ assert(val[1] == n_iterations * 4);
+
+ return EXIT_SUCCESS;
+}
--
2.38.1
© 2016 - 2024 Red Hat, Inc.