[PATCH] tests/tcg/s390x: Add cdsg.c

Ilya Leoshkevich posted 1 patch 1 year, 4 months ago
Failed in applying to current master (apply log)
There is a newer version of this series
tests/tcg/s390x/Makefile.target |  4 ++
tests/tcg/s390x/cdsg.c          | 73 +++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
create mode 100644 tests/tcg/s390x/cdsg.c
[PATCH] tests/tcg/s390x: Add cdsg.c
Posted by Ilya Leoshkevich 1 year, 4 months ago
Add a simple test to prevent regressions.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 tests/tcg/s390x/Makefile.target |  4 ++
 tests/tcg/s390x/cdsg.c          | 73 +++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 tests/tcg/s390x/cdsg.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
 TESTS+=div
 TESTS+=clst
 TESTS+=long-double
+TESTS+=cdsg
 
 Z13_TESTS=vistr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 
 TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..83313699f7d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,73 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+static unsigned long val[2] __attribute__((__aligned__(16)));
+
+void *cdsg_loop(void *arg)
+{
+    unsigned long orig0, orig1, new0, new1;
+    register unsigned long r0 asm("r0");
+    register unsigned long r1 asm("r1");
+    register unsigned long r2 asm("r2");
+    register unsigned long r3 asm("r3");
+    int cc;
+    int i;
+
+    while (!start) {
+    }
+
+    orig0 = val[0];
+    orig1 = val[1];
+    for (i = 0; i < 1000;) {
+        new0 = orig0 + 1;
+        new1 = orig1 + 2;
+
+        r0 = orig0;
+        r1 = orig1;
+        r2 = new0;
+        r3 = new1;
+        asm("cdsg %[r0],%[r2],%[db2]\n"
+            "ipm %[cc]"
+            : [r0] "+r" (r0)
+            , [r1] "+r" (r1)
+            , [db2] "=m" (val)
+            , [cc] "=r" (cc)
+            : [r2] "r" (r2)
+            , [r3] "r" (r3)
+            : "cc");
+        orig0 = r0;
+        orig1 = r1;
+        cc = (cc >> 28) & 3;
+
+        if (cc == 0) {
+            orig0 = new0;
+            orig1 = new1;
+            i++;
+        } else {
+            assert(cc == 1);
+        }
+    }
+
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t thread;
+    int ret;
+
+    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+    assert(ret == 0);
+    start = true;
+    cdsg_loop(NULL);
+    ret = pthread_join(thread, NULL);
+    assert(ret == 0);
+
+    assert(val[0] == 2000);
+    assert(val[1] == 4000);
+
+    return EXIT_SUCCESS;
+}
-- 
2.38.1
Re: [PATCH] tests/tcg/s390x: Add cdsg.c
Posted by David Hildenbrand 1 year, 4 months ago
On 29.11.22 00:48, Ilya Leoshkevich wrote:
> Add a simple test to prevent regressions.
> 
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>   tests/tcg/s390x/Makefile.target |  4 ++
>   tests/tcg/s390x/cdsg.c          | 73 +++++++++++++++++++++++++++++++++
>   2 files changed, 77 insertions(+)
>   create mode 100644 tests/tcg/s390x/cdsg.c
> 
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index 1d454270c0e..523214dac33 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -27,6 +27,7 @@ TESTS+=noexec
>   TESTS+=div
>   TESTS+=clst
>   TESTS+=long-double
> +TESTS+=cdsg
>   
>   Z13_TESTS=vistr
>   $(Z13_TESTS): CFLAGS+=-march=z13 -O2
> @@ -66,3 +67,6 @@ sha512-mvx: sha512.c
>   	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>   
>   TESTS+=sha512-mvx
> +
> +cdsg: CFLAGS+=-pthread
> +cdsg: LDFLAGS+=-pthread
> diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
> new file mode 100644
> index 00000000000..83313699f7d
> --- /dev/null
> +++ b/tests/tcg/s390x/cdsg.c
> @@ -0,0 +1,73 @@
> +#include <assert.h>
> +#include <pthread.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +
> +static volatile bool start;
> +static unsigned long val[2] __attribute__((__aligned__(16)));
> +
> +void *cdsg_loop(void *arg)
> +{
> +    unsigned long orig0, orig1, new0, new1;
> +    register unsigned long r0 asm("r0");
> +    register unsigned long r1 asm("r1");
> +    register unsigned long r2 asm("r2");
> +    register unsigned long r3 asm("r3");
> +    int cc;
> +    int i;
> +
> +    while (!start) {
> +    }
> +
> +    orig0 = val[0];
> +    orig1 = val[1];
> +    for (i = 0; i < 1000;) {

Are 1000 iterations sufficient to catch the race window reliably?

> +        new0 = orig0 + 1;
> +        new1 = orig1 + 2;
> +
> +        r0 = orig0;
> +        r1 = orig1;
> +        r2 = new0;
> +        r3 = new1;
> +        asm("cdsg %[r0],%[r2],%[db2]\n"
> +            "ipm %[cc]"
> +            : [r0] "+r" (r0)
> +            , [r1] "+r" (r1)
> +            , [db2] "=m" (val)
> +            , [cc] "=r" (cc)
> +            : [r2] "r" (r2)
> +            , [r3] "r" (r3)
> +            : "cc");

Nit: I'd suggest a simple cdsg helper function that makes this code 
easier to digest.

> +        orig0 = r0;
> +        orig1 = r1;
> +        cc = (cc >> 28) & 3;
> +
> +        if (cc == 0) {
> +            orig0 = new0;
> +            orig1 = new1;
> +            i++;
> +        } else {
> +            assert(cc == 1);
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +int main(void)
> +{
> +    pthread_t thread;
> +    int ret;
> +
> +    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
> +    assert(ret == 0);
> +    start = true;
> +    cdsg_loop(NULL);
> +    ret = pthread_join(thread, NULL);
> +    assert(ret == 0);
> +
> +    assert(val[0] == 2000);
> +    assert(val[1] == 4000);
> +
> +    return EXIT_SUCCESS;
> +}

-- 
Thanks,

David / dhildenb
Re: [PATCH] tests/tcg/s390x: Add cdsg.c
Posted by Ilya Leoshkevich 1 year, 4 months ago
On Tue, Nov 29, 2022 at 09:54:13AM +0100, David Hildenbrand wrote:
> On 29.11.22 00:48, Ilya Leoshkevich wrote:
> > Add a simple test to prevent regressions.
> > 
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> >   tests/tcg/s390x/Makefile.target |  4 ++
> >   tests/tcg/s390x/cdsg.c          | 73 +++++++++++++++++++++++++++++++++
> >   2 files changed, 77 insertions(+)
> >   create mode 100644 tests/tcg/s390x/cdsg.c
> > 
> > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> > index 1d454270c0e..523214dac33 100644
> > --- a/tests/tcg/s390x/Makefile.target
> > +++ b/tests/tcg/s390x/Makefile.target
> > @@ -27,6 +27,7 @@ TESTS+=noexec
> >   TESTS+=div
> >   TESTS+=clst
> >   TESTS+=long-double
> > +TESTS+=cdsg
> >   Z13_TESTS=vistr
> >   $(Z13_TESTS): CFLAGS+=-march=z13 -O2
> > @@ -66,3 +67,6 @@ sha512-mvx: sha512.c
> >   	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> >   TESTS+=sha512-mvx
> > +
> > +cdsg: CFLAGS+=-pthread
> > +cdsg: LDFLAGS+=-pthread
> > diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
> > new file mode 100644
> > index 00000000000..83313699f7d
> > --- /dev/null
> > +++ b/tests/tcg/s390x/cdsg.c
> > @@ -0,0 +1,73 @@
> > +#include <assert.h>
> > +#include <pthread.h>
> > +#include <stdbool.h>
> > +#include <stdlib.h>
> > +
> > +static volatile bool start;
> > +static unsigned long val[2] __attribute__((__aligned__(16)));
> > +
> > +void *cdsg_loop(void *arg)
> > +{
> > +    unsigned long orig0, orig1, new0, new1;
> > +    register unsigned long r0 asm("r0");
> > +    register unsigned long r1 asm("r1");
> > +    register unsigned long r2 asm("r2");
> > +    register unsigned long r3 asm("r3");
> > +    int cc;
> > +    int i;
> > +
> > +    while (!start) {
> > +    }
> > +
> > +    orig0 = val[0];
> > +    orig1 = val[1];
> > +    for (i = 0; i < 1000;) {
> 
> Are 1000 iterations sufficient to catch the race window reliably?

Good point, I had to raise it to 10k.
If I break the code like this:

--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3509,7 +3509,7 @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
 {
     gen_atomic_cx_i128 gen;
 
-    if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
+    if (true) {
         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
         return;
     }
 
the test with 10k iterations fails consistently.
And it's still fast:

$ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg
real 0.01

> > +        new0 = orig0 + 1;
> > +        new1 = orig1 + 2;
> > +
> > +        r0 = orig0;
> > +        r1 = orig1;
> > +        r2 = new0;
> > +        r3 = new1;
> > +        asm("cdsg %[r0],%[r2],%[db2]\n"
> > +            "ipm %[cc]"
> > +            : [r0] "+r" (r0)
> > +            , [r1] "+r" (r1)
> > +            , [db2] "=m" (val)
> > +            , [cc] "=r" (cc)
> > +            : [r2] "r" (r2)
> > +            , [r3] "r" (r3)
> > +            : "cc");
> 
> Nit: I'd suggest a simple cdsg helper function that makes this code easier
> to digest.

Ok.

> 
> > +        orig0 = r0;
> > +        orig1 = r1;
> > +        cc = (cc >> 28) & 3;
> > +
> > +        if (cc == 0) {
> > +            orig0 = new0;
> > +            orig1 = new1;
> > +            i++;
> > +        } else {
> > +            assert(cc == 1);
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +int main(void)
> > +{
> > +    pthread_t thread;
> > +    int ret;
> > +
> > +    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
> > +    assert(ret == 0);
> > +    start = true;
> > +    cdsg_loop(NULL);
> > +    ret = pthread_join(thread, NULL);
> > +    assert(ret == 0);
> > +
> > +    assert(val[0] == 2000);
> > +    assert(val[1] == 4000);
> > +
> > +    return EXIT_SUCCESS;
> > +}
> 
> -- 
> Thanks,
> 
> David / dhildenb
> 
>
[PATCH v2] tests/tcg/s390x: Add cdsg.c
Posted by Ilya Leoshkevich 1 year, 4 months ago
Add a simple test to prevent regressions.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 tests/tcg/s390x/Makefile.target |  4 ++
 tests/tcg/s390x/cdsg.c          | 84 +++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 tests/tcg/s390x/cdsg.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
 TESTS+=div
 TESTS+=clst
 TESTS+=long-double
+TESTS+=cdsg
 
 Z13_TESTS=vistr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 
 TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..28b5ac9a000
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,84 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+                       unsigned long new0, unsigned long new1,
+                       aligned_quadword *mem)
+{
+    register unsigned long r0 asm("r0");
+    register unsigned long r1 asm("r1");
+    register unsigned long r2 asm("r2");
+    register unsigned long r3 asm("r3");
+    int cc;
+
+    r0 = *orig0;
+    r1 = *orig1;
+    r2 = new0;
+    r3 = new1;
+    asm("cdsg %[r0],%[r2],%[db2]\n"
+        "ipm %[cc]"
+        : [r0] "+r" (r0)
+        , [r1] "+r" (r1)
+        , [db2] "+m" (*mem)
+        , [cc] "=r" (cc)
+        : [r2] "r" (r2)
+        , [r3] "r" (r3)
+        : "cc");
+    *orig0 = r0;
+    *orig1 = r1;
+
+    return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+    unsigned long orig0, orig1, new0, new1;
+    int cc;
+    int i;
+
+    while (!start) {
+    }
+
+    orig0 = val[0];
+    orig1 = val[1];
+    for (i = 0; i < 1000;) {
+        new0 = orig0 + 1;
+        new1 = orig1 + 2;
+
+        cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+        if (cc == 0) {
+            orig0 = new0;
+            orig1 = new1;
+            i++;
+        } else {
+            assert(cc == 1);
+        }
+    }
+
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t thread;
+    int ret;
+
+    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+    assert(ret == 0);
+    start = true;
+    cdsg_loop(NULL);
+    ret = pthread_join(thread, NULL);
+    assert(ret == 0);
+
+    assert(val[0] == 2000);
+    assert(val[1] == 4000);
+
+    return EXIT_SUCCESS;
+}
-- 
2.38.1
[PATCH v3] tests/tcg/s390x: Add cdsg.c
Posted by Ilya Leoshkevich 1 year, 4 months ago
Add a simple test to prevent regressions.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---

Sorry, I just realized that in v2 that I sent the iteration count was
not increased. For v3 I've decided to bump it further to 1m, since it's
still fast enough:

$ time -p ./qemu-s390x ./tests/tcg/s390x-linux-user/cdsg
real 0.15

v2 -> v3: Increase iteration count to 1m.
v1 -> v2: Add cdsg() wrapper.

 tests/tcg/s390x/Makefile.target |  4 ++
 tests/tcg/s390x/cdsg.c          | 85 +++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 tests/tcg/s390x/cdsg.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1d454270c0e..523214dac33 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -27,6 +27,7 @@ TESTS+=noexec
 TESTS+=div
 TESTS+=clst
 TESTS+=long-double
+TESTS+=cdsg
 
 Z13_TESTS=vistr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
@@ -66,3 +67,6 @@ sha512-mvx: sha512.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 
 TESTS+=sha512-mvx
+
+cdsg: CFLAGS+=-pthread
+cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
new file mode 100644
index 00000000000..c7a5246181d
--- /dev/null
+++ b/tests/tcg/s390x/cdsg.c
@@ -0,0 +1,85 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static volatile bool start;
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
+static aligned_quadword val;
+static const int n_iterations = 1000000;
+
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
+                       unsigned long new0, unsigned long new1,
+                       aligned_quadword *mem)
+{
+    register unsigned long r0 asm("r0");
+    register unsigned long r1 asm("r1");
+    register unsigned long r2 asm("r2");
+    register unsigned long r3 asm("r3");
+    int cc;
+
+    r0 = *orig0;
+    r1 = *orig1;
+    r2 = new0;
+    r3 = new1;
+    asm("cdsg %[r0],%[r2],%[db2]\n"
+        "ipm %[cc]"
+        : [r0] "+r" (r0)
+        , [r1] "+r" (r1)
+        , [db2] "+m" (*mem)
+        , [cc] "=r" (cc)
+        : [r2] "r" (r2)
+        , [r3] "r" (r3)
+        : "cc");
+    *orig0 = r0;
+    *orig1 = r1;
+
+    return (cc >> 28) & 3;
+}
+
+void *cdsg_loop(void *arg)
+{
+    unsigned long orig0, orig1, new0, new1;
+    int cc;
+    int i;
+
+    while (!start) {
+    }
+
+    orig0 = val[0];
+    orig1 = val[1];
+    for (i = 0; i < n_iterations;) {
+        new0 = orig0 + 1;
+        new1 = orig1 + 2;
+
+        cc = cdsg(&orig0, &orig1, new0, new1, &val);
+
+        if (cc == 0) {
+            orig0 = new0;
+            orig1 = new1;
+            i++;
+        } else {
+            assert(cc == 1);
+        }
+    }
+
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t thread;
+    int ret;
+
+    ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
+    assert(ret == 0);
+    start = true;
+    cdsg_loop(NULL);
+    ret = pthread_join(thread, NULL);
+    assert(ret == 0);
+
+    assert(val[0] == n_iterations * 2);
+    assert(val[1] == n_iterations * 4);
+
+    return EXIT_SUCCESS;
+}
-- 
2.38.1