target/s390x: Fix 32-bit shifts

[PATCH] target/s390x: Fix 32-bit shifts

Posted by Ilya Leoshkevich 4 years ago

Both 32- and 64-bit shifts use lowest 6 address bits. The current code
special-cases 32-bit shifts to use only 5 bits, which is not correct.

Fix by merging sh32 and sh64.

Fixes: cbe24bfa91d2 ("target-s390: Convert SHIFT, ROTATE SINGLE")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 target/s390x/tcg/insn-data.def  | 36 ++++++++++++++++-----------------
 target/s390x/tcg/translate.c    | 10 ++-------
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/sll.c           | 25 +++++++++++++++++++++++
 4 files changed, 46 insertions(+), 26 deletions(-)
 create mode 100644 tests/tcg/s390x/sll.c

diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index f0af458aee..348a15be72 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -747,8 +747,8 @@
     C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
 
 /* ROTATE LEFT SINGLE LOGICAL */
-    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh32, new, r1_32, rll32, 0)
-    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, rll64, 0)
+    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
+    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
 
 /* ROTATE THEN INSERT SELECTED BITS */
     C(0xec55, RISBG,   RIE_f, GIE, 0, r2, r1, 0, risbg, s64)
@@ -784,29 +784,29 @@
     C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
 
 /* SHIFT LEFT SINGLE */
-    D(0x8b00, SLA,     RS_a,  Z,   r1, sh32, new, r1_32, sla, 0, 31)
-    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh32, new, r1_32, sla, 0, 31)
-    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh64, r1, 0, sla, 0, 63)
+    D(0x8b00, SLA,     RS_a,  Z,   r1, sh, new, r1_32, sla, 0, 31)
+    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh, new, r1_32, sla, 0, 31)
+    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh, r1, 0, sla, 0, 63)
 /* SHIFT LEFT SINGLE LOGICAL */
-    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh32, new, r1_32, sll, 0)
-    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh32, new, r1_32, sll, 0)
-    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, sll, 0)
+    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh, new, r1_32, sll, 0)
+    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh, new, r1_32, sll, 0)
+    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh, r1, 0, sll, 0)
 /* SHIFT RIGHT SINGLE */
-    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh32, new, r1_32, sra, s32)
-    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh32, new, r1_32, sra, s32)
-    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh64, r1, 0, sra, s64)
+    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh, new, r1_32, sra, s32)
+    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh, new, r1_32, sra, s32)
+    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh, r1, 0, sra, s64)
 /* SHIFT RIGHT SINGLE LOGICAL */
-    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh32, new, r1_32, srl, 0)
-    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh32, new, r1_32, srl, 0)
-    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh64, r1, 0, srl, 0)
+    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh, new, r1_32, srl, 0)
+    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh, new, r1_32, srl, 0)
+    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh, r1, 0, srl, 0)
 /* SHIFT LEFT DOUBLE */
-    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sla, 0, 31)
+    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sla, 0, 31)
 /* SHIFT LEFT DOUBLE LOGICAL */
-    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sll, 0)
+    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sll, 0)
 /* SHIFT RIGHT DOUBLE */
-    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sra, s64)
+    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sra, s64)
 /* SHIFT RIGHT DOUBLE LOGICAL */
-    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, srl, 0)
+    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, srl, 0)
 
 /* SQUARE ROOT */
     F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index f180853e7a..89e14b8f29 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -5922,17 +5922,11 @@ static void in2_ri2(DisasContext *s, DisasOps *o)
 }
 #define SPEC_in2_ri2 0
 
-static void in2_sh32(DisasContext *s, DisasOps *o)
-{
-    help_l2_shift(s, o, 31);
-}
-#define SPEC_in2_sh32 0
-
-static void in2_sh64(DisasContext *s, DisasOps *o)
+static void in2_sh(DisasContext *s, DisasOps *o)
 {
     help_l2_shift(s, o, 63);
 }
-#define SPEC_in2_sh64 0
+#define SPEC_in2_sh 0
 
 static void in2_m2_8u(DisasContext *s, DisasOps *o)
 {
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index cc64dd32d2..4212bab014 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -9,6 +9,7 @@ TESTS+=exrl-trtr
 TESTS+=pack
 TESTS+=mvo
 TESTS+=mvc
+TESTS+=sll
 TESTS+=trap
 TESTS+=signals-s390x
 
diff --git a/tests/tcg/s390x/sll.c b/tests/tcg/s390x/sll.c
new file mode 100644
index 0000000000..aba2a94676
--- /dev/null
+++ b/tests/tcg/s390x/sll.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <unistd.h>
+
+int main(void)
+{
+    uint64_t op1 = 0xb90281a3105939dfull;
+    uint64_t op2 = 0xb5e4df7e082e4c5eull;
+    uint64_t cc = 0xffffffffffffffffull;
+
+    asm("sll\t%[op1],0xd04(%[op2])"
+        "\n\tipm\t%[cc]"
+        : [op1] "+r" (op1),
+          [cc] "+r" (cc)
+        : [op2] "r" (op2)
+        : "cc");
+    if (op1 != 0xb90281a300000000ull) {
+        write(1, "bad result\n", 11);
+        return 1;
+    }
+    if (cc != 0xffffffff10ffffffull) {
+        write(1, "bad cc\n", 7);
+        return 1;
+    }
+    return 0;
+}
-- 
2.31.1

Re: [PATCH] target/s390x: Fix 32-bit shifts

Posted by David Hildenbrand 4 years ago

On 10.01.22 19:59, Ilya Leoshkevich wrote:
> Both 32- and 64-bit shifts use lowest 6 address bits. The current code
> special-cases 32-bit shifts to use only 5 bits, which is not correct.
> 

I assume for 32-bit shifts, we could only shift by 31, not by 32 or
bigger. So it's impossible to zero out a 32bit register using a shift
right now.

Let's take a look at the details:

* RLL: IMHO it doesn't matter if we rotate by an additional 32bit, the
       result is the same. Not broken.
* SLA/SLAK: the numerical part is 31-bit, we don't care about shifting
            any more, the result for the numerical part is the same (0).
            Not broken.
* SLL/SLAK: Is broken because we cannot shift by > 31 and create
            a 0 result. Broken.
* SRA/SRAK: Same as SLA/SLAK. Not broken.
* SRL/SRLK: Same as SLL/SLAK, broken.
* SLDA/SLDL ... should not be broken because they are 64 bit shifts.

So AFAIKS, only SLL/SLAK and SRL/SRLK needs fixing to be able to shift > 32.

The issue with this patch is that I think it degrades CC computation.
For 32bit, we could now get a shift < 64, and I think at least
cc_calc_sla_32() is not prepared for that.


> Fix by merging sh32 and sh64.
> 
> Fixes: cbe24bfa91d2 ("target-s390: Convert SHIFT, ROTATE SINGLE")
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  target/s390x/tcg/insn-data.def  | 36 ++++++++++++++++-----------------
>  target/s390x/tcg/translate.c    | 10 ++-------
>  tests/tcg/s390x/Makefile.target |  1 +
>  tests/tcg/s390x/sll.c           | 25 +++++++++++++++++++++++
>  4 files changed, 46 insertions(+), 26 deletions(-)
>  create mode 100644 tests/tcg/s390x/sll.c
> 
> diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
> index f0af458aee..348a15be72 100644
> --- a/target/s390x/tcg/insn-data.def
> +++ b/target/s390x/tcg/insn-data.def
> @@ -747,8 +747,8 @@
>      C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
>  
>  /* ROTATE LEFT SINGLE LOGICAL */
> -    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh32, new, r1_32, rll32, 0)
> -    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, rll64, 0)
> +    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
> +    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
>  
>  /* ROTATE THEN INSERT SELECTED BITS */
>      C(0xec55, RISBG,   RIE_f, GIE, 0, r2, r1, 0, risbg, s64)
> @@ -784,29 +784,29 @@
>      C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
>  
>  /* SHIFT LEFT SINGLE */
> -    D(0x8b00, SLA,     RS_a,  Z,   r1, sh32, new, r1_32, sla, 0, 31)
> -    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh32, new, r1_32, sla, 0, 31)
> -    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh64, r1, 0, sla, 0, 63)
> +    D(0x8b00, SLA,     RS_a,  Z,   r1, sh, new, r1_32, sla, 0, 31)
> +    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh, new, r1_32, sla, 0, 31)
> +    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh, r1, 0, sla, 0, 63)
>  /* SHIFT LEFT SINGLE LOGICAL */
> -    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh32, new, r1_32, sll, 0)
> -    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh32, new, r1_32, sll, 0)
> -    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, sll, 0)
> +    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh, new, r1_32, sll, 0)
> +    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh, new, r1_32, sll, 0)
> +    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh, r1, 0, sll, 0)
>  /* SHIFT RIGHT SINGLE */
> -    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh32, new, r1_32, sra, s32)
> -    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh32, new, r1_32, sra, s32)
> -    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh64, r1, 0, sra, s64)
> +    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh, new, r1_32, sra, s32)
> +    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh, new, r1_32, sra, s32)
> +    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh, r1, 0, sra, s64)
>  /* SHIFT RIGHT SINGLE LOGICAL */
> -    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh32, new, r1_32, srl, 0)
> -    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh32, new, r1_32, srl, 0)
> -    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh64, r1, 0, srl, 0)
> +    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh, new, r1_32, srl, 0)
> +    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh, new, r1_32, srl, 0)
> +    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh, r1, 0, srl, 0)
>  /* SHIFT LEFT DOUBLE */
> -    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sla, 0, 31)
> +    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sla, 0, 31)

I'm confused. Is the 31 correct here? We're operating on a 64 bit value
and the sign bit shouldn't be in the middle ... but maybe I am missing
something/

>  /* SHIFT LEFT DOUBLE LOGICAL */
> -    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sll, 0)
> +    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sll, 0)
>  /* SHIFT RIGHT DOUBLE */
> -    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sra, s64)
> +    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sra, s64)
>  /* SHIFT RIGHT DOUBLE LOGICAL */
> -    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, srl, 0)
> +    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, srl, 0)
>  
>  /* SQUARE ROOT */
>      F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
> diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
> index f180853e7a..89e14b8f29 100644
> --- a/target/s390x/tcg/translate.c
> +++ b/target/s390x/tcg/translate.c
> @@ -5922,17 +5922,11 @@ static void in2_ri2(DisasContext *s, DisasOps *o)
>  }
>  #define SPEC_in2_ri2 0
>  
> -static void in2_sh32(DisasContext *s, DisasOps *o)
> -{
> -    help_l2_shift(s, o, 31);
> -}
> -#define SPEC_in2_sh32 0
> -
> -static void in2_sh64(DisasContext *s, DisasOps *o)
> +static void in2_sh(DisasContext *s, DisasOps *o)
>  {
>      help_l2_shift(s, o, 63);

If we go down that path, we should better inline help_l2_shift().

>  }
> -#define SPEC_in2_sh64 0
> +#define SPEC_in2_sh 0
>  
>  static void in2_m2_8u(DisasContext *s, DisasOps *o)
>  {
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index cc64dd32d2..4212bab014 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -9,6 +9,7 @@ TESTS+=exrl-trtr
>  TESTS+=pack
>  TESTS+=mvo
>  TESTS+=mvc
> +TESTS+=sll
>  TESTS+=trap
>  TESTS+=signals-s390x
>  
> diff --git a/tests/tcg/s390x/sll.c b/tests/tcg/s390x/sll.c
> new file mode 100644
> index 0000000000..aba2a94676
> --- /dev/null
> +++ b/tests/tcg/s390x/sll.c
> @@ -0,0 +1,25 @@
> +#include <stdint.h>
> +#include <unistd.h>
> +
> +int main(void)
> +{
> +    uint64_t op1 = 0xb90281a3105939dfull;
> +    uint64_t op2 = 0xb5e4df7e082e4c5eull;
> +    uint64_t cc = 0xffffffffffffffffull;
> +
> +    asm("sll\t%[op1],0xd04(%[op2])"
> +        "\n\tipm\t%[cc]"

Let's make this human-readable :)

asm("    sll %[op1],0xd04(%[op2])\n"
    "    ipm %[cc]"

Should we bettr use "asm volatile"?

> +        : [op1] "+r" (op1),
> +          [cc] "+r" (cc)
> +        : [op2] "r" (op2)
> +        : "cc");
> +    if (op1 != 0xb90281a300000000ull) {
> +        write(1, "bad result\n", 11);
> +        return 1;
> +    }
> +    if (cc != 0xffffffff10ffffffull) {
> +        write(1, "bad cc\n", 7);
> +        return 1;
> +    }
> +    return 0;
> +}

Can we split out the test into a separate patch?

-- 
Thanks,

David / dhildenb

Re: [PATCH] target/s390x: Fix 32-bit shifts

Posted by Ilya Leoshkevich 4 years ago

On Tue, 2022-01-11 at 15:22 +0100, David Hildenbrand wrote:
> On 10.01.22 19:59, Ilya Leoshkevich wrote:
> > Both 32- and 64-bit shifts use lowest 6 address bits. The current
> > code
> > special-cases 32-bit shifts to use only 5 bits, which is not
> > correct.
> > 
> 
> I assume for 32-bit shifts, we could only shift by 31, not by 32 or
> bigger. So it's impossible to zero out a 32bit register using a shift
> right now.

Thanks for having a detailed look!

That's my understanding of the problem as well.

> Let's take a look at the details:
> 
> * RLL: IMHO it doesn't matter if we rotate by an additional 32bit,
> the
>        result is the same. Not broken.
> * SLA/SLAK: the numerical part is 31-bit, we don't care about
> shifting
>             any more, the result for the numerical part is the same
> (0).
>             Not broken.
> * SLL/SLAK: Is broken because we cannot shift by > 31 and create
>             a 0 result. Broken.
> * SRA/SRAK: Same as SLA/SLAK. Not broken.
> * SRL/SRLK: Same as SLL/SLAK, broken.
> * SLDA/SLDL ... should not be broken because they are 64 bit shifts.
> 
> So AFAIKS, only SLL/SLAK and SRL/SRLK needs fixing to be able to
> shift > 32.

I think others (except rotation) are affected too, because they cannot
distinguish between shifting by 0 and 32 bits.

> The issue with this patch is that I think it degrades CC computation.
> For 32bit, we could now get a shift < 64, and I think at least
> cc_calc_sla_32() is not prepared for that.

Ouch, that's now broken indeed. I've fixed it in v2 and added a test.

> > Fix by merging sh32 and sh64.
> > 
> > Fixes: cbe24bfa91d2 ("target-s390: Convert SHIFT, ROTATE SINGLE")
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> >  target/s390x/tcg/insn-data.def  | 36 ++++++++++++++++-------------
> > ----
> >  target/s390x/tcg/translate.c    | 10 ++-------
> >  tests/tcg/s390x/Makefile.target |  1 +
> >  tests/tcg/s390x/sll.c           | 25 +++++++++++++++++++++++
> >  4 files changed, 46 insertions(+), 26 deletions(-)
> >  create mode 100644 tests/tcg/s390x/sll.c
> > 
> > diff --git a/target/s390x/tcg/insn-data.def
> > b/target/s390x/tcg/insn-data.def
> > index f0af458aee..348a15be72 100644
> > --- a/target/s390x/tcg/insn-data.def
> > +++ b/target/s390x/tcg/insn-data.def
> > @@ -747,8 +747,8 @@
> >      C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
> >  
> >  /* ROTATE LEFT SINGLE LOGICAL */
> > -    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh32, new, r1_32, rll32,
> > 0)
> > -    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, rll64, 0)
> > +    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh, new, r1_32, rll32, 0)
> > +    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh, r1, 0, rll64, 0)
> >  
> >  /* ROTATE THEN INSERT SELECTED BITS */
> >      C(0xec55, RISBG,   RIE_f, GIE, 0, r2, r1, 0, risbg, s64)
> > @@ -784,29 +784,29 @@
> >      C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
> >  
> >  /* SHIFT LEFT SINGLE */
> > -    D(0x8b00, SLA,     RS_a,  Z,   r1, sh32, new, r1_32, sla, 0,
> > 31)
> > -    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh32, new, r1_32, sla, 0,
> > 31)
> > -    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh64, r1, 0, sla, 0, 63)
> > +    D(0x8b00, SLA,     RS_a,  Z,   r1, sh, new, r1_32, sla, 0, 31)
> > +    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh, new, r1_32, sla, 0, 31)
> > +    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh, r1, 0, sla, 0, 63)
> >  /* SHIFT LEFT SINGLE LOGICAL */
> > -    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh32, new, r1_32, sll, 0)
> > -    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh32, new, r1_32, sll, 0)
> > -    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, sll, 0)
> > +    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh, new, r1_32, sll, 0)
> > +    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh, new, r1_32, sll, 0)
> > +    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh, r1, 0, sll, 0)
> >  /* SHIFT RIGHT SINGLE */
> > -    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh32, new, r1_32, sra,
> > s32)
> > -    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh32, new, r1_32, sra,
> > s32)
> > -    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh64, r1, 0, sra, s64)
> > +    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh, new, r1_32, sra,
> > s32)
> > +    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh, new, r1_32, sra,
> > s32)
> > +    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh, r1, 0, sra, s64)
> >  /* SHIFT RIGHT SINGLE LOGICAL */
> > -    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh32, new, r1_32, srl,
> > 0)
> > -    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh32, new, r1_32, srl,
> > 0)
> > -    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh64, r1, 0, srl, 0)
> > +    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh, new, r1_32, srl, 0)
> > +    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh, new, r1_32, srl, 0)
> > +    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh, r1, 0, srl, 0)
> >  /* SHIFT LEFT DOUBLE */
> > -    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sla,
> > 0, 31)
> > +    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sla,
> > 0, 31)
> 
> I'm confused. Is the 31 correct here? We're operating on a 64 bit
> value
> and the sign bit shouldn't be in the middle ... but maybe I am
> missing
> something/

Right, that's a bug. Fixed in v2.

> >  /* SHIFT LEFT DOUBLE LOGICAL */
> > -    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sll,
> > 0)
> > +    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sll,
> > 0)
> >  /* SHIFT RIGHT DOUBLE */
> > -    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sra,
> > s64)
> > +    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh, new, r1_D32, sra,
> > s64)
> >  /* SHIFT RIGHT DOUBLE LOGICAL */
> > -    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, srl,
> > 0)
> > +    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh, new, r1_D32, srl,
> > 0)
> >  
> >  /* SQUARE ROOT */
> >      F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0,
> > IF_BFP)
> > diff --git a/target/s390x/tcg/translate.c
> > b/target/s390x/tcg/translate.c
> > index f180853e7a..89e14b8f29 100644
> > --- a/target/s390x/tcg/translate.c
> > +++ b/target/s390x/tcg/translate.c
> > @@ -5922,17 +5922,11 @@ static void in2_ri2(DisasContext *s,
> > DisasOps *o)
> >  }
> >  #define SPEC_in2_ri2 0
> >  
> > -static void in2_sh32(DisasContext *s, DisasOps *o)
> > -{
> > -    help_l2_shift(s, o, 31);
> > -}
> > -#define SPEC_in2_sh32 0
> > -
> > -static void in2_sh64(DisasContext *s, DisasOps *o)
> > +static void in2_sh(DisasContext *s, DisasOps *o)
> >  {
> >      help_l2_shift(s, o, 63);
> 
> If we go down that path, we should better inline help_l2_shift().

I still think we need to do that, so inlined in v2.

> >  }
> > -#define SPEC_in2_sh64 0
> > +#define SPEC_in2_sh 0
> >  
> >  static void in2_m2_8u(DisasContext *s, DisasOps *o)
> >  {
> > diff --git a/tests/tcg/s390x/Makefile.target
> > b/tests/tcg/s390x/Makefile.target
> > index cc64dd32d2..4212bab014 100644
> > --- a/tests/tcg/s390x/Makefile.target
> > +++ b/tests/tcg/s390x/Makefile.target
> > @@ -9,6 +9,7 @@ TESTS+=exrl-trtr
> >  TESTS+=pack
> >  TESTS+=mvo
> >  TESTS+=mvc
> > +TESTS+=sll
> >  TESTS+=trap
> >  TESTS+=signals-s390x
> >  
> > diff --git a/tests/tcg/s390x/sll.c b/tests/tcg/s390x/sll.c
> > new file mode 100644
> > index 0000000000..aba2a94676
> > --- /dev/null
> > +++ b/tests/tcg/s390x/sll.c
> > @@ -0,0 +1,25 @@
> > +#include <stdint.h>
> > +#include <unistd.h>
> > +
> > +int main(void)
> > +{
> > +    uint64_t op1 = 0xb90281a3105939dfull;
> > +    uint64_t op2 = 0xb5e4df7e082e4c5eull;
> > +    uint64_t cc = 0xffffffffffffffffull;
> > +
> > +    asm("sll\t%[op1],0xd04(%[op2])"
> > +        "\n\tipm\t%[cc]"
> 
> Let's make this human-readable :)
> 
> asm("    sll %[op1],0xd04(%[op2])\n"
>     "    ipm %[cc]"
> 
> Should we bettr use "asm volatile"?

I don't think we need volatile here - assuming the compiler is sane,
it should see that asm outputs are used and should not throw it away.

> > +        : [op1] "+r" (op1),
> > +          [cc] "+r" (cc)
> > +        : [op2] "r" (op2)
> > +        : "cc");
> > +    if (op1 != 0xb90281a300000000ull) {
> > +        write(1, "bad result\n", 11);
> > +        return 1;
> > +    }
> > +    if (cc != 0xffffffff10ffffffull) {
> > +        write(1, "bad cc\n", 7);
> > +        return 1;
> > +    }
> > +    return 0;
> > +}
> 
> Can we split out the test into a separate patch?

Ok.

Best regards,
Ilya