[PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64

Richard Henderson posted 28 patches 3 years, 4 months ago
Maintainers: Andrzej Zaborowski <balrogg@gmail.com>, Aurelien Jarno <aurelien@aurel32.net>, Richard Henderson <richard.henderson@linaro.org>, Cornelia Huck <cohuck@redhat.com>, David Hildenbrand <david@redhat.com>, Palmer Dabbelt <palmer@dabbelt.com>, Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Huacai Chen <chenhuacai@kernel.org>, Eduardo Habkost <ehabkost@redhat.com>, Thomas Huth <thuth@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Stefan Weil <sw@weilnetz.de>, Paolo Bonzini <pbonzini@redhat.com>, Alistair Francis <Alistair.Francis@wdc.com>, Yoshinori Sato <ysato@users.sourceforge.jp>
There is a newer version of this series
[PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Richard Henderson 3 years, 4 months ago
Implement the new semantics in the fallback expansion.
Change all callers to supply the flags that keep the
semantics unchanged locally.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/tcg/tcg-op.h            |   8 +--
 target/arm/translate-a64.c      |  12 ++--
 target/arm/translate.c          |   2 +-
 target/i386/tcg/translate.c     |   2 +-
 target/mips/tcg/mxu_translate.c |   2 +-
 target/s390x/translate.c        |   4 +-
 target/sh4/translate.c          |   2 +-
 tcg/tcg-op.c                    | 121 ++++++++++++++++++++++----------
 8 files changed, 99 insertions(+), 54 deletions(-)

diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index ef8a008ea7..caf6ba2149 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -330,7 +330,7 @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -525,8 +525,8 @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -1185,7 +1185,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_ext32u_tl tcg_gen_mov_i32
 #define tcg_gen_ext32s_tl tcg_gen_mov_i32
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
-#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
 #define tcg_gen_bswap_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8713dfec17..e0785ce859 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5437,15 +5437,15 @@ static void handle_rev32(DisasContext *s, unsigned int sf,
 
         /* bswap32_i64 requires zero high word */
         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
-        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
+        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
-        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
 
         tcg_temp_free_i64(tcg_tmp);
     } else {
         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
-        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
+        tcg_gen_bswap32_i64(tcg_rd, tcg_rd, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     }
 }
 
@@ -12436,10 +12436,12 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
             read_vec_element(s, tcg_tmp, rn, i, grp_size);
             switch (grp_size) {
             case MO_16:
-                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
+                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp,
+                                    TCG_BSWAP_IZ | TCG_BSWAP_OZ);
                 break;
             case MO_32:
-                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp,
+                                    TCG_BSWAP_IZ | TCG_BSWAP_OZ);
                 break;
             case MO_64:
                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 8e0e55c1e0..6b88163e3a 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -342,7 +342,7 @@ void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 {
     tcg_gen_ext16u_i32(var, var);
-    tcg_gen_bswap16_i32(var, var);
+    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     tcg_gen_ext16s_i32(dest, var);
 }
 
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a7f5c0c8f2..e8a9dcd21a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -7203,7 +7203,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         {
             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
             tcg_gen_ext32u_tl(s->T0, s->T0);
-            tcg_gen_bswap32_tl(s->T0, s->T0);
+            tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
         }
         break;
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index fb0a811af6..c12cf78df7 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -861,7 +861,7 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
 
     if (sel == 1) {
         /* S32LDDR */
-        tcg_gen_bswap32_tl(t1, t1);
+        tcg_gen_bswap32_tl(t1, t1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     }
     gen_store_mxu_gpr(t1, XRa);
 
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index e243624d2a..03dab9f350 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3939,13 +3939,13 @@ static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_rev16(DisasContext *s, DisasOps *o)
 {
-    tcg_gen_bswap16_i64(o->out, o->in2);
+    tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_rev32(DisasContext *s, DisasOps *o)
 {
-    tcg_gen_bswap32_i64(o->out, o->in2);
+    tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
     return DISAS_NEXT;
 }
 
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 9312790623..147219759b 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -677,7 +677,7 @@ static void _decode_opc(DisasContext * ctx)
 	{
             TCGv low = tcg_temp_new();
 	    tcg_gen_ext16u_i32(low, REG(B7_4));
-	    tcg_gen_bswap16_i32(low, low);
+	    tcg_gen_bswap16_i32(low, low, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
 	    tcg_temp_free(low);
 	}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index dc65577e2f..3763285bb0 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1001,20 +1001,35 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
     }
 }
 
-/* Note: we assume the two high bytes are set to zero */
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_HAS_bswap16_i32) {
-        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,
-                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
 
-        tcg_gen_ext8u_i32(t0, arg);
-        tcg_gen_shli_i32(t0, t0, 8);
-        tcg_gen_shri_i32(ret, arg, 8);
-        tcg_gen_or_i32(ret, ret, t0);
+        tcg_gen_shri_i32(t0, arg, 8);
+        if (!(flags & TCG_BSWAP_IZ)) {
+            tcg_gen_ext8u_i32(t0, t0);
+        }
+
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_shli_i32(t1, t1, 24);
+            tcg_gen_sari_i32(t1, t1, 16);
+        } else if (flags & TCG_BSWAP_OZ) {
+            tcg_gen_ext8u_i32(t1, arg);
+            tcg_gen_shli_i32(t1, t1, 8);
+        } else {
+            tcg_gen_shli_i32(t1, arg, 8);
+        }
+
+        tcg_gen_or_i32(ret, t0, t1);
         tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
     }
 }
 
@@ -1655,51 +1670,79 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
     }
 }
 
-/* Note: we assume the six high bytes are set to zero */
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_REG_BITS == 32) {
-        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
-        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        }
     } else if (TCG_TARGET_HAS_bswap16_i64) {
-        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg,
-                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
 
-        tcg_gen_ext8u_i64(t0, arg);
-        tcg_gen_shli_i64(t0, t0, 8);
-        tcg_gen_shri_i64(ret, arg, 8);
-        tcg_gen_or_i64(ret, ret, t0);
+        tcg_gen_shri_i64(t0, arg, 8);
+        if (!(flags & TCG_BSWAP_IZ)) {
+            tcg_gen_ext8u_i64(t0, t0);
+        }
+
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_shli_i64(t1, t1, 56);
+            tcg_gen_sari_i64(t1, t1, 48);
+        } else if (flags & TCG_BSWAP_OZ) {
+            tcg_gen_ext8u_i64(t1, arg);
+            tcg_gen_shli_i64(t1, t1, 8);
+        } else {
+            tcg_gen_shli_i64(t1, arg, 8);
+        }
+
+        tcg_gen_or_i64(ret, t0, t1);
         tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
     }
 }
 
-/* Note: we assume the four high bytes are set to zero */
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_REG_BITS == 32) {
         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
-        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        }
     } else if (TCG_TARGET_HAS_bswap32_i64) {
-        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg,
-                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
 
-                                        /* arg = ....abcd */
-        tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .....abc */
-        tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .....b.d */
-        tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .....a.c */
-        tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ....b.d. */
-        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....badc */
+                                            /* arg = xxxxabcd */
+        tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
+        tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
+        tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
+        tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
+        tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
 
-        tcg_gen_shli_i64(t1, ret, 48);  /*  t1 = dc...... */
-        tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ......ba */
-        tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
-        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....dcba */
+        tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
+        tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
+        } else {
+            tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
+        }
+        tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
 
         tcg_temp_free_i64(t0);
         tcg_temp_free_i64(t1);
@@ -2846,7 +2889,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_bswap16_i32(val, val);
+            tcg_gen_bswap16_i32(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             if (orig_memop & MO_SIGN) {
                 tcg_gen_ext16s_i32(val, val);
             }
@@ -2874,7 +2917,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
         switch (memop & MO_SIZE) {
         case MO_16:
             tcg_gen_ext16u_i32(swap, val);
-            tcg_gen_bswap16_i32(swap, swap);
+            tcg_gen_bswap16_i32(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             break;
         case MO_32:
             tcg_gen_bswap32_i32(swap, val);
@@ -2935,13 +2978,13 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_bswap16_i64(val, val);
+            tcg_gen_bswap16_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             if (orig_memop & MO_SIGN) {
                 tcg_gen_ext16s_i64(val, val);
             }
             break;
         case MO_32:
-            tcg_gen_bswap32_i64(val, val);
+            tcg_gen_bswap32_i64(val, val, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             if (orig_memop & MO_SIGN) {
                 tcg_gen_ext32s_i64(val, val);
             }
@@ -2975,11 +3018,11 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
         switch (memop & MO_SIZE) {
         case MO_16:
             tcg_gen_ext16u_i64(swap, val);
-            tcg_gen_bswap16_i64(swap, swap);
+            tcg_gen_bswap16_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             break;
         case MO_32:
             tcg_gen_ext32u_i64(swap, val);
-            tcg_gen_bswap32_i64(swap, swap);
+            tcg_gen_bswap32_i64(swap, swap, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             break;
         case MO_64:
             tcg_gen_bswap64_i64(swap, val);
-- 
2.25.1


Re: [PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Philippe Mathieu-Daudé 3 years, 4 months ago
On 6/14/21 10:37 AM, Richard Henderson wrote:
> Implement the new semantics in the fallback expansion.
> Change all callers to supply the flags that keep the
> semantics unchanged locally.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/tcg/tcg-op.h            |   8 +--
>  target/arm/translate-a64.c      |  12 ++--
>  target/arm/translate.c          |   2 +-
>  target/i386/tcg/translate.c     |   2 +-
>  target/mips/tcg/mxu_translate.c |   2 +-
>  target/s390x/translate.c        |   4 +-
>  target/sh4/translate.c          |   2 +-

Various REV 16/32, would it be useful to have it as a TCG opcode?

>  tcg/tcg-op.c                    | 121 ++++++++++++++++++++++----------
>  8 files changed, 99 insertions(+), 54 deletions(-)

>      } else {
>          TCGv_i64 t0 = tcg_temp_new_i64();
>          TCGv_i64 t1 = tcg_temp_new_i64();
>          TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
>  
> -                                        /* arg = ....abcd */
> -        tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .....abc */
> -        tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .....b.d */
> -        tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .....a.c */
> -        tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ....b.d. */
> -        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....badc */
> +                                            /* arg = xxxxabcd */
> +        tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
> +        tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
> +        tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
> +        tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
> +        tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
>  
> -        tcg_gen_shli_i64(t1, ret, 48);  /*  t1 = dc...... */
> -        tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ......ba */
> -        tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
> -        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....dcba */
> +        tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
> +        tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
> +        if (flags & TCG_BSWAP_OS) {
> +            tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
> +        } else {
> +            tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
> +        }
> +        tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */

Comment update appreciated, thanks.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

Re: [PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Richard Henderson 3 years, 4 months ago
On 6/14/21 2:41 AM, Philippe Mathieu-Daudé wrote:
> On 6/14/21 10:37 AM, Richard Henderson wrote:
>> Implement the new semantics in the fallback expansion.
>> Change all callers to supply the flags that keep the
>> semantics unchanged locally.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   include/tcg/tcg-op.h            |   8 +--
>>   target/arm/translate-a64.c      |  12 ++--
>>   target/arm/translate.c          |   2 +-
>>   target/i386/tcg/translate.c     |   2 +-
>>   target/mips/tcg/mxu_translate.c |   2 +-
>>   target/s390x/translate.c        |   4 +-
>>   target/sh4/translate.c          |   2 +-
> 
> Various REV 16/32, would it be useful to have it as a TCG opcode?

Which operation are you proposing as tcg opcode?  The per-halfword swap akin to mips wsbh? 
  Yes, that operation also appears in arm (rev16) and ppc (brh).  So it's a reasonable 
thing to do.


r~

Re: [PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Philippe Mathieu-Daudé 3 years, 4 months ago
On 6/14/21 5:58 PM, Richard Henderson wrote:
> On 6/14/21 2:41 AM, Philippe Mathieu-Daudé wrote:
>> On 6/14/21 10:37 AM, Richard Henderson wrote:
>>> Implement the new semantics in the fallback expansion.
>>> Change all callers to supply the flags that keep the
>>> semantics unchanged locally.
>>>
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>>   include/tcg/tcg-op.h            |   8 +--
>>>   target/arm/translate-a64.c      |  12 ++--
>>>   target/arm/translate.c          |   2 +-
>>>   target/i386/tcg/translate.c     |   2 +-
>>>   target/mips/tcg/mxu_translate.c |   2 +-
>>>   target/s390x/translate.c        |   4 +-
>>>   target/sh4/translate.c          |   2 +-
>>
>> Various REV 16/32, would it be useful to have it as a TCG opcode?
> 
> Which operation are you proposing as tcg opcode?  The per-halfword swap
> akin to mips wsbh?  Yes, that operation also appears in arm (rev16) and
> ppc (brh).  So it's a reasonable thing to do.

and REV32 for PPC BRW?

Another I noticed is popcnt.

Re: [PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Richard Henderson 3 years, 4 months ago
On 6/22/21 3:20 AM, Philippe Mathieu-Daudé wrote:
> Another I noticed is popcnt.

Already present as ctpop.  (Which is how we name the operation in host-utils.h too.)


r~

Re: [PATCH 17/28] tcg: Add flags argument to tcg_gen_bswap16_*, tcg_gen_bswap32_i64
Posted by Peter Maydell 3 years, 4 months ago
On Mon, 14 Jun 2021 at 09:52, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Implement the new semantics in the fallback expansion.
> Change all callers to supply the flags that keep the
> semantics unchanged locally.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index dc65577e2f..3763285bb0 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1001,20 +1001,35 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
>      }
>  }
>
> -/* Note: we assume the two high bytes are set to zero */
> -void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
> +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
>  {
> +    /* Only one extension flag may be present. */
> +    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
> +
>      if (TCG_TARGET_HAS_bswap16_i32) {
> -        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg,
> -                         TCG_BSWAP_IZ | TCG_BSWAP_OZ);
> +        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
>      } else {
>          TCGv_i32 t0 = tcg_temp_new_i32();
> +        TCGv_i32 t1 = tcg_temp_new_i32();
>
> -        tcg_gen_ext8u_i32(t0, arg);
> -        tcg_gen_shli_i32(t0, t0, 8);
> -        tcg_gen_shri_i32(ret, arg, 8);
> -        tcg_gen_or_i32(ret, ret, t0);
> +        tcg_gen_shri_i32(t0, arg, 8);
> +        if (!(flags & TCG_BSWAP_IZ)) {
> +            tcg_gen_ext8u_i32(t0, t0);
> +        }
> +
> +        if (flags & TCG_BSWAP_OS) {
> +            tcg_gen_shli_i32(t1, t1, 24);

t1 hasn't been initialized yet. Should this be "tcg_gen_shli_i32(t1, arg, 24)" ?

> +            tcg_gen_sari_i32(t1, t1, 16);
> +        } else if (flags & TCG_BSWAP_OZ) {
> +            tcg_gen_ext8u_i32(t1, arg);
> +            tcg_gen_shli_i32(t1, t1, 8);
> +        } else {
> +            tcg_gen_shli_i32(t1, arg, 8);
> +        }
> +
> +        tcg_gen_or_i32(ret, t0, t1);
>          tcg_temp_free_i32(t0);
> +        tcg_temp_free_i32(t1);
>      }

>      } else {
>          TCGv_i64 t0 = tcg_temp_new_i64();
> +        TCGv_i64 t1 = tcg_temp_new_i64();
>
> -        tcg_gen_ext8u_i64(t0, arg);
> -        tcg_gen_shli_i64(t0, t0, 8);
> -        tcg_gen_shri_i64(ret, arg, 8);
> -        tcg_gen_or_i64(ret, ret, t0);
> +        tcg_gen_shri_i64(t0, arg, 8);
> +        if (!(flags & TCG_BSWAP_IZ)) {
> +            tcg_gen_ext8u_i64(t0, t0);
> +        }
> +
> +        if (flags & TCG_BSWAP_OS) {
> +            tcg_gen_shli_i64(t1, t1, 56);

Similarly here.

> +            tcg_gen_sari_i64(t1, t1, 48);
> +        } else if (flags & TCG_BSWAP_OZ) {
> +            tcg_gen_ext8u_i64(t1, arg);
> +            tcg_gen_shli_i64(t1, t1, 8);
> +        } else {
> +            tcg_gen_shli_i64(t1, arg, 8);
> +        }
> +
> +        tcg_gen_or_i64(ret, t0, t1);
>          tcg_temp_free_i64(t0);
> +        tcg_temp_free_i64(t1);
>      }
>  }

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM