tcg/loongarch64: Support v64 and v256

[PATCH 16/18] tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op

Posted by Richard Henderson 1 year, 5 months ago

Fixes a bug in the immediate shifts, because the exact
encoding depends on the element size.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 58 ++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 54f7bc9d14..5d2a6b2ca2 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1901,6 +1901,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     static const LoongArchInsn rotrv_vec_insn[4] = {
         OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
     };
+    static const LoongArchInsn rotri_vec_insn[4] = {
+        OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D
+    };
 
     a0 = args[0];
     a1 = args[1];
@@ -2034,15 +2037,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_sarv_vec:
         insn = sarv_vec_insn[vece];
         goto vdvjvk;
-    case INDEX_op_shli_vec:
-        tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
-        break;
-    case INDEX_op_shri_vec:
-        tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
-        break;
-    case INDEX_op_sari_vec:
-        tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
-        break;
     case INDEX_op_rotlv_vec:
         /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
         tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
@@ -2051,26 +2045,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_rotrv_vec:
         insn = rotrv_vec_insn[vece];
         goto vdvjvk;
+    case INDEX_op_shli_vec:
+        insn = shli_vec_insn[vece];
+        goto vdvjukN;
+    case INDEX_op_shri_vec:
+        insn = shri_vec_insn[vece];
+        goto vdvjukN;
+    case INDEX_op_sari_vec:
+        insn = sari_vec_insn[vece];
+        goto vdvjukN;
     case INDEX_op_rotli_vec:
         /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
         a2 = extract32(-a2, 0, 3 + vece);
-        switch (vece) {
-        case MO_8:
-            tcg_out_opc_vrotri_b(s, a0, a1, a2);
-            break;
-        case MO_16:
-            tcg_out_opc_vrotri_h(s, a0, a1, a2);
-            break;
-        case MO_32:
-            tcg_out_opc_vrotri_w(s, a0, a1, a2);
-            break;
-        case MO_64:
-            tcg_out_opc_vrotri_d(s, a0, a1, a2);
-            break;
-        default:
-            g_assert_not_reached();
-        }
-        break;
+        insn = rotri_vec_insn[vece];
+        goto vdvjukN;
     case INDEX_op_bitsel_vec:
         /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
         tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
@@ -2083,6 +2071,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     vdvjvk:
         tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
         break;
+    vdvjukN:
+        switch (vece) {
+        case MO_8:
+            tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
+            break;
+        case MO_16:
+            tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
+            break;
+        case MO_32:
+            tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
+            break;
+        case MO_64:
+            tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
     }
 }
 
-- 
2.34.1

Re: [PATCH 16/18] tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op

Posted by gaosong 1 year, 5 months ago

在 2024/5/28 上午5:19, Richard Henderson 写道:
> Fixes a bug in the immediate shifts, because the exact
> encoding depends on the element size.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/loongarch64/tcg-target.c.inc | 58 ++++++++++++++++++--------------
>   1 file changed, 32 insertions(+), 26 deletions(-)
Reviewed-by: Song Gao <gaosong@loongson.cn>

Thanks.
Song Gao
> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index 54f7bc9d14..5d2a6b2ca2 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -1901,6 +1901,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>       static const LoongArchInsn rotrv_vec_insn[4] = {
>           OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
>       };
> +    static const LoongArchInsn rotri_vec_insn[4] = {
> +        OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D
> +    };
>   
>       a0 = args[0];
>       a1 = args[1];
> @@ -2034,15 +2037,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>       case INDEX_op_sarv_vec:
>           insn = sarv_vec_insn[vece];
>           goto vdvjvk;
> -    case INDEX_op_shli_vec:
> -        tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
> -        break;
> -    case INDEX_op_shri_vec:
> -        tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
> -        break;
> -    case INDEX_op_sari_vec:
> -        tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
> -        break;
>       case INDEX_op_rotlv_vec:
>           /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
>           tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
> @@ -2051,26 +2045,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>       case INDEX_op_rotrv_vec:
>           insn = rotrv_vec_insn[vece];
>           goto vdvjvk;
> +    case INDEX_op_shli_vec:
> +        insn = shli_vec_insn[vece];
> +        goto vdvjukN;
> +    case INDEX_op_shri_vec:
> +        insn = shri_vec_insn[vece];
> +        goto vdvjukN;
> +    case INDEX_op_sari_vec:
> +        insn = sari_vec_insn[vece];
> +        goto vdvjukN;
>       case INDEX_op_rotli_vec:
>           /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
>           a2 = extract32(-a2, 0, 3 + vece);
> -        switch (vece) {
> -        case MO_8:
> -            tcg_out_opc_vrotri_b(s, a0, a1, a2);
> -            break;
> -        case MO_16:
> -            tcg_out_opc_vrotri_h(s, a0, a1, a2);
> -            break;
> -        case MO_32:
> -            tcg_out_opc_vrotri_w(s, a0, a1, a2);
> -            break;
> -        case MO_64:
> -            tcg_out_opc_vrotri_d(s, a0, a1, a2);
> -            break;
> -        default:
> -            g_assert_not_reached();
> -        }
> -        break;
> +        insn = rotri_vec_insn[vece];
> +        goto vdvjukN;
>       case INDEX_op_bitsel_vec:
>           /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
>           tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
> @@ -2083,6 +2071,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
>       vdvjvk:
>           tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
>           break;
> +    vdvjukN:
> +        switch (vece) {
> +        case MO_8:
> +            tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
> +            break;
> +        case MO_16:
> +            tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
> +            break;
> +        case MO_32:
> +            tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
> +            break;
> +        case MO_64:
> +            tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +        break;
>       }
>   }
>