[PATCH v5 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops

WANG Xuerui posted 30 patches 1 year ago
There is a newer version of this series
Maintainers: Laurent Vivier <laurent@vivier.eu>, Riku Voipio <riku.voipio@iki.fi>, Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, WANG Xuerui <git@xen0n.name>
[PATCH v5 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops
Posted by WANG Xuerui 1 year ago
Signed-off-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc     | 91 ++++++++++++++++++++++++++++
 tcg/loongarch64/tcg-target.h         |  4 +-
 3 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index 2975e03127..42f8e28741 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,6 +17,7 @@
 C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
+C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
 C_O1_I2(r, 0, rZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 1ab690bab6..32676e83af 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -580,6 +580,85 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
         break;
 
+    case INDEX_op_shl_i32:
+        if (c2) {
+            tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_sll_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shl_i64:
+        if (c2) {
+            tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_sll_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_shr_i32:
+        if (c2) {
+            tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_srl_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_shr_i64:
+        if (c2) {
+            tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_srl_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_sar_i32:
+        if (c2) {
+            tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_sra_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_sar_i64:
+        if (c2) {
+            tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_sra_d(s, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_rotl_i32:
+        /* transform into equivalent rotr/rotri */
+        if (c2) {
+            tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f);
+        } else {
+            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+            tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0);
+        }
+        break;
+    case INDEX_op_rotl_i64:
+        /* transform into equivalent rotr/rotri */
+        if (c2) {
+            tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f);
+        } else {
+            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+            tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0);
+        }
+        break;
+
+    case INDEX_op_rotr_i32:
+        if (c2) {
+            tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f);
+        } else {
+            tcg_out_opc_rotr_w(s, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_rotr_i64:
+        if (c2) {
+            tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f);
+        } else {
+            tcg_out_opc_rotr_d(s, a0, a1, a2);
+        }
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     default:
@@ -629,6 +708,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          */
         return C_O1_I2(r, r, rC);
 
+    case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i32:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i32:
+    case INDEX_op_sar_i64:
+    case INDEX_op_rotl_i32:
+    case INDEX_op_rotl_i64:
+    case INDEX_op_rotr_i32:
+    case INDEX_op_rotr_i64:
+        return C_O1_I2(r, r, ri);
+
     case INDEX_op_and_i32:
     case INDEX_op_and_i64:
     case INDEX_op_nor_i32:
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index ef489cbc86..e59c2a7bec 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -96,7 +96,7 @@ typedef enum {
 #define TCG_TARGET_HAS_div_i32          0
 #define TCG_TARGET_HAS_rem_i32          0
 #define TCG_TARGET_HAS_div2_i32         0
-#define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     0
@@ -133,7 +133,7 @@ typedef enum {
 #define TCG_TARGET_HAS_div_i64          0
 #define TCG_TARGET_HAS_rem_i64          0
 #define TCG_TARGET_HAS_div2_i64         0
-#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
-- 
2.33.0


Re: [PATCH v5 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops
Posted by Philippe Mathieu-Daudé 1 year ago
On 9/24/21 19:25, WANG Xuerui wrote:
> Signed-off-by: WANG Xuerui <git@xen0n.name>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/loongarch64/tcg-target-con-set.h |  1 +
>   tcg/loongarch64/tcg-target.c.inc     | 91 ++++++++++++++++++++++++++++
>   tcg/loongarch64/tcg-target.h         |  4 +-
>   3 files changed, 94 insertions(+), 2 deletions(-)

> diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
> index 1ab690bab6..32676e83af 100644
> --- a/tcg/loongarch64/tcg-target.c.inc
> +++ b/tcg/loongarch64/tcg-target.c.inc
> @@ -580,6 +580,85 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>           tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
>           break;
>   
> +    case INDEX_op_shl_i32:
> +        if (c2) {

Why can't we use:

                tcg_debug_assert(a2 <= 0x1f);
                tcg_out_opc_slli_w(s, a0, a1, a2);

?

> +            tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
> +        } else {
> +            tcg_out_opc_sll_w(s, a0, a1, a2);
> +        }
> +        break;

Re: [PATCH v5 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops
Posted by Richard Henderson 1 year ago
On 9/25/21 6:05 AM, Philippe Mathieu-Daudé wrote:
>> +    case INDEX_op_shl_i32:
>> +        if (c2) {
> 
> Why can't we use:
> 
>                 tcg_debug_assert(a2 <= 0x1f);
>                 tcg_out_opc_slli_w(s, a0, a1, a2);
> 
> ?

Because tcg/optimize.c can produce out-of-range values.
We have this same masking in tcg/sparc/ starting as far back as 1fd95946657.

Officially, the tcg backend generator must accept this, with UNSPECIFIED behaviour. 
Generally, such out-of-range shifts will be followed by a conditional move that overwrites 
the undefined result.  The tcg backend is not allowed to trap or assert.


r~

Re: [PATCH v5 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops
Posted by Philippe Mathieu-Daudé 1 year ago
On 9/25/21 16:09, Richard Henderson wrote:
> On 9/25/21 6:05 AM, Philippe Mathieu-Daudé wrote:
>>> +    case INDEX_op_shl_i32:
>>> +        if (c2) {
>>
>> Why can't we use:
>>
>>                 tcg_debug_assert(a2 <= 0x1f);
>>                 tcg_out_opc_slli_w(s, a0, a1, a2);
>>
>> ?
> 
> Because tcg/optimize.c can produce out-of-range values.
> We have this same masking in tcg/sparc/ starting as far back as 
> 1fd95946657.
> 
> Officially, the tcg backend generator must accept this, with UNSPECIFIED 
> behaviour. Generally, such out-of-range shifts will be followed by a 
> conditional move that overwrites the undefined result.  The tcg backend 
> is not allowed to trap or assert.

Ah now I understand, TIL again :) Thanks.

So for this patch:
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>