[PATCH v2 13/51] tcg/optimize: Compute sign mask in fold_deposit

Richard Henderson posted 51 patches 3 days, 18 hours ago
There is a newer version of this series
[PATCH v2 13/51] tcg/optimize: Compute sign mask in fold_deposit
Posted by Richard Henderson 3 days, 18 hours ago
The input which overlaps the sign bit of the output can
have its input s_mask propagated to the output s_mask.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index de32cc0323..e4f68241ce 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1646,8 +1646,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
     TempOptInfo *t2 = arg_info(op->args[2]);
     int ofs = op->args[3];
     int len = op->args[4];
+    int width;
     TCGOpcode and_opc;
-    uint64_t z_mask;
+    uint64_t z_mask, s_mask;
 
     if (t1->is_const && t2->is_const) {
         return tcg_opt_gen_movi(ctx, op, op->args[0],
@@ -1657,9 +1658,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
     switch (ctx->type) {
     case TCG_TYPE_I32:
         and_opc = INDEX_op_and_i32;
+        width = 32;
         break;
     case TCG_TYPE_I64:
         and_opc = INDEX_op_and_i64;
+        width = 64;
         break;
     default:
         g_assert_not_reached();
@@ -1684,8 +1687,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
         return fold_and(ctx, op);
     }
 
+    /* The s_mask from the top portion of the deposit is still valid. */
+    if (ofs + len == width) {
+        s_mask = t2->s_mask << ofs;
+    } else {
+        s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
+    }
+
     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
-    return fold_masks_z(ctx, op, z_mask);
+    return fold_masks_zs(ctx, op, z_mask, s_mask);
 }
 
 static bool fold_divide(OptContext *ctx, TCGOp *op)
-- 
2.43.0
Re: [PATCH v2 13/51] tcg/optimize: Compute sign mask in fold_deposit
Posted by Pierrick Bouvier 3 days, 2 hours ago
On 12/19/24 20:10, Richard Henderson wrote:
> The input which overlaps the sign bit of the output can
> have its input s_mask propagated to the output s_mask.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/optimize.c | 14 ++++++++++++--
>   1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index de32cc0323..e4f68241ce 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1646,8 +1646,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
>       TempOptInfo *t2 = arg_info(op->args[2]);
>       int ofs = op->args[3];
>       int len = op->args[4];
> +    int width;
>       TCGOpcode and_opc;
> -    uint64_t z_mask;
> +    uint64_t z_mask, s_mask;
>   
>       if (t1->is_const && t2->is_const) {
>           return tcg_opt_gen_movi(ctx, op, op->args[0],
> @@ -1657,9 +1658,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
>       switch (ctx->type) {
>       case TCG_TYPE_I32:
>           and_opc = INDEX_op_and_i32;
> +        width = 32;
>           break;
>       case TCG_TYPE_I64:
>           and_opc = INDEX_op_and_i64;
> +        width = 64;
>           break;
>       default:
>           g_assert_not_reached();
> @@ -1684,8 +1687,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
>           return fold_and(ctx, op);
>       }
>   
> +    /* The s_mask from the top portion of the deposit is still valid. */
> +    if (ofs + len == width) {
> +        s_mask = t2->s_mask << ofs;
> +    } else {
> +        s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
> +    }
> +
>       z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
> -    return fold_masks_z(ctx, op, z_mask);
> +    return fold_masks_zs(ctx, op, z_mask, s_mask);
>   }
>   
>   static bool fold_divide(OptContext *ctx, TCGOp *op)

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>