On 12/19/24 20:10, Richard Henderson wrote:
> The input which overlaps the sign bit of the output can
> have its input s_mask propagated to the output s_mask.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index de32cc0323..e4f68241ce 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -1646,8 +1646,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
> TempOptInfo *t2 = arg_info(op->args[2]);
> int ofs = op->args[3];
> int len = op->args[4];
> + int width;
> TCGOpcode and_opc;
> - uint64_t z_mask;
> + uint64_t z_mask, s_mask;
>
> if (t1->is_const && t2->is_const) {
> return tcg_opt_gen_movi(ctx, op, op->args[0],
> @@ -1657,9 +1658,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
> switch (ctx->type) {
> case TCG_TYPE_I32:
> and_opc = INDEX_op_and_i32;
> + width = 32;
> break;
> case TCG_TYPE_I64:
> and_opc = INDEX_op_and_i64;
> + width = 64;
> break;
> default:
> g_assert_not_reached();
> @@ -1684,8 +1687,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
> return fold_and(ctx, op);
> }
>
> + /* The s_mask from the top portion of the deposit is still valid. */
> + if (ofs + len == width) {
> + s_mask = t2->s_mask << ofs;
> + } else {
> + s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
> + }
> +
> z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
> - return fold_masks_z(ctx, op, z_mask);
> + return fold_masks_zs(ctx, op, z_mask, s_mask);
> }
>
> static bool fold_divide(OptContext *ctx, TCGOp *op)
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>