The expansions that we chose in tcg-op.c may be less than optimial.
Delay lowering until optimize, so that we have propagated constants
and have computed known zero/one masks.
Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----
tcg/tcg-op.c | 9 ++------
2 files changed, 60 insertions(+), 12 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 890c8068fb..e6a16921c9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1933,21 +1933,74 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
uint64_t z2 = t2->z_mask;
uint64_t o1 = t1->o_mask;
uint64_t o2 = t2->o_mask;
+ uint64_t zr, or;
int shr = op->args[3];
+ int shl;
if (ctx->type == TCG_TYPE_I32) {
z1 = (uint32_t)z1 >> shr;
o1 = (uint32_t)o1 >> shr;
- z2 = (uint64_t)((int32_t)z2 << (32 - shr));
- o2 = (uint64_t)((int32_t)o2 << (32 - shr));
+ shl = 32 - shr;
+ z2 = (uint64_t)((int32_t)z2 << shl);
+ o2 = (uint64_t)((int32_t)o2 << shl);
} else {
z1 >>= shr;
o1 >>= shr;
- z2 <<= 64 - shr;
- o2 <<= 64 - shr;
+ shl = 64 - shr;
+ z2 <<= shl;
+ o2 <<= shl;
+ }
+ zr = z1 | z2;
+ or = o1 | o2;
+
+ if (zr == or) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], zr);
}
- return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
+ if (z2 == 0) {
+ /* High part zeros folds to simple right shift. */
+ op->opc = INDEX_op_shr;
+ op->args[2] = arg_new_constant(ctx, shr);
+ } else if (z1 == 0) {
+ /* Low part zeros folds to simple left shift. */
+ op->opc = INDEX_op_shl;
+ op->args[1] = op->args[2];
+ op->args[2] = arg_new_constant(ctx, shl);
+ } else if (!tcg_op_supported(INDEX_op_extract2, ctx->type, 0)) {
+ TCGArg tmp = arg_new_temp(ctx);
+ TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
+
+ op2->args[0] = tmp;
+ op2->args[1] = op->args[1];
+ op2->args[2] = arg_new_constant(ctx, shr);
+
+ if (TCG_TARGET_deposit_valid(ctx->type, shl, shr)) {
+ /*
+ * Deposit has more arguments than extract2,
+ * so we need to create a new TCGOp.
+ */
+ op2 = opt_insert_before(ctx, op, INDEX_op_deposit, 5);
+ op2->args[0] = op->args[0];
+ op2->args[1] = tmp;
+ op2->args[2] = op->args[2];
+ op2->args[3] = shl;
+ op2->args[4] = shr;
+
+ tcg_op_remove(ctx->tcg, op);
+ op = op2;
+ } else {
+ op2 = opt_insert_before(ctx, op, INDEX_op_shl, 3);
+ op2->args[0] = op->args[0];
+ op2->args[1] = op->args[2];
+ op2->args[2] = arg_new_constant(ctx, shl);
+
+ op->opc = INDEX_op_or;
+ op->args[1] = op->args[0];
+ op->args[2] = tmp;
+ }
+ }
+
+ return fold_masks_zo(ctx, op, zr, or);
}
static bool fold_exts(OptContext *ctx, TCGOp *op)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 96f72ba381..8a4fd14ad5 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1000,13 +1000,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
tcg_gen_mov_i32(ret, ah);
} else if (al == ah) {
tcg_gen_rotri_i32(ret, al, ofs);
- } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) {
- tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs);
} else {
- TCGv_i32 t0 = tcg_temp_ebb_new_i32();
- tcg_gen_shri_i32(t0, al, ofs);
- tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
- tcg_temp_free_i32(t0);
+ tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs);
}
}
@@ -2221,7 +2216,7 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
tcg_gen_mov_i64(ret, ah);
} else if (al == ah) {
tcg_gen_rotri_i64(ret, al, ofs);
- } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) {
+ } else if (TCG_TARGET_REG_BITS == 64) {
tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs);
} else {
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
--
2.43.0
© 2016 - 2026 Red Hat, Inc.