Better constraint for tcg_out_cmp, based on the comparison.
We can't yet remove the fallback to load constants into a
scratch because of tcg_out_cmp2, but that path should not
be as frequent.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target-con-set.h | 5 ++--
tcg/ppc/tcg-target-con-str.h | 1 +
tcg/ppc/tcg-target.c.inc | 48 ++++++++++++++++++++++++++++++------
3 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
index bbd7b21247..aa1dac8740 100644
--- a/tcg/ppc/tcg-target-con-set.h
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -11,7 +11,7 @@
*/
C_O0_I1(r)
C_O0_I2(r, r)
-C_O0_I2(r, ri)
+C_O0_I2(r, rC)
C_O0_I2(v, r)
C_O0_I3(r, r, r)
C_O0_I3(o, m, r)
@@ -26,13 +26,14 @@ C_O1_I2(r, rI, ri)
C_O1_I2(r, rI, rT)
C_O1_I2(r, r, r)
C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rC)
C_O1_I2(r, r, rI)
C_O1_I2(r, r, rT)
C_O1_I2(r, r, rU)
C_O1_I2(r, r, rZW)
C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
-C_O1_I4(r, r, ri, rZ, rZ)
+C_O1_I4(r, r, rC, rZ, rZ)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(r, r, r)
C_O2_I1(o, m, r)
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
index 20846901de..16b687216e 100644
--- a/tcg/ppc/tcg-target-con-str.h
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -16,6 +16,7 @@ REGS('v', ALL_VECTOR_REGS)
* Define constraint letters for constants:
* CONST(letter, TCG_CT_CONST_* bit set)
*/
+CONST('C', TCG_CT_CONST_CMP)
CONST('I', TCG_CT_CONST_S16)
CONST('M', TCG_CT_CONST_MONE)
CONST('T', TCG_CT_CONST_S32)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 13d43ef9ba..b20dbe036d 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -92,11 +92,13 @@
#define SZR (TCG_TARGET_REG_BITS / 8)
#define TCG_CT_CONST_S16 0x100
+#define TCG_CT_CONST_U16 0x200
#define TCG_CT_CONST_S32 0x400
#define TCG_CT_CONST_U32 0x800
#define TCG_CT_CONST_ZERO 0x1000
#define TCG_CT_CONST_MONE 0x2000
#define TCG_CT_CONST_WSZ 0x4000
+#define TCG_CT_CONST_CMP 0x8000
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
@@ -296,9 +298,35 @@ static bool tcg_target_const_match(int64_t sval, int ct,
sval = (int32_t)sval;
}
+ if (ct & TCG_CT_CONST_CMP) {
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ ct |= TCG_CT_CONST_S16;
+ break;
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ ct |= TCG_CT_CONST_U16;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
return 1;
}
+ if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
+ return 1;
+ }
if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
return 1;
}
@@ -1682,7 +1710,10 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
- /* Simplify the comparisons below wrt CMPI. */
+ /*
+ * Simplify the comparisons below wrt CMPI.
+ * All of the tests are 16-bit, so a 32-bit sign extend always works.
+ */
if (type == TCG_TYPE_I32) {
arg2 = (int32_t)arg2;
}
@@ -3990,8 +4021,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sar_i32:
case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32:
- case INDEX_op_setcond_i32:
- case INDEX_op_negsetcond_i32:
case INDEX_op_and_i64:
case INDEX_op_andc_i64:
case INDEX_op_shl_i64:
@@ -3999,8 +4028,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sar_i64:
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64:
- case INDEX_op_setcond_i64:
- case INDEX_op_negsetcond_i64:
return C_O1_I2(r, r, ri);
case INDEX_op_mul_i32:
@@ -4044,11 +4071,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
- return C_O0_I2(r, ri);
-
+ return C_O0_I2(r, rC);
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_negsetcond_i32:
+ case INDEX_op_negsetcond_i64:
+ return C_O1_I2(r, r, rC);
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
- return C_O1_I4(r, r, ri, rZ, rZ);
+ return C_O1_I4(r, r, rC, rZ, rZ);
+
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:
return C_O1_I2(r, 0, rZ);
--
2.34.1