A release build should simply accept and emit the subtract.
I'm not even sure if this is reasonable to keep for debug.
Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg.c | 49 ++++++++++++++++++++++++++-------------
tcg/i386/tcg-target.c.inc | 13 ++++++++---
2 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a507c111cf..408647af7e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s)
do_addsub2:
nb_iargs = 4;
nb_oargs = 2;
- /* Test if the high part of the operation is dead, but not
- the low part. The result can be optimized to a simple
- add or sub. This happens often for x86_64 guest when the
- cpu mode is set to 32 bit. */
- if (arg_temp(op->args[1])->state == TS_DEAD) {
- if (arg_temp(op->args[0])->state == TS_DEAD) {
- goto do_remove;
- }
- /* Replace the opcode and adjust the args in place,
- leaving 3 unused args at the end. */
- op->opc = opc = opc_new;
- op->args[1] = op->args[2];
- op->args[2] = op->args[4];
- /* Fall through and mark the single-word operation live. */
- nb_iargs = 2;
- nb_oargs = 1;
+ /*
+ * Test if the high part of the operation is dead, but the low
+ * part is still live. The result can be optimized to a simple
+ * add or sub.
+ */
+ if (arg_temp(op->args[1])->state != TS_DEAD) {
+ goto do_not_remove;
}
+ if (arg_temp(op->args[0])->state == TS_DEAD) {
+ goto do_remove;
+ }
+ /*
+ * Replace the opcode and adjust the args in place, leaving 3
+ * unused args at the end. Canonicalize subi to andi.
+ */
+ op->args[1] = op->args[2];
+ {
+ TCGTemp *src2 = arg_temp(op->args[4]);
+ if (src2->kind == TEMP_CONST) {
+ if (opc_new == INDEX_op_sub_i32) {
+ src2 = tcg_constant_internal(TCG_TYPE_I32,
+ (int32_t)-src2->val);
+ opc_new = INDEX_op_add_i32;
+ } else if (opc_new == INDEX_op_sub_i64) {
+ src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
+ opc_new = INDEX_op_add_i64;
+ }
+ }
+ op->args[2] = temp_arg(src2);
+ }
+ op->opc = opc = opc_new;
+ /* Mark the single-word operation live. */
+ nb_iargs = 2;
+ nb_oargs = 1;
goto do_not_remove;
case INDEX_op_mulu2_i32:
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 0d97864174..39d03fa698 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2544,8 +2544,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_ADD;
goto gen_arith;
OP_32_64(sub):
- c = ARITH_SUB;
- goto gen_arith;
+ /*
+ * Should have canonicalized all constants to add.
+ * Keep the constraint allowing any constant so that we catch this
+ * case without register allocation loading the constant first.
+ */
+ tcg_debug_assert(!const_a2);
+ tgen_arithr(s, ARITH_SUB + rexw, a0, a2);
+ break;
OP_32_64(and):
c = ARITH_AND;
goto gen_arith;
@@ -3325,7 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I2(r, r, re);
case INDEX_op_sub_i32:
- case INDEX_op_sub_i64:
case INDEX_op_mul_i32:
case INDEX_op_mul_i64:
case INDEX_op_or_i32:
@@ -3333,6 +3338,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_xor_i32:
case INDEX_op_xor_i64:
return C_O1_I2(r, 0, re);
+ case INDEX_op_sub_i64:
+ return C_O1_I2(r, 0, ri);
case INDEX_op_and_i32:
case INDEX_op_and_i64:
--
2.34.1
On 26/10/23 03:39, Richard Henderson wrote:
> A release build should simply accept and emit the subtract.
> I'm not even sure if this is reasonable to keep for debug.
>
> Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tcg.c | 49 ++++++++++++++++++++++++++-------------
> tcg/i386/tcg-target.c.inc | 13 ++++++++---
> 2 files changed, 43 insertions(+), 19 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a507c111cf..408647af7e 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s)
> do_addsub2:
> nb_iargs = 4;
> nb_oargs = 2;
> - /* Test if the high part of the operation is dead, but not
> - the low part. The result can be optimized to a simple
> - add or sub. This happens often for x86_64 guest when the
> - cpu mode is set to 32 bit. */
> - if (arg_temp(op->args[1])->state == TS_DEAD) {
> - if (arg_temp(op->args[0])->state == TS_DEAD) {
> - goto do_remove;
> - }
> - /* Replace the opcode and adjust the args in place,
> - leaving 3 unused args at the end. */
> - op->opc = opc = opc_new;
> - op->args[1] = op->args[2];
> - op->args[2] = op->args[4];
> - /* Fall through and mark the single-word operation live. */
> - nb_iargs = 2;
> - nb_oargs = 1;
> + /*
> + * Test if the high part of the operation is dead, but the low
> + * part is still live. The result can be optimized to a simple
> + * add or sub.
> + */
> + if (arg_temp(op->args[1])->state != TS_DEAD) {
> + goto do_not_remove;
> }
> + if (arg_temp(op->args[0])->state == TS_DEAD) {
> + goto do_remove;
> + }
> + /*
> + * Replace the opcode and adjust the args in place, leaving 3
> + * unused args at the end. Canonicalize subi to andi.
Typo s/andi/addi/.
> + */
> + op->args[1] = op->args[2];
> + {
> + TCGTemp *src2 = arg_temp(op->args[4]);
> + if (src2->kind == TEMP_CONST) {
> + if (opc_new == INDEX_op_sub_i32) {
> + src2 = tcg_constant_internal(TCG_TYPE_I32,
> + (int32_t)-src2->val);
> + opc_new = INDEX_op_add_i32;
> + } else if (opc_new == INDEX_op_sub_i64) {
> + src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
> + opc_new = INDEX_op_add_i64;
> + }
> + }
> + op->args[2] = temp_arg(src2);
> + }
> + op->opc = opc = opc_new;
> + /* Mark the single-word operation live. */
> + nb_iargs = 2;
> + nb_oargs = 1;
> goto do_not_remove;
For tcg/tcg.c with your S-o-b and a reworded description:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 10/25/23 18:39, Richard Henderson wrote:
> A release build should simply accept and emit the subtract.
> I'm not even sure if this is reasonable to keep for debug.
>
> Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tcg.c | 49 ++++++++++++++++++++++++++-------------
> tcg/i386/tcg-target.c.inc | 13 ++++++++---
> 2 files changed, 43 insertions(+), 19 deletions(-)
Oops, accidental merge of two patches.
The tcg.c change *is* required.
r~
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a507c111cf..408647af7e 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s)
> do_addsub2:
> nb_iargs = 4;
> nb_oargs = 2;
> - /* Test if the high part of the operation is dead, but not
> - the low part. The result can be optimized to a simple
> - add or sub. This happens often for x86_64 guest when the
> - cpu mode is set to 32 bit. */
> - if (arg_temp(op->args[1])->state == TS_DEAD) {
> - if (arg_temp(op->args[0])->state == TS_DEAD) {
> - goto do_remove;
> - }
> - /* Replace the opcode and adjust the args in place,
> - leaving 3 unused args at the end. */
> - op->opc = opc = opc_new;
> - op->args[1] = op->args[2];
> - op->args[2] = op->args[4];
> - /* Fall through and mark the single-word operation live. */
> - nb_iargs = 2;
> - nb_oargs = 1;
> + /*
> + * Test if the high part of the operation is dead, but the low
> + * part is still live. The result can be optimized to a simple
> + * add or sub.
> + */
> + if (arg_temp(op->args[1])->state != TS_DEAD) {
> + goto do_not_remove;
> }
> + if (arg_temp(op->args[0])->state == TS_DEAD) {
> + goto do_remove;
> + }
> + /*
> + * Replace the opcode and adjust the args in place, leaving 3
> + * unused args at the end. Canonicalize subi to andi.
> + */
> + op->args[1] = op->args[2];
> + {
> + TCGTemp *src2 = arg_temp(op->args[4]);
> + if (src2->kind == TEMP_CONST) {
> + if (opc_new == INDEX_op_sub_i32) {
> + src2 = tcg_constant_internal(TCG_TYPE_I32,
> + (int32_t)-src2->val);
> + opc_new = INDEX_op_add_i32;
> + } else if (opc_new == INDEX_op_sub_i64) {
> + src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
> + opc_new = INDEX_op_add_i64;
> + }
> + }
> + op->args[2] = temp_arg(src2);
> + }
> + op->opc = opc = opc_new;
> + /* Mark the single-word operation live. */
> + nb_iargs = 2;
> + nb_oargs = 1;
> goto do_not_remove;
>
> case INDEX_op_mulu2_i32:
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index 0d97864174..39d03fa698 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -2544,8 +2544,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> c = ARITH_ADD;
> goto gen_arith;
> OP_32_64(sub):
> - c = ARITH_SUB;
> - goto gen_arith;
> + /*
> + * Should have canonicalized all constants to add.
> + * Keep the constraint allowing any constant so that we catch this
> + * case without register allocation loading the constant first.
> + */
> + tcg_debug_assert(!const_a2);
> + tgen_arithr(s, ARITH_SUB + rexw, a0, a2);
> + break;
> OP_32_64(and):
> c = ARITH_AND;
> goto gen_arith;
> @@ -3325,7 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
> return C_O1_I2(r, r, re);
>
> case INDEX_op_sub_i32:
> - case INDEX_op_sub_i64:
> case INDEX_op_mul_i32:
> case INDEX_op_mul_i64:
> case INDEX_op_or_i32:
> @@ -3333,6 +3338,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
> case INDEX_op_xor_i32:
> case INDEX_op_xor_i64:
> return C_O1_I2(r, 0, re);
> + case INDEX_op_sub_i64:
> + return C_O1_I2(r, 0, ri);
>
> case INDEX_op_and_i32:
> case INDEX_op_and_i64:
© 2016 - 2026 Red Hat, Inc.