[RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI

Paolo Bonzini posted 1 patch 1 year, 1 month ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20231025185956.72677-1-pbonzini@redhat.com
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>
include/tcg/tcg.h     |  4 ++++
tcg/i386/tcg-target.h |  2 ++
tcg/tcg-op.c          | 20 ++++++++++++++++----
3 files changed, 22 insertions(+), 4 deletions(-)
[RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI
Posted by Paolo Bonzini 1 year, 1 month ago
On x86, this is more efficient because it enables generation of
more LEA instructions.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/tcg/tcg.h     |  4 ++++
 tcg/i386/tcg-target.h |  2 ++
 tcg/tcg-op.c          | 20 ++++++++++++++++----
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index a9282cdcc60..48e5aeef173 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -109,6 +109,10 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_sub2_i32         1
 #endif
 
+#ifndef TCG_TARGET_PREFERS_addi
+#define TCG_TARGET_PREFERS_addi         0
+#endif
+
 #ifndef TCG_TARGET_deposit_i32_valid
 #define TCG_TARGET_deposit_i32_valid(ofs, len) 1
 #endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 8417ea4899e..452c6eba2d6 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -227,6 +227,8 @@ typedef enum {
 #define TCG_TARGET_HAS_bitsel_vec       have_avx512vl
 #define TCG_TARGET_HAS_cmpsel_vec       -1
 
+#define TCG_TARGET_PREFERS_addi         1
+
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
      (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 828eb9ee460..48c667032de 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -151,6 +151,8 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     /* some cases can be optimized here */
     if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
+    } else if (TCG_TARGET_PREFERS_addi) {
+        tcg_gen_add_i32(ret, arg1, tcg_constant_i32(-arg2));
     } else {
         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
     }
@@ -1369,11 +1371,21 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
     if (arg2 == 0) {
         tcg_gen_mov_i64(ret, arg1);
     } else if (TCG_TARGET_REG_BITS == 64) {
-        tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+        if (TCG_TARGET_PREFERS_addi) {
+            tcg_gen_add_i64(ret, arg1, tcg_constant_i64(-arg2));
+        } else {
+            tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+        }
     } else {
-        tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
-                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                         tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+        if (TCG_TARGET_PREFERS_addi) {
+            tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+                             TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                             tcg_constant_i32(-arg2), tcg_constant_i32(-arg2 >> 32));
+        } else {
+            tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+                             TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                             tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+        }
     }
 }
 
-- 
2.41.0
Re: [RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI
Posted by Richard Henderson 1 year, 1 month ago
On 10/25/23 11:59, Paolo Bonzini wrote:
> On x86, this is more efficient because it enables generation of
> more LEA instructions.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   include/tcg/tcg.h     |  4 ++++
>   tcg/i386/tcg-target.h |  2 ++
>   tcg/tcg-op.c          | 20 ++++++++++++++++----
>   3 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
> index a9282cdcc60..48e5aeef173 100644
> --- a/include/tcg/tcg.h
> +++ b/include/tcg/tcg.h
> @@ -109,6 +109,10 @@ typedef uint64_t TCGRegSet;
>   #define TCG_TARGET_HAS_sub2_i32         1
>   #endif
>   
> +#ifndef TCG_TARGET_PREFERS_addi
> +#define TCG_TARGET_PREFERS_addi         0
> +#endif

I would rather do this unconditionally.

Many of the tcg backends do this manually, e.g. s390x:

     case INDEX_op_sub_i64:
         a0 = args[0], a1 = args[1], a2 = args[2];
         if (const_args[2]) {
             a2 = -a2;
             goto do_addi_64;
         } else {
             tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
         }
         break;

While we could do something similar for i386, it would be better to not require such hoops 
in each backend.

We would also want to perform this transformation in optimize.c.


r~